Files
2026-03-28 13:59:36 +01:00

47 lines
1.5 KiB
Python

"""Parser factory — selects the right parser based on file extension."""
from pathlib import Path
from bincio.extract.models import ParsedActivity
from bincio.extract.parsers.base import BaseParser
from bincio.extract.parsers.fit import FitParser
from bincio.extract.parsers.gpx import GpxParser
from bincio.extract.parsers.tcx import TcxParser
# Supported extensions (including .gz variants)
SUPPORTED = {".fit", ".gpx", ".tcx", ".fit.gz", ".gpx.gz", ".tcx.gz"}
_PARSERS: dict[str, type[BaseParser]] = {
".fit": FitParser,
".gpx": GpxParser,
".tcx": TcxParser,
}
def _base_ext(path: Path) -> str:
"""Return the meaningful extension, stripping .gz if present."""
if path.suffix == ".gz":
return Path(path.stem).suffix # e.g. ".fit" from "ride.fit.gz"
return path.suffix
def is_supported(path: Path) -> bool:
suffix = "".join(path.suffixes[-2:]) if path.suffix == ".gz" else path.suffix
return suffix in SUPPORTED
def parse_file(path: Path) -> ParsedActivity:
"""Parse an activity file, handling .gz transparently."""
ext = _base_ext(path)
parser_cls = _PARSERS.get(ext)
if parser_cls is None:
raise ValueError(f"Unsupported file type: {path.name!r}")
raw_bytes, content_bytes = BaseParser._read_file(path)
parser = parser_cls()
activity = parser.parse(path, content_bytes)
# Attach hash of the *original* bytes (compressed if .gz) for dedup
activity.source_hash = BaseParser._sha256(raw_bytes)
activity.source_file = path.name
return activity