47 lines
1.5 KiB
Python
47 lines
1.5 KiB
Python
"""Parser factory — selects the right parser based on file extension."""
|
|
|
|
from pathlib import Path
|
|
|
|
from bincio.extract.models import ParsedActivity
|
|
from bincio.extract.parsers.base import BaseParser
|
|
from bincio.extract.parsers.fit import FitParser
|
|
from bincio.extract.parsers.gpx import GpxParser
|
|
from bincio.extract.parsers.tcx import TcxParser
|
|
|
|
# Supported extensions (including .gz variants)
|
|
SUPPORTED = {".fit", ".gpx", ".tcx", ".fit.gz", ".gpx.gz", ".tcx.gz"}
|
|
|
|
_PARSERS: dict[str, type[BaseParser]] = {
|
|
".fit": FitParser,
|
|
".gpx": GpxParser,
|
|
".tcx": TcxParser,
|
|
}
|
|
|
|
|
|
def _base_ext(path: Path) -> str:
|
|
"""Return the meaningful extension, stripping .gz if present."""
|
|
if path.suffix == ".gz":
|
|
return Path(path.stem).suffix # e.g. ".fit" from "ride.fit.gz"
|
|
return path.suffix
|
|
|
|
|
|
def is_supported(path: Path) -> bool:
|
|
suffix = "".join(path.suffixes[-2:]) if path.suffix == ".gz" else path.suffix
|
|
return suffix in SUPPORTED
|
|
|
|
|
|
def parse_file(path: Path) -> ParsedActivity:
|
|
"""Parse an activity file, handling .gz transparently."""
|
|
ext = _base_ext(path)
|
|
parser_cls = _PARSERS.get(ext)
|
|
if parser_cls is None:
|
|
raise ValueError(f"Unsupported file type: {path.name!r}")
|
|
|
|
raw_bytes, content_bytes = BaseParser._read_file(path)
|
|
parser = parser_cls()
|
|
activity = parser.parse(path, content_bytes)
|
|
# Attach hash of the *original* bytes (compressed if .gz) for dedup
|
|
activity.source_hash = BaseParser._sha256(raw_bytes)
|
|
activity.source_file = path.name
|
|
return activity
|