parallelizing extraction, fix tcx files
This commit is contained in:
@@ -8,18 +8,24 @@ from lxml import etree
|
||||
from bincio.extract.models import DataPoint, ParsedActivity
|
||||
from bincio.extract.sport import normalise_sport
|
||||
|
||||
_NS = {
|
||||
_NS_HTTP = {
|
||||
"tcx": "http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2",
|
||||
"ext": "http://www.garmin.com/xmlschemas/ActivityExtension/v2",
|
||||
}
|
||||
_NS_HTTPS = {
|
||||
"tcx": "https://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2",
|
||||
"ext": "https://www.garmin.com/xmlschemas/ActivityExtension/v2",
|
||||
}
|
||||
|
||||
|
||||
class TcxParser:
|
||||
def parse(self, path: Path, raw_bytes: bytes) -> ParsedActivity:
|
||||
# Some exporters (e.g. Garmin) prepend whitespace before the XML
|
||||
# declaration, which is technically invalid. Strip it.
|
||||
# Some exporters prepend whitespace before the XML declaration. Strip it.
|
||||
root = etree.fromstring(raw_bytes.lstrip())
|
||||
|
||||
# Garmin sometimes uses https:// instead of http:// in the namespace URI.
|
||||
_NS = _NS_HTTPS if b"https://www.garmin.com" in raw_bytes else _NS_HTTP
|
||||
|
||||
activities = root.findall(".//tcx:Activity", _NS)
|
||||
if not activities:
|
||||
raise ValueError(f"No Activity elements found in {path.name}")
|
||||
|
||||
Reference in New Issue
Block a user