Files
bincio-activity/bincio/extract/parsers/tcx.py
T
Davide Scaini 872651f471 metrics: replace naive elevation accumulation with hysteresis dead-band
GPS jitter and barometric quantization noise caused systematic overestimation
of elevation gain — in extreme cases 100% of reported gain was sub-1m noise.

Implements source-aware hysteresis: elevation is only committed when it
deviates from the last committed value by ≥5m (barometric) or ≥10m (GPS/GPX/TCX).

- ParsedActivity gains `altitude_source` field ("barometric"/"gps"/"unknown")
- FIT parser sets "barometric" when enhanced_altitude is present, else "gps"
- GPX and TCX parsers always set "gps"
- metrics._elevation() uses the threshold matching the source
- 5 new parametric tests covering flat GPS noise, threshold differences, and real climbs
2026-04-20 20:29:20 +02:00

112 lines
4.4 KiB
Python

"""TCX (Training Center XML) file parser."""
from datetime import datetime, timezone
from pathlib import Path
from lxml import etree
from bincio.extract.models import DataPoint, ParsedActivity
from bincio.extract.sport import normalise_sport, normalise_sub_sport
_NS_HTTP = {
"tcx": "http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2",
"ext": "http://www.garmin.com/xmlschemas/ActivityExtension/v2",
}
_NS_HTTPS = {
"tcx": "https://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2",
"ext": "https://www.garmin.com/xmlschemas/ActivityExtension/v2",
}
class TcxParser:
def parse(self, path: Path, raw_bytes: bytes) -> ParsedActivity:
# Some exporters prepend whitespace before the XML declaration. Strip it.
root = etree.fromstring(raw_bytes.lstrip())
# Garmin sometimes uses https:// instead of http:// in the namespace URI.
_NS = _NS_HTTPS if b"https://www.garmin.com" in raw_bytes else _NS_HTTP
activities = root.findall(".//tcx:Activity", _NS)
if not activities:
raise ValueError(f"No Activity elements found in {path.name}")
# Use the first activity
act = activities[0]
sport_attr = act.get("Sport", "Biking")
sport = normalise_sport(sport_attr)
sub_sport = normalise_sub_sport(sport_attr)
points: list[DataPoint] = []
for tp in act.findall(".//tcx:Trackpoint", _NS):
ts_el = tp.find("tcx:Time", _NS)
if ts_el is None or not ts_el.text:
continue
ts = _parse_ts(ts_el.text)
lat, lon = None, None
pos = tp.find("tcx:Position", _NS)
if pos is not None:
lat_el = pos.find("tcx:LatitudeDegrees", _NS)
lon_el = pos.find("tcx:LongitudeDegrees", _NS)
lat = float(lat_el.text) if lat_el is not None and lat_el.text else None
lon = float(lon_el.text) if lon_el is not None and lon_el.text else None
ele_el = tp.find("tcx:AltitudeMeters", _NS)
hr_el = tp.find(".//tcx:HeartRateBpm/tcx:Value", _NS)
cad_el = tp.find("tcx:Cadence", _NS)
dist_el = tp.find("tcx:DistanceMeters", _NS)
# Extensions (speed, watts)
speed_el = tp.find(".//ext:Speed", _NS)
power_el = tp.find(".//ext:Watts", _NS)
dp = DataPoint(
timestamp=ts,
lat=lat,
lon=lon,
elevation_m=float(ele_el.text) if ele_el is not None and ele_el.text else None,
hr_bpm=int(float(hr_el.text)) if hr_el is not None and hr_el.text else None,
cadence_rpm=int(float(cad_el.text)) if cad_el is not None and cad_el.text else None,
distance_m=float(dist_el.text) if dist_el is not None and dist_el.text else None,
speed_kmh=float(speed_el.text) * 3.6 if speed_el is not None and speed_el.text else None,
power_w=int(float(power_el.text)) if power_el is not None and power_el.text else None,
)
points.append(dp)
if not points:
raise ValueError(f"No trackpoints found in {path.name}")
return ParsedActivity(
points=points,
sport=sport,
sub_sport=sub_sport,
started_at=points[0].timestamp,
source_file=path.name,
source_hash="",
altitude_source="gps",
)
def _parse_ts(s: str) -> datetime:
# ISO 8601 with or without fractional seconds, with Z or numeric offset (+02:00)
import re as _re
# Strip trailing Z → assume UTC
if s.endswith("Z"):
s = s[:-1]
for fmt in ("%Y-%m-%dT%H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S"):
try:
return datetime.strptime(s, fmt).replace(tzinfo=timezone.utc)
except ValueError:
continue
# Numeric offset like +02:00, -05:30, or +0200 — parse with %z then convert to UTC
m = _re.match(r"^(.+)([+-]\d{2}:?\d{2})$", s)
if m:
body, off = m.group(1), m.group(2).replace(":", "")
for fmt in ("%Y-%m-%dT%H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S"):
try:
dt = datetime.strptime(body + off, fmt + "%z")
return dt.astimezone(timezone.utc).replace(tzinfo=timezone.utc)
except ValueError:
continue
raise ValueError(f"Cannot parse timestamp: {s!r}")