diff --git a/bincio/extract/models.py b/bincio/extract/models.py index 8ea3004..ebe3dcc 100644 --- a/bincio/extract/models.py +++ b/bincio/extract/models.py @@ -5,9 +5,27 @@ It gets fed into metrics computation and the BAS JSON writer. """ from dataclasses import dataclass, field -from datetime import datetime +from datetime import datetime, timezone from typing import Optional +# Any timestamp before this is almost certainly an uninitialised sensor value +# (epoch 0, FIT "no-data" sentinel, RTC not yet synced, etc.). +_MIN_TIMESTAMP = datetime(2000, 1, 1, tzinfo=timezone.utc) + + +def strip_bogus_leading_points(points: list["DataPoint"]) -> list["DataPoint"]: + """Drop leading points whose timestamp predates the year 2000. + + FIT files occasionally emit a record with timestamp=0 (or another + pre-2000 value) as an uninitialised sentinel before the real data + begins. Keeping such a point as points[0] produces a 1970 start + time and an absurdly large duration_s. + """ + i = 0 + while i < len(points) and points[i].timestamp < _MIN_TIMESTAMP: + i += 1 + return points[i:] + @dataclass class DataPoint: diff --git a/bincio/extract/parsers/fit.py b/bincio/extract/parsers/fit.py index c723d4a..5353107 100644 --- a/bincio/extract/parsers/fit.py +++ b/bincio/extract/parsers/fit.py @@ -6,7 +6,7 @@ from typing import Any import fitdecode -from bincio.extract.models import DataPoint, LapData, ParsedActivity +from bincio.extract.models import DataPoint, LapData, ParsedActivity, strip_bogus_leading_points from bincio.extract.sport import normalise_sport @@ -101,6 +101,7 @@ class FitParser: ) ) + points = strip_bogus_leading_points(points) if not points: raise ValueError(f"No record messages found in {path.name}") diff --git a/bincio/extract/parsers/gpx.py b/bincio/extract/parsers/gpx.py index e17449c..bf2b3ac 100644 --- a/bincio/extract/parsers/gpx.py +++ b/bincio/extract/parsers/gpx.py @@ -6,7 +6,7 @@ from pathlib import Path import gpxpy import gpxpy.gpx -from bincio.extract.models import DataPoint, ParsedActivity +from bincio.extract.models import DataPoint, ParsedActivity, strip_bogus_leading_points from bincio.extract.parsers.base import BaseParser from bincio.extract.sport import normalise_sport, normalise_sub_sport @@ -38,6 +38,7 @@ class GpxParser(BaseParser): _apply_extensions(pt, dp) points.append(dp) + points = strip_bogus_leading_points(points) if not points: raise ValueError(f"No trackpoints found in {path.name}") diff --git a/bincio/extract/parsers/tcx.py b/bincio/extract/parsers/tcx.py index 1c60f2b..254b42c 100644 --- a/bincio/extract/parsers/tcx.py +++ b/bincio/extract/parsers/tcx.py @@ -5,7 +5,7 @@ from pathlib import Path from lxml import etree -from bincio.extract.models import DataPoint, ParsedActivity +from bincio.extract.models import DataPoint, ParsedActivity, strip_bogus_leading_points from bincio.extract.sport import normalise_sport, normalise_sub_sport _NS_HTTP = { @@ -73,6 +73,7 @@ class TcxParser: ) points.append(dp) + points = strip_bogus_leading_points(points) if not points: raise ValueError(f"No trackpoints found in {path.name}")