perf: spatial 10 m downsampling for timeseries

Extract _haversine_m from the inline block in _gps_speed_kmh, add
_spatial_downsample (keep one sample per 10 m traveled, GPS haversine
primary / speed×Δt fallback, indoor activities unchanged), and wire it
into build_timeseries() after the 1 s dedup loop.

Add --downsample-timeseries migration flag to bincio render that applies
the same downsampling to existing stored timeseries files without
re-extracting from original FIT/GPX files.
This commit is contained in:
Davide Scaini
2026-05-19 20:11:00 +02:00
parent 835968e8fe
commit 84eff1f3b0
2 changed files with 163 additions and 4 deletions
+65 -4
View File
@@ -8,6 +8,68 @@ from typing import Optional
from bincio.extract.models import DataPoint
def _haversine_m(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
"""Great-circle distance in metres between two GPS points."""
dlat = radians(lat2 - lat1)
dlon = radians(lon2 - lon1)
a = sin(dlat / 2) ** 2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon / 2) ** 2
return 2 * 6_371_000.0 * atan2(sqrt(a), sqrt(1 - a))
_SPATIAL_RESOLUTION_M = 10.0
def _spatial_downsample(
sampled: list[DataPoint],
resolution_m: float = _SPATIAL_RESOLUTION_M,
) -> list[DataPoint]:
"""Keep one sample per `resolution_m` of cumulative distance traveled.
Distance source priority:
1. GPS haversine (lat/lon present on both consecutive points)
2. speed_kmh × Δt (fallback when GPS absent or gapped)
If neither source is available (indoor, no speed data), returns `sampled`
unchanged. Always retains the first and last points.
"""
if len(sampled) < 2:
return sampled
has_gps = any(p.lat is not None and p.lon is not None for p in sampled)
has_speed = any(p.speed_kmh is not None for p in sampled)
if not has_gps and not has_speed:
return sampled
result: list[DataPoint] = [sampled[0]]
cum_dist = 0.0
last_kept = 0.0
prev_speed = 0.0
for i in range(1, len(sampled)):
prev, cur = sampled[i - 1], sampled[i]
dt = (cur.timestamp - prev.timestamp).total_seconds()
if (has_gps
and prev.lat is not None and prev.lon is not None
and cur.lat is not None and cur.lon is not None):
dist_m = _haversine_m(prev.lat, prev.lon, cur.lat, cur.lon)
else:
spd = cur.speed_kmh if cur.speed_kmh is not None else prev_speed
dist_m = (spd / 3.6) * max(dt, 0)
if cur.speed_kmh is not None:
prev_speed = cur.speed_kmh
cum_dist += dist_m
if cum_dist - last_kept >= resolution_m:
result.append(cur)
last_kept = cum_dist
if result[-1] is not sampled[-1]:
result.append(sampled[-1])
return result
def _gps_speed_kmh(
lat_vals: list[Optional[float]],
lon_vals: list[Optional[float]],
@@ -24,10 +86,7 @@ def _gps_speed_kmh(
dt = ts_vals[i] - ts_vals[i - 1]
if la0 is None or lo0 is None or la1 is None or lo1 is None or dt <= 0:
continue
dlat = radians(la1 - la0)
dlon = radians(lo1 - lo0)
a = sin(dlat / 2) ** 2 + cos(radians(la0)) * cos(radians(la1)) * sin(dlon / 2) ** 2
d_km = 2 * 6371.0 * atan2(sqrt(a), sqrt(1 - a))
d_km = _haversine_m(la0, lo0, la1, lo1) / 1000.0
raw[i] = d_km / dt * 3600.0
# 5-point centred moving average (skip None anchors)
@@ -69,6 +128,8 @@ def build_timeseries(
sampled.append(p)
last_t = t
sampled = _spatial_downsample(sampled)
ts_vals = [int((p.timestamp - started_at).total_seconds()) for p in sampled]
lat_vals = [round(p.lat, 7) if p.lat is not None else None for p in sampled] if include_gps else None
lon_vals = [round(p.lon, 7) if p.lon is not None else None for p in sampled] if include_gps else None