perf: spatial 10 m downsampling for timeseries
Extract _haversine_m from the inline block in _gps_speed_kmh, add _spatial_downsample (keep one sample per 10 m traveled, GPS haversine primary / speed×Δt fallback, indoor activities unchanged), and wire it into build_timeseries() after the 1 s dedup loop. Add --downsample-timeseries migration flag to bincio render that applies the same downsampling to existing stored timeseries files without re-extracting from original FIT/GPX files.
This commit is contained in:
@@ -8,6 +8,68 @@ from typing import Optional
|
||||
from bincio.extract.models import DataPoint
|
||||
|
||||
|
||||
def _haversine_m(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
|
||||
"""Great-circle distance in metres between two GPS points."""
|
||||
dlat = radians(lat2 - lat1)
|
||||
dlon = radians(lon2 - lon1)
|
||||
a = sin(dlat / 2) ** 2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon / 2) ** 2
|
||||
return 2 * 6_371_000.0 * atan2(sqrt(a), sqrt(1 - a))
|
||||
|
||||
|
||||
_SPATIAL_RESOLUTION_M = 10.0
|
||||
|
||||
|
||||
def _spatial_downsample(
|
||||
sampled: list[DataPoint],
|
||||
resolution_m: float = _SPATIAL_RESOLUTION_M,
|
||||
) -> list[DataPoint]:
|
||||
"""Keep one sample per `resolution_m` of cumulative distance traveled.
|
||||
|
||||
Distance source priority:
|
||||
1. GPS haversine (lat/lon present on both consecutive points)
|
||||
2. speed_kmh × Δt (fallback when GPS absent or gapped)
|
||||
If neither source is available (indoor, no speed data), returns `sampled`
|
||||
unchanged. Always retains the first and last points.
|
||||
"""
|
||||
if len(sampled) < 2:
|
||||
return sampled
|
||||
|
||||
has_gps = any(p.lat is not None and p.lon is not None for p in sampled)
|
||||
has_speed = any(p.speed_kmh is not None for p in sampled)
|
||||
if not has_gps and not has_speed:
|
||||
return sampled
|
||||
|
||||
result: list[DataPoint] = [sampled[0]]
|
||||
cum_dist = 0.0
|
||||
last_kept = 0.0
|
||||
prev_speed = 0.0
|
||||
|
||||
for i in range(1, len(sampled)):
|
||||
prev, cur = sampled[i - 1], sampled[i]
|
||||
dt = (cur.timestamp - prev.timestamp).total_seconds()
|
||||
|
||||
if (has_gps
|
||||
and prev.lat is not None and prev.lon is not None
|
||||
and cur.lat is not None and cur.lon is not None):
|
||||
dist_m = _haversine_m(prev.lat, prev.lon, cur.lat, cur.lon)
|
||||
else:
|
||||
spd = cur.speed_kmh if cur.speed_kmh is not None else prev_speed
|
||||
dist_m = (spd / 3.6) * max(dt, 0)
|
||||
|
||||
if cur.speed_kmh is not None:
|
||||
prev_speed = cur.speed_kmh
|
||||
|
||||
cum_dist += dist_m
|
||||
if cum_dist - last_kept >= resolution_m:
|
||||
result.append(cur)
|
||||
last_kept = cum_dist
|
||||
|
||||
if result[-1] is not sampled[-1]:
|
||||
result.append(sampled[-1])
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _gps_speed_kmh(
|
||||
lat_vals: list[Optional[float]],
|
||||
lon_vals: list[Optional[float]],
|
||||
@@ -24,10 +86,7 @@ def _gps_speed_kmh(
|
||||
dt = ts_vals[i] - ts_vals[i - 1]
|
||||
if la0 is None or lo0 is None or la1 is None or lo1 is None or dt <= 0:
|
||||
continue
|
||||
dlat = radians(la1 - la0)
|
||||
dlon = radians(lo1 - lo0)
|
||||
a = sin(dlat / 2) ** 2 + cos(radians(la0)) * cos(radians(la1)) * sin(dlon / 2) ** 2
|
||||
d_km = 2 * 6371.0 * atan2(sqrt(a), sqrt(1 - a))
|
||||
d_km = _haversine_m(la0, lo0, la1, lo1) / 1000.0
|
||||
raw[i] = d_km / dt * 3600.0
|
||||
|
||||
# 5-point centred moving average (skip None anchors)
|
||||
@@ -69,6 +128,8 @@ def build_timeseries(
|
||||
sampled.append(p)
|
||||
last_t = t
|
||||
|
||||
sampled = _spatial_downsample(sampled)
|
||||
|
||||
ts_vals = [int((p.timestamp - started_at).total_seconds()) for p in sampled]
|
||||
lat_vals = [round(p.lat, 7) if p.lat is not None else None for p in sampled] if include_gps else None
|
||||
lon_vals = [round(p.lon, 7) if p.lon is not None else None for p in sampled] if include_gps else None
|
||||
|
||||
Reference in New Issue
Block a user