perf: spatial 10 m downsampling for timeseries

Extract _haversine_m from the inline block in _gps_speed_kmh, add
_spatial_downsample (keep one sample per 10 m traveled, GPS haversine
primary / speed×Δt fallback, indoor activities unchanged), and wire it
into build_timeseries() after the 1 s dedup loop.

Add --downsample-timeseries migration flag to bincio render that applies
the same downsampling to existing stored timeseries files without
re-extracting from original FIT/GPX files.
This commit is contained in:
Davide Scaini
2026-05-19 20:11:00 +02:00
parent 835968e8fe
commit 84eff1f3b0
2 changed files with 163 additions and 4 deletions
+98
View File
@@ -517,6 +517,96 @@ def _backfill_speed(data: Path, handle: str | None = None) -> None:
console.print(f" [cyan]{user_dir.name}[/cyan]: {updated} timeseries updated with GPS speed")
def _downsample_timeseries(data: Path, handle: str | None = None) -> None:
"""Apply 10 m spatial downsampling to all stored timeseries files in activities/.
Reads the parallel JSON arrays, computes which indices to keep using the
same distance logic as _spatial_downsample, slices every channel, and
writes the file back. Run bincio render --no-build afterward so _merge_edits
regenerates _merged/ from the smaller source files.
"""
import json
from bincio.extract.timeseries import _haversine_m, _SPATIAL_RESOLUTION_M
_CHANNELS = ("t", "lat", "lon", "elevation_m", "speed_kmh",
"hr_bpm", "cadence_rpm", "power_w", "temperature_c")
targets = [data / handle] if handle else _user_dirs(data)
for user_dir in targets:
acts_dir = user_dir / "activities"
if not acts_dir.exists():
continue
updated = skipped = 0
for ts_path in sorted(acts_dir.glob("*.timeseries.json")):
try:
ts = json.loads(ts_path.read_text(encoding="utf-8"))
except Exception:
continue
t_vals = ts.get("t") or []
lat_vals = ts.get("lat") or []
lon_vals = ts.get("lon") or []
spd_vals = ts.get("speed_kmh") or []
n = len(t_vals)
if n < 2:
skipped += 1
continue
has_gps = any(v is not None for v in lat_vals)
has_speed = any(v is not None for v in spd_vals)
if not has_gps and not has_speed:
skipped += 1
continue
kept: list[int] = [0]
cum_dist = last_kept = prev_speed = 0.0
for i in range(1, n):
dt = t_vals[i] - t_vals[i - 1]
la0 = lat_vals[i - 1] if lat_vals else None
lo0 = lon_vals[i - 1] if lon_vals else None
la1 = lat_vals[i] if lat_vals else None
lo1 = lon_vals[i] if lon_vals else None
if (has_gps and la0 is not None and lo0 is not None
and la1 is not None and lo1 is not None):
dist_m = _haversine_m(la0, lo0, la1, lo1)
else:
spd = (spd_vals[i] if spd_vals and spd_vals[i] is not None
else prev_speed)
dist_m = (spd / 3.6) * max(dt, 0)
if spd_vals and spd_vals[i] is not None:
prev_speed = spd_vals[i]
cum_dist += dist_m
if cum_dist - last_kept >= _SPATIAL_RESOLUTION_M:
kept.append(i)
last_kept = cum_dist
if kept[-1] != n - 1:
kept.append(n - 1)
if len(kept) >= n:
skipped += 1
continue # already sparse (very short / indoor / rest-stop heavy)
for key in _CHANNELS:
ch = ts.get(key)
if ch:
ts[key] = [ch[i] for i in kept]
ts_path.write_text(
json.dumps(ts, indent=2, ensure_ascii=False), encoding="utf-8"
)
updated += 1
console.print(
f" [cyan]{user_dir.name}[/cyan]: "
f"{updated} downsampled, {skipped} skipped (indoor / short / already sparse)"
)
@click.command()
@click.option("--config", "config_path", default=None,
help="Path to extract_config.yaml (reads output.dir from it).")
@@ -549,6 +639,9 @@ def _backfill_speed(data: Path, handle: str | None = None) -> None:
@click.option("--backfill-speed", "backfill_speed", is_flag=True,
help="Compute GPS-derived speed for timeseries where the device didn't record "
"per-second speed (run once to enable speed map coloring on older activities).")
@click.option("--downsample-timeseries", "downsample_timeseries", is_flag=True,
help="Apply 10 m spatial downsampling to all stored timeseries files "
"(run once after deploying the downsampling code).")
def render(
config_path: Optional[str],
data_dir: Optional[str],
@@ -563,6 +656,7 @@ def render(
recompute_vam: bool,
backfill_vam_summary: bool,
backfill_speed: bool,
downsample_timeseries: bool,
) -> None:
"""Build (or serve) the BincioActivity static site from a BAS data store."""
@@ -592,6 +686,10 @@ def render(
console.print("Backfilling GPS-derived speed into timeseries…")
_backfill_speed(data, handle=handle)
if downsample_timeseries:
console.print("Applying spatial downsampling to timeseries…")
_downsample_timeseries(data, handle=handle)
_merge_edits(data, handle=handle)
_rebuild_athlete_json(data, handle=handle)
_bake_tracks(data, handle=handle)