perf: spatial 10 m downsampling for timeseries
Extract _haversine_m from the inline block in _gps_speed_kmh, add _spatial_downsample (keep one sample per 10 m traveled, GPS haversine primary / speed×Δt fallback, indoor activities unchanged), and wire it into build_timeseries() after the 1 s dedup loop. Add --downsample-timeseries migration flag to bincio render that applies the same downsampling to existing stored timeseries files without re-extracting from original FIT/GPX files.
This commit is contained in:
@@ -8,6 +8,68 @@ from typing import Optional
|
||||
from bincio.extract.models import DataPoint
|
||||
|
||||
|
||||
def _haversine_m(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
|
||||
"""Great-circle distance in metres between two GPS points."""
|
||||
dlat = radians(lat2 - lat1)
|
||||
dlon = radians(lon2 - lon1)
|
||||
a = sin(dlat / 2) ** 2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon / 2) ** 2
|
||||
return 2 * 6_371_000.0 * atan2(sqrt(a), sqrt(1 - a))
|
||||
|
||||
|
||||
_SPATIAL_RESOLUTION_M = 10.0
|
||||
|
||||
|
||||
def _spatial_downsample(
|
||||
sampled: list[DataPoint],
|
||||
resolution_m: float = _SPATIAL_RESOLUTION_M,
|
||||
) -> list[DataPoint]:
|
||||
"""Keep one sample per `resolution_m` of cumulative distance traveled.
|
||||
|
||||
Distance source priority:
|
||||
1. GPS haversine (lat/lon present on both consecutive points)
|
||||
2. speed_kmh × Δt (fallback when GPS absent or gapped)
|
||||
If neither source is available (indoor, no speed data), returns `sampled`
|
||||
unchanged. Always retains the first and last points.
|
||||
"""
|
||||
if len(sampled) < 2:
|
||||
return sampled
|
||||
|
||||
has_gps = any(p.lat is not None and p.lon is not None for p in sampled)
|
||||
has_speed = any(p.speed_kmh is not None for p in sampled)
|
||||
if not has_gps and not has_speed:
|
||||
return sampled
|
||||
|
||||
result: list[DataPoint] = [sampled[0]]
|
||||
cum_dist = 0.0
|
||||
last_kept = 0.0
|
||||
prev_speed = 0.0
|
||||
|
||||
for i in range(1, len(sampled)):
|
||||
prev, cur = sampled[i - 1], sampled[i]
|
||||
dt = (cur.timestamp - prev.timestamp).total_seconds()
|
||||
|
||||
if (has_gps
|
||||
and prev.lat is not None and prev.lon is not None
|
||||
and cur.lat is not None and cur.lon is not None):
|
||||
dist_m = _haversine_m(prev.lat, prev.lon, cur.lat, cur.lon)
|
||||
else:
|
||||
spd = cur.speed_kmh if cur.speed_kmh is not None else prev_speed
|
||||
dist_m = (spd / 3.6) * max(dt, 0)
|
||||
|
||||
if cur.speed_kmh is not None:
|
||||
prev_speed = cur.speed_kmh
|
||||
|
||||
cum_dist += dist_m
|
||||
if cum_dist - last_kept >= resolution_m:
|
||||
result.append(cur)
|
||||
last_kept = cum_dist
|
||||
|
||||
if result[-1] is not sampled[-1]:
|
||||
result.append(sampled[-1])
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _gps_speed_kmh(
|
||||
lat_vals: list[Optional[float]],
|
||||
lon_vals: list[Optional[float]],
|
||||
@@ -24,10 +86,7 @@ def _gps_speed_kmh(
|
||||
dt = ts_vals[i] - ts_vals[i - 1]
|
||||
if la0 is None or lo0 is None or la1 is None or lo1 is None or dt <= 0:
|
||||
continue
|
||||
dlat = radians(la1 - la0)
|
||||
dlon = radians(lo1 - lo0)
|
||||
a = sin(dlat / 2) ** 2 + cos(radians(la0)) * cos(radians(la1)) * sin(dlon / 2) ** 2
|
||||
d_km = 2 * 6371.0 * atan2(sqrt(a), sqrt(1 - a))
|
||||
d_km = _haversine_m(la0, lo0, la1, lo1) / 1000.0
|
||||
raw[i] = d_km / dt * 3600.0
|
||||
|
||||
# 5-point centred moving average (skip None anchors)
|
||||
@@ -69,6 +128,8 @@ def build_timeseries(
|
||||
sampled.append(p)
|
||||
last_t = t
|
||||
|
||||
sampled = _spatial_downsample(sampled)
|
||||
|
||||
ts_vals = [int((p.timestamp - started_at).total_seconds()) for p in sampled]
|
||||
lat_vals = [round(p.lat, 7) if p.lat is not None else None for p in sampled] if include_gps else None
|
||||
lon_vals = [round(p.lon, 7) if p.lon is not None else None for p in sampled] if include_gps else None
|
||||
|
||||
@@ -517,6 +517,96 @@ def _backfill_speed(data: Path, handle: str | None = None) -> None:
|
||||
console.print(f" [cyan]{user_dir.name}[/cyan]: {updated} timeseries updated with GPS speed")
|
||||
|
||||
|
||||
def _downsample_timeseries(data: Path, handle: str | None = None) -> None:
|
||||
"""Apply 10 m spatial downsampling to all stored timeseries files in activities/.
|
||||
|
||||
Reads the parallel JSON arrays, computes which indices to keep using the
|
||||
same distance logic as _spatial_downsample, slices every channel, and
|
||||
writes the file back. Run bincio render --no-build afterward so _merge_edits
|
||||
regenerates _merged/ from the smaller source files.
|
||||
"""
|
||||
import json
|
||||
from bincio.extract.timeseries import _haversine_m, _SPATIAL_RESOLUTION_M
|
||||
|
||||
_CHANNELS = ("t", "lat", "lon", "elevation_m", "speed_kmh",
|
||||
"hr_bpm", "cadence_rpm", "power_w", "temperature_c")
|
||||
|
||||
targets = [data / handle] if handle else _user_dirs(data)
|
||||
for user_dir in targets:
|
||||
acts_dir = user_dir / "activities"
|
||||
if not acts_dir.exists():
|
||||
continue
|
||||
updated = skipped = 0
|
||||
for ts_path in sorted(acts_dir.glob("*.timeseries.json")):
|
||||
try:
|
||||
ts = json.loads(ts_path.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
t_vals = ts.get("t") or []
|
||||
lat_vals = ts.get("lat") or []
|
||||
lon_vals = ts.get("lon") or []
|
||||
spd_vals = ts.get("speed_kmh") or []
|
||||
n = len(t_vals)
|
||||
if n < 2:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
has_gps = any(v is not None for v in lat_vals)
|
||||
has_speed = any(v is not None for v in spd_vals)
|
||||
if not has_gps and not has_speed:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
kept: list[int] = [0]
|
||||
cum_dist = last_kept = prev_speed = 0.0
|
||||
|
||||
for i in range(1, n):
|
||||
dt = t_vals[i] - t_vals[i - 1]
|
||||
la0 = lat_vals[i - 1] if lat_vals else None
|
||||
lo0 = lon_vals[i - 1] if lon_vals else None
|
||||
la1 = lat_vals[i] if lat_vals else None
|
||||
lo1 = lon_vals[i] if lon_vals else None
|
||||
|
||||
if (has_gps and la0 is not None and lo0 is not None
|
||||
and la1 is not None and lo1 is not None):
|
||||
dist_m = _haversine_m(la0, lo0, la1, lo1)
|
||||
else:
|
||||
spd = (spd_vals[i] if spd_vals and spd_vals[i] is not None
|
||||
else prev_speed)
|
||||
dist_m = (spd / 3.6) * max(dt, 0)
|
||||
|
||||
if spd_vals and spd_vals[i] is not None:
|
||||
prev_speed = spd_vals[i]
|
||||
|
||||
cum_dist += dist_m
|
||||
if cum_dist - last_kept >= _SPATIAL_RESOLUTION_M:
|
||||
kept.append(i)
|
||||
last_kept = cum_dist
|
||||
|
||||
if kept[-1] != n - 1:
|
||||
kept.append(n - 1)
|
||||
|
||||
if len(kept) >= n:
|
||||
skipped += 1
|
||||
continue # already sparse (very short / indoor / rest-stop heavy)
|
||||
|
||||
for key in _CHANNELS:
|
||||
ch = ts.get(key)
|
||||
if ch:
|
||||
ts[key] = [ch[i] for i in kept]
|
||||
|
||||
ts_path.write_text(
|
||||
json.dumps(ts, indent=2, ensure_ascii=False), encoding="utf-8"
|
||||
)
|
||||
updated += 1
|
||||
|
||||
console.print(
|
||||
f" [cyan]{user_dir.name}[/cyan]: "
|
||||
f"{updated} downsampled, {skipped} skipped (indoor / short / already sparse)"
|
||||
)
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option("--config", "config_path", default=None,
|
||||
help="Path to extract_config.yaml (reads output.dir from it).")
|
||||
@@ -549,6 +639,9 @@ def _backfill_speed(data: Path, handle: str | None = None) -> None:
|
||||
@click.option("--backfill-speed", "backfill_speed", is_flag=True,
|
||||
help="Compute GPS-derived speed for timeseries where the device didn't record "
|
||||
"per-second speed (run once to enable speed map coloring on older activities).")
|
||||
@click.option("--downsample-timeseries", "downsample_timeseries", is_flag=True,
|
||||
help="Apply 10 m spatial downsampling to all stored timeseries files "
|
||||
"(run once after deploying the downsampling code).")
|
||||
def render(
|
||||
config_path: Optional[str],
|
||||
data_dir: Optional[str],
|
||||
@@ -563,6 +656,7 @@ def render(
|
||||
recompute_vam: bool,
|
||||
backfill_vam_summary: bool,
|
||||
backfill_speed: bool,
|
||||
downsample_timeseries: bool,
|
||||
) -> None:
|
||||
"""Build (or serve) the BincioActivity static site from a BAS data store."""
|
||||
|
||||
@@ -592,6 +686,10 @@ def render(
|
||||
console.print("Backfilling GPS-derived speed into timeseries…")
|
||||
_backfill_speed(data, handle=handle)
|
||||
|
||||
if downsample_timeseries:
|
||||
console.print("Applying spatial downsampling to timeseries…")
|
||||
_downsample_timeseries(data, handle=handle)
|
||||
|
||||
_merge_edits(data, handle=handle)
|
||||
_rebuild_athlete_json(data, handle=handle)
|
||||
_bake_tracks(data, handle=handle)
|
||||
|
||||
Reference in New Issue
Block a user