diff --git a/bincio/extract/timeseries.py b/bincio/extract/timeseries.py index d47f33b..4981a1f 100644 --- a/bincio/extract/timeseries.py +++ b/bincio/extract/timeseries.py @@ -8,6 +8,68 @@ from typing import Optional from bincio.extract.models import DataPoint +def _haversine_m(lat1: float, lon1: float, lat2: float, lon2: float) -> float: + """Great-circle distance in metres between two GPS points.""" + dlat = radians(lat2 - lat1) + dlon = radians(lon2 - lon1) + a = sin(dlat / 2) ** 2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon / 2) ** 2 + return 2 * 6_371_000.0 * atan2(sqrt(a), sqrt(1 - a)) + + +_SPATIAL_RESOLUTION_M = 10.0 + + +def _spatial_downsample( + sampled: list[DataPoint], + resolution_m: float = _SPATIAL_RESOLUTION_M, +) -> list[DataPoint]: + """Keep one sample per `resolution_m` of cumulative distance traveled. + + Distance source priority: + 1. GPS haversine (lat/lon present on both consecutive points) + 2. speed_kmh × Δt (fallback when GPS absent or gapped) + If neither source is available (indoor, no speed data), returns `sampled` + unchanged. Always retains the first and last points. + """ + if len(sampled) < 2: + return sampled + + has_gps = any(p.lat is not None and p.lon is not None for p in sampled) + has_speed = any(p.speed_kmh is not None for p in sampled) + if not has_gps and not has_speed: + return sampled + + result: list[DataPoint] = [sampled[0]] + cum_dist = 0.0 + last_kept = 0.0 + prev_speed = 0.0 + + for i in range(1, len(sampled)): + prev, cur = sampled[i - 1], sampled[i] + dt = (cur.timestamp - prev.timestamp).total_seconds() + + if (has_gps + and prev.lat is not None and prev.lon is not None + and cur.lat is not None and cur.lon is not None): + dist_m = _haversine_m(prev.lat, prev.lon, cur.lat, cur.lon) + else: + spd = cur.speed_kmh if cur.speed_kmh is not None else prev_speed + dist_m = (spd / 3.6) * max(dt, 0) + + if cur.speed_kmh is not None: + prev_speed = cur.speed_kmh + + cum_dist += dist_m + if cum_dist - last_kept >= resolution_m: + result.append(cur) + last_kept = cum_dist + + if result[-1] is not sampled[-1]: + result.append(sampled[-1]) + + return result + + def _gps_speed_kmh( lat_vals: list[Optional[float]], lon_vals: list[Optional[float]], @@ -24,10 +86,7 @@ def _gps_speed_kmh( dt = ts_vals[i] - ts_vals[i - 1] if la0 is None or lo0 is None or la1 is None or lo1 is None or dt <= 0: continue - dlat = radians(la1 - la0) - dlon = radians(lo1 - lo0) - a = sin(dlat / 2) ** 2 + cos(radians(la0)) * cos(radians(la1)) * sin(dlon / 2) ** 2 - d_km = 2 * 6371.0 * atan2(sqrt(a), sqrt(1 - a)) + d_km = _haversine_m(la0, lo0, la1, lo1) / 1000.0 raw[i] = d_km / dt * 3600.0 # 5-point centred moving average (skip None anchors) @@ -69,6 +128,8 @@ def build_timeseries( sampled.append(p) last_t = t + sampled = _spatial_downsample(sampled) + ts_vals = [int((p.timestamp - started_at).total_seconds()) for p in sampled] lat_vals = [round(p.lat, 7) if p.lat is not None else None for p in sampled] if include_gps else None lon_vals = [round(p.lon, 7) if p.lon is not None else None for p in sampled] if include_gps else None diff --git a/bincio/render/cli.py b/bincio/render/cli.py index 56dd57c..967de0c 100644 --- a/bincio/render/cli.py +++ b/bincio/render/cli.py @@ -517,6 +517,96 @@ def _backfill_speed(data: Path, handle: str | None = None) -> None: console.print(f" [cyan]{user_dir.name}[/cyan]: {updated} timeseries updated with GPS speed") +def _downsample_timeseries(data: Path, handle: str | None = None) -> None: + """Apply 10 m spatial downsampling to all stored timeseries files in activities/. + + Reads the parallel JSON arrays, computes which indices to keep using the + same distance logic as _spatial_downsample, slices every channel, and + writes the file back. Run bincio render --no-build afterward so _merge_edits + regenerates _merged/ from the smaller source files. + """ + import json + from bincio.extract.timeseries import _haversine_m, _SPATIAL_RESOLUTION_M + + _CHANNELS = ("t", "lat", "lon", "elevation_m", "speed_kmh", + "hr_bpm", "cadence_rpm", "power_w", "temperature_c") + + targets = [data / handle] if handle else _user_dirs(data) + for user_dir in targets: + acts_dir = user_dir / "activities" + if not acts_dir.exists(): + continue + updated = skipped = 0 + for ts_path in sorted(acts_dir.glob("*.timeseries.json")): + try: + ts = json.loads(ts_path.read_text(encoding="utf-8")) + except Exception: + continue + + t_vals = ts.get("t") or [] + lat_vals = ts.get("lat") or [] + lon_vals = ts.get("lon") or [] + spd_vals = ts.get("speed_kmh") or [] + n = len(t_vals) + if n < 2: + skipped += 1 + continue + + has_gps = any(v is not None for v in lat_vals) + has_speed = any(v is not None for v in spd_vals) + if not has_gps and not has_speed: + skipped += 1 + continue + + kept: list[int] = [0] + cum_dist = last_kept = prev_speed = 0.0 + + for i in range(1, n): + dt = t_vals[i] - t_vals[i - 1] + la0 = lat_vals[i - 1] if lat_vals else None + lo0 = lon_vals[i - 1] if lon_vals else None + la1 = lat_vals[i] if lat_vals else None + lo1 = lon_vals[i] if lon_vals else None + + if (has_gps and la0 is not None and lo0 is not None + and la1 is not None and lo1 is not None): + dist_m = _haversine_m(la0, lo0, la1, lo1) + else: + spd = (spd_vals[i] if spd_vals and spd_vals[i] is not None + else prev_speed) + dist_m = (spd / 3.6) * max(dt, 0) + + if spd_vals and spd_vals[i] is not None: + prev_speed = spd_vals[i] + + cum_dist += dist_m + if cum_dist - last_kept >= _SPATIAL_RESOLUTION_M: + kept.append(i) + last_kept = cum_dist + + if kept[-1] != n - 1: + kept.append(n - 1) + + if len(kept) >= n: + skipped += 1 + continue # already sparse (very short / indoor / rest-stop heavy) + + for key in _CHANNELS: + ch = ts.get(key) + if ch: + ts[key] = [ch[i] for i in kept] + + ts_path.write_text( + json.dumps(ts, indent=2, ensure_ascii=False), encoding="utf-8" + ) + updated += 1 + + console.print( + f" [cyan]{user_dir.name}[/cyan]: " + f"{updated} downsampled, {skipped} skipped (indoor / short / already sparse)" + ) + + @click.command() @click.option("--config", "config_path", default=None, help="Path to extract_config.yaml (reads output.dir from it).") @@ -549,6 +639,9 @@ def _backfill_speed(data: Path, handle: str | None = None) -> None: @click.option("--backfill-speed", "backfill_speed", is_flag=True, help="Compute GPS-derived speed for timeseries where the device didn't record " "per-second speed (run once to enable speed map coloring on older activities).") +@click.option("--downsample-timeseries", "downsample_timeseries", is_flag=True, + help="Apply 10 m spatial downsampling to all stored timeseries files " + "(run once after deploying the downsampling code).") def render( config_path: Optional[str], data_dir: Optional[str], @@ -563,6 +656,7 @@ def render( recompute_vam: bool, backfill_vam_summary: bool, backfill_speed: bool, + downsample_timeseries: bool, ) -> None: """Build (or serve) the BincioActivity static site from a BAS data store.""" @@ -592,6 +686,10 @@ def render( console.print("Backfilling GPS-derived speed into timeseries…") _backfill_speed(data, handle=handle) + if downsample_timeseries: + console.print("Applying spatial downsampling to timeseries…") + _downsample_timeseries(data, handle=handle) + _merge_edits(data, handle=handle) _rebuild_athlete_json(data, handle=handle) _bake_tracks(data, handle=handle)