Files
bincio-activity/scripts/backfill.py
T
2026-03-30 10:53:51 +02:00

226 lines
8.0 KiB
Python

"""Backfill MMP and best-effort records into existing BAS activity JSONs.
Reads 1Hz timeseries (power_w, speed_kmh, elevation_m) from already-extracted
detail JSONs — no need to re-parse source FIT/GPX/TCX files.
Run once after upgrading to the MMP + records extract pipeline, or whenever
the computation logic changes and you want to refresh all activities.
Usage:
uv run python scripts/backfill.py [--data-dir ~/src/bincio_data]
"""
import json
import sys
from pathlib import Path
import click
from rich.console import Console
from rich.progress import BarColumn, MofNCompleteColumn, Progress, TextColumn, TimeElapsedColumn
console = Console()
# ── MMP ───────────────────────────────────────────────────────────────────────
MMP_DURATIONS_S = [1, 2, 5, 10, 15, 20, 30, 60, 120, 180, 300, 600, 1200, 1800, 3600]
def _compute_mmp(power_w: list) -> list[list[int]] | None:
samples = [w for w in power_w if w is not None]
if len(samples) < 2:
return None
n = len(samples)
results = []
for d in MMP_DURATIONS_S:
if d > n:
break
window_sum = sum(samples[:d])
best = window_sum
for i in range(1, n - d + 1):
window_sum += samples[i + d - 1] - samples[i - 1]
if window_sum > best:
best = window_sum
results.append([d, round(best / d)])
return results if results else None
# ── Best efforts ──────────────────────────────────────────────────────────────
BEST_EFFORT_DISTANCES: dict[str, list[float]] = {
"running": [0.4, 1.0, 1.609, 5.0, 10.0, 21.097, 42.195],
"cycling": [5.0, 10.0, 20.0, 50.0, 100.0],
"swimming": [0.1, 0.2, 0.5, 1.0, 2.0],
}
def _fastest_time(speed_kmh: list, target_km: float) -> int | None:
left = 0
window_dist = 0.0
best_s = None
for right, spd in enumerate(speed_kmh):
window_dist += (spd or 0.0) / 3600.0
while window_dist >= target_km and left <= right:
window_s = right - left + 1
if best_s is None or window_s < best_s:
best_s = window_s
window_dist -= (speed_kmh[left] or 0.0) / 3600.0
left += 1
return best_s
def _compute_best_efforts(speed_kmh: list, sport: str) -> list[list[float]] | None:
targets = BEST_EFFORT_DISTANCES.get(sport, [])
if not targets or not speed_kmh:
return None
results = []
for d_km in targets:
t_s = _fastest_time(speed_kmh, d_km)
if t_s is not None:
results.append([d_km, t_s])
return results if results else None
def _compute_best_climb(elevation_m: list) -> float | None:
valid = [e for e in elevation_m if e is not None]
if len(valid) < 2:
return None
max_gain = current = 0.0
for a, b in zip(valid, valid[1:]):
current = max(0.0, current + (b - a))
if current > max_gain:
max_gain = current
return round(max_gain, 1) if max_gain > 0 else None
# ── Main ──────────────────────────────────────────────────────────────────────
@click.command()
@click.option("--data-dir", default="~/src/bincio_data", show_default=True)
@click.option("--dry-run", is_flag=True)
@click.option("--force", is_flag=True, help="Recompute even if fields already present.")
def main(data_dir: str, dry_run: bool, force: bool) -> None:
"""Backfill mmp, best_efforts, and best_climb_m into existing activity JSONs."""
data = Path(data_dir).expanduser()
acts_dir = data / "activities"
if not acts_dir.exists():
console.print(f"[red]Activities dir not found: {acts_dir}[/red]")
sys.exit(1)
jsons = sorted(acts_dir.glob("*.json"))
console.print(f"Found [bold]{len(jsons)}[/bold] activity JSONs in {acts_dir}")
updated = skipped = 0
with Progress(
TextColumn("[progress.description]{task.description}"),
BarColumn(), MofNCompleteColumn(), TimeElapsedColumn(),
console=console,
) as progress:
task = progress.add_task("Backfilling…", total=len(jsons))
for path in jsons:
progress.advance(task)
try:
detail = json.loads(path.read_text())
except Exception:
skipped += 1
continue
already_done = (
detail.get("mmp") is not None
and detail.get("best_efforts") is not None
or detail.get("best_efforts") == [] # explicitly empty = computed, no results
)
if already_done and not force:
skipped += 1
continue
sport = detail.get("sport", "other")
ts = detail.get("timeseries") or {}
power_w = ts.get("power_w") or []
speed_kmh = ts.get("speed_kmh") or []
ele_m = ts.get("elevation_m") or []
changed = False
if detail.get("mmp") is None or force:
mmp = _compute_mmp(power_w)
if mmp is not None:
detail["mmp"] = mmp
changed = True
if detail.get("best_efforts") is None or force:
be = _compute_best_efforts(speed_kmh, sport)
detail["best_efforts"] = be # store None or list (None = sport has no targets)
changed = True
if (detail.get("best_climb_m") is None or force) and sport == "cycling":
bc = _compute_best_climb(ele_m)
if bc is not None:
detail["best_climb_m"] = bc
changed = True
if changed:
if not dry_run:
path.write_text(json.dumps(detail, indent=2, ensure_ascii=False))
updated += 1
console.print(
f"\n[green]Done.[/green] "
f"Updated [bold]{updated}[/bold], skipped [bold]{skipped}[/bold]."
)
if dry_run:
console.print("[yellow]Dry run — nothing written.[/yellow]")
return
# Patch index.json summaries
console.print("Patching index.json summaries…")
index_path = data / "index.json"
index = json.loads(index_path.read_text())
lookup: dict[str, dict] = {}
for path in acts_dir.glob("*.json"):
try:
d = json.loads(path.read_text())
lookup[d["id"]] = {
"mmp": d.get("mmp"),
"best_efforts": d.get("best_efforts"),
"best_climb_m": d.get("best_climb_m"),
}
except Exception:
pass
patched = 0
for s in index.get("activities", []):
row = lookup.get(s["id"])
if not row:
continue
if row.get("mmp") and not s.get("mmp"):
s["mmp"] = row["mmp"]; patched += 1
if row.get("best_efforts") is not None and s.get("best_efforts") is None:
s["best_efforts"] = row["best_efforts"]; patched += 1
if row.get("best_climb_m") and not s.get("best_climb_m"):
s["best_climb_m"] = row["best_climb_m"]; patched += 1
index_path.write_text(json.dumps(index, indent=2, ensure_ascii=False))
console.print(f" {patched} fields patched in index.json.")
# Rebuild athlete.json
console.print("Rebuilding athlete.json…")
from bincio.extract.writer import write_athlete_json
owner = index.get("owner", {})
athlete_cfg = {k: v for k, v in (owner.get("athlete") or {}).items() if v is not None}
write_athlete_json(index.get("activities", []), data, athlete_cfg)
console.print(" athlete.json written.")
# Re-merge
console.print("Running merge_all…")
from bincio.render.merge import merge_all
n = merge_all(data)
console.print(f" merge_all done ({n} sidecars).")
if __name__ == "__main__":
main()