From 084c652fddcf141cc15c3434b1c70e763a6cd3fe Mon Sep 17 00:00:00 2001 From: Davide Scaini Date: Thu, 9 Apr 2026 15:27:00 +0200 Subject: [PATCH] fixing stuff after splitting jsons --- bincio/extract/writer.py | 6 +- site/src/components/StatsView.svelte | 2 +- site/src/lib/dataloader.ts | 17 +++- site/src/pages/activity/[id].astro | 75 ++++++++++++-- tests/test_merge.py | 86 +++++++++++++++- tests/test_writer.py | 146 ++++++++++++++++++++++++++- 6 files changed, 315 insertions(+), 17 deletions(-) diff --git a/bincio/extract/writer.py b/bincio/extract/writer.py index 370d619..8648bc6 100644 --- a/bincio/extract/writer.py +++ b/bincio/extract/writer.py @@ -50,8 +50,10 @@ def write_activity( source = _infer_source(activity) has_gps = metrics.bbox is not None and privacy not in ("no_gps", "private") - # Build timeseries once — written to a separate file to keep detail JSON small - timeseries = build_timeseries(activity.points, activity.started_at, privacy) + # Build timeseries once — written to a separate file to keep detail JSON small. + # Treat an empty timeseries (no points) as None so no file is created. + _ts = build_timeseries(activity.points, activity.started_at, privacy) + timeseries = _ts if _ts.get("t") else None tag = activity.source_hash[-8:] if activity.source_hash else "unknown" # ── detail JSON ────────────────────────────────────────────────────────── diff --git a/site/src/components/StatsView.svelte b/site/src/components/StatsView.svelte index 5bde982..72a9de0 100644 --- a/site/src/components/StatsView.svelte +++ b/site/src/components/StatsView.svelte @@ -430,7 +430,7 @@
{#each tooltipActivities as a} diff --git a/site/src/lib/dataloader.ts b/site/src/lib/dataloader.ts index 45a998e..46b9f40 100644 --- a/site/src/lib/dataloader.ts +++ b/site/src/lib/dataloader.ts @@ -186,17 +186,24 @@ export async function loadTimeseries( ): Promise { try { let url: string; + // Strip the leading "activities/" from timeseriesUrl so we can append it + // to whatever directory the detail JSON lives in. + const filename = timeseriesUrl.replace(/^activities\//, ''); + if (timeseriesUrl.startsWith('http')) { url = timeseriesUrl; } else if (detailUrl.startsWith('http')) { - // detailUrl is absolute — resolve timeseries relative to its directory + // absolute detailUrl (browser shard resolution) → same directory const dir = detailUrl.substring(0, detailUrl.lastIndexOf('/') + 1); - // timeseriesUrl is "activities/id.timeseries.json" — strip leading "activities/" - // because dir already ends with "activities/" - const filename = timeseriesUrl.replace(/^activities\//, ''); url = `${dir}${filename}`; } else { - url = `${baseUrl}data/${timeseriesUrl}`; + // relative detailUrl — may be plain ("activities/{id}.json", single-user) + // or prefixed ("dave/_merged/activities/{id}.json", multi-user SSG prop). + // In both cases, resolve the timeseries file from the same directory. + const dir = detailUrl.includes('/') + ? detailUrl.substring(0, detailUrl.lastIndexOf('/') + 1) + : ''; + url = `${baseUrl}data/${dir}${filename}`; } return await fetchJSON(url); } catch { diff --git a/site/src/pages/activity/[id].astro b/site/src/pages/activity/[id].astro index 36cb8d5..273c53d 100644 --- a/site/src/pages/activity/[id].astro +++ b/site/src/pages/activity/[id].astro @@ -1,5 +1,5 @@ --- -import { readFileSync } from 'node:fs'; +import { readFileSync, readdirSync, existsSync } from 'node:fs'; import { join, resolve } from 'node:path'; import Base from '../../layouts/Base.astro'; import ActivityDetail from '../../components/ActivityDetail.svelte'; @@ -49,12 +49,73 @@ export async function getStaticPaths() { const activities = readActivities(join(dataDir, 'index.json')); const athlete = root.owner?.athlete ?? null; - return activities - .filter(a => a.privacy !== 'private' && a.id) - .map(a => ({ - params: { id: a.id }, - props: { activity: a, athlete }, - })); + // Build the map from the index first + const byId = new Map( + activities + .filter(a => a.privacy !== 'private' && a.id) + .map(a => [a.id, { activity: a, athlete }]) + ); + + // Fallback: scan _merged/activities/ directories for any JSON files not yet + // covered by the index (e.g. shard read failures, recently added activities). + try { + const userDirs = readdirSync(dataDir, { withFileTypes: true }) + .filter(d => d.isDirectory() && !d.name.startsWith('_') && !d.name.startsWith('.')) + .map(d => d.name); + + for (const handle of userDirs) { + // Prefer _merged, fall back to plain activities dir + const mergedActs = join(dataDir, handle, '_merged', 'activities'); + const plainActs = join(dataDir, handle, 'activities'); + const actsDir = existsSync(mergedActs) ? mergedActs : (existsSync(plainActs) ? plainActs : null); + if (!actsDir) continue; + + const urlPrefix = existsSync(mergedActs) + ? `${handle}/_merged/` + : `${handle}/`; + + for (const file of readdirSync(actsDir)) { + if (!file.endsWith('.json') || file.endsWith('.timeseries.json')) continue; + const id = file.slice(0, -5); // strip .json + if (byId.has(id)) continue; // already covered by the index + try { + const detail = JSON.parse(readFileSync(join(actsDir, file), 'utf-8')); + if (detail.privacy === 'private') continue; + // Build a minimal ActivitySummary from the detail file + const a: ActivitySummary = { + id, + title: detail.title ?? id, + sport: detail.sport ?? 'other', + sub_sport: detail.sub_sport ?? null, + started_at: detail.started_at ?? '', + distance_m: detail.distance_m ?? null, + duration_s: detail.duration_s ?? null, + moving_time_s: detail.moving_time_s ?? null, + elevation_gain_m: detail.elevation_gain_m ?? null, + avg_speed_kmh: detail.avg_speed_kmh ?? null, + max_speed_kmh: detail.max_speed_kmh ?? null, + avg_hr_bpm: detail.avg_hr_bpm ?? null, + max_hr_bpm: detail.max_hr_bpm ?? null, + avg_cadence_rpm: detail.avg_cadence_rpm ?? null, + avg_power_w: detail.avg_power_w ?? null, + mmp: detail.mmp ?? null, + source: detail.source ?? null, + privacy: detail.privacy ?? 'public', + detail_url: `${urlPrefix}activities/${file}`, + track_url: detail.bbox ? `${urlPrefix}activities/${id}.geojson` : null, + preview_coords: null, + handle, + }; + byId.set(id, { activity: a, athlete }); + } catch { /* skip malformed files */ } + } + } + } catch { /* ignore scan errors */ } + + return [...byId.values()].map(({ activity: a, athlete: ath }) => ({ + params: { id: a.id }, + props: { activity: a, athlete: ath }, + })); } catch { return []; } diff --git a/tests/test_merge.py b/tests/test_merge.py index 686c38e..e53e2cf 100644 --- a/tests/test_merge.py +++ b/tests/test_merge.py @@ -6,7 +6,7 @@ from pathlib import Path import pytest -from bincio.render.merge import apply_sidecar, merge_all, parse_sidecar +from bincio.render.merge import apply_sidecar, merge_all, merge_one, parse_sidecar # ── parse_sidecar ───────────────────────────────────────────────────────────── @@ -204,3 +204,87 @@ def test_merge_all_idempotent(data_dir): (data_dir / "_merged" / "activities" / "2024-01-01T080000Z-morning-ride.json").read_text() ) assert data["title"] == "Renamed" + + +# ── timeseries file handling ────────────────────────────────────────────────── + + +@pytest.fixture() +def data_dir_with_timeseries(tmp_path): + """data_dir fixture extended with .timeseries.json sidecar files.""" + acts = tmp_path / "activities" + acts.mkdir() + ACT_ID = "2024-01-01T080000Z-morning-ride" + detail = { + "id": ACT_ID, "title": "Morning Ride", "sport": "cycling", + "started_at": "2024-01-01T08:00:00Z", + "description": "", "privacy": "public", "custom": {}, + "timeseries_url": f"activities/{ACT_ID}.timeseries.json", + } + ts_data = {"t": [0, 1], "lat": [45.0, 45.1], "lon": [7.0, 7.1], + "elevation_m": [300.0, 301.0], "speed_kmh": [None, None], + "hr_bpm": [None, None], "cadence_rpm": [None, None], + "power_w": [None, None], "temperature_c": [None, None]} + (acts / f"{ACT_ID}.json").write_text(json.dumps(detail)) + (acts / f"{ACT_ID}.timeseries.json").write_text(json.dumps(ts_data)) + index = {"activities": [ + {"id": ACT_ID, "title": "Morning Ride", "sport": "cycling", + "started_at": "2024-01-01T08:00:00Z", "privacy": "public", "custom": {}}, + ]} + (tmp_path / "index.json").write_text(json.dumps(index)) + return tmp_path, ACT_ID + + +def test_merge_all_symlinks_timeseries(data_dir_with_timeseries): + """merge_all should symlink .timeseries.json alongside the detail JSON.""" + data_dir, act_id = data_dir_with_timeseries + merge_all(data_dir) + + ts_dest = data_dir / "_merged" / "activities" / f"{act_id}.timeseries.json" + assert ts_dest.exists(), "timeseries file not present in _merged" + assert ts_dest.is_symlink(), "timeseries file should be a symlink (no merge needed)" + + # Points to the original + src = data_dir / "activities" / f"{act_id}.timeseries.json" + assert ts_dest.resolve() == src.resolve() + + +def test_merge_all_timeseries_survives_sidecar(data_dir_with_timeseries): + """When a sidecar is applied (detail JSON is rewritten), the timeseries + symlink should still be created alongside it.""" + data_dir, act_id = data_dir_with_timeseries + edits = data_dir / "edits" + edits.mkdir() + (edits / f"{act_id}.md").write_text("---\ntitle: Renamed\n---\n") + merge_all(data_dir) + + detail_dest = data_dir / "_merged" / "activities" / f"{act_id}.json" + ts_dest = data_dir / "_merged" / "activities" / f"{act_id}.timeseries.json" + + assert not detail_dest.is_symlink(), "sidecar detail should be a copy, not symlink" + assert ts_dest.exists(), "timeseries should still be present after sidecar merge" + assert ts_dest.is_symlink(), "timeseries should remain a symlink" + + +def test_merge_one_symlinks_timeseries(data_dir_with_timeseries): + """merge_one should symlink the .timeseries.json file for the given activity.""" + data_dir, act_id = data_dir_with_timeseries + merged_acts = data_dir / "_merged" / "activities" + merged_acts.mkdir(parents=True) + + merge_one(data_dir, act_id) + + ts_dest = merged_acts / f"{act_id}.timeseries.json" + assert ts_dest.exists() + assert ts_dest.is_symlink() + + +def test_merge_all_idempotent_with_timeseries(data_dir_with_timeseries): + """Running merge_all twice should not break timeseries symlinks.""" + data_dir, act_id = data_dir_with_timeseries + merge_all(data_dir) + merge_all(data_dir) + + ts_dest = data_dir / "_merged" / "activities" / f"{act_id}.timeseries.json" + assert ts_dest.exists() + assert ts_dest.is_symlink() diff --git a/tests/test_writer.py b/tests/test_writer.py index f914fbb..410788e 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -1,4 +1,16 @@ -from bincio.extract.writer import make_activity_id, build_summary, _slugify +import json +from pathlib import Path + +import pytest + +from bincio.extract.writer import ( + make_activity_id, + build_summary, + write_activity, + finalize_pending, + cleanup_pending, + _slugify, +) from bincio.extract.metrics import ComputedMetrics from bincio.extract.models import ParsedActivity, DataPoint from datetime import datetime, timezone @@ -49,6 +61,138 @@ def test_id_utc_conversion(): assert make_activity_id(act) == "2024-06-01T073012Z" +def _dummy_metrics(**overrides): + defaults = dict( + distance_m=10000.0, duration_s=3600, moving_time_s=3500, + elevation_gain_m=100.0, elevation_loss_m=95.0, + avg_speed_kmh=10.0, max_speed_kmh=20.0, + avg_hr_bpm=None, max_hr_bpm=None, + avg_cadence_rpm=None, avg_power_w=None, max_power_w=None, + bbox=None, start_latlng=None, end_latlng=None, + mmp=None, best_efforts=None, best_climb_m=None, + ) + defaults.update(overrides) + return ComputedMetrics(**defaults) + + +# ── write_activity (timeseries split) ──────────────────────────────────────── + +def test_write_activity_creates_timeseries_file(tmp_path): + """write_activity should produce a separate .timeseries.json and + set timeseries_url in the detail JSON (no embedded timeseries).""" + ts = datetime(2024, 6, 1, 7, 30, 12, tzinfo=timezone.utc) + # Activity with one data point so timeseries is non-null + act = ParsedActivity( + points=[DataPoint(timestamp=ts, lat=45.0, lon=7.0, elevation_m=300.0)], + sport="cycling", + started_at=ts, + source_file="test.fit", + source_hash="sha256:" + "a" * 56, + ) + metrics = _dummy_metrics() + activity_id = write_activity(act, metrics, tmp_path) + + detail_path = tmp_path / "activities" / f"{activity_id}.json" + ts_path = tmp_path / "activities" / f"{activity_id}.timeseries.json" + + assert detail_path.exists(), "detail JSON not created" + assert ts_path.exists(), "timeseries JSON not created" + + detail = json.loads(detail_path.read_text()) + assert "timeseries" not in detail, "timeseries must NOT be embedded in detail" + assert detail["timeseries_url"] == f"activities/{activity_id}.timeseries.json" + + ts_data = json.loads(ts_path.read_text()) + assert "t" in ts_data, "timeseries file must have 't' array" + + +def test_write_activity_no_points_no_timeseries_file(tmp_path): + """An activity with no data points should produce no timeseries file + and timeseries_url should be None.""" + ts = datetime(2024, 6, 1, 7, 30, 12, tzinfo=timezone.utc) + act = ParsedActivity( + points=[], + sport="cycling", + started_at=ts, + source_file="test.fit", + source_hash="sha256:" + "b" * 56, + ) + metrics = _dummy_metrics() + activity_id = write_activity(act, metrics, tmp_path) + + detail = json.loads((tmp_path / "activities" / f"{activity_id}.json").read_text()) + ts_path = tmp_path / "activities" / f"{activity_id}.timeseries.json" + + assert detail["timeseries_url"] is None + assert not ts_path.exists() + + +def test_write_activity_pending_creates_pending_timeseries(tmp_path): + """pending=True should create .pending.timeseries.json alongside .pending.json.""" + ts = datetime(2024, 6, 1, 7, 30, 12, tzinfo=timezone.utc) + act = ParsedActivity( + points=[DataPoint(timestamp=ts, lat=45.0, lon=7.0)], + sport="cycling", + started_at=ts, + source_file="test.fit", + source_hash="sha256:" + "c" * 56, + ) + metrics = _dummy_metrics() + activity_id = write_activity(act, metrics, tmp_path, pending=True) + + acts_dir = tmp_path / "activities" + tag = "c" * 8 + assert (acts_dir / f"{activity_id}.{tag}.pending.json").exists() + assert (acts_dir / f"{activity_id}.{tag}.pending.timeseries.json").exists() + + +def test_finalize_pending_promotes_timeseries(tmp_path): + """finalize_pending should rename the pending timeseries file to its final path.""" + ts = datetime(2024, 6, 1, 7, 30, 12, tzinfo=timezone.utc) + act = ParsedActivity( + points=[DataPoint(timestamp=ts, lat=45.0, lon=7.0)], + sport="cycling", + started_at=ts, + source_file="test.fit", + source_hash="sha256:" + "d" * 56, + ) + metrics = _dummy_metrics() + activity_id = write_activity(act, metrics, tmp_path, pending=True) + source_hash = "sha256:" + "d" * 56 + + final_id = finalize_pending(tmp_path, activity_id, source_hash) + + acts_dir = tmp_path / "activities" + assert (acts_dir / f"{final_id}.json").exists() + assert (acts_dir / f"{final_id}.timeseries.json").exists() + + # Pending files must be gone + tag = "d" * 8 + assert not (acts_dir / f"{activity_id}.{tag}.pending.timeseries.json").exists() + + +def test_cleanup_pending_removes_timeseries(tmp_path): + """cleanup_pending should remove the pending timeseries file.""" + ts = datetime(2024, 6, 1, 7, 30, 12, tzinfo=timezone.utc) + act = ParsedActivity( + points=[DataPoint(timestamp=ts, lat=45.0, lon=7.0)], + sport="cycling", + started_at=ts, + source_file="test.fit", + source_hash="sha256:" + "e" * 56, + ) + metrics = _dummy_metrics() + activity_id = write_activity(act, metrics, tmp_path, pending=True) + source_hash = "sha256:" + "e" * 56 + + cleanup_pending(tmp_path, activity_id, source_hash) + + tag = "e" * 8 + acts_dir = tmp_path / "activities" + assert not (acts_dir / f"{activity_id}.{tag}.pending.json").exists() + assert not (acts_dir / f"{activity_id}.{tag}.pending.timeseries.json").exists() + + def test_build_summary_required_fields(): """build_summary should include all fields needed by the schema.""" act = _dummy_activity("Test Ride")