From 084c652fddcf141cc15c3434b1c70e763a6cd3fe Mon Sep 17 00:00:00 2001
From: Davide Scaini <davide.scaini@alumni.cern>
Date: Thu, 9 Apr 2026 15:27:00 +0200
Subject: [PATCH] fixing stuff after splitting jsons

---
 bincio/extract/writer.py             |   6 +-
 site/src/components/StatsView.svelte |   2 +-
 site/src/lib/dataloader.ts           |  17 +++-
 site/src/pages/activity/[id].astro   |  75 ++++++++++++--
 tests/test_merge.py                  |  86 +++++++++++++++-
 tests/test_writer.py                 | 146 ++++++++++++++++++++++++++-
 6 files changed, 315 insertions(+), 17 deletions(-)
diff --git a/bincio/extract/writer.py b/bincio/extract/writer.py
index 370d619..8648bc6 100644
--- a/bincio/extract/writer.py
+++ b/bincio/extract/writer.py
@@ -50,8 +50,10 @@ def write_activity(
     source = _infer_source(activity)
     has_gps = metrics.bbox is not None and privacy not in ("no_gps", "private")
 
-    # Build timeseries once — written to a separate file to keep detail JSON small
-    timeseries = build_timeseries(activity.points, activity.started_at, privacy)
+    # Build timeseries once — written to a separate file to keep detail JSON small.
+    # Treat an empty timeseries (no points) as None so no file is created.
+    _ts = build_timeseries(activity.points, activity.started_at, privacy)
+    timeseries = _ts if _ts.get("t") else None
     tag = activity.source_hash[-8:] if activity.source_hash else "unknown"
 
     # ── detail JSON ──────────────────────────────────────────────────────────
diff --git a/site/src/components/StatsView.svelte b/site/src/components/StatsView.svelte
index 5bde982..72a9de0 100644
--- a/site/src/components/StatsView.svelte
+++ b/site/src/components/StatsView.svelte
@@ -430,7 +430,7 @@
     <div class="flex flex-col gap-1">
       {#each tooltipActivities as a}
         <a
-          href="{import.meta.env.BASE_URL}activity/{a.id}/"
+          href={a.detail_url ? `${import.meta.env.BASE_URL}activity/${a.id}/` : `${import.meta.env.BASE_URL}activity/local/?id=${a.id}`}
           class="flex flex-col gap-0.5 rounded-lg px-2 py-1.5 hover:bg-zinc-800 transition-colors"
         >
           <span class="text-sm font-medium text-white truncate">
diff --git a/site/src/lib/dataloader.ts b/site/src/lib/dataloader.ts
index 45a998e..46b9f40 100644
--- a/site/src/lib/dataloader.ts
+++ b/site/src/lib/dataloader.ts
@@ -186,17 +186,24 @@ export async function loadTimeseries(
 ): Promise<Timeseries | null> {
   try {
     let url: string;
+    // Strip the leading "activities/" from timeseriesUrl so we can append it
+    // to whatever directory the detail JSON lives in.
+    const filename = timeseriesUrl.replace(/^activities\//, '');
+
     if (timeseriesUrl.startsWith('http')) {
       url = timeseriesUrl;
     } else if (detailUrl.startsWith('http')) {
-      // detailUrl is absolute — resolve timeseries relative to its directory
+      // absolute detailUrl (browser shard resolution) → same directory
       const dir = detailUrl.substring(0, detailUrl.lastIndexOf('/') + 1);
-      // timeseriesUrl is "activities/id.timeseries.json" — strip leading "activities/"
-      // because dir already ends with "activities/"
-      const filename = timeseriesUrl.replace(/^activities\//, '');
       url = `${dir}${filename}`;
     } else {
-      url = `${baseUrl}data/${timeseriesUrl}`;
+      // relative detailUrl — may be plain ("activities/{id}.json", single-user)
+      // or prefixed ("dave/_merged/activities/{id}.json", multi-user SSG prop).
+      // In both cases, resolve the timeseries file from the same directory.
+      const dir = detailUrl.includes('/')
+        ? detailUrl.substring(0, detailUrl.lastIndexOf('/') + 1)
+        : '';
+      url = `${baseUrl}data/${dir}${filename}`;
     }
     return await fetchJSON<Timeseries>(url);
   } catch {
diff --git a/site/src/pages/activity/[id].astro b/site/src/pages/activity/[id].astro
index 36cb8d5..273c53d 100644
--- a/site/src/pages/activity/[id].astro
+++ b/site/src/pages/activity/[id].astro
@@ -1,5 +1,5 @@
 ---
-import { readFileSync } from 'node:fs';
+import { readFileSync, readdirSync, existsSync } from 'node:fs';
 import { join, resolve } from 'node:path';
 import Base from '../../layouts/Base.astro';
 import ActivityDetail from '../../components/ActivityDetail.svelte';
@@ -49,12 +49,73 @@ export async function getStaticPaths() {
     const activities = readActivities(join(dataDir, 'index.json'));
     const athlete = root.owner?.athlete ?? null;
 
-    return activities
-      .filter(a => a.privacy !== 'private' && a.id)
-      .map(a => ({
-        params: { id: a.id },
-        props: { activity: a, athlete },
-      }));
+    // Build the map from the index first
+    const byId = new Map(
+      activities
+        .filter(a => a.privacy !== 'private' && a.id)
+        .map(a => [a.id, { activity: a, athlete }])
+    );
+
+    // Fallback: scan _merged/activities/ directories for any JSON files not yet
+    // covered by the index (e.g. shard read failures, recently added activities).
+    try {
+      const userDirs = readdirSync(dataDir, { withFileTypes: true })
+        .filter(d => d.isDirectory() && !d.name.startsWith('_') && !d.name.startsWith('.'))
+        .map(d => d.name);
+
+      for (const handle of userDirs) {
+        // Prefer _merged, fall back to plain activities dir
+        const mergedActs = join(dataDir, handle, '_merged', 'activities');
+        const plainActs  = join(dataDir, handle, 'activities');
+        const actsDir    = existsSync(mergedActs) ? mergedActs : (existsSync(plainActs) ? plainActs : null);
+        if (!actsDir) continue;
+
+        const urlPrefix = existsSync(mergedActs)
+          ? `${handle}/_merged/`
+          : `${handle}/`;
+
+        for (const file of readdirSync(actsDir)) {
+          if (!file.endsWith('.json') || file.endsWith('.timeseries.json')) continue;
+          const id = file.slice(0, -5); // strip .json
+          if (byId.has(id)) continue;   // already covered by the index
+          try {
+            const detail = JSON.parse(readFileSync(join(actsDir, file), 'utf-8'));
+            if (detail.privacy === 'private') continue;
+            // Build a minimal ActivitySummary from the detail file
+            const a: ActivitySummary = {
+              id,
+              title:            detail.title ?? id,
+              sport:            detail.sport  ?? 'other',
+              sub_sport:        detail.sub_sport ?? null,
+              started_at:       detail.started_at ?? '',
+              distance_m:       detail.distance_m   ?? null,
+              duration_s:       detail.duration_s   ?? null,
+              moving_time_s:    detail.moving_time_s ?? null,
+              elevation_gain_m: detail.elevation_gain_m ?? null,
+              avg_speed_kmh:    detail.avg_speed_kmh ?? null,
+              max_speed_kmh:    detail.max_speed_kmh ?? null,
+              avg_hr_bpm:       detail.avg_hr_bpm ?? null,
+              max_hr_bpm:       detail.max_hr_bpm ?? null,
+              avg_cadence_rpm:  detail.avg_cadence_rpm ?? null,
+              avg_power_w:      detail.avg_power_w ?? null,
+              mmp:              detail.mmp ?? null,
+              source:           detail.source ?? null,
+              privacy:          detail.privacy ?? 'public',
+              detail_url:       `${urlPrefix}activities/${file}`,
+              track_url:        detail.bbox ? `${urlPrefix}activities/${id}.geojson` : null,
+              preview_coords:   null,
+              handle,
+            };
+            byId.set(id, { activity: a, athlete });
+          } catch { /* skip malformed files */ }
+        }
+      }
+    } catch { /* ignore scan errors */ }
+
+    return [...byId.values()].map(({ activity: a, athlete: ath }) => ({
+      params: { id: a.id },
+      props: { activity: a, athlete: ath },
+    }));
   } catch {
     return [];
   }
diff --git a/tests/test_merge.py b/tests/test_merge.py
index 686c38e..e53e2cf 100644
--- a/tests/test_merge.py
+++ b/tests/test_merge.py
@@ -6,7 +6,7 @@ from pathlib import Path
 
 import pytest
 
-from bincio.render.merge import apply_sidecar, merge_all, parse_sidecar
+from bincio.render.merge import apply_sidecar, merge_all, merge_one, parse_sidecar
 
 
 # ── parse_sidecar ─────────────────────────────────────────────────────────────
@@ -204,3 +204,87 @@ def test_merge_all_idempotent(data_dir):
         (data_dir / "_merged" / "activities" / "2024-01-01T080000Z-morning-ride.json").read_text()
     )
     assert data["title"] == "Renamed"
+
+
+# ── timeseries file handling ──────────────────────────────────────────────────
+
+
+@pytest.fixture()
+def data_dir_with_timeseries(tmp_path):
+    """data_dir fixture extended with .timeseries.json sidecar files."""
+    acts = tmp_path / "activities"
+    acts.mkdir()
+    ACT_ID = "2024-01-01T080000Z-morning-ride"
+    detail = {
+        "id": ACT_ID, "title": "Morning Ride", "sport": "cycling",
+        "started_at": "2024-01-01T08:00:00Z",
+        "description": "", "privacy": "public", "custom": {},
+        "timeseries_url": f"activities/{ACT_ID}.timeseries.json",
+    }
+    ts_data = {"t": [0, 1], "lat": [45.0, 45.1], "lon": [7.0, 7.1],
+               "elevation_m": [300.0, 301.0], "speed_kmh": [None, None],
+               "hr_bpm": [None, None], "cadence_rpm": [None, None],
+               "power_w": [None, None], "temperature_c": [None, None]}
+    (acts / f"{ACT_ID}.json").write_text(json.dumps(detail))
+    (acts / f"{ACT_ID}.timeseries.json").write_text(json.dumps(ts_data))
+    index = {"activities": [
+        {"id": ACT_ID, "title": "Morning Ride", "sport": "cycling",
+         "started_at": "2024-01-01T08:00:00Z", "privacy": "public", "custom": {}},
+    ]}
+    (tmp_path / "index.json").write_text(json.dumps(index))
+    return tmp_path, ACT_ID
+
+
+def test_merge_all_symlinks_timeseries(data_dir_with_timeseries):
+    """merge_all should symlink .timeseries.json alongside the detail JSON."""
+    data_dir, act_id = data_dir_with_timeseries
+    merge_all(data_dir)
+
+    ts_dest = data_dir / "_merged" / "activities" / f"{act_id}.timeseries.json"
+    assert ts_dest.exists(), "timeseries file not present in _merged"
+    assert ts_dest.is_symlink(), "timeseries file should be a symlink (no merge needed)"
+
+    # Points to the original
+    src = data_dir / "activities" / f"{act_id}.timeseries.json"
+    assert ts_dest.resolve() == src.resolve()
+
+
+def test_merge_all_timeseries_survives_sidecar(data_dir_with_timeseries):
+    """When a sidecar is applied (detail JSON is rewritten), the timeseries
+    symlink should still be created alongside it."""
+    data_dir, act_id = data_dir_with_timeseries
+    edits = data_dir / "edits"
+    edits.mkdir()
+    (edits / f"{act_id}.md").write_text("---\ntitle: Renamed\n---\n")
+    merge_all(data_dir)
+
+    detail_dest = data_dir / "_merged" / "activities" / f"{act_id}.json"
+    ts_dest = data_dir / "_merged" / "activities" / f"{act_id}.timeseries.json"
+
+    assert not detail_dest.is_symlink(), "sidecar detail should be a copy, not symlink"
+    assert ts_dest.exists(), "timeseries should still be present after sidecar merge"
+    assert ts_dest.is_symlink(), "timeseries should remain a symlink"
+
+
+def test_merge_one_symlinks_timeseries(data_dir_with_timeseries):
+    """merge_one should symlink the .timeseries.json file for the given activity."""
+    data_dir, act_id = data_dir_with_timeseries
+    merged_acts = data_dir / "_merged" / "activities"
+    merged_acts.mkdir(parents=True)
+
+    merge_one(data_dir, act_id)
+
+    ts_dest = merged_acts / f"{act_id}.timeseries.json"
+    assert ts_dest.exists()
+    assert ts_dest.is_symlink()
+
+
+def test_merge_all_idempotent_with_timeseries(data_dir_with_timeseries):
+    """Running merge_all twice should not break timeseries symlinks."""
+    data_dir, act_id = data_dir_with_timeseries
+    merge_all(data_dir)
+    merge_all(data_dir)
+
+    ts_dest = data_dir / "_merged" / "activities" / f"{act_id}.timeseries.json"
+    assert ts_dest.exists()
+    assert ts_dest.is_symlink()
diff --git a/tests/test_writer.py b/tests/test_writer.py
index f914fbb..410788e 100644
--- a/tests/test_writer.py
+++ b/tests/test_writer.py
@@ -1,4 +1,16 @@
-from bincio.extract.writer import make_activity_id, build_summary, _slugify
+import json
+from pathlib import Path
+
+import pytest
+
+from bincio.extract.writer import (
+    make_activity_id,
+    build_summary,
+    write_activity,
+    finalize_pending,
+    cleanup_pending,
+    _slugify,
+)
 from bincio.extract.metrics import ComputedMetrics
 from bincio.extract.models import ParsedActivity, DataPoint
 from datetime import datetime, timezone
@@ -49,6 +61,138 @@ def test_id_utc_conversion():
     assert make_activity_id(act) == "2024-06-01T073012Z"
 
 
+def _dummy_metrics(**overrides):
+    defaults = dict(
+        distance_m=10000.0, duration_s=3600, moving_time_s=3500,
+        elevation_gain_m=100.0, elevation_loss_m=95.0,
+        avg_speed_kmh=10.0, max_speed_kmh=20.0,
+        avg_hr_bpm=None, max_hr_bpm=None,
+        avg_cadence_rpm=None, avg_power_w=None, max_power_w=None,
+        bbox=None, start_latlng=None, end_latlng=None,
+        mmp=None, best_efforts=None, best_climb_m=None,
+    )
+    defaults.update(overrides)
+    return ComputedMetrics(**defaults)
+
+
+# ── write_activity (timeseries split) ────────────────────────────────────────
+
+def test_write_activity_creates_timeseries_file(tmp_path):
+    """write_activity should produce a separate .timeseries.json and
+    set timeseries_url in the detail JSON (no embedded timeseries)."""
+    ts = datetime(2024, 6, 1, 7, 30, 12, tzinfo=timezone.utc)
+    # Activity with one data point so timeseries is non-null
+    act = ParsedActivity(
+        points=[DataPoint(timestamp=ts, lat=45.0, lon=7.0, elevation_m=300.0)],
+        sport="cycling",
+        started_at=ts,
+        source_file="test.fit",
+        source_hash="sha256:" + "a" * 56,
+    )
+    metrics = _dummy_metrics()
+    activity_id = write_activity(act, metrics, tmp_path)
+
+    detail_path = tmp_path / "activities" / f"{activity_id}.json"
+    ts_path = tmp_path / "activities" / f"{activity_id}.timeseries.json"
+
+    assert detail_path.exists(), "detail JSON not created"
+    assert ts_path.exists(), "timeseries JSON not created"
+
+    detail = json.loads(detail_path.read_text())
+    assert "timeseries" not in detail, "timeseries must NOT be embedded in detail"
+    assert detail["timeseries_url"] == f"activities/{activity_id}.timeseries.json"
+
+    ts_data = json.loads(ts_path.read_text())
+    assert "t" in ts_data, "timeseries file must have 't' array"
+
+
+def test_write_activity_no_points_no_timeseries_file(tmp_path):
+    """An activity with no data points should produce no timeseries file
+    and timeseries_url should be None."""
+    ts = datetime(2024, 6, 1, 7, 30, 12, tzinfo=timezone.utc)
+    act = ParsedActivity(
+        points=[],
+        sport="cycling",
+        started_at=ts,
+        source_file="test.fit",
+        source_hash="sha256:" + "b" * 56,
+    )
+    metrics = _dummy_metrics()
+    activity_id = write_activity(act, metrics, tmp_path)
+
+    detail = json.loads((tmp_path / "activities" / f"{activity_id}.json").read_text())
+    ts_path = tmp_path / "activities" / f"{activity_id}.timeseries.json"
+
+    assert detail["timeseries_url"] is None
+    assert not ts_path.exists()
+
+
+def test_write_activity_pending_creates_pending_timeseries(tmp_path):
+    """pending=True should create .pending.timeseries.json alongside .pending.json."""
+    ts = datetime(2024, 6, 1, 7, 30, 12, tzinfo=timezone.utc)
+    act = ParsedActivity(
+        points=[DataPoint(timestamp=ts, lat=45.0, lon=7.0)],
+        sport="cycling",
+        started_at=ts,
+        source_file="test.fit",
+        source_hash="sha256:" + "c" * 56,
+    )
+    metrics = _dummy_metrics()
+    activity_id = write_activity(act, metrics, tmp_path, pending=True)
+
+    acts_dir = tmp_path / "activities"
+    tag = "c" * 8
+    assert (acts_dir / f"{activity_id}.{tag}.pending.json").exists()
+    assert (acts_dir / f"{activity_id}.{tag}.pending.timeseries.json").exists()
+
+
+def test_finalize_pending_promotes_timeseries(tmp_path):
+    """finalize_pending should rename the pending timeseries file to its final path."""
+    ts = datetime(2024, 6, 1, 7, 30, 12, tzinfo=timezone.utc)
+    act = ParsedActivity(
+        points=[DataPoint(timestamp=ts, lat=45.0, lon=7.0)],
+        sport="cycling",
+        started_at=ts,
+        source_file="test.fit",
+        source_hash="sha256:" + "d" * 56,
+    )
+    metrics = _dummy_metrics()
+    activity_id = write_activity(act, metrics, tmp_path, pending=True)
+    source_hash = "sha256:" + "d" * 56
+
+    final_id = finalize_pending(tmp_path, activity_id, source_hash)
+
+    acts_dir = tmp_path / "activities"
+    assert (acts_dir / f"{final_id}.json").exists()
+    assert (acts_dir / f"{final_id}.timeseries.json").exists()
+
+    # Pending files must be gone
+    tag = "d" * 8
+    assert not (acts_dir / f"{activity_id}.{tag}.pending.timeseries.json").exists()
+
+
+def test_cleanup_pending_removes_timeseries(tmp_path):
+    """cleanup_pending should remove the pending timeseries file."""
+    ts = datetime(2024, 6, 1, 7, 30, 12, tzinfo=timezone.utc)
+    act = ParsedActivity(
+        points=[DataPoint(timestamp=ts, lat=45.0, lon=7.0)],
+        sport="cycling",
+        started_at=ts,
+        source_file="test.fit",
+        source_hash="sha256:" + "e" * 56,
+    )
+    metrics = _dummy_metrics()
+    activity_id = write_activity(act, metrics, tmp_path, pending=True)
+    source_hash = "sha256:" + "e" * 56
+
+    cleanup_pending(tmp_path, activity_id, source_hash)
+
+    tag = "e" * 8
+    acts_dir = tmp_path / "activities"
+    assert not (acts_dir / f"{activity_id}.{tag}.pending.json").exists()
+    assert not (acts_dir / f"{activity_id}.{tag}.pending.timeseries.json").exists()
+
+
 def test_build_summary_required_fields():
     """build_summary should include all fields needed by the schema."""
     act = _dummy_activity("Test Ride")