fixing stuff after splitting jsons

This commit is contained in:
Davide Scaini
2026-04-09 15:27:00 +02:00
parent 8118f6f316
commit 084c652fdd
6 changed files with 315 additions and 17 deletions
+4 -2
View File
@@ -50,8 +50,10 @@ def write_activity(
source = _infer_source(activity)
has_gps = metrics.bbox is not None and privacy not in ("no_gps", "private")
# Build timeseries once — written to a separate file to keep detail JSON small
timeseries = build_timeseries(activity.points, activity.started_at, privacy)
# Build timeseries once — written to a separate file to keep detail JSON small.
# Treat an empty timeseries (no points) as None so no file is created.
_ts = build_timeseries(activity.points, activity.started_at, privacy)
timeseries = _ts if _ts.get("t") else None
tag = activity.source_hash[-8:] if activity.source_hash else "unknown"
# ── detail JSON ──────────────────────────────────────────────────────────
+1 -1
View File
@@ -430,7 +430,7 @@
<div class="flex flex-col gap-1">
{#each tooltipActivities as a}
<a
href="{import.meta.env.BASE_URL}activity/{a.id}/"
href={a.detail_url ? `${import.meta.env.BASE_URL}activity/${a.id}/` : `${import.meta.env.BASE_URL}activity/local/?id=${a.id}`}
class="flex flex-col gap-0.5 rounded-lg px-2 py-1.5 hover:bg-zinc-800 transition-colors"
>
<span class="text-sm font-medium text-white truncate">
+12 -5
View File
@@ -186,17 +186,24 @@ export async function loadTimeseries(
): Promise<Timeseries | null> {
try {
let url: string;
// Strip the leading "activities/" from timeseriesUrl so we can append it
// to whatever directory the detail JSON lives in.
const filename = timeseriesUrl.replace(/^activities\//, '');
if (timeseriesUrl.startsWith('http')) {
url = timeseriesUrl;
} else if (detailUrl.startsWith('http')) {
// detailUrl is absolute — resolve timeseries relative to its directory
// absolute detailUrl (browser shard resolution) → same directory
const dir = detailUrl.substring(0, detailUrl.lastIndexOf('/') + 1);
// timeseriesUrl is "activities/id.timeseries.json" — strip leading "activities/"
// because dir already ends with "activities/"
const filename = timeseriesUrl.replace(/^activities\//, '');
url = `${dir}${filename}`;
} else {
url = `${baseUrl}data/${timeseriesUrl}`;
// relative detailUrl — may be plain ("activities/{id}.json", single-user)
// or prefixed ("dave/_merged/activities/{id}.json", multi-user SSG prop).
// In both cases, resolve the timeseries file from the same directory.
const dir = detailUrl.includes('/')
? detailUrl.substring(0, detailUrl.lastIndexOf('/') + 1)
: '';
url = `${baseUrl}data/${dir}${filename}`;
}
return await fetchJSON<Timeseries>(url);
} catch {
+65 -4
View File
@@ -1,5 +1,5 @@
---
import { readFileSync } from 'node:fs';
import { readFileSync, readdirSync, existsSync } from 'node:fs';
import { join, resolve } from 'node:path';
import Base from '../../layouts/Base.astro';
import ActivityDetail from '../../components/ActivityDetail.svelte';
@@ -49,11 +49,72 @@ export async function getStaticPaths() {
const activities = readActivities(join(dataDir, 'index.json'));
const athlete = root.owner?.athlete ?? null;
return activities
// Build the map from the index first
const byId = new Map(
activities
.filter(a => a.privacy !== 'private' && a.id)
.map(a => ({
.map(a => [a.id, { activity: a, athlete }])
);
// Fallback: scan _merged/activities/ directories for any JSON files not yet
// covered by the index (e.g. shard read failures, recently added activities).
try {
const userDirs = readdirSync(dataDir, { withFileTypes: true })
.filter(d => d.isDirectory() && !d.name.startsWith('_') && !d.name.startsWith('.'))
.map(d => d.name);
for (const handle of userDirs) {
// Prefer _merged, fall back to plain activities dir
const mergedActs = join(dataDir, handle, '_merged', 'activities');
const plainActs = join(dataDir, handle, 'activities');
const actsDir = existsSync(mergedActs) ? mergedActs : (existsSync(plainActs) ? plainActs : null);
if (!actsDir) continue;
const urlPrefix = existsSync(mergedActs)
? `${handle}/_merged/`
: `${handle}/`;
for (const file of readdirSync(actsDir)) {
if (!file.endsWith('.json') || file.endsWith('.timeseries.json')) continue;
const id = file.slice(0, -5); // strip .json
if (byId.has(id)) continue; // already covered by the index
try {
const detail = JSON.parse(readFileSync(join(actsDir, file), 'utf-8'));
if (detail.privacy === 'private') continue;
// Build a minimal ActivitySummary from the detail file
const a: ActivitySummary = {
id,
title: detail.title ?? id,
sport: detail.sport ?? 'other',
sub_sport: detail.sub_sport ?? null,
started_at: detail.started_at ?? '',
distance_m: detail.distance_m ?? null,
duration_s: detail.duration_s ?? null,
moving_time_s: detail.moving_time_s ?? null,
elevation_gain_m: detail.elevation_gain_m ?? null,
avg_speed_kmh: detail.avg_speed_kmh ?? null,
max_speed_kmh: detail.max_speed_kmh ?? null,
avg_hr_bpm: detail.avg_hr_bpm ?? null,
max_hr_bpm: detail.max_hr_bpm ?? null,
avg_cadence_rpm: detail.avg_cadence_rpm ?? null,
avg_power_w: detail.avg_power_w ?? null,
mmp: detail.mmp ?? null,
source: detail.source ?? null,
privacy: detail.privacy ?? 'public',
detail_url: `${urlPrefix}activities/${file}`,
track_url: detail.bbox ? `${urlPrefix}activities/${id}.geojson` : null,
preview_coords: null,
handle,
};
byId.set(id, { activity: a, athlete });
} catch { /* skip malformed files */ }
}
}
} catch { /* ignore scan errors */ }
return [...byId.values()].map(({ activity: a, athlete: ath }) => ({
params: { id: a.id },
props: { activity: a, athlete },
props: { activity: a, athlete: ath },
}));
} catch {
return [];
+85 -1
View File
@@ -6,7 +6,7 @@ from pathlib import Path
import pytest
from bincio.render.merge import apply_sidecar, merge_all, parse_sidecar
from bincio.render.merge import apply_sidecar, merge_all, merge_one, parse_sidecar
# ── parse_sidecar ─────────────────────────────────────────────────────────────
@@ -204,3 +204,87 @@ def test_merge_all_idempotent(data_dir):
(data_dir / "_merged" / "activities" / "2024-01-01T080000Z-morning-ride.json").read_text()
)
assert data["title"] == "Renamed"
# ── timeseries file handling ──────────────────────────────────────────────────
@pytest.fixture()
def data_dir_with_timeseries(tmp_path):
"""data_dir fixture extended with .timeseries.json sidecar files."""
acts = tmp_path / "activities"
acts.mkdir()
ACT_ID = "2024-01-01T080000Z-morning-ride"
detail = {
"id": ACT_ID, "title": "Morning Ride", "sport": "cycling",
"started_at": "2024-01-01T08:00:00Z",
"description": "", "privacy": "public", "custom": {},
"timeseries_url": f"activities/{ACT_ID}.timeseries.json",
}
ts_data = {"t": [0, 1], "lat": [45.0, 45.1], "lon": [7.0, 7.1],
"elevation_m": [300.0, 301.0], "speed_kmh": [None, None],
"hr_bpm": [None, None], "cadence_rpm": [None, None],
"power_w": [None, None], "temperature_c": [None, None]}
(acts / f"{ACT_ID}.json").write_text(json.dumps(detail))
(acts / f"{ACT_ID}.timeseries.json").write_text(json.dumps(ts_data))
index = {"activities": [
{"id": ACT_ID, "title": "Morning Ride", "sport": "cycling",
"started_at": "2024-01-01T08:00:00Z", "privacy": "public", "custom": {}},
]}
(tmp_path / "index.json").write_text(json.dumps(index))
return tmp_path, ACT_ID
def test_merge_all_symlinks_timeseries(data_dir_with_timeseries):
"""merge_all should symlink .timeseries.json alongside the detail JSON."""
data_dir, act_id = data_dir_with_timeseries
merge_all(data_dir)
ts_dest = data_dir / "_merged" / "activities" / f"{act_id}.timeseries.json"
assert ts_dest.exists(), "timeseries file not present in _merged"
assert ts_dest.is_symlink(), "timeseries file should be a symlink (no merge needed)"
# Points to the original
src = data_dir / "activities" / f"{act_id}.timeseries.json"
assert ts_dest.resolve() == src.resolve()
def test_merge_all_timeseries_survives_sidecar(data_dir_with_timeseries):
"""When a sidecar is applied (detail JSON is rewritten), the timeseries
symlink should still be created alongside it."""
data_dir, act_id = data_dir_with_timeseries
edits = data_dir / "edits"
edits.mkdir()
(edits / f"{act_id}.md").write_text("---\ntitle: Renamed\n---\n")
merge_all(data_dir)
detail_dest = data_dir / "_merged" / "activities" / f"{act_id}.json"
ts_dest = data_dir / "_merged" / "activities" / f"{act_id}.timeseries.json"
assert not detail_dest.is_symlink(), "sidecar detail should be a copy, not symlink"
assert ts_dest.exists(), "timeseries should still be present after sidecar merge"
assert ts_dest.is_symlink(), "timeseries should remain a symlink"
def test_merge_one_symlinks_timeseries(data_dir_with_timeseries):
"""merge_one should symlink the .timeseries.json file for the given activity."""
data_dir, act_id = data_dir_with_timeseries
merged_acts = data_dir / "_merged" / "activities"
merged_acts.mkdir(parents=True)
merge_one(data_dir, act_id)
ts_dest = merged_acts / f"{act_id}.timeseries.json"
assert ts_dest.exists()
assert ts_dest.is_symlink()
def test_merge_all_idempotent_with_timeseries(data_dir_with_timeseries):
"""Running merge_all twice should not break timeseries symlinks."""
data_dir, act_id = data_dir_with_timeseries
merge_all(data_dir)
merge_all(data_dir)
ts_dest = data_dir / "_merged" / "activities" / f"{act_id}.timeseries.json"
assert ts_dest.exists()
assert ts_dest.is_symlink()
+145 -1
View File
@@ -1,4 +1,16 @@
from bincio.extract.writer import make_activity_id, build_summary, _slugify
import json
from pathlib import Path
import pytest
from bincio.extract.writer import (
make_activity_id,
build_summary,
write_activity,
finalize_pending,
cleanup_pending,
_slugify,
)
from bincio.extract.metrics import ComputedMetrics
from bincio.extract.models import ParsedActivity, DataPoint
from datetime import datetime, timezone
@@ -49,6 +61,138 @@ def test_id_utc_conversion():
assert make_activity_id(act) == "2024-06-01T073012Z"
def _dummy_metrics(**overrides):
defaults = dict(
distance_m=10000.0, duration_s=3600, moving_time_s=3500,
elevation_gain_m=100.0, elevation_loss_m=95.0,
avg_speed_kmh=10.0, max_speed_kmh=20.0,
avg_hr_bpm=None, max_hr_bpm=None,
avg_cadence_rpm=None, avg_power_w=None, max_power_w=None,
bbox=None, start_latlng=None, end_latlng=None,
mmp=None, best_efforts=None, best_climb_m=None,
)
defaults.update(overrides)
return ComputedMetrics(**defaults)
# ── write_activity (timeseries split) ────────────────────────────────────────
def test_write_activity_creates_timeseries_file(tmp_path):
"""write_activity should produce a separate .timeseries.json and
set timeseries_url in the detail JSON (no embedded timeseries)."""
ts = datetime(2024, 6, 1, 7, 30, 12, tzinfo=timezone.utc)
# Activity with one data point so timeseries is non-null
act = ParsedActivity(
points=[DataPoint(timestamp=ts, lat=45.0, lon=7.0, elevation_m=300.0)],
sport="cycling",
started_at=ts,
source_file="test.fit",
source_hash="sha256:" + "a" * 56,
)
metrics = _dummy_metrics()
activity_id = write_activity(act, metrics, tmp_path)
detail_path = tmp_path / "activities" / f"{activity_id}.json"
ts_path = tmp_path / "activities" / f"{activity_id}.timeseries.json"
assert detail_path.exists(), "detail JSON not created"
assert ts_path.exists(), "timeseries JSON not created"
detail = json.loads(detail_path.read_text())
assert "timeseries" not in detail, "timeseries must NOT be embedded in detail"
assert detail["timeseries_url"] == f"activities/{activity_id}.timeseries.json"
ts_data = json.loads(ts_path.read_text())
assert "t" in ts_data, "timeseries file must have 't' array"
def test_write_activity_no_points_no_timeseries_file(tmp_path):
"""An activity with no data points should produce no timeseries file
and timeseries_url should be None."""
ts = datetime(2024, 6, 1, 7, 30, 12, tzinfo=timezone.utc)
act = ParsedActivity(
points=[],
sport="cycling",
started_at=ts,
source_file="test.fit",
source_hash="sha256:" + "b" * 56,
)
metrics = _dummy_metrics()
activity_id = write_activity(act, metrics, tmp_path)
detail = json.loads((tmp_path / "activities" / f"{activity_id}.json").read_text())
ts_path = tmp_path / "activities" / f"{activity_id}.timeseries.json"
assert detail["timeseries_url"] is None
assert not ts_path.exists()
def test_write_activity_pending_creates_pending_timeseries(tmp_path):
"""pending=True should create .pending.timeseries.json alongside .pending.json."""
ts = datetime(2024, 6, 1, 7, 30, 12, tzinfo=timezone.utc)
act = ParsedActivity(
points=[DataPoint(timestamp=ts, lat=45.0, lon=7.0)],
sport="cycling",
started_at=ts,
source_file="test.fit",
source_hash="sha256:" + "c" * 56,
)
metrics = _dummy_metrics()
activity_id = write_activity(act, metrics, tmp_path, pending=True)
acts_dir = tmp_path / "activities"
tag = "c" * 8
assert (acts_dir / f"{activity_id}.{tag}.pending.json").exists()
assert (acts_dir / f"{activity_id}.{tag}.pending.timeseries.json").exists()
def test_finalize_pending_promotes_timeseries(tmp_path):
"""finalize_pending should rename the pending timeseries file to its final path."""
ts = datetime(2024, 6, 1, 7, 30, 12, tzinfo=timezone.utc)
act = ParsedActivity(
points=[DataPoint(timestamp=ts, lat=45.0, lon=7.0)],
sport="cycling",
started_at=ts,
source_file="test.fit",
source_hash="sha256:" + "d" * 56,
)
metrics = _dummy_metrics()
activity_id = write_activity(act, metrics, tmp_path, pending=True)
source_hash = "sha256:" + "d" * 56
final_id = finalize_pending(tmp_path, activity_id, source_hash)
acts_dir = tmp_path / "activities"
assert (acts_dir / f"{final_id}.json").exists()
assert (acts_dir / f"{final_id}.timeseries.json").exists()
# Pending files must be gone
tag = "d" * 8
assert not (acts_dir / f"{activity_id}.{tag}.pending.timeseries.json").exists()
def test_cleanup_pending_removes_timeseries(tmp_path):
"""cleanup_pending should remove the pending timeseries file."""
ts = datetime(2024, 6, 1, 7, 30, 12, tzinfo=timezone.utc)
act = ParsedActivity(
points=[DataPoint(timestamp=ts, lat=45.0, lon=7.0)],
sport="cycling",
started_at=ts,
source_file="test.fit",
source_hash="sha256:" + "e" * 56,
)
metrics = _dummy_metrics()
activity_id = write_activity(act, metrics, tmp_path, pending=True)
source_hash = "sha256:" + "e" * 56
cleanup_pending(tmp_path, activity_id, source_hash)
tag = "e" * 8
acts_dir = tmp_path / "activities"
assert not (acts_dir / f"{activity_id}.{tag}.pending.json").exists()
assert not (acts_dir / f"{activity_id}.{tag}.pending.timeseries.json").exists()
def test_build_summary_required_fields():
"""build_summary should include all fields needed by the schema."""
act = _dummy_activity("Test Ride")