fixing stuff after splitting jsons
This commit is contained in:
+85
-1
@@ -6,7 +6,7 @@ from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from bincio.render.merge import apply_sidecar, merge_all, parse_sidecar
|
||||
from bincio.render.merge import apply_sidecar, merge_all, merge_one, parse_sidecar
|
||||
|
||||
|
||||
# ── parse_sidecar ─────────────────────────────────────────────────────────────
|
||||
@@ -204,3 +204,87 @@ def test_merge_all_idempotent(data_dir):
|
||||
(data_dir / "_merged" / "activities" / "2024-01-01T080000Z-morning-ride.json").read_text()
|
||||
)
|
||||
assert data["title"] == "Renamed"
|
||||
|
||||
|
||||
# ── timeseries file handling ──────────────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def data_dir_with_timeseries(tmp_path):
|
||||
"""data_dir fixture extended with .timeseries.json sidecar files."""
|
||||
acts = tmp_path / "activities"
|
||||
acts.mkdir()
|
||||
ACT_ID = "2024-01-01T080000Z-morning-ride"
|
||||
detail = {
|
||||
"id": ACT_ID, "title": "Morning Ride", "sport": "cycling",
|
||||
"started_at": "2024-01-01T08:00:00Z",
|
||||
"description": "", "privacy": "public", "custom": {},
|
||||
"timeseries_url": f"activities/{ACT_ID}.timeseries.json",
|
||||
}
|
||||
ts_data = {"t": [0, 1], "lat": [45.0, 45.1], "lon": [7.0, 7.1],
|
||||
"elevation_m": [300.0, 301.0], "speed_kmh": [None, None],
|
||||
"hr_bpm": [None, None], "cadence_rpm": [None, None],
|
||||
"power_w": [None, None], "temperature_c": [None, None]}
|
||||
(acts / f"{ACT_ID}.json").write_text(json.dumps(detail))
|
||||
(acts / f"{ACT_ID}.timeseries.json").write_text(json.dumps(ts_data))
|
||||
index = {"activities": [
|
||||
{"id": ACT_ID, "title": "Morning Ride", "sport": "cycling",
|
||||
"started_at": "2024-01-01T08:00:00Z", "privacy": "public", "custom": {}},
|
||||
]}
|
||||
(tmp_path / "index.json").write_text(json.dumps(index))
|
||||
return tmp_path, ACT_ID
|
||||
|
||||
|
||||
def test_merge_all_symlinks_timeseries(data_dir_with_timeseries):
|
||||
"""merge_all should symlink .timeseries.json alongside the detail JSON."""
|
||||
data_dir, act_id = data_dir_with_timeseries
|
||||
merge_all(data_dir)
|
||||
|
||||
ts_dest = data_dir / "_merged" / "activities" / f"{act_id}.timeseries.json"
|
||||
assert ts_dest.exists(), "timeseries file not present in _merged"
|
||||
assert ts_dest.is_symlink(), "timeseries file should be a symlink (no merge needed)"
|
||||
|
||||
# Points to the original
|
||||
src = data_dir / "activities" / f"{act_id}.timeseries.json"
|
||||
assert ts_dest.resolve() == src.resolve()
|
||||
|
||||
|
||||
def test_merge_all_timeseries_survives_sidecar(data_dir_with_timeseries):
|
||||
"""When a sidecar is applied (detail JSON is rewritten), the timeseries
|
||||
symlink should still be created alongside it."""
|
||||
data_dir, act_id = data_dir_with_timeseries
|
||||
edits = data_dir / "edits"
|
||||
edits.mkdir()
|
||||
(edits / f"{act_id}.md").write_text("---\ntitle: Renamed\n---\n")
|
||||
merge_all(data_dir)
|
||||
|
||||
detail_dest = data_dir / "_merged" / "activities" / f"{act_id}.json"
|
||||
ts_dest = data_dir / "_merged" / "activities" / f"{act_id}.timeseries.json"
|
||||
|
||||
assert not detail_dest.is_symlink(), "sidecar detail should be a copy, not symlink"
|
||||
assert ts_dest.exists(), "timeseries should still be present after sidecar merge"
|
||||
assert ts_dest.is_symlink(), "timeseries should remain a symlink"
|
||||
|
||||
|
||||
def test_merge_one_symlinks_timeseries(data_dir_with_timeseries):
|
||||
"""merge_one should symlink the .timeseries.json file for the given activity."""
|
||||
data_dir, act_id = data_dir_with_timeseries
|
||||
merged_acts = data_dir / "_merged" / "activities"
|
||||
merged_acts.mkdir(parents=True)
|
||||
|
||||
merge_one(data_dir, act_id)
|
||||
|
||||
ts_dest = merged_acts / f"{act_id}.timeseries.json"
|
||||
assert ts_dest.exists()
|
||||
assert ts_dest.is_symlink()
|
||||
|
||||
|
||||
def test_merge_all_idempotent_with_timeseries(data_dir_with_timeseries):
|
||||
"""Running merge_all twice should not break timeseries symlinks."""
|
||||
data_dir, act_id = data_dir_with_timeseries
|
||||
merge_all(data_dir)
|
||||
merge_all(data_dir)
|
||||
|
||||
ts_dest = data_dir / "_merged" / "activities" / f"{act_id}.timeseries.json"
|
||||
assert ts_dest.exists()
|
||||
assert ts_dest.is_symlink()
|
||||
|
||||
+145
-1
@@ -1,4 +1,16 @@
|
||||
from bincio.extract.writer import make_activity_id, build_summary, _slugify
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from bincio.extract.writer import (
|
||||
make_activity_id,
|
||||
build_summary,
|
||||
write_activity,
|
||||
finalize_pending,
|
||||
cleanup_pending,
|
||||
_slugify,
|
||||
)
|
||||
from bincio.extract.metrics import ComputedMetrics
|
||||
from bincio.extract.models import ParsedActivity, DataPoint
|
||||
from datetime import datetime, timezone
|
||||
@@ -49,6 +61,138 @@ def test_id_utc_conversion():
|
||||
assert make_activity_id(act) == "2024-06-01T073012Z"
|
||||
|
||||
|
||||
def _dummy_metrics(**overrides):
|
||||
defaults = dict(
|
||||
distance_m=10000.0, duration_s=3600, moving_time_s=3500,
|
||||
elevation_gain_m=100.0, elevation_loss_m=95.0,
|
||||
avg_speed_kmh=10.0, max_speed_kmh=20.0,
|
||||
avg_hr_bpm=None, max_hr_bpm=None,
|
||||
avg_cadence_rpm=None, avg_power_w=None, max_power_w=None,
|
||||
bbox=None, start_latlng=None, end_latlng=None,
|
||||
mmp=None, best_efforts=None, best_climb_m=None,
|
||||
)
|
||||
defaults.update(overrides)
|
||||
return ComputedMetrics(**defaults)
|
||||
|
||||
|
||||
# ── write_activity (timeseries split) ────────────────────────────────────────
|
||||
|
||||
def test_write_activity_creates_timeseries_file(tmp_path):
|
||||
"""write_activity should produce a separate .timeseries.json and
|
||||
set timeseries_url in the detail JSON (no embedded timeseries)."""
|
||||
ts = datetime(2024, 6, 1, 7, 30, 12, tzinfo=timezone.utc)
|
||||
# Activity with one data point so timeseries is non-null
|
||||
act = ParsedActivity(
|
||||
points=[DataPoint(timestamp=ts, lat=45.0, lon=7.0, elevation_m=300.0)],
|
||||
sport="cycling",
|
||||
started_at=ts,
|
||||
source_file="test.fit",
|
||||
source_hash="sha256:" + "a" * 56,
|
||||
)
|
||||
metrics = _dummy_metrics()
|
||||
activity_id = write_activity(act, metrics, tmp_path)
|
||||
|
||||
detail_path = tmp_path / "activities" / f"{activity_id}.json"
|
||||
ts_path = tmp_path / "activities" / f"{activity_id}.timeseries.json"
|
||||
|
||||
assert detail_path.exists(), "detail JSON not created"
|
||||
assert ts_path.exists(), "timeseries JSON not created"
|
||||
|
||||
detail = json.loads(detail_path.read_text())
|
||||
assert "timeseries" not in detail, "timeseries must NOT be embedded in detail"
|
||||
assert detail["timeseries_url"] == f"activities/{activity_id}.timeseries.json"
|
||||
|
||||
ts_data = json.loads(ts_path.read_text())
|
||||
assert "t" in ts_data, "timeseries file must have 't' array"
|
||||
|
||||
|
||||
def test_write_activity_no_points_no_timeseries_file(tmp_path):
|
||||
"""An activity with no data points should produce no timeseries file
|
||||
and timeseries_url should be None."""
|
||||
ts = datetime(2024, 6, 1, 7, 30, 12, tzinfo=timezone.utc)
|
||||
act = ParsedActivity(
|
||||
points=[],
|
||||
sport="cycling",
|
||||
started_at=ts,
|
||||
source_file="test.fit",
|
||||
source_hash="sha256:" + "b" * 56,
|
||||
)
|
||||
metrics = _dummy_metrics()
|
||||
activity_id = write_activity(act, metrics, tmp_path)
|
||||
|
||||
detail = json.loads((tmp_path / "activities" / f"{activity_id}.json").read_text())
|
||||
ts_path = tmp_path / "activities" / f"{activity_id}.timeseries.json"
|
||||
|
||||
assert detail["timeseries_url"] is None
|
||||
assert not ts_path.exists()
|
||||
|
||||
|
||||
def test_write_activity_pending_creates_pending_timeseries(tmp_path):
|
||||
"""pending=True should create .pending.timeseries.json alongside .pending.json."""
|
||||
ts = datetime(2024, 6, 1, 7, 30, 12, tzinfo=timezone.utc)
|
||||
act = ParsedActivity(
|
||||
points=[DataPoint(timestamp=ts, lat=45.0, lon=7.0)],
|
||||
sport="cycling",
|
||||
started_at=ts,
|
||||
source_file="test.fit",
|
||||
source_hash="sha256:" + "c" * 56,
|
||||
)
|
||||
metrics = _dummy_metrics()
|
||||
activity_id = write_activity(act, metrics, tmp_path, pending=True)
|
||||
|
||||
acts_dir = tmp_path / "activities"
|
||||
tag = "c" * 8
|
||||
assert (acts_dir / f"{activity_id}.{tag}.pending.json").exists()
|
||||
assert (acts_dir / f"{activity_id}.{tag}.pending.timeseries.json").exists()
|
||||
|
||||
|
||||
def test_finalize_pending_promotes_timeseries(tmp_path):
|
||||
"""finalize_pending should rename the pending timeseries file to its final path."""
|
||||
ts = datetime(2024, 6, 1, 7, 30, 12, tzinfo=timezone.utc)
|
||||
act = ParsedActivity(
|
||||
points=[DataPoint(timestamp=ts, lat=45.0, lon=7.0)],
|
||||
sport="cycling",
|
||||
started_at=ts,
|
||||
source_file="test.fit",
|
||||
source_hash="sha256:" + "d" * 56,
|
||||
)
|
||||
metrics = _dummy_metrics()
|
||||
activity_id = write_activity(act, metrics, tmp_path, pending=True)
|
||||
source_hash = "sha256:" + "d" * 56
|
||||
|
||||
final_id = finalize_pending(tmp_path, activity_id, source_hash)
|
||||
|
||||
acts_dir = tmp_path / "activities"
|
||||
assert (acts_dir / f"{final_id}.json").exists()
|
||||
assert (acts_dir / f"{final_id}.timeseries.json").exists()
|
||||
|
||||
# Pending files must be gone
|
||||
tag = "d" * 8
|
||||
assert not (acts_dir / f"{activity_id}.{tag}.pending.timeseries.json").exists()
|
||||
|
||||
|
||||
def test_cleanup_pending_removes_timeseries(tmp_path):
|
||||
"""cleanup_pending should remove the pending timeseries file."""
|
||||
ts = datetime(2024, 6, 1, 7, 30, 12, tzinfo=timezone.utc)
|
||||
act = ParsedActivity(
|
||||
points=[DataPoint(timestamp=ts, lat=45.0, lon=7.0)],
|
||||
sport="cycling",
|
||||
started_at=ts,
|
||||
source_file="test.fit",
|
||||
source_hash="sha256:" + "e" * 56,
|
||||
)
|
||||
metrics = _dummy_metrics()
|
||||
activity_id = write_activity(act, metrics, tmp_path, pending=True)
|
||||
source_hash = "sha256:" + "e" * 56
|
||||
|
||||
cleanup_pending(tmp_path, activity_id, source_hash)
|
||||
|
||||
tag = "e" * 8
|
||||
acts_dir = tmp_path / "activities"
|
||||
assert not (acts_dir / f"{activity_id}.{tag}.pending.json").exists()
|
||||
assert not (acts_dir / f"{activity_id}.{tag}.pending.timeseries.json").exists()
|
||||
|
||||
|
||||
def test_build_summary_required_fields():
|
||||
"""build_summary should include all fields needed by the schema."""
|
||||
act = _dummy_activity("Test Ride")
|
||||
|
||||
Reference in New Issue
Block a user