second pass. low

This commit is contained in:
Davide Scaini
2026-04-01 19:00:28 +02:00
parent 3d364c3992
commit bd5831c2fd
11 changed files with 277 additions and 62 deletions
+7 -2
View File
@@ -554,10 +554,15 @@ async def upload_activity(file: UploadFile = File(...)) -> JSONResponse:
if suffix not in _SUPPORTED_SUFFIXES: if suffix not in _SUPPORTED_SUFFIXES:
raise HTTPException(400, f"Unsupported file type '{Path(name).suffix}'. Expected FIT, GPX, or TCX.") raise HTTPException(400, f"Unsupported file type '{Path(name).suffix}'. Expected FIT, GPX, or TCX.")
_MAX_UPLOAD_BYTES = 50 * 1024 * 1024 # 50 MB
contents = await file.read()
if len(contents) > _MAX_UPLOAD_BYTES:
raise HTTPException(413, f"File too large ({len(contents)} bytes). Maximum is 50 MB.")
staging = dd / "_uploads" staging = dd / "_uploads"
staging.mkdir(exist_ok=True) staging.mkdir(exist_ok=True)
staged = staging / name staged = staging / name
staged.write_bytes(await file.read()) staged.write_bytes(contents)
try: try:
from bincio.extract.metrics import compute from bincio.extract.metrics import compute
@@ -592,7 +597,7 @@ async def upload_activity(file: UploadFile = File(...)) -> JSONResponse:
except HTTPException: except HTTPException:
raise raise
except Exception as exc: except Exception as exc:
raise HTTPException(422, str(exc)) raise HTTPException(422, f"Failed to process activity file: {type(exc).__name__}")
finally: finally:
staged.unlink(missing_ok=True) staged.unlink(missing_ok=True)
+65 -21
View File
@@ -46,6 +46,9 @@ def _process_file(path: Path) -> dict:
"""Runs inside a worker process. Only receives a Path (tiny pickle). """Runs inside a worker process. Only receives a Path (tiny pickle).
All heavy shared data (_known_hashes, _strava_lookup, etc.) is already All heavy shared data (_known_hashes, _strava_lookup, etc.) is already
in the worker's memory from the initializer — zero per-task overhead. in the worker's memory from the initializer — zero per-task overhead.
Writes to pending files (not final paths) so the main process can
arbitrate collisions and pick the best version.
""" """
from bincio.extract.metrics import compute from bincio.extract.metrics import compute
from bincio.extract.parsers.factory import parse_file from bincio.extract.parsers.factory import parse_file
@@ -80,11 +83,17 @@ def _process_file(path: Path) -> dict:
activity, metrics, _output_dir, activity, metrics, _output_dir,
privacy=_privacy, privacy=_privacy,
rdp_epsilon=_rdp_epsilon, rdp_epsilon=_rdp_epsilon,
pending=True,
) )
summary = build_summary(activity, metrics, activity_id, _privacy) summary = build_summary(activity, metrics, activity_id, _privacy)
except Exception as exc: except Exception as exc:
return {"status": "error", "path": str(path), "error": str(exc)} return {"status": "error", "path": str(path), "error": str(exc)}
# Quality signals for the main process to compare competing results
sensor_channels = sum(1 for v in [
metrics.avg_hr_bpm, metrics.avg_power_w, metrics.avg_cadence_rpm,
] if v is not None)
return { return {
"status": "ok", "status": "ok",
"summary": summary, "summary": summary,
@@ -94,6 +103,8 @@ def _process_file(path: Path) -> dict:
"distance_m": metrics.distance_m, "distance_m": metrics.distance_m,
"source": summary.get("source"), "source": summary.get("source"),
"mmp": metrics.mmp, "mmp": metrics.mmp,
"point_count": len(activity.points),
"sensor_channels": sensor_channels,
} }
@@ -177,6 +188,8 @@ def extract(
summaries: list[dict] = [] summaries: list[dict] = []
errors: list[tuple[str, str]] = [] errors: list[tuple[str, str]] = []
skipped = 0 skipped = 0
# Collect all pending results, grouped by activity_id for collision arbitration
pending_by_id: dict[str, list[dict]] = {}
with Progress( with Progress(
TextColumn("[progress.description]{task.description}"), TextColumn("[progress.description]{task.description}"),
@@ -202,30 +215,61 @@ def extract(
elif result["status"] == "error": elif result["status"] == "error":
errors.append((result["path"], result["error"])) errors.append((result["path"], result["error"]))
else: else:
# Near-duplicate check — must be sequential (stateful) pending_by_id.setdefault(result["id"], []).append(result)
from datetime import datetime
started_at = datetime.fromisoformat(result["started_at"])
near_id = dedup.find_near_duplicate(started_at, result["distance_m"])
if near_id: # ── Arbitrate collisions and finalize pending files ───────────────────────
canonical = dedup.pick_canonical(near_id, result.get("source")) from bincio.extract.writer import (
if canonical != "__new__": activity_quality, cleanup_pending, finalize_pending, write_athlete_json, write_index,
_patch_duplicate_of(cfg.output_dir, result["id"], near_id) )
skipped += 1
continue
_patch_duplicate_of(cfg.output_dir, near_id, result["id"])
dedup._records[near_id].duplicate_of = result["id"]
dedup.register(ActivityRecord( for activity_id, candidates in pending_by_id.items():
id=result["id"], # Pick the best candidate by quality score
source_hash=result["hash"], candidates.sort(key=activity_quality, reverse=True)
started_at=started_at, winner = candidates[0]
distance_m=result["distance_m"],
source=result.get("source"), # Clean up losing candidates' pending files
)) for loser in candidates[1:]:
summaries.append(result["summary"]) cleanup_pending(cfg.output_dir, activity_id, loser["hash"])
skipped += 1
# Near-duplicate check against already-known activities
from datetime import datetime
started_at = datetime.fromisoformat(winner["started_at"])
near_id = dedup.find_near_duplicate(started_at, winner["distance_m"])
if near_id:
canonical = dedup.pick_canonical(near_id, winner.get("source"))
if canonical != "__new__":
# Existing is better — finalize winner as duplicate, then patch it
final_id = finalize_pending(cfg.output_dir, activity_id, winner["hash"])
_patch_duplicate_of(cfg.output_dir, final_id, near_id)
skipped += 1
continue
# New is better — patch the existing one as duplicate
final_id = finalize_pending(cfg.output_dir, activity_id, winner["hash"])
_patch_duplicate_of(cfg.output_dir, near_id, final_id)
dedup._records[near_id].duplicate_of = final_id
else:
final_id = finalize_pending(cfg.output_dir, activity_id, winner["hash"])
# Update summary with the finalized ID (may include hash suffix)
summary = winner["summary"]
if final_id != activity_id:
summary = dict(summary)
summary["id"] = final_id
summary["detail_url"] = f"activities/{final_id}.json"
if summary.get("track_url"):
summary["track_url"] = f"activities/{final_id}.geojson"
dedup.register(ActivityRecord(
id=final_id,
source_hash=winner["hash"],
started_at=started_at,
distance_m=winner["distance_m"],
source=winner.get("source"),
))
summaries.append(summary)
from bincio.extract.writer import write_athlete_json, write_index
existing = _load_existing_summaries(cfg.output_dir) existing = _load_existing_summaries(cfg.output_dir)
merged = {s["id"]: s for s in existing} merged = {s["id"]: s for s in existing}
for s in summaries: for s in summaries:
+2
View File
@@ -76,6 +76,8 @@ def _apply_extensions(pt: gpxpy.gpx.GPXTrackPoint, dp: DataPoint) -> None:
dp.temperature_c = float(val) dp.temperature_c = float(val)
elif tag == "speed": elif tag == "speed":
dp.speed_kmh = float(val) * 3.6 # m/s → km/h dp.speed_kmh = float(val) * 3.6 # m/s → km/h
elif tag in ("pwr", "power", "watts"):
dp.power_w = int(float(val))
def _strip_ns(tag: str) -> str: def _strip_ns(tag: str) -> str:
+2 -2
View File
@@ -97,8 +97,8 @@ def _parse_ts(s: str) -> datetime:
return datetime.strptime(s, fmt).replace(tzinfo=timezone.utc) return datetime.strptime(s, fmt).replace(tzinfo=timezone.utc)
except ValueError: except ValueError:
continue continue
# Numeric offset like +02:00 or -05:30 — parse with %z then convert to UTC # Numeric offset like +02:00, -05:30, or +0200 — parse with %z then convert to UTC
m = _re.match(r"^(.+)([+-]\d{2}:\d{2})$", s) m = _re.match(r"^(.+)([+-]\d{2}:?\d{2})$", s)
if m: if m:
body, off = m.group(1), m.group(2).replace(":", "") body, off = m.group(1), m.group(2).replace(":", "")
for fmt in ("%Y-%m-%dT%H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S"): for fmt in ("%Y-%m-%dT%H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S"):
+6 -2
View File
@@ -67,8 +67,12 @@ def build_geojson(
if p.lon is not None and p.lat is not None if p.lon is not None and p.lat is not None
] ]
# Parallel speed array for gradient coloring # Parallel speed array for gradient coloring — same filter as coordinates
speeds = [round(p.speed_kmh, 2) if p.speed_kmh is not None else None for p in simplified] speeds = [
round(p.speed_kmh, 2) if p.speed_kmh is not None else None
for p in simplified
if p.lon is not None and p.lat is not None
]
return { return {
"type": "Feature", "type": "Feature",
+101 -11
View File
@@ -33,8 +33,16 @@ def write_activity(
privacy: str = "public", privacy: str = "public",
duplicate_of: str | None = None, duplicate_of: str | None = None,
rdp_epsilon: float = 0.0001, rdp_epsilon: float = 0.0001,
pending: bool = False,
) -> str: ) -> str:
"""Write {id}.json and (if GPS available) {id}.geojson. Returns the ID.""" """Write {id}.json and (if GPS available) {id}.geojson. Returns the ID.
When pending=True, writes to a uniquely-named pending file
({id}.{hash[:8]}.pending.json) instead of the final path. This avoids
race conditions when multiple workers process activities with the same ID.
The main process is responsible for promoting pending files to final paths
via finalize_pending().
"""
activity_id = make_activity_id(activity) activity_id = make_activity_id(activity)
acts_dir = output_dir / "activities" acts_dir = output_dir / "activities"
acts_dir.mkdir(parents=True, exist_ok=True) acts_dir.mkdir(parents=True, exist_ok=True)
@@ -82,26 +90,108 @@ def write_activity(
"custom": {}, "custom": {},
} }
json_path = acts_dir / f"{activity_id}.json" if pending:
# Collision guard: if a *different* activity already has this ID, append a # Write to a unique pending file — no collision possible
# short hash suffix to disambiguate (same hash = idempotent re-extract). tag = activity.source_hash[-8:] if activity.source_hash else "unknown"
if json_path.exists(): json_path = acts_dir / f"{activity_id}.{tag}.pending.json"
existing = json.loads(json_path.read_text(encoding="utf-8")) else:
if existing.get("source_hash") != activity.source_hash: json_path = acts_dir / f"{activity_id}.json"
activity_id = f"{activity_id}-{activity.source_hash[-6:]}" # Legacy non-pending path: collision guard for callers that don't use
json_path = acts_dir / f"{activity_id}.json" # the pending workflow (e.g. edit server upload_activity)
detail["id"] = activity_id if json_path.exists():
existing = json.loads(json_path.read_text(encoding="utf-8"))
if existing.get("source_hash") != activity.source_hash:
activity_id = f"{activity_id}-{activity.source_hash[-6:]}"
json_path = acts_dir / f"{activity_id}.json"
detail["id"] = activity_id
json_path.write_text(json.dumps(detail, indent=2, ensure_ascii=False)) json_path.write_text(json.dumps(detail, indent=2, ensure_ascii=False))
# ── GeoJSON track ──────────────────────────────────────────────────────── # ── GeoJSON track ────────────────────────────────────────────────────────
if has_gps: if has_gps:
geojson = build_geojson(activity.points, activity_id, epsilon=rdp_epsilon) geojson = build_geojson(activity.points, activity_id, epsilon=rdp_epsilon)
geojson_path = acts_dir / f"{activity_id}.geojson" if pending:
geojson_path = acts_dir / f"{activity_id}.{tag}.pending.geojson"
else:
geojson_path = acts_dir / f"{activity_id}.geojson"
geojson_path.write_text(json.dumps(geojson, indent=2, ensure_ascii=False)) geojson_path.write_text(json.dumps(geojson, indent=2, ensure_ascii=False))
return activity_id return activity_id
def activity_quality(result: dict) -> int:
"""Compute a quality score for an activity result from a worker.
Higher is better. Used by the main process to pick the best version
when multiple workers produce results for the same activity ID.
"""
from bincio.extract.dedup import _SOURCE_QUALITY
score = 0
# Source type quality (FIT > GPX > TCX)
score += _SOURCE_QUALITY.get(result.get("source") or "", 0) * 100
# Sensor channel count
score += result.get("sensor_channels", 0) * 10
# Point count (more data = better)
score += min(result.get("point_count", 0), 50000) // 100
return score
def finalize_pending(output_dir: Path, activity_id: str, source_hash: str) -> str:
"""Promote a pending file to its final path via atomic rename.
If another activity already occupies the ID (different source_hash),
the pending file is disambiguated with a hash suffix.
Returns the final activity_id (may include suffix).
"""
acts_dir = output_dir / "activities"
tag = source_hash[-8:] if source_hash else "unknown"
pending_json = acts_dir / f"{activity_id}.{tag}.pending.json"
pending_geojson = acts_dir / f"{activity_id}.{tag}.pending.geojson"
final_id = activity_id
final_json = acts_dir / f"{final_id}.json"
# Check for ID collision with a different activity
if final_json.exists():
existing = json.loads(final_json.read_text(encoding="utf-8"))
if existing.get("source_hash") != source_hash:
final_id = f"{activity_id}-{source_hash[-6:]}"
final_json = acts_dir / f"{final_id}.json"
# Update the ID inside the JSON if it changed
if final_id != activity_id and pending_json.exists():
detail = json.loads(pending_json.read_text(encoding="utf-8"))
detail["id"] = final_id
pending_json.write_text(json.dumps(detail, indent=2, ensure_ascii=False))
# Atomic rename: pending → final
if pending_json.exists():
pending_json.rename(final_json)
final_geojson = acts_dir / f"{final_id}.geojson"
if pending_geojson.exists():
# Update the ID in GeoJSON properties too
if final_id != activity_id:
geo = json.loads(pending_geojson.read_text(encoding="utf-8"))
geo["properties"]["id"] = final_id
pending_geojson.write_text(json.dumps(geo, indent=2, ensure_ascii=False))
pending_geojson.rename(final_geojson)
return final_id
def cleanup_pending(output_dir: Path, activity_id: str, source_hash: str) -> None:
"""Remove pending files for a losing activity (the one not chosen as canonical)."""
acts_dir = output_dir / "activities"
tag = source_hash[-8:] if source_hash else "unknown"
for suffix in (".pending.json", ".pending.geojson"):
p = acts_dir / f"{activity_id}.{tag}{suffix}"
p.unlink(missing_ok=True)
def build_summary( def build_summary(
activity: ParsedActivity, activity: ParsedActivity,
metrics: ComputedMetrics, metrics: ComputedMetrics,
+11 -3
View File
@@ -1,6 +1,6 @@
<script lang="ts"> <script lang="ts">
import * as Plot from '@observablehq/plot'; import * as Plot from '@observablehq/plot';
import { onMount } from 'svelte'; import { onMount, onDestroy } from 'svelte';
import type { Timeseries, AthleteZones } from '../lib/types'; import type { Timeseries, AthleteZones } from '../lib/types';
export let timeseries: Timeseries; export let timeseries: Timeseries;
@@ -82,8 +82,15 @@
// Range handles — reset whenever the metric or chart type changes // Range handles — reset whenever the metric or chart type changes
let trimMin = 0; let trimMin = 0;
let trimMax = 100; let trimMax = 100;
$: if (dataMin !== undefined) resetTrim(dataMin, dataMax); let lastResetTab: Tab | null = null;
function resetTrim(lo: number, hi: number) { trimMin = lo; trimMax = hi; } $: {
// Reset trim on tab change OR when data range changes
if (activeTab !== lastResetTab || trimMin < dataMin || trimMax > dataMax) {
trimMin = dataMin;
trimMax = dataMax;
lastResetTab = activeTab;
}
}
$: step = (dataMax - dataMin) / 200 || 1; $: step = (dataMax - dataMin) / 200 || 1;
@@ -116,6 +123,7 @@
// ── Rendering ──────────────────────────────────────────────────────────── // ── Rendering ────────────────────────────────────────────────────────────
onMount(() => { renderChart(); }); onMount(() => { renderChart(); });
onDestroy(() => { chart?.remove(); chart = null; });
$: if (chartEl) { $: if (chartEl) {
activeTab; xMode; chartType; histData; histThresholds; alignZones; activeTab; xMode; chartType; histData; histThresholds; alignZones;
+3 -1
View File
@@ -27,7 +27,9 @@
} }
onMount(async () => { onMount(async () => {
activeTab = (new URLSearchParams(window.location.search).get('tab') as Tab) ?? 'power'; const TABS: Tab[] = ['power', 'records', 'profile'];
const rawTab = new URLSearchParams(window.location.search).get('tab');
activeTab = TABS.includes(rawTab as Tab) ? (rawTab as Tab) : 'power';
mounted = true; mounted = true;
try { try {
const [athleteRes, indexRes] = await Promise.all([ const [athleteRes, indexRes] = await Promise.all([
+19 -19
View File
@@ -55,7 +55,7 @@ def test_parse_sidecar_frontmatter_only(tmp_path):
# ── apply_sidecar ───────────────────────────────────────────────────────────── # ── apply_sidecar ─────────────────────────────────────────────────────────────
BASE_DETAIL = { BASE_DETAIL = {
"id": "2024-01-01T08:00:00Z_cycling", "id": "2024-01-01T080000Z-morning-ride",
"title": "Morning Ride", "title": "Morning Ride",
"sport": "cycling", "sport": "cycling",
"started_at": "2024-01-01T08:00:00Z", "started_at": "2024-01-01T08:00:00Z",
@@ -118,21 +118,21 @@ def data_dir(tmp_path):
acts = tmp_path / "activities" acts = tmp_path / "activities"
acts.mkdir() acts.mkdir()
# Two activities # Two activities
for act_id, title in [ for act_id, title, sport, started_at in [
("2024-01-01T08:00:00Z_cycling", "Morning Ride"), ("2024-01-01T080000Z-morning-ride", "Morning Ride", "cycling", "2024-01-01T08:00:00Z"),
("2024-01-02T09:00:00Z_running", "Easy Run"), ("2024-01-02T090000Z-easy-run", "Easy Run", "running", "2024-01-02T09:00:00Z"),
]: ]:
detail = { detail = {
"id": act_id, "title": title, "sport": act_id.split("_")[1], "id": act_id, "title": title, "sport": sport,
"started_at": act_id.split("_")[0], "started_at": started_at,
"description": "", "privacy": "public", "custom": {}, "description": "", "privacy": "public", "custom": {},
} }
(acts / f"{act_id}.json").write_text(json.dumps(detail)) (acts / f"{act_id}.json").write_text(json.dumps(detail))
# Index # Index
index = {"activities": [ index = {"activities": [
{"id": "2024-01-01T08:00:00Z_cycling", "title": "Morning Ride", {"id": "2024-01-01T080000Z-morning-ride", "title": "Morning Ride",
"sport": "cycling", "started_at": "2024-01-01T08:00:00Z", "privacy": "public", "custom": {}}, "sport": "cycling", "started_at": "2024-01-01T08:00:00Z", "privacy": "public", "custom": {}},
{"id": "2024-01-02T09:00:00Z_running", "title": "Easy Run", {"id": "2024-01-02T090000Z-easy-run", "title": "Easy Run",
"sport": "running", "started_at": "2024-01-02T09:00:00Z", "privacy": "public", "custom": {}}, "sport": "running", "started_at": "2024-01-02T09:00:00Z", "privacy": "public", "custom": {}},
]} ]}
(tmp_path / "index.json").write_text(json.dumps(index)) (tmp_path / "index.json").write_text(json.dumps(index))
@@ -145,20 +145,20 @@ def test_merge_all_no_sidecars(data_dir):
merged = data_dir / "_merged" merged = data_dir / "_merged"
assert merged.exists() assert merged.exists()
# Unmodified files are symlinked # Unmodified files are symlinked
detail_link = merged / "activities" / "2024-01-01T08:00:00Z_cycling.json" detail_link = merged / "activities" / "2024-01-01T080000Z-morning-ride.json"
assert detail_link.is_symlink() assert detail_link.is_symlink()
def test_merge_all_applies_sidecar(data_dir): def test_merge_all_applies_sidecar(data_dir):
edits = data_dir / "edits" edits = data_dir / "edits"
edits.mkdir() edits.mkdir()
(edits / "2024-01-01T08:00:00Z_cycling.md").write_text( (edits / "2024-01-01T080000Z-morning-ride.md").write_text(
"---\ntitle: Epic Ride\nhighlight: true\n---\n\nWhat a day!" "---\ntitle: Epic Ride\nhighlight: true\n---\n\nWhat a day!"
) )
n = merge_all(data_dir) n = merge_all(data_dir)
assert n == 1 assert n == 1
merged_json = data_dir / "_merged" / "activities" / "2024-01-01T08:00:00Z_cycling.json" merged_json = data_dir / "_merged" / "activities" / "2024-01-01T080000Z-morning-ride.json"
assert not merged_json.is_symlink() assert not merged_json.is_symlink()
data = json.loads(merged_json.read_text()) data = json.loads(merged_json.read_text())
assert data["title"] == "Epic Ride" assert data["title"] == "Epic Ride"
@@ -166,41 +166,41 @@ def test_merge_all_applies_sidecar(data_dir):
assert data["description"] == "What a day!" assert data["description"] == "What a day!"
# Untouched activity is still a symlink # Untouched activity is still a symlink
run_link = data_dir / "_merged" / "activities" / "2024-01-02T09:00:00Z_running.json" run_link = data_dir / "_merged" / "activities" / "2024-01-02T090000Z-easy-run.json"
assert run_link.is_symlink() assert run_link.is_symlink()
def test_merge_all_private_filtered_from_index(data_dir): def test_merge_all_private_filtered_from_index(data_dir):
edits = data_dir / "edits" edits = data_dir / "edits"
edits.mkdir() edits.mkdir()
(edits / "2024-01-01T08:00:00Z_cycling.md").write_text("---\nprivate: true\n---\n") (edits / "2024-01-01T080000Z-morning-ride.md").write_text("---\nprivate: true\n---\n")
merge_all(data_dir) merge_all(data_dir)
index = json.loads((data_dir / "_merged" / "index.json").read_text()) index = json.loads((data_dir / "_merged" / "index.json").read_text())
ids = [a["id"] for a in index["activities"]] ids = [a["id"] for a in index["activities"]]
assert "2024-01-01T08:00:00Z_cycling" not in ids assert "2024-01-01T080000Z-morning-ride" not in ids
assert "2024-01-02T09:00:00Z_running" in ids assert "2024-01-02T090000Z-easy-run" in ids
def test_merge_all_highlight_sorts_first(data_dir): def test_merge_all_highlight_sorts_first(data_dir):
edits = data_dir / "edits" edits = data_dir / "edits"
edits.mkdir() edits.mkdir()
# Highlight the older activity — it should appear first # Highlight the older activity — it should appear first
(edits / "2024-01-01T08:00:00Z_cycling.md").write_text("---\nhighlight: true\n---\n") (edits / "2024-01-01T080000Z-morning-ride.md").write_text("---\nhighlight: true\n---\n")
merge_all(data_dir) merge_all(data_dir)
index = json.loads((data_dir / "_merged" / "index.json").read_text()) index = json.loads((data_dir / "_merged" / "index.json").read_text())
ids = [a["id"] for a in index["activities"]] ids = [a["id"] for a in index["activities"]]
assert ids[0] == "2024-01-01T08:00:00Z_cycling" assert ids[0] == "2024-01-01T080000Z-morning-ride"
def test_merge_all_idempotent(data_dir): def test_merge_all_idempotent(data_dir):
edits = data_dir / "edits" edits = data_dir / "edits"
edits.mkdir() edits.mkdir()
(edits / "2024-01-01T08:00:00Z_cycling.md").write_text("---\ntitle: Renamed\n---\n") (edits / "2024-01-01T080000Z-morning-ride.md").write_text("---\ntitle: Renamed\n---\n")
merge_all(data_dir) merge_all(data_dir)
merge_all(data_dir) # second run should not error or double-apply merge_all(data_dir) # second run should not error or double-apply
data = json.loads( data = json.loads(
(data_dir / "_merged" / "activities" / "2024-01-01T08:00:00Z_cycling.json").read_text() (data_dir / "_merged" / "activities" / "2024-01-01T080000Z-morning-ride.json").read_text()
) )
assert data["title"] == "Renamed" assert data["title"] == "Renamed"
+10
View File
@@ -11,6 +11,16 @@ def test_running_variants():
assert normalise_sport(raw) == "running", raw assert normalise_sport(raw) == "running", raw
def test_skiing_variants():
for raw in ("skiing", "alpine_skiing", "nordic_skiing", "backcountry_ski"):
assert normalise_sport(raw) == "skiing", raw
def test_swimming_variants():
for raw in ("swimming", "swim", "open_water_swimming", "lap_swimming"):
assert normalise_sport(raw) == "swimming", raw
def test_unknown_falls_back_to_other(): def test_unknown_falls_back_to_other():
assert normalise_sport("yoga") == "other" assert normalise_sport("yoga") == "other"
assert normalise_sport(None) == "other" assert normalise_sport(None) == "other"
+51 -1
View File
@@ -1,4 +1,5 @@
from bincio.extract.writer import make_activity_id, _slugify from bincio.extract.writer import make_activity_id, build_summary, _slugify
from bincio.extract.metrics import ComputedMetrics
from bincio.extract.models import ParsedActivity, DataPoint from bincio.extract.models import ParsedActivity, DataPoint
from datetime import datetime, timezone from datetime import datetime, timezone
@@ -31,3 +32,52 @@ def test_slugify():
assert _slugify("Morning Ride!") == "morning-ride" assert _slugify("Morning Ride!") == "morning-ride"
assert _slugify(" Vélo ") == "velo" # é → e via NFKD + ASCII assert _slugify(" Vélo ") == "velo" # é → e via NFKD + ASCII
assert _slugify("") == "" assert _slugify("") == ""
def test_id_utc_conversion():
"""Non-UTC timestamps should be converted to UTC in the ID."""
from datetime import timedelta
tz_plus2 = timezone(timedelta(hours=2))
ts = datetime(2024, 6, 1, 9, 30, 12, tzinfo=tz_plus2) # 07:30:12 UTC
act = ParsedActivity(
points=[DataPoint(timestamp=ts)],
sport="cycling",
started_at=ts,
source_file="test.fit",
source_hash="sha256:abc",
)
assert make_activity_id(act) == "2024-06-01T073012Z"
def test_build_summary_required_fields():
"""build_summary should include all fields needed by the schema."""
act = _dummy_activity("Test Ride")
metrics = ComputedMetrics(
distance_m=10000.0,
duration_s=3600,
moving_time_s=3500,
elevation_gain_m=100.0,
elevation_loss_m=95.0,
avg_speed_kmh=10.0,
max_speed_kmh=20.0,
avg_hr_bpm=None,
max_hr_bpm=None,
avg_cadence_rpm=None,
avg_power_w=None,
max_power_w=None,
bbox=None,
start_latlng=None,
end_latlng=None,
mmp=None,
best_efforts=None,
best_climb_m=None,
)
summary = build_summary(act, metrics, "2024-06-01T073012Z-test-ride")
# Required fields per schema
assert summary["id"] == "2024-06-01T073012Z-test-ride"
assert summary["title"] == "Test Ride"
assert summary["sport"] == "cycling"
assert "started_at" in summary
assert "privacy" in summary
assert "detail_url" in summary
assert "track_url" in summary