second pass. low
This commit is contained in:
@@ -554,10 +554,15 @@ async def upload_activity(file: UploadFile = File(...)) -> JSONResponse:
|
|||||||
if suffix not in _SUPPORTED_SUFFIXES:
|
if suffix not in _SUPPORTED_SUFFIXES:
|
||||||
raise HTTPException(400, f"Unsupported file type '{Path(name).suffix}'. Expected FIT, GPX, or TCX.")
|
raise HTTPException(400, f"Unsupported file type '{Path(name).suffix}'. Expected FIT, GPX, or TCX.")
|
||||||
|
|
||||||
|
_MAX_UPLOAD_BYTES = 50 * 1024 * 1024 # 50 MB
|
||||||
|
contents = await file.read()
|
||||||
|
if len(contents) > _MAX_UPLOAD_BYTES:
|
||||||
|
raise HTTPException(413, f"File too large ({len(contents)} bytes). Maximum is 50 MB.")
|
||||||
|
|
||||||
staging = dd / "_uploads"
|
staging = dd / "_uploads"
|
||||||
staging.mkdir(exist_ok=True)
|
staging.mkdir(exist_ok=True)
|
||||||
staged = staging / name
|
staged = staging / name
|
||||||
staged.write_bytes(await file.read())
|
staged.write_bytes(contents)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from bincio.extract.metrics import compute
|
from bincio.extract.metrics import compute
|
||||||
@@ -592,7 +597,7 @@ async def upload_activity(file: UploadFile = File(...)) -> JSONResponse:
|
|||||||
except HTTPException:
|
except HTTPException:
|
||||||
raise
|
raise
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
raise HTTPException(422, str(exc))
|
raise HTTPException(422, f"Failed to process activity file: {type(exc).__name__}")
|
||||||
finally:
|
finally:
|
||||||
staged.unlink(missing_ok=True)
|
staged.unlink(missing_ok=True)
|
||||||
|
|
||||||
|
|||||||
+57
-13
@@ -46,6 +46,9 @@ def _process_file(path: Path) -> dict:
|
|||||||
"""Runs inside a worker process. Only receives a Path (tiny pickle).
|
"""Runs inside a worker process. Only receives a Path (tiny pickle).
|
||||||
All heavy shared data (_known_hashes, _strava_lookup, etc.) is already
|
All heavy shared data (_known_hashes, _strava_lookup, etc.) is already
|
||||||
in the worker's memory from the initializer — zero per-task overhead.
|
in the worker's memory from the initializer — zero per-task overhead.
|
||||||
|
|
||||||
|
Writes to pending files (not final paths) so the main process can
|
||||||
|
arbitrate collisions and pick the best version.
|
||||||
"""
|
"""
|
||||||
from bincio.extract.metrics import compute
|
from bincio.extract.metrics import compute
|
||||||
from bincio.extract.parsers.factory import parse_file
|
from bincio.extract.parsers.factory import parse_file
|
||||||
@@ -80,11 +83,17 @@ def _process_file(path: Path) -> dict:
|
|||||||
activity, metrics, _output_dir,
|
activity, metrics, _output_dir,
|
||||||
privacy=_privacy,
|
privacy=_privacy,
|
||||||
rdp_epsilon=_rdp_epsilon,
|
rdp_epsilon=_rdp_epsilon,
|
||||||
|
pending=True,
|
||||||
)
|
)
|
||||||
summary = build_summary(activity, metrics, activity_id, _privacy)
|
summary = build_summary(activity, metrics, activity_id, _privacy)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
return {"status": "error", "path": str(path), "error": str(exc)}
|
return {"status": "error", "path": str(path), "error": str(exc)}
|
||||||
|
|
||||||
|
# Quality signals for the main process to compare competing results
|
||||||
|
sensor_channels = sum(1 for v in [
|
||||||
|
metrics.avg_hr_bpm, metrics.avg_power_w, metrics.avg_cadence_rpm,
|
||||||
|
] if v is not None)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"status": "ok",
|
"status": "ok",
|
||||||
"summary": summary,
|
"summary": summary,
|
||||||
@@ -94,6 +103,8 @@ def _process_file(path: Path) -> dict:
|
|||||||
"distance_m": metrics.distance_m,
|
"distance_m": metrics.distance_m,
|
||||||
"source": summary.get("source"),
|
"source": summary.get("source"),
|
||||||
"mmp": metrics.mmp,
|
"mmp": metrics.mmp,
|
||||||
|
"point_count": len(activity.points),
|
||||||
|
"sensor_channels": sensor_channels,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -177,6 +188,8 @@ def extract(
|
|||||||
summaries: list[dict] = []
|
summaries: list[dict] = []
|
||||||
errors: list[tuple[str, str]] = []
|
errors: list[tuple[str, str]] = []
|
||||||
skipped = 0
|
skipped = 0
|
||||||
|
# Collect all pending results, grouped by activity_id for collision arbitration
|
||||||
|
pending_by_id: dict[str, list[dict]] = {}
|
||||||
|
|
||||||
with Progress(
|
with Progress(
|
||||||
TextColumn("[progress.description]{task.description}"),
|
TextColumn("[progress.description]{task.description}"),
|
||||||
@@ -202,30 +215,61 @@ def extract(
|
|||||||
elif result["status"] == "error":
|
elif result["status"] == "error":
|
||||||
errors.append((result["path"], result["error"]))
|
errors.append((result["path"], result["error"]))
|
||||||
else:
|
else:
|
||||||
# Near-duplicate check — must be sequential (stateful)
|
pending_by_id.setdefault(result["id"], []).append(result)
|
||||||
|
|
||||||
|
# ── Arbitrate collisions and finalize pending files ───────────────────────
|
||||||
|
from bincio.extract.writer import (
|
||||||
|
activity_quality, cleanup_pending, finalize_pending, write_athlete_json, write_index,
|
||||||
|
)
|
||||||
|
|
||||||
|
for activity_id, candidates in pending_by_id.items():
|
||||||
|
# Pick the best candidate by quality score
|
||||||
|
candidates.sort(key=activity_quality, reverse=True)
|
||||||
|
winner = candidates[0]
|
||||||
|
|
||||||
|
# Clean up losing candidates' pending files
|
||||||
|
for loser in candidates[1:]:
|
||||||
|
cleanup_pending(cfg.output_dir, activity_id, loser["hash"])
|
||||||
|
skipped += 1
|
||||||
|
|
||||||
|
# Near-duplicate check against already-known activities
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
started_at = datetime.fromisoformat(result["started_at"])
|
started_at = datetime.fromisoformat(winner["started_at"])
|
||||||
near_id = dedup.find_near_duplicate(started_at, result["distance_m"])
|
near_id = dedup.find_near_duplicate(started_at, winner["distance_m"])
|
||||||
|
|
||||||
if near_id:
|
if near_id:
|
||||||
canonical = dedup.pick_canonical(near_id, result.get("source"))
|
canonical = dedup.pick_canonical(near_id, winner.get("source"))
|
||||||
if canonical != "__new__":
|
if canonical != "__new__":
|
||||||
_patch_duplicate_of(cfg.output_dir, result["id"], near_id)
|
# Existing is better — finalize winner as duplicate, then patch it
|
||||||
|
final_id = finalize_pending(cfg.output_dir, activity_id, winner["hash"])
|
||||||
|
_patch_duplicate_of(cfg.output_dir, final_id, near_id)
|
||||||
skipped += 1
|
skipped += 1
|
||||||
continue
|
continue
|
||||||
_patch_duplicate_of(cfg.output_dir, near_id, result["id"])
|
# New is better — patch the existing one as duplicate
|
||||||
dedup._records[near_id].duplicate_of = result["id"]
|
final_id = finalize_pending(cfg.output_dir, activity_id, winner["hash"])
|
||||||
|
_patch_duplicate_of(cfg.output_dir, near_id, final_id)
|
||||||
|
dedup._records[near_id].duplicate_of = final_id
|
||||||
|
else:
|
||||||
|
final_id = finalize_pending(cfg.output_dir, activity_id, winner["hash"])
|
||||||
|
|
||||||
|
# Update summary with the finalized ID (may include hash suffix)
|
||||||
|
summary = winner["summary"]
|
||||||
|
if final_id != activity_id:
|
||||||
|
summary = dict(summary)
|
||||||
|
summary["id"] = final_id
|
||||||
|
summary["detail_url"] = f"activities/{final_id}.json"
|
||||||
|
if summary.get("track_url"):
|
||||||
|
summary["track_url"] = f"activities/{final_id}.geojson"
|
||||||
|
|
||||||
dedup.register(ActivityRecord(
|
dedup.register(ActivityRecord(
|
||||||
id=result["id"],
|
id=final_id,
|
||||||
source_hash=result["hash"],
|
source_hash=winner["hash"],
|
||||||
started_at=started_at,
|
started_at=started_at,
|
||||||
distance_m=result["distance_m"],
|
distance_m=winner["distance_m"],
|
||||||
source=result.get("source"),
|
source=winner.get("source"),
|
||||||
))
|
))
|
||||||
summaries.append(result["summary"])
|
summaries.append(summary)
|
||||||
|
|
||||||
from bincio.extract.writer import write_athlete_json, write_index
|
|
||||||
existing = _load_existing_summaries(cfg.output_dir)
|
existing = _load_existing_summaries(cfg.output_dir)
|
||||||
merged = {s["id"]: s for s in existing}
|
merged = {s["id"]: s for s in existing}
|
||||||
for s in summaries:
|
for s in summaries:
|
||||||
|
|||||||
@@ -76,6 +76,8 @@ def _apply_extensions(pt: gpxpy.gpx.GPXTrackPoint, dp: DataPoint) -> None:
|
|||||||
dp.temperature_c = float(val)
|
dp.temperature_c = float(val)
|
||||||
elif tag == "speed":
|
elif tag == "speed":
|
||||||
dp.speed_kmh = float(val) * 3.6 # m/s → km/h
|
dp.speed_kmh = float(val) * 3.6 # m/s → km/h
|
||||||
|
elif tag in ("pwr", "power", "watts"):
|
||||||
|
dp.power_w = int(float(val))
|
||||||
|
|
||||||
|
|
||||||
def _strip_ns(tag: str) -> str:
|
def _strip_ns(tag: str) -> str:
|
||||||
|
|||||||
@@ -97,8 +97,8 @@ def _parse_ts(s: str) -> datetime:
|
|||||||
return datetime.strptime(s, fmt).replace(tzinfo=timezone.utc)
|
return datetime.strptime(s, fmt).replace(tzinfo=timezone.utc)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
continue
|
continue
|
||||||
# Numeric offset like +02:00 or -05:30 — parse with %z then convert to UTC
|
# Numeric offset like +02:00, -05:30, or +0200 — parse with %z then convert to UTC
|
||||||
m = _re.match(r"^(.+)([+-]\d{2}:\d{2})$", s)
|
m = _re.match(r"^(.+)([+-]\d{2}:?\d{2})$", s)
|
||||||
if m:
|
if m:
|
||||||
body, off = m.group(1), m.group(2).replace(":", "")
|
body, off = m.group(1), m.group(2).replace(":", "")
|
||||||
for fmt in ("%Y-%m-%dT%H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S"):
|
for fmt in ("%Y-%m-%dT%H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S"):
|
||||||
|
|||||||
@@ -67,8 +67,12 @@ def build_geojson(
|
|||||||
if p.lon is not None and p.lat is not None
|
if p.lon is not None and p.lat is not None
|
||||||
]
|
]
|
||||||
|
|
||||||
# Parallel speed array for gradient coloring
|
# Parallel speed array for gradient coloring — same filter as coordinates
|
||||||
speeds = [round(p.speed_kmh, 2) if p.speed_kmh is not None else None for p in simplified]
|
speeds = [
|
||||||
|
round(p.speed_kmh, 2) if p.speed_kmh is not None else None
|
||||||
|
for p in simplified
|
||||||
|
if p.lon is not None and p.lat is not None
|
||||||
|
]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"type": "Feature",
|
"type": "Feature",
|
||||||
|
|||||||
@@ -33,8 +33,16 @@ def write_activity(
|
|||||||
privacy: str = "public",
|
privacy: str = "public",
|
||||||
duplicate_of: str | None = None,
|
duplicate_of: str | None = None,
|
||||||
rdp_epsilon: float = 0.0001,
|
rdp_epsilon: float = 0.0001,
|
||||||
|
pending: bool = False,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Write {id}.json and (if GPS available) {id}.geojson. Returns the ID."""
|
"""Write {id}.json and (if GPS available) {id}.geojson. Returns the ID.
|
||||||
|
|
||||||
|
When pending=True, writes to a uniquely-named pending file
|
||||||
|
({id}.{hash[:8]}.pending.json) instead of the final path. This avoids
|
||||||
|
race conditions when multiple workers process activities with the same ID.
|
||||||
|
The main process is responsible for promoting pending files to final paths
|
||||||
|
via finalize_pending().
|
||||||
|
"""
|
||||||
activity_id = make_activity_id(activity)
|
activity_id = make_activity_id(activity)
|
||||||
acts_dir = output_dir / "activities"
|
acts_dir = output_dir / "activities"
|
||||||
acts_dir.mkdir(parents=True, exist_ok=True)
|
acts_dir.mkdir(parents=True, exist_ok=True)
|
||||||
@@ -82,26 +90,108 @@ def write_activity(
|
|||||||
"custom": {},
|
"custom": {},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if pending:
|
||||||
|
# Write to a unique pending file — no collision possible
|
||||||
|
tag = activity.source_hash[-8:] if activity.source_hash else "unknown"
|
||||||
|
json_path = acts_dir / f"{activity_id}.{tag}.pending.json"
|
||||||
|
else:
|
||||||
json_path = acts_dir / f"{activity_id}.json"
|
json_path = acts_dir / f"{activity_id}.json"
|
||||||
# Collision guard: if a *different* activity already has this ID, append a
|
# Legacy non-pending path: collision guard for callers that don't use
|
||||||
# short hash suffix to disambiguate (same hash = idempotent re-extract).
|
# the pending workflow (e.g. edit server upload_activity)
|
||||||
if json_path.exists():
|
if json_path.exists():
|
||||||
existing = json.loads(json_path.read_text(encoding="utf-8"))
|
existing = json.loads(json_path.read_text(encoding="utf-8"))
|
||||||
if existing.get("source_hash") != activity.source_hash:
|
if existing.get("source_hash") != activity.source_hash:
|
||||||
activity_id = f"{activity_id}-{activity.source_hash[-6:]}"
|
activity_id = f"{activity_id}-{activity.source_hash[-6:]}"
|
||||||
json_path = acts_dir / f"{activity_id}.json"
|
json_path = acts_dir / f"{activity_id}.json"
|
||||||
detail["id"] = activity_id
|
detail["id"] = activity_id
|
||||||
|
|
||||||
json_path.write_text(json.dumps(detail, indent=2, ensure_ascii=False))
|
json_path.write_text(json.dumps(detail, indent=2, ensure_ascii=False))
|
||||||
|
|
||||||
# ── GeoJSON track ────────────────────────────────────────────────────────
|
# ── GeoJSON track ────────────────────────────────────────────────────────
|
||||||
if has_gps:
|
if has_gps:
|
||||||
geojson = build_geojson(activity.points, activity_id, epsilon=rdp_epsilon)
|
geojson = build_geojson(activity.points, activity_id, epsilon=rdp_epsilon)
|
||||||
|
if pending:
|
||||||
|
geojson_path = acts_dir / f"{activity_id}.{tag}.pending.geojson"
|
||||||
|
else:
|
||||||
geojson_path = acts_dir / f"{activity_id}.geojson"
|
geojson_path = acts_dir / f"{activity_id}.geojson"
|
||||||
geojson_path.write_text(json.dumps(geojson, indent=2, ensure_ascii=False))
|
geojson_path.write_text(json.dumps(geojson, indent=2, ensure_ascii=False))
|
||||||
|
|
||||||
return activity_id
|
return activity_id
|
||||||
|
|
||||||
|
|
||||||
|
def activity_quality(result: dict) -> int:
|
||||||
|
"""Compute a quality score for an activity result from a worker.
|
||||||
|
|
||||||
|
Higher is better. Used by the main process to pick the best version
|
||||||
|
when multiple workers produce results for the same activity ID.
|
||||||
|
"""
|
||||||
|
from bincio.extract.dedup import _SOURCE_QUALITY
|
||||||
|
|
||||||
|
score = 0
|
||||||
|
# Source type quality (FIT > GPX > TCX)
|
||||||
|
score += _SOURCE_QUALITY.get(result.get("source") or "", 0) * 100
|
||||||
|
# Sensor channel count
|
||||||
|
score += result.get("sensor_channels", 0) * 10
|
||||||
|
# Point count (more data = better)
|
||||||
|
score += min(result.get("point_count", 0), 50000) // 100
|
||||||
|
return score
|
||||||
|
|
||||||
|
|
||||||
|
def finalize_pending(output_dir: Path, activity_id: str, source_hash: str) -> str:
|
||||||
|
"""Promote a pending file to its final path via atomic rename.
|
||||||
|
|
||||||
|
If another activity already occupies the ID (different source_hash),
|
||||||
|
the pending file is disambiguated with a hash suffix.
|
||||||
|
|
||||||
|
Returns the final activity_id (may include suffix).
|
||||||
|
"""
|
||||||
|
acts_dir = output_dir / "activities"
|
||||||
|
tag = source_hash[-8:] if source_hash else "unknown"
|
||||||
|
|
||||||
|
pending_json = acts_dir / f"{activity_id}.{tag}.pending.json"
|
||||||
|
pending_geojson = acts_dir / f"{activity_id}.{tag}.pending.geojson"
|
||||||
|
|
||||||
|
final_id = activity_id
|
||||||
|
final_json = acts_dir / f"{final_id}.json"
|
||||||
|
|
||||||
|
# Check for ID collision with a different activity
|
||||||
|
if final_json.exists():
|
||||||
|
existing = json.loads(final_json.read_text(encoding="utf-8"))
|
||||||
|
if existing.get("source_hash") != source_hash:
|
||||||
|
final_id = f"{activity_id}-{source_hash[-6:]}"
|
||||||
|
final_json = acts_dir / f"{final_id}.json"
|
||||||
|
|
||||||
|
# Update the ID inside the JSON if it changed
|
||||||
|
if final_id != activity_id and pending_json.exists():
|
||||||
|
detail = json.loads(pending_json.read_text(encoding="utf-8"))
|
||||||
|
detail["id"] = final_id
|
||||||
|
pending_json.write_text(json.dumps(detail, indent=2, ensure_ascii=False))
|
||||||
|
|
||||||
|
# Atomic rename: pending → final
|
||||||
|
if pending_json.exists():
|
||||||
|
pending_json.rename(final_json)
|
||||||
|
|
||||||
|
final_geojson = acts_dir / f"{final_id}.geojson"
|
||||||
|
if pending_geojson.exists():
|
||||||
|
# Update the ID in GeoJSON properties too
|
||||||
|
if final_id != activity_id:
|
||||||
|
geo = json.loads(pending_geojson.read_text(encoding="utf-8"))
|
||||||
|
geo["properties"]["id"] = final_id
|
||||||
|
pending_geojson.write_text(json.dumps(geo, indent=2, ensure_ascii=False))
|
||||||
|
pending_geojson.rename(final_geojson)
|
||||||
|
|
||||||
|
return final_id
|
||||||
|
|
||||||
|
|
||||||
|
def cleanup_pending(output_dir: Path, activity_id: str, source_hash: str) -> None:
|
||||||
|
"""Remove pending files for a losing activity (the one not chosen as canonical)."""
|
||||||
|
acts_dir = output_dir / "activities"
|
||||||
|
tag = source_hash[-8:] if source_hash else "unknown"
|
||||||
|
for suffix in (".pending.json", ".pending.geojson"):
|
||||||
|
p = acts_dir / f"{activity_id}.{tag}{suffix}"
|
||||||
|
p.unlink(missing_ok=True)
|
||||||
|
|
||||||
|
|
||||||
def build_summary(
|
def build_summary(
|
||||||
activity: ParsedActivity,
|
activity: ParsedActivity,
|
||||||
metrics: ComputedMetrics,
|
metrics: ComputedMetrics,
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
import * as Plot from '@observablehq/plot';
|
import * as Plot from '@observablehq/plot';
|
||||||
import { onMount } from 'svelte';
|
import { onMount, onDestroy } from 'svelte';
|
||||||
import type { Timeseries, AthleteZones } from '../lib/types';
|
import type { Timeseries, AthleteZones } from '../lib/types';
|
||||||
|
|
||||||
export let timeseries: Timeseries;
|
export let timeseries: Timeseries;
|
||||||
@@ -82,8 +82,15 @@
|
|||||||
// Range handles — reset whenever the metric or chart type changes
|
// Range handles — reset whenever the metric or chart type changes
|
||||||
let trimMin = 0;
|
let trimMin = 0;
|
||||||
let trimMax = 100;
|
let trimMax = 100;
|
||||||
$: if (dataMin !== undefined) resetTrim(dataMin, dataMax);
|
let lastResetTab: Tab | null = null;
|
||||||
function resetTrim(lo: number, hi: number) { trimMin = lo; trimMax = hi; }
|
$: {
|
||||||
|
// Reset trim on tab change OR when data range changes
|
||||||
|
if (activeTab !== lastResetTab || trimMin < dataMin || trimMax > dataMax) {
|
||||||
|
trimMin = dataMin;
|
||||||
|
trimMax = dataMax;
|
||||||
|
lastResetTab = activeTab;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
$: step = (dataMax - dataMin) / 200 || 1;
|
$: step = (dataMax - dataMin) / 200 || 1;
|
||||||
|
|
||||||
@@ -116,6 +123,7 @@
|
|||||||
|
|
||||||
// ── Rendering ────────────────────────────────────────────────────────────
|
// ── Rendering ────────────────────────────────────────────────────────────
|
||||||
onMount(() => { renderChart(); });
|
onMount(() => { renderChart(); });
|
||||||
|
onDestroy(() => { chart?.remove(); chart = null; });
|
||||||
|
|
||||||
$: if (chartEl) {
|
$: if (chartEl) {
|
||||||
activeTab; xMode; chartType; histData; histThresholds; alignZones;
|
activeTab; xMode; chartType; histData; histThresholds; alignZones;
|
||||||
|
|||||||
@@ -27,7 +27,9 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
onMount(async () => {
|
onMount(async () => {
|
||||||
activeTab = (new URLSearchParams(window.location.search).get('tab') as Tab) ?? 'power';
|
const TABS: Tab[] = ['power', 'records', 'profile'];
|
||||||
|
const rawTab = new URLSearchParams(window.location.search).get('tab');
|
||||||
|
activeTab = TABS.includes(rawTab as Tab) ? (rawTab as Tab) : 'power';
|
||||||
mounted = true;
|
mounted = true;
|
||||||
try {
|
try {
|
||||||
const [athleteRes, indexRes] = await Promise.all([
|
const [athleteRes, indexRes] = await Promise.all([
|
||||||
|
|||||||
+19
-19
@@ -55,7 +55,7 @@ def test_parse_sidecar_frontmatter_only(tmp_path):
|
|||||||
# ── apply_sidecar ─────────────────────────────────────────────────────────────
|
# ── apply_sidecar ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
BASE_DETAIL = {
|
BASE_DETAIL = {
|
||||||
"id": "2024-01-01T08:00:00Z_cycling",
|
"id": "2024-01-01T080000Z-morning-ride",
|
||||||
"title": "Morning Ride",
|
"title": "Morning Ride",
|
||||||
"sport": "cycling",
|
"sport": "cycling",
|
||||||
"started_at": "2024-01-01T08:00:00Z",
|
"started_at": "2024-01-01T08:00:00Z",
|
||||||
@@ -118,21 +118,21 @@ def data_dir(tmp_path):
|
|||||||
acts = tmp_path / "activities"
|
acts = tmp_path / "activities"
|
||||||
acts.mkdir()
|
acts.mkdir()
|
||||||
# Two activities
|
# Two activities
|
||||||
for act_id, title in [
|
for act_id, title, sport, started_at in [
|
||||||
("2024-01-01T08:00:00Z_cycling", "Morning Ride"),
|
("2024-01-01T080000Z-morning-ride", "Morning Ride", "cycling", "2024-01-01T08:00:00Z"),
|
||||||
("2024-01-02T09:00:00Z_running", "Easy Run"),
|
("2024-01-02T090000Z-easy-run", "Easy Run", "running", "2024-01-02T09:00:00Z"),
|
||||||
]:
|
]:
|
||||||
detail = {
|
detail = {
|
||||||
"id": act_id, "title": title, "sport": act_id.split("_")[1],
|
"id": act_id, "title": title, "sport": sport,
|
||||||
"started_at": act_id.split("_")[0],
|
"started_at": started_at,
|
||||||
"description": "", "privacy": "public", "custom": {},
|
"description": "", "privacy": "public", "custom": {},
|
||||||
}
|
}
|
||||||
(acts / f"{act_id}.json").write_text(json.dumps(detail))
|
(acts / f"{act_id}.json").write_text(json.dumps(detail))
|
||||||
# Index
|
# Index
|
||||||
index = {"activities": [
|
index = {"activities": [
|
||||||
{"id": "2024-01-01T08:00:00Z_cycling", "title": "Morning Ride",
|
{"id": "2024-01-01T080000Z-morning-ride", "title": "Morning Ride",
|
||||||
"sport": "cycling", "started_at": "2024-01-01T08:00:00Z", "privacy": "public", "custom": {}},
|
"sport": "cycling", "started_at": "2024-01-01T08:00:00Z", "privacy": "public", "custom": {}},
|
||||||
{"id": "2024-01-02T09:00:00Z_running", "title": "Easy Run",
|
{"id": "2024-01-02T090000Z-easy-run", "title": "Easy Run",
|
||||||
"sport": "running", "started_at": "2024-01-02T09:00:00Z", "privacy": "public", "custom": {}},
|
"sport": "running", "started_at": "2024-01-02T09:00:00Z", "privacy": "public", "custom": {}},
|
||||||
]}
|
]}
|
||||||
(tmp_path / "index.json").write_text(json.dumps(index))
|
(tmp_path / "index.json").write_text(json.dumps(index))
|
||||||
@@ -145,20 +145,20 @@ def test_merge_all_no_sidecars(data_dir):
|
|||||||
merged = data_dir / "_merged"
|
merged = data_dir / "_merged"
|
||||||
assert merged.exists()
|
assert merged.exists()
|
||||||
# Unmodified files are symlinked
|
# Unmodified files are symlinked
|
||||||
detail_link = merged / "activities" / "2024-01-01T08:00:00Z_cycling.json"
|
detail_link = merged / "activities" / "2024-01-01T080000Z-morning-ride.json"
|
||||||
assert detail_link.is_symlink()
|
assert detail_link.is_symlink()
|
||||||
|
|
||||||
|
|
||||||
def test_merge_all_applies_sidecar(data_dir):
|
def test_merge_all_applies_sidecar(data_dir):
|
||||||
edits = data_dir / "edits"
|
edits = data_dir / "edits"
|
||||||
edits.mkdir()
|
edits.mkdir()
|
||||||
(edits / "2024-01-01T08:00:00Z_cycling.md").write_text(
|
(edits / "2024-01-01T080000Z-morning-ride.md").write_text(
|
||||||
"---\ntitle: Epic Ride\nhighlight: true\n---\n\nWhat a day!"
|
"---\ntitle: Epic Ride\nhighlight: true\n---\n\nWhat a day!"
|
||||||
)
|
)
|
||||||
n = merge_all(data_dir)
|
n = merge_all(data_dir)
|
||||||
assert n == 1
|
assert n == 1
|
||||||
|
|
||||||
merged_json = data_dir / "_merged" / "activities" / "2024-01-01T08:00:00Z_cycling.json"
|
merged_json = data_dir / "_merged" / "activities" / "2024-01-01T080000Z-morning-ride.json"
|
||||||
assert not merged_json.is_symlink()
|
assert not merged_json.is_symlink()
|
||||||
data = json.loads(merged_json.read_text())
|
data = json.loads(merged_json.read_text())
|
||||||
assert data["title"] == "Epic Ride"
|
assert data["title"] == "Epic Ride"
|
||||||
@@ -166,41 +166,41 @@ def test_merge_all_applies_sidecar(data_dir):
|
|||||||
assert data["description"] == "What a day!"
|
assert data["description"] == "What a day!"
|
||||||
|
|
||||||
# Untouched activity is still a symlink
|
# Untouched activity is still a symlink
|
||||||
run_link = data_dir / "_merged" / "activities" / "2024-01-02T09:00:00Z_running.json"
|
run_link = data_dir / "_merged" / "activities" / "2024-01-02T090000Z-easy-run.json"
|
||||||
assert run_link.is_symlink()
|
assert run_link.is_symlink()
|
||||||
|
|
||||||
|
|
||||||
def test_merge_all_private_filtered_from_index(data_dir):
|
def test_merge_all_private_filtered_from_index(data_dir):
|
||||||
edits = data_dir / "edits"
|
edits = data_dir / "edits"
|
||||||
edits.mkdir()
|
edits.mkdir()
|
||||||
(edits / "2024-01-01T08:00:00Z_cycling.md").write_text("---\nprivate: true\n---\n")
|
(edits / "2024-01-01T080000Z-morning-ride.md").write_text("---\nprivate: true\n---\n")
|
||||||
merge_all(data_dir)
|
merge_all(data_dir)
|
||||||
|
|
||||||
index = json.loads((data_dir / "_merged" / "index.json").read_text())
|
index = json.loads((data_dir / "_merged" / "index.json").read_text())
|
||||||
ids = [a["id"] for a in index["activities"]]
|
ids = [a["id"] for a in index["activities"]]
|
||||||
assert "2024-01-01T08:00:00Z_cycling" not in ids
|
assert "2024-01-01T080000Z-morning-ride" not in ids
|
||||||
assert "2024-01-02T09:00:00Z_running" in ids
|
assert "2024-01-02T090000Z-easy-run" in ids
|
||||||
|
|
||||||
|
|
||||||
def test_merge_all_highlight_sorts_first(data_dir):
|
def test_merge_all_highlight_sorts_first(data_dir):
|
||||||
edits = data_dir / "edits"
|
edits = data_dir / "edits"
|
||||||
edits.mkdir()
|
edits.mkdir()
|
||||||
# Highlight the older activity — it should appear first
|
# Highlight the older activity — it should appear first
|
||||||
(edits / "2024-01-01T08:00:00Z_cycling.md").write_text("---\nhighlight: true\n---\n")
|
(edits / "2024-01-01T080000Z-morning-ride.md").write_text("---\nhighlight: true\n---\n")
|
||||||
merge_all(data_dir)
|
merge_all(data_dir)
|
||||||
|
|
||||||
index = json.loads((data_dir / "_merged" / "index.json").read_text())
|
index = json.loads((data_dir / "_merged" / "index.json").read_text())
|
||||||
ids = [a["id"] for a in index["activities"]]
|
ids = [a["id"] for a in index["activities"]]
|
||||||
assert ids[0] == "2024-01-01T08:00:00Z_cycling"
|
assert ids[0] == "2024-01-01T080000Z-morning-ride"
|
||||||
|
|
||||||
|
|
||||||
def test_merge_all_idempotent(data_dir):
|
def test_merge_all_idempotent(data_dir):
|
||||||
edits = data_dir / "edits"
|
edits = data_dir / "edits"
|
||||||
edits.mkdir()
|
edits.mkdir()
|
||||||
(edits / "2024-01-01T08:00:00Z_cycling.md").write_text("---\ntitle: Renamed\n---\n")
|
(edits / "2024-01-01T080000Z-morning-ride.md").write_text("---\ntitle: Renamed\n---\n")
|
||||||
merge_all(data_dir)
|
merge_all(data_dir)
|
||||||
merge_all(data_dir) # second run should not error or double-apply
|
merge_all(data_dir) # second run should not error or double-apply
|
||||||
data = json.loads(
|
data = json.loads(
|
||||||
(data_dir / "_merged" / "activities" / "2024-01-01T08:00:00Z_cycling.json").read_text()
|
(data_dir / "_merged" / "activities" / "2024-01-01T080000Z-morning-ride.json").read_text()
|
||||||
)
|
)
|
||||||
assert data["title"] == "Renamed"
|
assert data["title"] == "Renamed"
|
||||||
|
|||||||
@@ -11,6 +11,16 @@ def test_running_variants():
|
|||||||
assert normalise_sport(raw) == "running", raw
|
assert normalise_sport(raw) == "running", raw
|
||||||
|
|
||||||
|
|
||||||
|
def test_skiing_variants():
|
||||||
|
for raw in ("skiing", "alpine_skiing", "nordic_skiing", "backcountry_ski"):
|
||||||
|
assert normalise_sport(raw) == "skiing", raw
|
||||||
|
|
||||||
|
|
||||||
|
def test_swimming_variants():
|
||||||
|
for raw in ("swimming", "swim", "open_water_swimming", "lap_swimming"):
|
||||||
|
assert normalise_sport(raw) == "swimming", raw
|
||||||
|
|
||||||
|
|
||||||
def test_unknown_falls_back_to_other():
|
def test_unknown_falls_back_to_other():
|
||||||
assert normalise_sport("yoga") == "other"
|
assert normalise_sport("yoga") == "other"
|
||||||
assert normalise_sport(None) == "other"
|
assert normalise_sport(None) == "other"
|
||||||
|
|||||||
+51
-1
@@ -1,4 +1,5 @@
|
|||||||
from bincio.extract.writer import make_activity_id, _slugify
|
from bincio.extract.writer import make_activity_id, build_summary, _slugify
|
||||||
|
from bincio.extract.metrics import ComputedMetrics
|
||||||
from bincio.extract.models import ParsedActivity, DataPoint
|
from bincio.extract.models import ParsedActivity, DataPoint
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
@@ -31,3 +32,52 @@ def test_slugify():
|
|||||||
assert _slugify("Morning Ride!") == "morning-ride"
|
assert _slugify("Morning Ride!") == "morning-ride"
|
||||||
assert _slugify(" Vélo ") == "velo" # é → e via NFKD + ASCII
|
assert _slugify(" Vélo ") == "velo" # é → e via NFKD + ASCII
|
||||||
assert _slugify("") == ""
|
assert _slugify("") == ""
|
||||||
|
|
||||||
|
|
||||||
|
def test_id_utc_conversion():
|
||||||
|
"""Non-UTC timestamps should be converted to UTC in the ID."""
|
||||||
|
from datetime import timedelta
|
||||||
|
tz_plus2 = timezone(timedelta(hours=2))
|
||||||
|
ts = datetime(2024, 6, 1, 9, 30, 12, tzinfo=tz_plus2) # 07:30:12 UTC
|
||||||
|
act = ParsedActivity(
|
||||||
|
points=[DataPoint(timestamp=ts)],
|
||||||
|
sport="cycling",
|
||||||
|
started_at=ts,
|
||||||
|
source_file="test.fit",
|
||||||
|
source_hash="sha256:abc",
|
||||||
|
)
|
||||||
|
assert make_activity_id(act) == "2024-06-01T073012Z"
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_summary_required_fields():
|
||||||
|
"""build_summary should include all fields needed by the schema."""
|
||||||
|
act = _dummy_activity("Test Ride")
|
||||||
|
metrics = ComputedMetrics(
|
||||||
|
distance_m=10000.0,
|
||||||
|
duration_s=3600,
|
||||||
|
moving_time_s=3500,
|
||||||
|
elevation_gain_m=100.0,
|
||||||
|
elevation_loss_m=95.0,
|
||||||
|
avg_speed_kmh=10.0,
|
||||||
|
max_speed_kmh=20.0,
|
||||||
|
avg_hr_bpm=None,
|
||||||
|
max_hr_bpm=None,
|
||||||
|
avg_cadence_rpm=None,
|
||||||
|
avg_power_w=None,
|
||||||
|
max_power_w=None,
|
||||||
|
bbox=None,
|
||||||
|
start_latlng=None,
|
||||||
|
end_latlng=None,
|
||||||
|
mmp=None,
|
||||||
|
best_efforts=None,
|
||||||
|
best_climb_m=None,
|
||||||
|
)
|
||||||
|
summary = build_summary(act, metrics, "2024-06-01T073012Z-test-ride")
|
||||||
|
# Required fields per schema
|
||||||
|
assert summary["id"] == "2024-06-01T073012Z-test-ride"
|
||||||
|
assert summary["title"] == "Test Ride"
|
||||||
|
assert summary["sport"] == "cycling"
|
||||||
|
assert "started_at" in summary
|
||||||
|
assert "privacy" in summary
|
||||||
|
assert "detail_url" in summary
|
||||||
|
assert "track_url" in summary
|
||||||
|
|||||||
Reference in New Issue
Block a user