From 7dcb1e6dd0eeef9ec9b7a073ff3b6db404e395df Mon Sep 17 00:00:00 2001 From: Davide Scaini Date: Thu, 9 Apr 2026 12:03:06 +0200 Subject: [PATCH] refactor: extract/ingest facade, merge_one, deduplicate ops constants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add bincio/extract/ingest.py as a facade over the extract internals (ingest_parsed, strava_sync), reducing coupling from 6+ imports to one - Add merge_one() to merge.py — fast single-activity path for interactive edits (rewrites one file + index, skips full directory rebuild) - Rewrite edit/ops.py to delegate to the new facade; fix broken run_strava_sync return (was referencing undefined locals) - Remove duplicated SPORTS, STAT_PANELS, VALID_ACTIVITY_ID from edit/server.py — now imported from ops.py --- bincio/edit/ops.py | 78 +++------------------- bincio/edit/server.py | 11 +--- bincio/extract/ingest.py | 137 +++++++++++++++++++++++++++++++++++++++ bincio/render/merge.py | 78 ++++++++++++++++++++++ bincio/serve/server.py | 2 +- 5 files changed, 229 insertions(+), 77 deletions(-) create mode 100644 bincio/extract/ingest.py diff --git a/bincio/edit/ops.py b/bincio/edit/ops.py index dc95fc4..fddddb5 100644 --- a/bincio/edit/ops.py +++ b/bincio/edit/ops.py @@ -7,12 +7,15 @@ No FastAPI, no globals — all context is passed as explicit arguments. from __future__ import annotations import json -import time +import re from pathlib import Path from typing import Any +# ── Shared constants (imported by edit/server.py and serve/server.py) ───────── + SPORTS = ["cycling", "running", "hiking", "walking", "swimming", "skiing", "other"] STAT_PANELS = ["elevation", "speed", "heart_rate", "cadence", "power"] +VALID_ACTIVITY_ID = re.compile(r'^[a-zA-Z0-9][a-zA-Z0-9\-]{0,250}$') def apply_sidecar_edit(activity_id: str, payload: dict[str, Any], data_dir: Path) -> None: @@ -51,8 +54,8 @@ def apply_sidecar_edit(activity_id: str, payload: dict[str, Any], data_dir: Path sidecar_path.write_text(content, encoding="utf-8") - from bincio.render.merge import merge_all - merge_all(data_dir) + from bincio.render.merge import merge_one + merge_one(data_dir, activity_id) def run_strava_sync(data_dir: Path, client_id: str, client_secret: str) -> dict[str, Any]: @@ -69,72 +72,11 @@ def run_strava_sync(data_dir: Path, client_id: str, client_secret: str) -> dict[ Raises: RuntimeError: If Strava credentials are missing or API calls fail. """ - if not client_id or not client_secret: - raise RuntimeError("Strava not configured (missing client_id or client_secret)") - - from bincio.extract.strava_api import ( - StravaError, - ensure_fresh, - fetch_activities, - fetch_streams, - save_token, - strava_meta_to_partial, - strava_to_parsed, - ) - - try: - token = ensure_fresh(data_dir, client_id, client_secret) - except StravaError as e: - raise RuntimeError(str(e)) from e - - after: int | None = token.get("last_sync_at") - try: - activities = fetch_activities(token["access_token"], after=after) - except StravaError as e: - raise RuntimeError(str(e)) from e - - from bincio.extract.metrics import compute - from bincio.extract.writer import build_summary, make_activity_id, write_activity, write_index + from bincio.extract.ingest import strava_sync as _strava_sync from bincio.render.merge import merge_all - index_path = data_dir / "index.json" - if index_path.exists(): - index_data = json.loads(index_path.read_text(encoding="utf-8")) - else: - index_data = {"owner": {"handle": "unknown"}, "activities": []} - owner = index_data.get("owner", {}) - summaries: dict[str, dict] = {s["id"]: s for s in index_data.get("activities", [])} - - imported = 0 - skipped = 0 - errors: list[str] = [] - - for meta in activities: - try: - activity_id = make_activity_id(strava_meta_to_partial(meta)) - if (data_dir / "activities" / f"{activity_id}.json").exists(): - skipped += 1 - continue - streams = fetch_streams(token["access_token"], meta["id"]) - parsed = strava_to_parsed(meta, streams) - metrics = compute(parsed) - write_activity(parsed, metrics, data_dir, privacy="public", rdp_epsilon=0.0001) - summaries[activity_id] = build_summary(parsed, metrics, activity_id, "public") - imported += 1 - except Exception as exc: - errors.append(f"{meta.get('id')}: {type(exc).__name__}") - - if imported: - write_index(list(summaries.values()), data_dir, owner) + result = _strava_sync(data_dir, client_id, client_secret) + if result["imported"]: merge_all(data_dir) - token["last_sync_at"] = int(time.time()) - save_token(data_dir, token) - - return { - "ok": True, - "imported": imported, - "skipped": skipped, - "error_count": len(errors), - "errors": errors[:5], - } + return result diff --git a/bincio/edit/server.py b/bincio/edit/server.py index ea2f7f3..e9ddb1a 100644 --- a/bincio/edit/server.py +++ b/bincio/edit/server.py @@ -3,7 +3,6 @@ from __future__ import annotations import json -import re import shutil from pathlib import Path from typing import Any @@ -12,6 +11,8 @@ from fastapi import FastAPI, File, HTTPException, Request, UploadFile from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import HTMLResponse, JSONResponse, RedirectResponse +from bincio.edit.ops import SPORTS, STAT_PANELS, VALID_ACTIVITY_ID + # Populated by the CLI before uvicorn starts data_dir: Path | None = None site_url: str = "http://localhost:4321" @@ -28,18 +29,12 @@ app.add_middleware( allow_headers=["Content-Type"], ) -_VALID_ACTIVITY_ID = re.compile(r'^[a-zA-Z0-9][a-zA-Z0-9\-]{0,250}$') - - def _check_id(activity_id: str) -> str: """Reject activity IDs that contain path traversal sequences.""" - if not _VALID_ACTIVITY_ID.match(activity_id): + if not VALID_ACTIVITY_ID.match(activity_id): raise HTTPException(400, "Invalid activity ID") return activity_id -SPORTS = ["cycling", "running", "hiking", "walking", "swimming", "skiing", "other"] -STAT_PANELS = ["elevation", "speed", "heart_rate", "cadence", "power"] - # ── HTML UI ─────────────────────────────────────────────────────────────────── diff --git a/bincio/extract/ingest.py b/bincio/extract/ingest.py new file mode 100644 index 0000000..84406eb --- /dev/null +++ b/bincio/extract/ingest.py @@ -0,0 +1,137 @@ +"""Facade for writing a parsed or Strava-sourced activity into a BAS data store. + +Callers (edit/ops.py) import from here instead of reaching into extract.metrics, +extract.writer, and extract.strava_api individually. If the internal structure +of the extract package changes, only this file needs updating. +""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any, Optional + +from bincio.extract.models import ParsedActivity + + +def ingest_parsed( + parsed: ParsedActivity, + data_dir: Path, + privacy: str = "public", + rdp_epsilon: float = 0.0001, +) -> str: + """Compute metrics, write activity files, and update index.json. + + Args: + parsed: Activity produced by any parser or Strava converter. + data_dir: Per-user output directory (contains activities/, index.json). + privacy: BAS privacy level — "public", "no_gps", or "private". + rdp_epsilon: RDP simplification threshold in degrees. + + Returns: + The BAS activity ID of the written activity. + + Raises: + FileExistsError: If an activity with the same ID already exists. + """ + from bincio.extract.metrics import compute + from bincio.extract.writer import ( + build_summary, + make_activity_id, + write_activity, + write_index, + ) + + activity_id = make_activity_id(parsed) + if (data_dir / "activities" / f"{activity_id}.json").exists(): + raise FileExistsError(f"Activity already exists: {activity_id}") + + metrics = compute(parsed) + write_activity(parsed, metrics, data_dir, privacy=privacy, rdp_epsilon=rdp_epsilon) + summary = build_summary(parsed, metrics, activity_id, privacy) + + index_path = data_dir / "index.json" + if index_path.exists(): + index_data = json.loads(index_path.read_text(encoding="utf-8")) + else: + index_data = {"owner": {"handle": "unknown"}, "activities": []} + owner = index_data.get("owner", {}) + summaries: dict[str, Any] = {s["id"]: s for s in index_data.get("activities", [])} + summaries[activity_id] = summary + write_index(list(summaries.values()), data_dir, owner) + + return activity_id + + +def strava_sync( + data_dir: Path, + client_id: str, + client_secret: str, +) -> dict[str, Any]: + """Fetch new Strava activities and ingest them into data_dir. + + Args: + data_dir: Per-user data directory. + client_id: Strava OAuth client ID. + client_secret: Strava OAuth client secret. + + Returns: + Dict with keys: ok, imported, skipped, error_count, errors. + + Raises: + RuntimeError: If Strava credentials are missing or API calls fail. + """ + import time + + from bincio.extract.strava_api import ( + StravaError, + ensure_fresh, + fetch_activities, + fetch_streams, + save_token, + strava_meta_to_partial, + strava_to_parsed, + ) + from bincio.extract.writer import make_activity_id + + if not client_id or not client_secret: + raise RuntimeError("Strava not configured (missing client_id or client_secret)") + + try: + token = ensure_fresh(data_dir, client_id, client_secret) + except StravaError as e: + raise RuntimeError(str(e)) from e + + after: Optional[int] = token.get("last_sync_at") + try: + activities = fetch_activities(token["access_token"], after=after) + except StravaError as e: + raise RuntimeError(str(e)) from e + + imported = 0 + skipped = 0 + errors: list[str] = [] + + for meta in activities: + try: + activity_id = make_activity_id(strava_meta_to_partial(meta)) + if (data_dir / "activities" / f"{activity_id}.json").exists(): + skipped += 1 + continue + streams = fetch_streams(token["access_token"], meta["id"]) + parsed = strava_to_parsed(meta, streams) + ingest_parsed(parsed, data_dir, privacy="public", rdp_epsilon=0.0001) + imported += 1 + except Exception as exc: + errors.append(f"{meta.get('id')}: {type(exc).__name__}") + + token["last_sync_at"] = int(time.time()) + save_token(data_dir, token) + + return { + "ok": True, + "imported": imported, + "skipped": skipped, + "error_count": len(errors), + "errors": errors[:5], + } diff --git a/bincio/render/merge.py b/bincio/render/merge.py index 53befd3..fb8bfb6 100644 --- a/bincio/render/merge.py +++ b/bincio/render/merge.py @@ -74,6 +74,84 @@ def _apply_sidecar_summary(summary: dict, fm: dict) -> dict: return s +def merge_one(data_dir: Path, activity_id: str) -> None: + """Apply (or remove) sidecar overrides for a single activity. + + Updates data_dir/_merged/activities/{id}.json and rewrites + _merged/index.json. Faster than merge_all() for interactive edits + because it touches only one activity file instead of rebuilding the + whole _merged/activities/ directory. + + Use merge_all() for bulk operations (first run, Strava sync, etc.). + """ + edits_dir = data_dir / "edits" + acts_dir = data_dir / "activities" + merged_dir = data_dir / "_merged" + merged_acts = merged_dir / "activities" + merged_acts.mkdir(parents=True, exist_ok=True) + + src = acts_dir / f"{activity_id}.json" + if not src.exists(): + return + + dest = merged_acts / f"{activity_id}.json" + + # Determine if a sidecar or image list applies to this activity + sidecar_path = edits_dir / f"{activity_id}.md" if edits_dir.exists() else None + images_dir = edits_dir / "images" / activity_id if edits_dir.exists() else None + has_sidecar = sidecar_path is not None and sidecar_path.exists() + image_files: list[str] = [] + if images_dir and images_dir.exists(): + image_files = sorted( + p.name for p in images_dir.iterdir() + if p.is_file() and not p.name.startswith(".") + ) + + needs_merge = has_sidecar or bool(image_files) + + # Remove the old dest (symlink or file) before writing the new one + if dest.exists() or dest.is_symlink(): + dest.unlink() + + if needs_merge: + detail = json.loads(src.read_text(encoding="utf-8")) + if has_sidecar: + fm, body = parse_sidecar(sidecar_path) # type: ignore[arg-type] + detail = apply_sidecar(detail, fm, body) + if image_files: + detail["custom"] = dict(detail.get("custom") or {}) + detail["custom"]["images"] = image_files + dest.write_text(json.dumps(detail, indent=2, ensure_ascii=False)) + else: + dest.symlink_to(src.resolve()) + + # Rewrite index — load the full sidecar map so all summaries stay consistent + index_path = data_dir / "index.json" + if not index_path.exists(): + return + + all_sidecars: dict[str, tuple[dict, str]] = {} + if edits_dir and edits_dir.exists(): + for md_path in edits_dir.glob("*.md"): + all_sidecars[md_path.stem] = parse_sidecar(md_path) + + index = json.loads(index_path.read_text(encoding="utf-8")) + activities = [] + for s in index.get("activities", []): + aid = s.get("id", "") + if aid in all_sidecars: + fm, _ = all_sidecars[aid] + s = _apply_sidecar_summary(s, fm) + activities.append(s) + + activities = [a for a in activities if a.get("privacy") != "private"] + activities.sort(key=lambda a: a.get("started_at", ""), reverse=True) + activities.sort(key=lambda a: 0 if a.get("custom", {}).get("highlight") else 1) + + index["activities"] = activities + (merged_dir / "index.json").write_text(json.dumps(index, indent=2, ensure_ascii=False)) + + def merge_all(data_dir: Path) -> int: """Build data_dir/_merged/ with all sidecar overrides applied. diff --git a/bincio/serve/server.py b/bincio/serve/server.py index 3a8427f..6e1135a 100644 --- a/bincio/serve/server.py +++ b/bincio/serve/server.py @@ -70,7 +70,7 @@ app.add_middleware( ) _VALID_HANDLE = re.compile(r'^[a-z0-9][a-z0-9_-]{0,29}$') -_VALID_ACTIVITY_ID = re.compile(r'^[a-zA-Z0-9][a-zA-Z0-9\-]{0,250}$') +from bincio.edit.ops import VALID_ACTIVITY_ID as _VALID_ACTIVITY_ID _SESSION_COOKIE = "bincio_session" _COOKIE_MAX_AGE = 30 * 86400 # 30 days