From 17f36889f33c85204b2790a6800a5cedbbb4c997 Mon Sep 17 00:00:00 2001 From: Davide Scaini Date: Mon, 6 Apr 2026 12:38:41 +0200 Subject: [PATCH] sync strava data from web ui --- CHANGELOG.md | 37 ++++++ bincio/edit/cli.py | 39 +++++- bincio/edit/server.py | 117 ++++++++++++++++- bincio/extract/strava_api.py | 211 +++++++++++++++++++++++++++++++ site/src/layouts/Base.astro | 235 +++++++++++++++++++++++++++++++---- 5 files changed, 605 insertions(+), 34 deletions(-) create mode 100644 bincio/extract/strava_api.py diff --git a/CHANGELOG.md b/CHANGELOG.md index c33ec88..2b6ffc0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,42 @@ # Changelog +## [Unreleased] — 2026-04-06 + +### New feature — Strava sync from UI + +- **`bincio/extract/strava_api.py`** (new) — Strava OAuth + activity API integration: + OAuth URL generation, authorization code exchange, token refresh, paged activity list + fetching, stream fetching (time, latlng, altitude, HR, cadence, power, velocity), and + conversion of the API response directly to `ParsedActivity` (no file download needed). + Token stored in `/strava_token.json`; `last_sync_at` tracks incremental syncs. + +- **`bincio/edit/server.py`** — three new endpoints: + - `GET /api/strava/status` — returns `{configured, connected, last_sync}` for the UI + - `GET /api/strava/auth-url` — returns the OAuth URL for the popup window + - `GET /api/strava/callback` — exchanges auth code, saves token, redirects to site with `?strava=connected` + - `POST /api/strava/sync` — fetches activities since `last_sync_at`, runs extract pipeline, + updates `index.json`, runs `merge_all()`, and updates `last_sync_at` in the token file + +- **`bincio/edit/cli.py`** — `--strava-client-id` and `--strava-client-secret` flags added + (also read from `STRAVA_CLIENT_ID` / `STRAVA_CLIENT_SECRET` env vars). Strava sync is + disabled (endpoints return 400) when credentials are not provided. + +- **`site/src/layouts/Base.astro`** — upload modal redesigned with a "choose source" screen: + two buttons — "Upload file" (existing drag-and-drop) and "Sync from Strava". Strava button + shows "Not configured" when the server lacks credentials, or opens an OAuth popup window. + After connecting, a "Sync now" button triggers the sync and reloads the feed on import. + +**Setup:** register `http://localhost:4041/api/strava/callback` as an allowed redirect URI +in your Strava app settings, then run: +``` +bincio edit --strava-client-id YOUR_ID --strava-client-secret YOUR_SECRET +# or via env vars: STRAVA_CLIENT_ID=... STRAVA_CLIENT_SECRET=... bincio edit +``` + +**Note on the upload button:** the button is visible whenever `PUBLIC_EDIT_URL` is set in +`site/.env`, regardless of whether the edit server is running. This is intentional — the env +var is the "edit mode enabled" flag. Remove it from `.env` to hide the button. + ## [Unreleased] — 2026-04-01 ### Security fixes (second-pass audit) diff --git a/bincio/edit/cli.py b/bincio/edit/cli.py index 55406c7..afe0d62 100644 --- a/bincio/edit/cli.py +++ b/bincio/edit/cli.py @@ -20,11 +20,17 @@ console = Console() help="URL of the Astro dev server (for the Back link).") @click.option("--config", "config_path", default=None, help="Path to extract_config.yaml (reads output.dir from it).") +@click.option("--strava-client-id", default=None, envvar="STRAVA_CLIENT_ID", + help="Strava API client ID (enables Strava sync in the UI). Also reads STRAVA_CLIENT_ID env var.") +@click.option("--strava-client-secret", default=None, envvar="STRAVA_CLIENT_SECRET", + help="Strava API client secret. Also reads STRAVA_CLIENT_SECRET env var.") def edit( data_dir: Optional[str], port: int, site_url: str, config_path: Optional[str], + strava_client_id: Optional[str], + strava_client_secret: Optional[str], ) -> None: """Start a local web UI for editing activity sidecar files. @@ -46,6 +52,13 @@ def edit( ) data = _resolve_data_dir(data_dir, config_path) + + # Fall back to extract_config.yaml for Strava credentials + if not strava_client_id or not strava_client_secret: + cfg_strava = _load_config(config_path).get("import", {}).get("strava", {}) + strava_client_id = strava_client_id or str(cfg_strava.get("client_id") or "") + strava_client_secret = strava_client_secret or str(cfg_strava.get("client_secret") or "") + console.print(f"Data dir: [cyan]{data}[/cyan]") console.print(f"Edit UI: [cyan]http://localhost:{port}/edit/[/cyan]") console.print(f"Site URL: [cyan]{site_url}[/cyan]") @@ -54,20 +67,34 @@ def edit( import bincio.edit.server as srv srv.data_dir = data srv.site_url = site_url + srv.strava_client_id = strava_client_id or "" + srv.strava_client_secret = strava_client_secret or "" + + if strava_client_id: + console.print(f"Strava sync: [green]enabled[/green] (client {strava_client_id})") + else: + console.print("Strava sync: [yellow]disabled[/yellow] (pass --strava-client-id to enable)") uvicorn.run(srv.app, host="127.0.0.1", port=port, log_level="warning") +def _load_config(config_path: Optional[str]) -> dict: + """Load extract_config.yaml — explicit path first, then cwd auto-discovery.""" + import yaml + for cfg in filter(None, [config_path and Path(config_path), Path("extract_config.yaml")]): + if Path(cfg).exists(): + return yaml.safe_load(Path(cfg).read_text()) or {} + return {} + + def _resolve_data_dir(explicit: Optional[str], config_path: Optional[str]) -> Path: if explicit: return Path(explicit).expanduser().resolve() - if config_path and Path(config_path).exists(): - import yaml - raw = yaml.safe_load(Path(config_path).read_text()) or {} - out = raw.get("output", {}).get("dir") - if out: - return Path(out).expanduser().resolve() + raw = _load_config(config_path) + out = raw.get("output", {}).get("dir") + if out: + return Path(out).expanduser().resolve() default = Path.cwd() / "bincio_data" if default.exists(): diff --git a/bincio/edit/server.py b/bincio/edit/server.py index ac39035..3f9018d 100644 --- a/bincio/edit/server.py +++ b/bincio/edit/server.py @@ -5,16 +5,19 @@ from __future__ import annotations import json import re import shutil +import time from pathlib import Path from typing import Any -from fastapi import FastAPI, File, HTTPException, UploadFile +from fastapi import FastAPI, File, HTTPException, Request, UploadFile from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import HTMLResponse, JSONResponse, RedirectResponse # Populated by the CLI before uvicorn starts data_dir: Path | None = None site_url: str = "http://localhost:4321" +strava_client_id: str = "" +strava_client_secret: str = "" app = FastAPI(title="BincioActivity Edit Server", docs_url=None, redoc_url=None) @@ -618,3 +621,115 @@ async def delete_image(activity_id: str, filename: str) -> JSONResponse: if not any(target.parent.iterdir()): shutil.rmtree(target.parent) return JSONResponse({"ok": True}) + + +# ── Strava sync ─────────────────────────────────────────────────────────────── + +@app.get("/api/strava/status") +async def strava_status() -> JSONResponse: + """Return whether Strava is configured and whether a token is stored.""" + dd = _get_data_dir() + from bincio.extract.strava_api import load_token + token = load_token(dd) + return JSONResponse({ + "configured": bool(strava_client_id), + "connected": token is not None, + "last_sync": token.get("last_sync_at") if token else None, + }) + + +@app.get("/api/strava/auth-url") +async def strava_auth_url(request: Request) -> JSONResponse: + """Return the Strava OAuth URL the browser should open.""" + if not strava_client_id: + raise HTTPException(400, "Strava client ID not configured. Pass --strava-client-id to bincio edit.") + redirect_uri = str(request.url_for("strava_callback")) + from bincio.extract.strava_api import auth_url + return JSONResponse({"url": auth_url(strava_client_id, redirect_uri)}) + + +@app.get("/api/strava/callback", name="strava_callback") +async def strava_callback(code: str = "", error: str = "") -> RedirectResponse: + """Strava OAuth callback — exchange code for token then redirect to the site.""" + if error or not code: + return RedirectResponse(f"{site_url}?strava=error") + if not strava_client_id or not strava_client_secret: + return RedirectResponse(f"{site_url}?strava=error") + dd = _get_data_dir() + from bincio.extract.strava_api import StravaError, exchange_code, save_token + try: + token = exchange_code(strava_client_id, strava_client_secret, code) + except StravaError: + return RedirectResponse(f"{site_url}?strava=error") + # Stamp last_sync_at at connect time so the first sync only fetches new activities + token.setdefault("last_sync_at", int(time.time())) + save_token(dd, token) + return RedirectResponse(f"{site_url}?strava=connected") + + +@app.post("/api/strava/sync") +async def strava_sync() -> JSONResponse: + """Fetch new Strava activities since last sync and add them to the data store.""" + if not strava_client_id or not strava_client_secret: + raise HTTPException(400, "Strava not configured. Pass --strava-client-id and --strava-client-secret to bincio edit.") + dd = _get_data_dir() + + from bincio.extract.strava_api import ( + StravaError, ensure_fresh, fetch_activities, fetch_streams, + save_token, strava_to_parsed, + ) + try: + token = ensure_fresh(dd, strava_client_id, strava_client_secret) + except StravaError as e: + raise HTTPException(502, str(e)) + + after: int | None = token.get("last_sync_at") + try: + activities = fetch_activities(token["access_token"], after=after) + except StravaError as e: + raise HTTPException(502, str(e)) + + from bincio.extract.metrics import compute + from bincio.extract.writer import build_summary, make_activity_id, write_activity, write_index + from bincio.extract.strava_api import strava_meta_to_partial + from bincio.render.merge import merge_all + + # Load existing index once + index_path = dd / "index.json" + if index_path.exists(): + index_data = json.loads(index_path.read_text(encoding="utf-8")) + else: + index_data = {"owner": {"handle": "unknown"}, "activities": []} + owner = index_data.get("owner", {}) + summaries: dict[str, dict] = {s["id"]: s for s in index_data.get("activities", [])} + + imported = 0 + skipped = 0 + errors: list[str] = [] + + for meta in activities: + try: + # Compute ID from meta alone (no API call) to skip already-known activities + activity_id = make_activity_id(strava_meta_to_partial(meta)) + if (dd / "activities" / f"{activity_id}.json").exists(): + skipped += 1 + continue + + # Only fetch streams for genuinely new activities + streams = fetch_streams(token["access_token"], meta["id"]) + parsed = strava_to_parsed(meta, streams) + metrics = compute(parsed) + write_activity(parsed, metrics, dd, privacy="public", rdp_epsilon=0.0001) + summaries[activity_id] = build_summary(parsed, metrics, activity_id, "public") + imported += 1 + except Exception as exc: + errors.append(f"{meta.get('id')}: {type(exc).__name__}") + + if imported: + write_index(list(summaries.values()), dd, owner) + merge_all(dd) + + token["last_sync_at"] = int(time.time()) + save_token(dd, token) + + return JSONResponse({"ok": True, "imported": imported, "skipped": skipped, "errors": errors[:5]}) diff --git a/bincio/extract/strava_api.py b/bincio/extract/strava_api.py new file mode 100644 index 0000000..1367c2d --- /dev/null +++ b/bincio/extract/strava_api.py @@ -0,0 +1,211 @@ +"""Strava OAuth + activity API sync. + +Token is stored in /strava_token.json: + {access_token, refresh_token, expires_at, last_sync_at?} + +Usage: + 1. Build an auth URL and redirect the user to it. + 2. Exchange the returned code for a token (exchange_code). + 3. On subsequent syncs, call ensure_fresh() then fetch_activities() + fetch_streams(). + 4. Convert each result to ParsedActivity with strava_to_parsed(). +""" + +from __future__ import annotations + +import hashlib +import json +import time +import urllib.error +import urllib.parse +import urllib.request +from datetime import datetime, timezone +from pathlib import Path +from typing import Optional + +from bincio.extract.models import DataPoint, LapData, ParsedActivity +from bincio.extract.sport import normalise_sport + +_TOKEN_FILE = "strava_token.json" +_AUTH_URL = "https://www.strava.com/oauth/authorize" +_TOKEN_URL = "https://www.strava.com/oauth/token" +_API_BASE = "https://www.strava.com/api/v3" + + +class StravaError(Exception): + pass + + +# ── OAuth helpers ────────────────────────────────────────────────────────────── + +def auth_url(client_id: str, redirect_uri: str) -> str: + """Return the Strava OAuth authorization URL.""" + params = urllib.parse.urlencode({ + "client_id": client_id, + "redirect_uri": redirect_uri, + "response_type": "code", + "scope": "activity:read_all", + "approval_prompt": "auto", + }) + return f"{_AUTH_URL}?{params}" + + +def exchange_code(client_id: str, client_secret: str, code: str) -> dict: + """Exchange an authorization code for access + refresh tokens.""" + data = urllib.parse.urlencode({ + "client_id": client_id, + "client_secret": client_secret, + "code": code, + "grant_type": "authorization_code", + }).encode() + req = urllib.request.Request(_TOKEN_URL, data=data, method="POST") + try: + with urllib.request.urlopen(req) as resp: + return json.loads(resp.read()) + except urllib.error.HTTPError as e: + raise StravaError(f"Token exchange failed: {e.code} {e.read().decode()[:200]}") + + +def _refresh(client_id: str, client_secret: str, refresh_token: str) -> dict: + data = urllib.parse.urlencode({ + "client_id": client_id, + "client_secret": client_secret, + "refresh_token": refresh_token, + "grant_type": "refresh_token", + }).encode() + req = urllib.request.Request(_TOKEN_URL, data=data, method="POST") + try: + with urllib.request.urlopen(req) as resp: + return json.loads(resp.read()) + except urllib.error.HTTPError as e: + raise StravaError(f"Token refresh failed: {e.code} {e.read().decode()[:200]}") + + +# ── Token storage ────────────────────────────────────────────────────────────── + +def load_token(data_dir: Path) -> Optional[dict]: + p = data_dir / _TOKEN_FILE + if not p.exists(): + return None + try: + return json.loads(p.read_text()) + except Exception: + return None + + +def save_token(data_dir: Path, token: dict) -> None: + (data_dir / _TOKEN_FILE).write_text(json.dumps(token, indent=2)) + + +def ensure_fresh(data_dir: Path, client_id: str, client_secret: str) -> dict: + """Load the stored token, refresh if expiring soon, persist and return it.""" + token = load_token(data_dir) + if token is None: + raise StravaError("Not connected to Strava") + if time.time() > token.get("expires_at", 0) - 60: + refreshed = _refresh(client_id, client_secret, token["refresh_token"]) + token.update(refreshed) + save_token(data_dir, token) + return token + + +# ── API calls ────────────────────────────────────────────────────────────────── + +def _api_get(url: str, access_token: str) -> dict | list: + req = urllib.request.Request(url, headers={"Authorization": f"Bearer {access_token}"}) + try: + with urllib.request.urlopen(req) as resp: + return json.loads(resp.read()) + except urllib.error.HTTPError as e: + raise StravaError(f"Strava API {e.code}: {e.read().decode()[:200]}") + + +def fetch_activities(access_token: str, after: Optional[int] = None) -> list[dict]: + """Fetch all activity summaries, paged, optionally after a Unix timestamp.""" + results: list[dict] = [] + page = 1 + while True: + params: dict = {"per_page": 200, "page": page} + if after: + params["after"] = after + qs = urllib.parse.urlencode(params) + batch = _api_get(f"{_API_BASE}/athlete/activities?{qs}", access_token) + if not isinstance(batch, list) or not batch: + break + results.extend(batch) + if len(batch) < 200: + break + page += 1 + return results + + +def fetch_streams(access_token: str, activity_id: int) -> dict: + """Fetch time-series streams for a single activity.""" + keys = "time,latlng,altitude,heartrate,cadence,watts,velocity_smooth" + result = _api_get( + f"{_API_BASE}/activities/{activity_id}/streams?keys={keys}&key_by_type=true", + access_token, + ) + return result if isinstance(result, dict) else {} + + +# ── Model conversion ─────────────────────────────────────────────────────────── + +def strava_meta_to_partial(meta: dict) -> ParsedActivity: + """Build a minimal ParsedActivity from activity meta (no streams) — enough to compute the ID.""" + started_at = datetime.fromisoformat(meta["start_date"].replace("Z", "+00:00")) + return ParsedActivity( + points=[], + sport=normalise_sport(meta.get("sport_type") or meta.get("type") or ""), + started_at=started_at, + source_file=f"strava:{meta['id']}", + source_hash="", + title=meta.get("name") or None, + ) + + +def strava_to_parsed(meta: dict, streams: dict) -> ParsedActivity: + """Convert a Strava activity summary + streams dict to ParsedActivity.""" + started_at = datetime.fromisoformat(meta["start_date"].replace("Z", "+00:00")) + start_ts = started_at.timestamp() + + time_data = streams.get("time", {}).get("data", []) + latlng_data = streams.get("latlng", {}).get("data", []) + alt_data = streams.get("altitude", {}).get("data", []) + hr_data = streams.get("heartrate", {}).get("data", []) + cad_data = streams.get("cadence", {}).get("data", []) + pwr_data = streams.get("watts", {}).get("data", []) + vel_data = streams.get("velocity_smooth", {}).get("data", []) + + def _get(lst: list, i: int): + return lst[i] if i < len(lst) else None + + points: list[DataPoint] = [] + for i, t_offset in enumerate(time_data): + ll = _get(latlng_data, i) + lat, lon = (ll[0], ll[1]) if ll else (None, None) + vel = _get(vel_data, i) + points.append(DataPoint( + timestamp=datetime.fromtimestamp(start_ts + t_offset, tz=timezone.utc), + lat=lat, + lon=lon, + elevation_m=_get(alt_data, i), + hr_bpm=_get(hr_data, i), + cadence_rpm=_get(cad_data, i), + power_w=_get(pwr_data, i), + speed_kmh=(vel * 3.6) if vel is not None else None, + )) + + # Deterministic source hash based on the Strava activity ID + source = f"strava:{meta['id']}" + source_hash = "sha256:" + hashlib.sha256(source.encode()).hexdigest() + + return ParsedActivity( + points=points, + sport=normalise_sport(meta.get("sport_type") or meta.get("type") or ""), + started_at=started_at, + source_file=source, + source_hash=source_hash, + title=meta.get("name") or None, + description=meta.get("description") or None, + strava_id=str(meta["id"]), + ) diff --git a/site/src/layouts/Base.astro b/site/src/layouts/Base.astro index 3f9cf8f..71dd0e4 100644 --- a/site/src/layouts/Base.astro +++ b/site/src/layouts/Base.astro @@ -124,28 +124,80 @@ const baseUrl = import.meta.env.BASE_URL ?? '/'; {editUrl && ( - +