sync strava data from web ui

This commit is contained in:
Davide Scaini
2026-04-06 12:38:41 +02:00
parent ad2710e759
commit 17f36889f3
5 changed files with 605 additions and 34 deletions
+33 -6
View File
@@ -20,11 +20,17 @@ console = Console()
help="URL of the Astro dev server (for the Back link).")
@click.option("--config", "config_path", default=None,
help="Path to extract_config.yaml (reads output.dir from it).")
@click.option("--strava-client-id", default=None, envvar="STRAVA_CLIENT_ID",
help="Strava API client ID (enables Strava sync in the UI). Also reads STRAVA_CLIENT_ID env var.")
@click.option("--strava-client-secret", default=None, envvar="STRAVA_CLIENT_SECRET",
help="Strava API client secret. Also reads STRAVA_CLIENT_SECRET env var.")
def edit(
data_dir: Optional[str],
port: int,
site_url: str,
config_path: Optional[str],
strava_client_id: Optional[str],
strava_client_secret: Optional[str],
) -> None:
"""Start a local web UI for editing activity sidecar files.
@@ -46,6 +52,13 @@ def edit(
)
data = _resolve_data_dir(data_dir, config_path)
# Fall back to extract_config.yaml for Strava credentials
if not strava_client_id or not strava_client_secret:
cfg_strava = _load_config(config_path).get("import", {}).get("strava", {})
strava_client_id = strava_client_id or str(cfg_strava.get("client_id") or "")
strava_client_secret = strava_client_secret or str(cfg_strava.get("client_secret") or "")
console.print(f"Data dir: [cyan]{data}[/cyan]")
console.print(f"Edit UI: [cyan]http://localhost:{port}/edit/<activity-id>[/cyan]")
console.print(f"Site URL: [cyan]{site_url}[/cyan]")
@@ -54,20 +67,34 @@ def edit(
import bincio.edit.server as srv
srv.data_dir = data
srv.site_url = site_url
srv.strava_client_id = strava_client_id or ""
srv.strava_client_secret = strava_client_secret or ""
if strava_client_id:
console.print(f"Strava sync: [green]enabled[/green] (client {strava_client_id})")
else:
console.print("Strava sync: [yellow]disabled[/yellow] (pass --strava-client-id to enable)")
uvicorn.run(srv.app, host="127.0.0.1", port=port, log_level="warning")
def _load_config(config_path: Optional[str]) -> dict:
"""Load extract_config.yaml — explicit path first, then cwd auto-discovery."""
import yaml
for cfg in filter(None, [config_path and Path(config_path), Path("extract_config.yaml")]):
if Path(cfg).exists():
return yaml.safe_load(Path(cfg).read_text()) or {}
return {}
def _resolve_data_dir(explicit: Optional[str], config_path: Optional[str]) -> Path:
if explicit:
return Path(explicit).expanduser().resolve()
if config_path and Path(config_path).exists():
import yaml
raw = yaml.safe_load(Path(config_path).read_text()) or {}
out = raw.get("output", {}).get("dir")
if out:
return Path(out).expanduser().resolve()
raw = _load_config(config_path)
out = raw.get("output", {}).get("dir")
if out:
return Path(out).expanduser().resolve()
default = Path.cwd() / "bincio_data"
if default.exists():
+116 -1
View File
@@ -5,16 +5,19 @@ from __future__ import annotations
import json
import re
import shutil
import time
from pathlib import Path
from typing import Any
from fastapi import FastAPI, File, HTTPException, UploadFile
from fastapi import FastAPI, File, HTTPException, Request, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import HTMLResponse, JSONResponse, RedirectResponse
# Populated by the CLI before uvicorn starts
data_dir: Path | None = None
site_url: str = "http://localhost:4321"
strava_client_id: str = ""
strava_client_secret: str = ""
app = FastAPI(title="BincioActivity Edit Server", docs_url=None, redoc_url=None)
@@ -618,3 +621,115 @@ async def delete_image(activity_id: str, filename: str) -> JSONResponse:
if not any(target.parent.iterdir()):
shutil.rmtree(target.parent)
return JSONResponse({"ok": True})
# ── Strava sync ───────────────────────────────────────────────────────────────
@app.get("/api/strava/status")
async def strava_status() -> JSONResponse:
"""Return whether Strava is configured and whether a token is stored."""
dd = _get_data_dir()
from bincio.extract.strava_api import load_token
token = load_token(dd)
return JSONResponse({
"configured": bool(strava_client_id),
"connected": token is not None,
"last_sync": token.get("last_sync_at") if token else None,
})
@app.get("/api/strava/auth-url")
async def strava_auth_url(request: Request) -> JSONResponse:
"""Return the Strava OAuth URL the browser should open."""
if not strava_client_id:
raise HTTPException(400, "Strava client ID not configured. Pass --strava-client-id to bincio edit.")
redirect_uri = str(request.url_for("strava_callback"))
from bincio.extract.strava_api import auth_url
return JSONResponse({"url": auth_url(strava_client_id, redirect_uri)})
@app.get("/api/strava/callback", name="strava_callback")
async def strava_callback(code: str = "", error: str = "") -> RedirectResponse:
"""Strava OAuth callback — exchange code for token then redirect to the site."""
if error or not code:
return RedirectResponse(f"{site_url}?strava=error")
if not strava_client_id or not strava_client_secret:
return RedirectResponse(f"{site_url}?strava=error")
dd = _get_data_dir()
from bincio.extract.strava_api import StravaError, exchange_code, save_token
try:
token = exchange_code(strava_client_id, strava_client_secret, code)
except StravaError:
return RedirectResponse(f"{site_url}?strava=error")
# Stamp last_sync_at at connect time so the first sync only fetches new activities
token.setdefault("last_sync_at", int(time.time()))
save_token(dd, token)
return RedirectResponse(f"{site_url}?strava=connected")
@app.post("/api/strava/sync")
async def strava_sync() -> JSONResponse:
"""Fetch new Strava activities since last sync and add them to the data store."""
if not strava_client_id or not strava_client_secret:
raise HTTPException(400, "Strava not configured. Pass --strava-client-id and --strava-client-secret to bincio edit.")
dd = _get_data_dir()
from bincio.extract.strava_api import (
StravaError, ensure_fresh, fetch_activities, fetch_streams,
save_token, strava_to_parsed,
)
try:
token = ensure_fresh(dd, strava_client_id, strava_client_secret)
except StravaError as e:
raise HTTPException(502, str(e))
after: int | None = token.get("last_sync_at")
try:
activities = fetch_activities(token["access_token"], after=after)
except StravaError as e:
raise HTTPException(502, str(e))
from bincio.extract.metrics import compute
from bincio.extract.writer import build_summary, make_activity_id, write_activity, write_index
from bincio.extract.strava_api import strava_meta_to_partial
from bincio.render.merge import merge_all
# Load existing index once
index_path = dd / "index.json"
if index_path.exists():
index_data = json.loads(index_path.read_text(encoding="utf-8"))
else:
index_data = {"owner": {"handle": "unknown"}, "activities": []}
owner = index_data.get("owner", {})
summaries: dict[str, dict] = {s["id"]: s for s in index_data.get("activities", [])}
imported = 0
skipped = 0
errors: list[str] = []
for meta in activities:
try:
# Compute ID from meta alone (no API call) to skip already-known activities
activity_id = make_activity_id(strava_meta_to_partial(meta))
if (dd / "activities" / f"{activity_id}.json").exists():
skipped += 1
continue
# Only fetch streams for genuinely new activities
streams = fetch_streams(token["access_token"], meta["id"])
parsed = strava_to_parsed(meta, streams)
metrics = compute(parsed)
write_activity(parsed, metrics, dd, privacy="public", rdp_epsilon=0.0001)
summaries[activity_id] = build_summary(parsed, metrics, activity_id, "public")
imported += 1
except Exception as exc:
errors.append(f"{meta.get('id')}: {type(exc).__name__}")
if imported:
write_index(list(summaries.values()), dd, owner)
merge_all(dd)
token["last_sync_at"] = int(time.time())
save_token(dd, token)
return JSONResponse({"ok": True, "imported": imported, "skipped": skipped, "errors": errors[:5]})
+211
View File
@@ -0,0 +1,211 @@
"""Strava OAuth + activity API sync.
Token is stored in <data_dir>/strava_token.json:
{access_token, refresh_token, expires_at, last_sync_at?}
Usage:
1. Build an auth URL and redirect the user to it.
2. Exchange the returned code for a token (exchange_code).
3. On subsequent syncs, call ensure_fresh() then fetch_activities() + fetch_streams().
4. Convert each result to ParsedActivity with strava_to_parsed().
"""
from __future__ import annotations
import hashlib
import json
import time
import urllib.error
import urllib.parse
import urllib.request
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional
from bincio.extract.models import DataPoint, LapData, ParsedActivity
from bincio.extract.sport import normalise_sport
_TOKEN_FILE = "strava_token.json"
_AUTH_URL = "https://www.strava.com/oauth/authorize"
_TOKEN_URL = "https://www.strava.com/oauth/token"
_API_BASE = "https://www.strava.com/api/v3"
class StravaError(Exception):
pass
# ── OAuth helpers ──────────────────────────────────────────────────────────────
def auth_url(client_id: str, redirect_uri: str) -> str:
"""Return the Strava OAuth authorization URL."""
params = urllib.parse.urlencode({
"client_id": client_id,
"redirect_uri": redirect_uri,
"response_type": "code",
"scope": "activity:read_all",
"approval_prompt": "auto",
})
return f"{_AUTH_URL}?{params}"
def exchange_code(client_id: str, client_secret: str, code: str) -> dict:
"""Exchange an authorization code for access + refresh tokens."""
data = urllib.parse.urlencode({
"client_id": client_id,
"client_secret": client_secret,
"code": code,
"grant_type": "authorization_code",
}).encode()
req = urllib.request.Request(_TOKEN_URL, data=data, method="POST")
try:
with urllib.request.urlopen(req) as resp:
return json.loads(resp.read())
except urllib.error.HTTPError as e:
raise StravaError(f"Token exchange failed: {e.code} {e.read().decode()[:200]}")
def _refresh(client_id: str, client_secret: str, refresh_token: str) -> dict:
data = urllib.parse.urlencode({
"client_id": client_id,
"client_secret": client_secret,
"refresh_token": refresh_token,
"grant_type": "refresh_token",
}).encode()
req = urllib.request.Request(_TOKEN_URL, data=data, method="POST")
try:
with urllib.request.urlopen(req) as resp:
return json.loads(resp.read())
except urllib.error.HTTPError as e:
raise StravaError(f"Token refresh failed: {e.code} {e.read().decode()[:200]}")
# ── Token storage ──────────────────────────────────────────────────────────────
def load_token(data_dir: Path) -> Optional[dict]:
p = data_dir / _TOKEN_FILE
if not p.exists():
return None
try:
return json.loads(p.read_text())
except Exception:
return None
def save_token(data_dir: Path, token: dict) -> None:
(data_dir / _TOKEN_FILE).write_text(json.dumps(token, indent=2))
def ensure_fresh(data_dir: Path, client_id: str, client_secret: str) -> dict:
"""Load the stored token, refresh if expiring soon, persist and return it."""
token = load_token(data_dir)
if token is None:
raise StravaError("Not connected to Strava")
if time.time() > token.get("expires_at", 0) - 60:
refreshed = _refresh(client_id, client_secret, token["refresh_token"])
token.update(refreshed)
save_token(data_dir, token)
return token
# ── API calls ──────────────────────────────────────────────────────────────────
def _api_get(url: str, access_token: str) -> dict | list:
req = urllib.request.Request(url, headers={"Authorization": f"Bearer {access_token}"})
try:
with urllib.request.urlopen(req) as resp:
return json.loads(resp.read())
except urllib.error.HTTPError as e:
raise StravaError(f"Strava API {e.code}: {e.read().decode()[:200]}")
def fetch_activities(access_token: str, after: Optional[int] = None) -> list[dict]:
"""Fetch all activity summaries, paged, optionally after a Unix timestamp."""
results: list[dict] = []
page = 1
while True:
params: dict = {"per_page": 200, "page": page}
if after:
params["after"] = after
qs = urllib.parse.urlencode(params)
batch = _api_get(f"{_API_BASE}/athlete/activities?{qs}", access_token)
if not isinstance(batch, list) or not batch:
break
results.extend(batch)
if len(batch) < 200:
break
page += 1
return results
def fetch_streams(access_token: str, activity_id: int) -> dict:
"""Fetch time-series streams for a single activity."""
keys = "time,latlng,altitude,heartrate,cadence,watts,velocity_smooth"
result = _api_get(
f"{_API_BASE}/activities/{activity_id}/streams?keys={keys}&key_by_type=true",
access_token,
)
return result if isinstance(result, dict) else {}
# ── Model conversion ───────────────────────────────────────────────────────────
def strava_meta_to_partial(meta: dict) -> ParsedActivity:
"""Build a minimal ParsedActivity from activity meta (no streams) — enough to compute the ID."""
started_at = datetime.fromisoformat(meta["start_date"].replace("Z", "+00:00"))
return ParsedActivity(
points=[],
sport=normalise_sport(meta.get("sport_type") or meta.get("type") or ""),
started_at=started_at,
source_file=f"strava:{meta['id']}",
source_hash="",
title=meta.get("name") or None,
)
def strava_to_parsed(meta: dict, streams: dict) -> ParsedActivity:
"""Convert a Strava activity summary + streams dict to ParsedActivity."""
started_at = datetime.fromisoformat(meta["start_date"].replace("Z", "+00:00"))
start_ts = started_at.timestamp()
time_data = streams.get("time", {}).get("data", [])
latlng_data = streams.get("latlng", {}).get("data", [])
alt_data = streams.get("altitude", {}).get("data", [])
hr_data = streams.get("heartrate", {}).get("data", [])
cad_data = streams.get("cadence", {}).get("data", [])
pwr_data = streams.get("watts", {}).get("data", [])
vel_data = streams.get("velocity_smooth", {}).get("data", [])
def _get(lst: list, i: int):
return lst[i] if i < len(lst) else None
points: list[DataPoint] = []
for i, t_offset in enumerate(time_data):
ll = _get(latlng_data, i)
lat, lon = (ll[0], ll[1]) if ll else (None, None)
vel = _get(vel_data, i)
points.append(DataPoint(
timestamp=datetime.fromtimestamp(start_ts + t_offset, tz=timezone.utc),
lat=lat,
lon=lon,
elevation_m=_get(alt_data, i),
hr_bpm=_get(hr_data, i),
cadence_rpm=_get(cad_data, i),
power_w=_get(pwr_data, i),
speed_kmh=(vel * 3.6) if vel is not None else None,
))
# Deterministic source hash based on the Strava activity ID
source = f"strava:{meta['id']}"
source_hash = "sha256:" + hashlib.sha256(source.encode()).hexdigest()
return ParsedActivity(
points=points,
sport=normalise_sport(meta.get("sport_type") or meta.get("type") or ""),
started_at=started_at,
source_file=source,
source_hash=source_hash,
title=meta.get("name") or None,
description=meta.get("description") or None,
strava_id=str(meta["id"]),
)