improve configs, update docs
This commit is contained in:
@@ -0,0 +1 @@
|
||||
"""BincioActivity importers — pull data from external platforms."""
|
||||
@@ -0,0 +1,134 @@
|
||||
"""bincio import — CLI command group for external platform importers."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import click
|
||||
from rich.console import Console
|
||||
|
||||
console = Console()
|
||||
|
||||
|
||||
@click.group("import")
|
||||
def import_group() -> None:
|
||||
"""Import activities from external platforms (Strava, Garmin, …)."""
|
||||
|
||||
|
||||
@import_group.command("strava")
|
||||
@click.option("--client-id", default=None, envvar="STRAVA_CLIENT_ID",
|
||||
help="Strava API client ID. Falls back to import.strava.client_id in extract_config.yaml.")
|
||||
@click.option("--client-secret", default=None, envvar="STRAVA_CLIENT_SECRET",
|
||||
help="Strava API client secret. Falls back to import.strava.client_secret in extract_config.yaml.")
|
||||
@click.option("--output", "output_dir", default=None,
|
||||
help="BAS data store directory (default: from config or ~/bincio_data).")
|
||||
@click.option("--config", "config_path", default=None,
|
||||
help="Path to extract_config.yaml (default: ./extract_config.yaml).")
|
||||
@click.option("--since", default=None, metavar="YYYY-MM-DD",
|
||||
help="Only import activities after this date (default: incremental from last sync).")
|
||||
@click.option("--reauth", is_flag=True, default=False,
|
||||
help="Force re-authorization even if valid tokens exist.")
|
||||
def strava_cmd(
|
||||
client_id: Optional[str],
|
||||
client_secret: Optional[str],
|
||||
output_dir: Optional[str],
|
||||
config_path: Optional[str],
|
||||
since: Optional[str],
|
||||
reauth: bool,
|
||||
) -> None:
|
||||
"""Import activities from Strava.
|
||||
|
||||
Credentials are resolved in this order:
|
||||
1. --client-id / --client-secret flags
|
||||
2. STRAVA_CLIENT_ID / STRAVA_CLIENT_SECRET environment variables
|
||||
3. import.strava.client_id / client_secret in extract_config.yaml
|
||||
|
||||
Tokens are saved to ~/.config/bincio/strava.json and refreshed automatically.
|
||||
|
||||
\b
|
||||
How to get API credentials (takes ~2 minutes, no approval needed):
|
||||
1. Go to strava.com/settings/api
|
||||
2. Create an application (name/website can be anything;
|
||||
Authorization Callback Domain: localhost)
|
||||
3. Copy the Client ID and Client Secret into extract_config.yaml:
|
||||
|
||||
\b
|
||||
import:
|
||||
strava:
|
||||
client_id: 12345
|
||||
client_secret: your_secret_here
|
||||
|
||||
\b
|
||||
Examples:
|
||||
bincio import strava # uses extract_config.yaml
|
||||
bincio import strava --since 2025-01-01 # only activities from 2025
|
||||
bincio import strava --reauth # force fresh OAuth flow
|
||||
"""
|
||||
try:
|
||||
import requests # noqa: F401
|
||||
except ImportError:
|
||||
raise click.ClickException(
|
||||
"requests is required for the Strava importer.\n"
|
||||
"Install with: uv sync --extra strava"
|
||||
)
|
||||
|
||||
from bincio.import_.strava import StravaClient, TOKENS_FILE, sync as strava_sync
|
||||
|
||||
# Load config to get credentials + output dir if not given on CLI
|
||||
cfg = _load_config(config_path)
|
||||
|
||||
# Resolve credentials: CLI flag > env var (already consumed by click) > config file
|
||||
if not client_id and cfg and cfg.strava:
|
||||
client_id = cfg.strava.client_id or None
|
||||
if not client_secret and cfg and cfg.strava:
|
||||
client_secret = cfg.strava.client_secret or None
|
||||
|
||||
if not client_id or not client_secret:
|
||||
raise click.UsageError(
|
||||
"Strava client ID and secret are required.\n"
|
||||
"Add them to extract_config.yaml under import.strava, or pass --client-id/--client-secret."
|
||||
)
|
||||
|
||||
out = _resolve_output(output_dir, cfg)
|
||||
console.print(f"Output dir: [cyan]{out}[/cyan]")
|
||||
|
||||
if reauth and TOKENS_FILE.exists():
|
||||
TOKENS_FILE.unlink()
|
||||
console.print("Removed saved tokens — starting fresh OAuth flow.")
|
||||
|
||||
client = StravaClient(client_id, client_secret, console)
|
||||
client.authenticate()
|
||||
|
||||
since_dt = None
|
||||
if since:
|
||||
from datetime import datetime, timezone
|
||||
try:
|
||||
since_dt = datetime.strptime(since, "%Y-%m-%d").replace(tzinfo=timezone.utc)
|
||||
except ValueError:
|
||||
raise click.BadParameter(f"Expected YYYY-MM-DD, got {since!r}", param_hint="--since")
|
||||
|
||||
strava_sync(client, out, since_dt, console)
|
||||
|
||||
|
||||
def _load_config(config_path: Optional[str]):
|
||||
"""Load extract_config.yaml if available; return None if not found."""
|
||||
from bincio.extract.config import load_config
|
||||
candidates = []
|
||||
if config_path:
|
||||
candidates.append(Path(config_path))
|
||||
candidates.append(Path("extract_config.yaml"))
|
||||
for p in candidates:
|
||||
if p.exists():
|
||||
return load_config(p)
|
||||
return None
|
||||
|
||||
|
||||
def _resolve_output(explicit: Optional[str], cfg) -> Path:
|
||||
if explicit:
|
||||
return Path(explicit).expanduser().resolve()
|
||||
if cfg and cfg.output_dir:
|
||||
return cfg.output_dir
|
||||
default = Path.home() / "bincio_data"
|
||||
console.print(f"[yellow]No output dir specified; using [cyan]{default}[/cyan]")
|
||||
return default
|
||||
@@ -0,0 +1,388 @@
|
||||
"""Strava API importer for BincioActivity.
|
||||
|
||||
Converts Strava activities + streams into BAS JSON using the same extract
|
||||
pipeline (ParsedActivity → compute() → write_activity()) as local files.
|
||||
|
||||
OAuth tokens are stored in ~/.config/bincio/strava.json and refreshed
|
||||
automatically. No server needed — the OAuth dance uses a one-shot local
|
||||
callback server (same pattern as `gh auth login`).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import dataclasses
|
||||
import hashlib
|
||||
import json
|
||||
import secrets
|
||||
import time
|
||||
import webbrowser
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from http.server import BaseHTTPRequestHandler, HTTPServer
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from urllib.parse import parse_qs, urlencode, urlparse
|
||||
|
||||
from rich.console import Console
|
||||
from rich.progress import BarColumn, MofNCompleteColumn, Progress, TextColumn, TimeElapsedColumn
|
||||
|
||||
from bincio.extract.models import DataPoint, ParsedActivity
|
||||
from bincio.extract.sport import normalise_sport
|
||||
|
||||
STRAVA_AUTH_URL = "https://www.strava.com/oauth/authorize"
|
||||
STRAVA_TOKEN_URL = "https://www.strava.com/oauth/token"
|
||||
STRAVA_API_BASE = "https://www.strava.com/api/v3"
|
||||
|
||||
TOKENS_FILE = Path.home() / ".config" / "bincio" / "strava.json"
|
||||
SYNC_FILE = "_strava_sync.json" # lives in output_dir
|
||||
CALLBACK_PORT = 8976
|
||||
STREAM_KEYS = "time,latlng,altitude,heartrate,cadence,watts,velocity_smooth"
|
||||
|
||||
|
||||
# ── API client ────────────────────────────────────────────────────────────────
|
||||
|
||||
class StravaClient:
|
||||
def __init__(self, client_id: str, client_secret: str, console: Console) -> None:
|
||||
self.client_id = client_id
|
||||
self.client_secret = client_secret
|
||||
self._console = console
|
||||
self._tokens: dict = {}
|
||||
self._15min_used = 0
|
||||
self._daily_used = 0
|
||||
|
||||
# ── auth ──────────────────────────────────────────────────────────────────
|
||||
|
||||
def authenticate(self) -> None:
|
||||
"""Load saved tokens (refreshing if needed) or run the OAuth dance."""
|
||||
if TOKENS_FILE.exists():
|
||||
saved = json.loads(TOKENS_FILE.read_text(encoding="utf-8"))
|
||||
if saved.get("client_id") == self.client_id:
|
||||
self._tokens = saved
|
||||
self._ensure_fresh()
|
||||
self._console.print("[green]✓[/green] Authenticated via saved tokens.")
|
||||
return
|
||||
self._oauth_dance()
|
||||
|
||||
def _ensure_fresh(self) -> None:
|
||||
if time.time() > self._tokens.get("expires_at", 0) - 60:
|
||||
self._refresh()
|
||||
|
||||
def _refresh(self) -> None:
|
||||
import requests
|
||||
r = requests.post(STRAVA_TOKEN_URL, data={
|
||||
"client_id": self.client_id,
|
||||
"client_secret": self.client_secret,
|
||||
"grant_type": "refresh_token",
|
||||
"refresh_token": self._tokens["refresh_token"],
|
||||
}, timeout=30)
|
||||
r.raise_for_status()
|
||||
self._tokens.update(r.json())
|
||||
self._tokens["client_id"] = self.client_id
|
||||
self._save_tokens()
|
||||
|
||||
def _oauth_dance(self) -> None:
|
||||
"""Open browser for OAuth2 authorization, receive callback."""
|
||||
import requests
|
||||
state = secrets.token_urlsafe(16)
|
||||
code_holder: dict[str, str] = {}
|
||||
|
||||
class _Handler(BaseHTTPRequestHandler):
|
||||
def do_GET(self) -> None:
|
||||
qs = parse_qs(urlparse(self.path).query)
|
||||
if qs.get("state", [None])[0] == state:
|
||||
code_holder["code"] = qs.get("code", [None])[0] or ""
|
||||
self.send_response(200)
|
||||
self.end_headers()
|
||||
self.wfile.write(
|
||||
b"<html><body style='font-family:sans-serif;padding:2rem'>"
|
||||
b"<h2>Authorized! You can close this tab.</h2></body></html>"
|
||||
)
|
||||
|
||||
def log_message(self, *_: Any) -> None:
|
||||
pass
|
||||
|
||||
server = HTTPServer(("127.0.0.1", CALLBACK_PORT), _Handler)
|
||||
|
||||
params = urlencode({
|
||||
"client_id": self.client_id,
|
||||
"redirect_uri": f"http://localhost:{CALLBACK_PORT}/callback",
|
||||
"response_type": "code",
|
||||
"scope": "activity:read_all",
|
||||
"state": state,
|
||||
})
|
||||
url = f"{STRAVA_AUTH_URL}?{params}"
|
||||
self._console.print(f"Opening browser for Strava authorization…")
|
||||
self._console.print(f"If nothing opens, visit: [cyan]{url}[/cyan]")
|
||||
webbrowser.open(url)
|
||||
|
||||
server.handle_request() # blocks until one request received
|
||||
server.server_close()
|
||||
|
||||
code = code_holder.get("code")
|
||||
if not code:
|
||||
raise RuntimeError("Authorization failed — no code received from Strava.")
|
||||
|
||||
r = requests.post(STRAVA_TOKEN_URL, data={
|
||||
"client_id": self.client_id,
|
||||
"client_secret": self.client_secret,
|
||||
"code": code,
|
||||
"grant_type": "authorization_code",
|
||||
}, timeout=30)
|
||||
r.raise_for_status()
|
||||
self._tokens = r.json()
|
||||
self._tokens["client_id"] = self.client_id
|
||||
self._save_tokens()
|
||||
self._console.print("[green]✓[/green] Authorized!")
|
||||
|
||||
def _save_tokens(self) -> None:
|
||||
TOKENS_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
TOKENS_FILE.write_text(json.dumps(self._tokens, indent=2), encoding="utf-8")
|
||||
|
||||
# ── HTTP ──────────────────────────────────────────────────────────────────
|
||||
|
||||
def _get(self, path: str, **params: Any) -> Any:
|
||||
import requests as req
|
||||
self._ensure_fresh()
|
||||
headers = {"Authorization": f"Bearer {self._tokens['access_token']}"}
|
||||
|
||||
while True:
|
||||
r = req.get(f"{STRAVA_API_BASE}{path}", params=params, headers=headers, timeout=30)
|
||||
|
||||
# Track rate limits
|
||||
usage = r.headers.get("X-RateLimit-Usage", "")
|
||||
if usage:
|
||||
parts = usage.split(",")
|
||||
if len(parts) == 2:
|
||||
self._15min_used = int(parts[0])
|
||||
self._daily_used = int(parts[1])
|
||||
|
||||
if r.status_code == 429:
|
||||
self._console.print("[yellow]Rate limit reached, waiting 60 s…[/yellow]")
|
||||
time.sleep(60)
|
||||
continue
|
||||
|
||||
r.raise_for_status()
|
||||
|
||||
limit_hdr = r.headers.get("X-RateLimit-Limit", "")
|
||||
if limit_hdr:
|
||||
lparts = limit_hdr.split(",")
|
||||
if len(lparts) == 2:
|
||||
l15 = int(lparts[0])
|
||||
if self._15min_used > int(l15 * 0.85):
|
||||
self._console.print(
|
||||
f"[yellow]Warning:[/yellow] {self._15min_used}/{l15} requests used this 15-min window."
|
||||
)
|
||||
|
||||
return r.json()
|
||||
|
||||
# ── API calls ─────────────────────────────────────────────────────────────
|
||||
|
||||
def get_activities(self, after: int | None = None, per_page: int = 200) -> list[dict]:
|
||||
"""Fetch full paginated activity list. `after` is a Unix timestamp."""
|
||||
activities: list[dict] = []
|
||||
page = 1
|
||||
while True:
|
||||
params: dict[str, Any] = {"per_page": per_page, "page": page}
|
||||
if after is not None:
|
||||
params["after"] = after
|
||||
batch: list[dict] = self._get("/athlete/activities", **params)
|
||||
if not batch:
|
||||
break
|
||||
activities.extend(batch)
|
||||
if len(batch) < per_page:
|
||||
break
|
||||
page += 1
|
||||
return activities
|
||||
|
||||
def get_streams(self, activity_id: int) -> dict[str, list]:
|
||||
"""Return {stream_type: [values...]}. Empty dict on any failure."""
|
||||
try:
|
||||
data: dict = self._get(
|
||||
f"/activities/{activity_id}/streams",
|
||||
keys=STREAM_KEYS,
|
||||
key_by_type="true",
|
||||
)
|
||||
return {k: v["data"] for k, v in data.items() if isinstance(v, dict) and "data" in v}
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
|
||||
# ── conversion ────────────────────────────────────────────────────────────────
|
||||
|
||||
def _strava_to_parsed(act: dict, streams: dict[str, list]) -> ParsedActivity:
|
||||
"""Build a ParsedActivity from a Strava activity dict + its streams."""
|
||||
started_at = datetime.fromisoformat(act["start_date"].replace("Z", "+00:00"))
|
||||
|
||||
sport = normalise_sport(act.get("sport_type") or act.get("type") or "")
|
||||
|
||||
times = streams.get("time", []) # seconds since start
|
||||
latlngs = streams.get("latlng", []) # [[lat, lon], ...]
|
||||
altitudes = streams.get("altitude", []) # metres
|
||||
heartrates = streams.get("heartrate", []) # bpm
|
||||
cadences = streams.get("cadence", []) # rpm
|
||||
watts = streams.get("watts", []) # W
|
||||
velocities = streams.get("velocity_smooth", []) # m/s
|
||||
|
||||
points: list[DataPoint] = []
|
||||
for i, t in enumerate(times):
|
||||
ll = latlngs[i] if i < len(latlngs) else None
|
||||
points.append(DataPoint(
|
||||
timestamp = started_at + timedelta(seconds=int(t)),
|
||||
lat = float(ll[0]) if ll else None,
|
||||
lon = float(ll[1]) if ll else None,
|
||||
elevation_m = float(altitudes[i]) if i < len(altitudes) else None,
|
||||
hr_bpm = int(heartrates[i]) if i < len(heartrates) else None,
|
||||
cadence_rpm = int(cadences[i]) if i < len(cadences) else None,
|
||||
power_w = int(watts[i]) if i < len(watts) else None,
|
||||
speed_kmh = float(velocities[i]) * 3.6 if i < len(velocities) else None,
|
||||
))
|
||||
|
||||
strava_id = str(act["id"])
|
||||
source_hash = "sha256:" + hashlib.sha256(f"strava:{strava_id}".encode()).hexdigest()
|
||||
|
||||
return ParsedActivity(
|
||||
points = points,
|
||||
sport = sport,
|
||||
started_at = started_at,
|
||||
source_file = f"strava_{strava_id}",
|
||||
source_hash = source_hash,
|
||||
title = act.get("name") or None,
|
||||
strava_id = strava_id,
|
||||
)
|
||||
|
||||
|
||||
def _patch_from_summary(metrics: Any, act: dict) -> Any:
|
||||
"""Fill None metric fields using Strava activity summary values.
|
||||
|
||||
Useful for activities without streams (manual entries, indoor rides with
|
||||
no sensors) where compute() returns _empty().
|
||||
"""
|
||||
patches: dict[str, Any] = {}
|
||||
if metrics.distance_m is None and act.get("distance"):
|
||||
patches["distance_m"] = float(act["distance"])
|
||||
if metrics.moving_time_s is None and act.get("moving_time"):
|
||||
patches["moving_time_s"] = int(act["moving_time"])
|
||||
if metrics.duration_s is None and act.get("elapsed_time"):
|
||||
patches["duration_s"] = int(act["elapsed_time"])
|
||||
if metrics.elevation_gain_m is None and act.get("total_elevation_gain"):
|
||||
patches["elevation_gain_m"] = float(act["total_elevation_gain"])
|
||||
if metrics.avg_hr_bpm is None and act.get("average_heartrate"):
|
||||
patches["avg_hr_bpm"] = int(act["average_heartrate"])
|
||||
if metrics.max_hr_bpm is None and act.get("max_heartrate"):
|
||||
patches["max_hr_bpm"] = int(act["max_heartrate"])
|
||||
if metrics.avg_power_w is None and act.get("average_watts"):
|
||||
patches["avg_power_w"] = int(act["average_watts"])
|
||||
if metrics.avg_cadence_rpm is None and act.get("average_cadence"):
|
||||
patches["avg_cadence_rpm"] = int(act["average_cadence"])
|
||||
if metrics.avg_speed_kmh is None and act.get("average_speed"):
|
||||
patches["avg_speed_kmh"] = float(act["average_speed"]) * 3.6
|
||||
return dataclasses.replace(metrics, **patches) if patches else metrics
|
||||
|
||||
|
||||
# ── main sync ─────────────────────────────────────────────────────────────────
|
||||
|
||||
def sync(
|
||||
client: StravaClient,
|
||||
output_dir: Path,
|
||||
since: datetime | None,
|
||||
console: Console,
|
||||
) -> None:
|
||||
"""Fetch new Strava activities and write BAS JSON files.
|
||||
|
||||
Idempotent: already-imported Strava IDs (tracked in _strava_sync.json)
|
||||
are skipped. `since` overrides the auto-detected checkpoint.
|
||||
"""
|
||||
from bincio.extract.metrics import compute
|
||||
from bincio.extract.writer import build_summary, make_activity_id, write_activity, write_index
|
||||
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# ── load sync state ───────────────────────────────────────────────────────
|
||||
sync_path = output_dir / SYNC_FILE
|
||||
sync_state: dict = json.loads(sync_path.read_text(encoding="utf-8")) if sync_path.exists() else {}
|
||||
imported_ids: set[str] = set(sync_state.get("imported_ids", []))
|
||||
|
||||
# ── determine `after` timestamp ───────────────────────────────────────────
|
||||
after_ts: int | None = None
|
||||
if since:
|
||||
after_ts = int(since.timestamp())
|
||||
elif sync_state.get("last_sync"):
|
||||
# 1-hour overlap to catch delayed Strava saves
|
||||
last = datetime.fromisoformat(sync_state["last_sync"])
|
||||
after_ts = int((last - timedelta(hours=1)).timestamp())
|
||||
# else: full sync (first run)
|
||||
|
||||
# ── fetch activity list ───────────────────────────────────────────────────
|
||||
since_label = f" since {since.date()}" if since else (" (incremental)" if after_ts else " (full sync)")
|
||||
console.print(f"Fetching Strava activity list{since_label}…")
|
||||
all_acts = client.get_activities(after=after_ts)
|
||||
new_acts = [a for a in all_acts if str(a["id"]) not in imported_ids]
|
||||
|
||||
console.print(
|
||||
f"Found [bold]{len(new_acts)}[/bold] new activities "
|
||||
f"([bold]{len(all_acts) - len(new_acts)}[/bold] already imported)."
|
||||
)
|
||||
if not new_acts:
|
||||
console.print("[green]All up to date.[/green]")
|
||||
return
|
||||
|
||||
# ── load existing index ───────────────────────────────────────────────────
|
||||
index_path = output_dir / "index.json"
|
||||
if index_path.exists():
|
||||
index_data = json.loads(index_path.read_text(encoding="utf-8"))
|
||||
else:
|
||||
index_data = {"owner": {"handle": "strava_user"}, "activities": []}
|
||||
owner = index_data.get("owner", {})
|
||||
summaries: dict[str, dict] = {s["id"]: s for s in index_data.get("activities", [])}
|
||||
|
||||
# ── import loop ───────────────────────────────────────────────────────────
|
||||
errors: list[tuple[str, str]] = []
|
||||
imported = 0
|
||||
|
||||
with Progress(
|
||||
TextColumn("[progress.description]{task.description}"),
|
||||
BarColumn(),
|
||||
MofNCompleteColumn(),
|
||||
TimeElapsedColumn(),
|
||||
console=console,
|
||||
) as progress:
|
||||
task = progress.add_task("Importing…", total=len(new_acts))
|
||||
|
||||
for act in new_acts:
|
||||
progress.advance(task)
|
||||
strava_id = str(act["id"])
|
||||
try:
|
||||
streams = client.get_streams(act["id"])
|
||||
parsed = _strava_to_parsed(act, streams)
|
||||
metrics = compute(parsed)
|
||||
metrics = _patch_from_summary(metrics, act)
|
||||
act_id = make_activity_id(parsed)
|
||||
write_activity(parsed, metrics, output_dir, privacy="public")
|
||||
summaries[act_id] = build_summary(parsed, metrics, act_id, "public")
|
||||
imported_ids.add(strava_id)
|
||||
imported += 1
|
||||
except Exception as exc:
|
||||
errors.append((strava_id, str(exc)))
|
||||
|
||||
# ── write index + sync state ──────────────────────────────────────────────
|
||||
write_index(list(summaries.values()), output_dir, owner)
|
||||
|
||||
sync_state["imported_ids"] = sorted(imported_ids)
|
||||
sync_state["last_sync"] = datetime.now(timezone.utc).isoformat()
|
||||
sync_path.write_text(json.dumps(sync_state, indent=2), encoding="utf-8")
|
||||
|
||||
# Trigger merge if sidecar edits directory exists
|
||||
if (output_dir / "edits").exists():
|
||||
from bincio.render.merge import merge_all
|
||||
merge_all(output_dir)
|
||||
|
||||
console.print(
|
||||
f"\n[green]Done.[/green] "
|
||||
f"Imported [bold]{imported}[/bold] activities, "
|
||||
f"errors [bold]{len(errors)}[/bold]."
|
||||
)
|
||||
if errors:
|
||||
console.print("\n[red]Errors:[/red]")
|
||||
for sid, msg in errors[:20]:
|
||||
console.print(f" Strava {sid}: {msg}")
|
||||
if len(errors) > 20:
|
||||
console.print(f" … and {len(errors) - 20} more.")
|
||||
Reference in New Issue
Block a user