diff --git a/CHEATSHEET.md b/CHEATSHEET.md index 9a85c34..0dbb861 100644 --- a/CHEATSHEET.md +++ b/CHEATSHEET.md @@ -25,11 +25,24 @@ uv run bincio extract --since 2025-01-01 # only files newer than date uv run bincio extract --file ride.gpx # single file → JSON on stdout uv run bincio extract --input ~/rides \ --output ~/bincio_data # override config paths +uv run bincio extract --dev 50 # dev mode: 50 files → /tmp/bincio_dev/ ``` Re-extraction is safe — unchanged files are skipped (hash-based dedup). To force a full re-extract: `rm -rf ~/bincio_data && uv run bincio extract` +### Dev mode + +`--dev N` samples N files evenly across the full file list (spread by date and format) +and writes to `/tmp/bincio_dev/` so your real data is never touched. Use it for fast +iteration on UI or pipeline changes: + +```bash +uv run bincio extract --dev 50 +uv run bincio import strava --dev 50 # N most recent Strava activities +uv run bincio render --serve --data-dir /tmp/bincio_dev +``` + --- ## Import from Strava @@ -54,6 +67,7 @@ uv run bincio import strava uv run bincio import strava --since 2025-01-01 # explicit date cutoff uv run bincio import strava --reauth # force new OAuth flow uv run bincio import strava --output ~/other_dir # override output dir +uv run bincio import strava --dev 50 # dev mode: 50 most recent → /tmp/bincio_dev/ ``` Credentials resolution order: diff --git a/CLAUDE.md b/CLAUDE.md index d34e1f1..b71a631 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -85,19 +85,22 @@ site/ Astro project ## How to run ```bash -# Extract from local files +# Fast dev loop (50-file sample → /tmp/bincio_dev/, no real data touched) +uv run bincio extract --dev 50 +uv run bincio import strava --dev 50 # 50 most recent Strava activities +uv run bincio render --serve --data-dir /tmp/bincio_dev + +# Full extract from local files cd ~/src/bincio_activity -uv run bincio extract --input ~/src/cycling_data_davide/activities --output /tmp/bincio_test +uv run bincio extract # uses extract_config.yaml # Import from Strava (credentials in extract_config.yaml under import.strava) uv sync --extra strava uv run bincio import strava # first run opens browser for OAuth uv run bincio import strava # subsequent runs are incremental -# Site dev server -cd site -ln -sf /tmp/bincio_test public/data # symlink data -npm run dev +# Site dev server (render handles symlink + merge automatically) +uv run bincio render --serve # Edit server (enables drawer + file upload in the site) uv run bincio edit --data-dir ~/bincio_data # port 4041 diff --git a/bincio/extract/cli.py b/bincio/extract/cli.py index 60655d9..473fe88 100644 --- a/bincio/extract/cli.py +++ b/bincio/extract/cli.py @@ -113,6 +113,8 @@ def _process_file(path: Path) -> dict: help="Only process files modified after this date.") @click.option("--workers", default=None, type=int, help="Parallel worker processes (default: CPU count).") +@click.option("--dev", "dev_sample", default=None, type=int, metavar="N", + help="Dev mode: sample N files evenly across the full list, output to /tmp/bincio_dev/.") def extract( config_path: Optional[str], input_dir: Optional[str], @@ -120,6 +122,7 @@ def extract( single_file: Optional[str], since: Optional[str], workers: Optional[int], + dev_sample: Optional[int], ) -> None: """Parse GPX/FIT/TCX files and write BAS JSON data store.""" @@ -128,13 +131,25 @@ def extract( return cfg = _resolve_config(config_path, input_dir, output_dir) + + if dev_sample is not None: + cfg.output_dir = Path("/tmp/bincio_dev") + cfg.incremental = False + console.print(f"[yellow]Dev mode:[/yellow] sampling {dev_sample} files → [cyan]{cfg.output_dir}[/cyan]") + cfg.output_dir.mkdir(parents=True, exist_ok=True) files = _collect_files(cfg, since) if not files: console.print("[yellow]No supported files found.[/yellow]") return - console.print(f"Found [bold]{len(files)}[/bold] activity files.") + + if dev_sample is not None: + total = len(files) + files = _sample_diverse(files, dev_sample) + console.print(f"Sampled [bold]{len(files)}[/bold] files from {total} total.") + else: + console.print(f"Found [bold]{len(files)}[/bold] activity files.") # Build strava lookup once (serialised dict, sent to workers via initializer) strava_lookup: dict = {} @@ -314,6 +329,15 @@ def _load_existing_summaries(output_dir: Path) -> list[dict]: return [] +def _sample_diverse(files: list[Path], n: int) -> list[Path]: + """Return n files sampled evenly across the sorted list for date/format diversity.""" + if len(files) <= n: + return files + files = sorted(files) + step = len(files) / n + return [files[int(i * step)] for i in range(n)] + + def _patch_duplicate_of(output_dir: Path, activity_id: str, canonical_id: str) -> None: p = output_dir / "activities" / f"{activity_id}.json" if not p.exists(): diff --git a/bincio/extract/parsers/gpx.py b/bincio/extract/parsers/gpx.py index 665ff04..bdf85df 100644 --- a/bincio/extract/parsers/gpx.py +++ b/bincio/extract/parsers/gpx.py @@ -8,7 +8,7 @@ import gpxpy.gpx from bincio.extract.models import DataPoint, ParsedActivity from bincio.extract.parsers.base import BaseParser -from bincio.extract.sport import normalise_sport +from bincio.extract.sport import normalise_sport, normalise_sub_sport # Known GPX extension namespaces _NS_GARMIN = "http://www.garmin.com/xmlschemas/TrackPointExtension/v1" @@ -41,14 +41,15 @@ class GpxParser(BaseParser): if not points: raise ValueError(f"No trackpoints found in {path.name}") - sport = normalise_sport( - (gpx.tracks[0].type if gpx.tracks else None) or "cycling" - ) + raw_sport = (gpx.tracks[0].type if gpx.tracks else None) or "cycling" + sport = normalise_sport(raw_sport) + sub_sport = normalise_sub_sport(raw_sport) started_at = points[0].timestamp return ParsedActivity( points=points, sport=sport, + sub_sport=sub_sport, started_at=started_at, source_file=path.name, source_hash="", # set by factory diff --git a/bincio/extract/parsers/tcx.py b/bincio/extract/parsers/tcx.py index c80e9d1..1d1387f 100644 --- a/bincio/extract/parsers/tcx.py +++ b/bincio/extract/parsers/tcx.py @@ -6,7 +6,7 @@ from pathlib import Path from lxml import etree from bincio.extract.models import DataPoint, ParsedActivity -from bincio.extract.sport import normalise_sport +from bincio.extract.sport import normalise_sport, normalise_sub_sport _NS_HTTP = { "tcx": "http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2", @@ -33,7 +33,8 @@ class TcxParser: # Use the first activity act = activities[0] sport_attr = act.get("Sport", "Biking") - sport = normalise_sport(sport_attr) + sport = normalise_sport(sport_attr) + sub_sport = normalise_sub_sport(sport_attr) points: list[DataPoint] = [] for tp in act.findall(".//tcx:Trackpoint", _NS): @@ -78,6 +79,7 @@ class TcxParser: return ParsedActivity( points=points, sport=sport, + sub_sport=sub_sport, started_at=points[0].timestamp, source_file=path.name, source_hash="", diff --git a/bincio/extract/sport.py b/bincio/extract/sport.py index b333559..e7b9a2b 100644 --- a/bincio/extract/sport.py +++ b/bincio/extract/sport.py @@ -47,10 +47,13 @@ _MAPPING: dict[str, str] = { "skiing": "skiing", "cross_country_skiing": "skiing", "nordic_skiing": "skiing", + "nordic_ski": "skiing", "downhill_skiing": "skiing", "alpine_skiing": "skiing", + "alpine_ski": "skiing", "skate_skiing": "skiing", "backcountry_skiing": "skiing", + "backcountry_ski": "skiing", # swimming "swimming": "swimming", "swim": "swimming", @@ -58,13 +61,63 @@ _MAPPING: dict[str, str] = { "lap_swimming": "swimming", } +_SUB_SPORT_MAPPING: dict[str, str] = { + # cycling + "road_biking": "road", + "road_cycling": "road", + "mountain_biking": "mountain", + "mountain_bike_ride": "mountain", + "gravel_cycling": "gravel", + "gravel_ride": "gravel", + "cyclocross": "gravel", + "indoor_cycling": "indoor", + "indoor_ride": "indoor", + "virtual_ride": "indoor", + # running + "trail_running": "trail", + "trail_run": "trail", + "treadmill_running": "indoor", + "treadmill": "indoor", + "indoor_run": "indoor", + "virtual_run": "indoor", + "track_run": "track", + # skiing + "cross_country_skiing": "nordic", + "nordic_skiing": "nordic", + "nordic_ski": "nordic", + "skate_skiing": "nordic", + "backcountry_skiing": "nordic", + "backcountry_ski": "nordic", + "downhill_skiing": "alpine", + "alpine_skiing": "alpine", + "alpine_ski": "alpine", + # swimming + "open_water_swimming": "open_water", + "lap_swimming": "pool", +} + BAS_SPORTS = {"cycling", "running", "hiking", "walking", "swimming", "skiing", "other"} +def _normalise_key(raw: object) -> str: + key = str(raw).strip() + # CamelCase → snake_case ("MountainBikeRide" → "mountain_bike_ride") + key = re.sub(r"([A-Z])", r"_\1", key).lower().lstrip("_") + key = key.replace(" ", "_").replace("-", "_") + return re.sub(r"^\d+", "", key) + + def normalise_sport(raw: object) -> str: if raw is None: return "other" - key = str(raw).lower().strip().replace(" ", "_").replace("-", "_") - # Strip leading date-like prefixes e.g. "20231117outdoor_run" → "outdoor_run" - key = re.sub(r"^\d+", "", key) - return _MAPPING.get(key, "other") + return _MAPPING.get(_normalise_key(raw), "other") + + +def normalise_sub_sport(raw: object) -> str | None: + """Infer sub_sport from a raw sport type string (e.g. 'mountain_bike_ride' → 'mountain'). + + Returns None when no sub_sport is implied (e.g. plain 'ride', 'run'). + """ + if raw is None: + return None + return _SUB_SPORT_MAPPING.get(_normalise_key(raw)) diff --git a/bincio/import_/cli.py b/bincio/import_/cli.py index 8070a01..2017e9f 100644 --- a/bincio/import_/cli.py +++ b/bincio/import_/cli.py @@ -29,6 +29,8 @@ def import_group() -> None: help="Only import activities after this date (default: incremental from last sync).") @click.option("--reauth", is_flag=True, default=False, help="Force re-authorization even if valid tokens exist.") +@click.option("--dev", "dev_sample", default=None, type=int, metavar="N", + help="Dev mode: import only the N most recent activities, output to /tmp/bincio_dev/.") def strava_cmd( client_id: Optional[str], client_secret: Optional[str], @@ -36,6 +38,7 @@ def strava_cmd( config_path: Optional[str], since: Optional[str], reauth: bool, + dev_sample: Optional[int], ) -> None: """Import activities from Strava. @@ -90,7 +93,11 @@ def strava_cmd( "Add them to extract_config.yaml under import.strava, or pass --client-id/--client-secret." ) - out = _resolve_output(output_dir, cfg) + if dev_sample is not None: + out = Path("/tmp/bincio_dev") + console.print(f"[yellow]Dev mode:[/yellow] importing {dev_sample} activities → [cyan]{out}[/cyan]") + else: + out = _resolve_output(output_dir, cfg) console.print(f"Output dir: [cyan]{out}[/cyan]") if reauth and TOKENS_FILE.exists(): @@ -108,7 +115,7 @@ def strava_cmd( except ValueError: raise click.BadParameter(f"Expected YYYY-MM-DD, got {since!r}", param_hint="--since") - strava_sync(client, out, since_dt, console) + strava_sync(client, out, since_dt, console, limit=dev_sample) def _load_config(config_path: Optional[str]): diff --git a/bincio/import_/strava.py b/bincio/import_/strava.py index 89a13f0..83cbdce 100644 --- a/bincio/import_/strava.py +++ b/bincio/import_/strava.py @@ -26,7 +26,7 @@ from rich.console import Console from rich.progress import BarColumn, MofNCompleteColumn, Progress, TextColumn, TimeElapsedColumn from bincio.extract.models import DataPoint, ParsedActivity -from bincio.extract.sport import normalise_sport +from bincio.extract.sport import normalise_sport, normalise_sub_sport STRAVA_AUTH_URL = "https://www.strava.com/oauth/authorize" STRAVA_TOKEN_URL = "https://www.strava.com/oauth/token" @@ -214,7 +214,9 @@ def _strava_to_parsed(act: dict, streams: dict[str, list]) -> ParsedActivity: """Build a ParsedActivity from a Strava activity dict + its streams.""" started_at = datetime.fromisoformat(act["start_date"].replace("Z", "+00:00")) - sport = normalise_sport(act.get("sport_type") or act.get("type") or "") + raw_sport = act.get("sport_type") or act.get("type") or "" + sport = normalise_sport(raw_sport) + sub_sport = normalise_sub_sport(raw_sport) times = streams.get("time", []) # seconds since start latlngs = streams.get("latlng", []) # [[lat, lon], ...] @@ -244,6 +246,7 @@ def _strava_to_parsed(act: dict, streams: dict[str, list]) -> ParsedActivity: return ParsedActivity( points = points, sport = sport, + sub_sport = sub_sport, started_at = started_at, source_file = f"strava_{strava_id}", source_hash = source_hash, @@ -287,6 +290,7 @@ def sync( output_dir: Path, since: datetime | None, console: Console, + limit: int | None = None, ) -> None: """Fetch new Strava activities and write BAS JSON files. @@ -323,6 +327,9 @@ def sync( f"Found [bold]{len(new_acts)}[/bold] new activities " f"([bold]{len(all_acts) - len(new_acts)}[/bold] already imported)." ) + if limit is not None and len(new_acts) > limit: + new_acts = new_acts[:limit] + console.print(f"[yellow]Dev mode:[/yellow] capped to {limit} activities.") if not new_acts: console.print("[green]All up to date.[/green]") return diff --git a/site/src/components/ActivityDetail.svelte b/site/src/components/ActivityDetail.svelte index 3bd7389..ac4d9a8 100644 --- a/site/src/components/ActivityDetail.svelte +++ b/site/src/components/ActivityDetail.svelte @@ -158,8 +158,16 @@ class="text-xs font-medium px-2 py-0.5 rounded-full" style="background:{color}22;color:{color}" > - {sportIcon(activity.sport)} {sportLabel(activity.sport, activity.sub_sport)} + {sportIcon(activity.sport)} {sportLabel(activity.sport)} + {#if activity.sub_sport && activity.sub_sport !== 'generic'} + + {sportLabel(activity.sport, activity.sub_sport).split(' ')[0]} + + {/if} {formatDate(activity.started_at)} · {formatTime(activity.started_at)} diff --git a/site/src/lib/format.ts b/site/src/lib/format.ts index 458ce36..7da3cdf 100644 --- a/site/src/lib/format.ts +++ b/site/src/lib/format.ts @@ -75,10 +75,24 @@ export function sportColor(sport: Sport): string { return SPORT_COLORS[sport] ?? '#a78bfa'; } +const SUB_SPORT_LABELS: Record = { + road: 'Road', + mountain: 'MTB', + gravel: 'Gravel', + indoor: 'Indoor', + trail: 'Trail', + track: 'Track', + nordic: 'Nordic', + alpine: 'Alpine', + open_water: 'Open Water', + pool: 'Pool', +}; + export function sportLabel(sport: Sport, subSport?: string | null): string { const base = sport.charAt(0).toUpperCase() + sport.slice(1); if (subSport && subSport !== 'generic') { - return `${subSport.charAt(0).toUpperCase() + subSport.slice(1)} ${base}`; + const sub = SUB_SPORT_LABELS[subSport] ?? (subSport.charAt(0).toUpperCase() + subSport.slice(1)); + return `${sub} ${base}`; } return base; } diff --git a/site/src/lib/types.ts b/site/src/lib/types.ts index 03ce82a..ba6763b 100644 --- a/site/src/lib/types.ts +++ b/site/src/lib/types.ts @@ -1,7 +1,7 @@ /** TypeScript types mirroring BAS v1.0 schema. */ export type Sport = "cycling" | "running" | "hiking" | "walking" | "swimming" | "skiing" | "other"; -export type SubSport = "road" | "mountain" | "gravel" | "indoor" | "trail" | "track" | "nordic" | null; +export type SubSport = "road" | "mountain" | "gravel" | "indoor" | "trail" | "track" | "nordic" | "alpine" | "open_water" | "pool" | null; export type Privacy = "public" | "blur_start" | "no_gps" | "private"; /** [duration_s, avg_watts] pairs, sorted by duration ascending. */