diff --git a/CHEATSHEET.md b/CHEATSHEET.md
index 9a85c34..0dbb861 100644
--- a/CHEATSHEET.md
+++ b/CHEATSHEET.md
@@ -25,11 +25,24 @@ uv run bincio extract --since 2025-01-01 # only files newer than date
uv run bincio extract --file ride.gpx # single file → JSON on stdout
uv run bincio extract --input ~/rides \
--output ~/bincio_data # override config paths
+uv run bincio extract --dev 50 # dev mode: 50 files → /tmp/bincio_dev/
```
Re-extraction is safe — unchanged files are skipped (hash-based dedup).
To force a full re-extract: `rm -rf ~/bincio_data && uv run bincio extract`
+### Dev mode
+
+`--dev N` samples N files evenly across the full file list (spread by date and format)
+and writes to `/tmp/bincio_dev/` so your real data is never touched. Use it for fast
+iteration on UI or pipeline changes:
+
+```bash
+uv run bincio extract --dev 50
+uv run bincio import strava --dev 50 # N most recent Strava activities
+uv run bincio render --serve --data-dir /tmp/bincio_dev
+```
+
---
## Import from Strava
@@ -54,6 +67,7 @@ uv run bincio import strava
uv run bincio import strava --since 2025-01-01 # explicit date cutoff
uv run bincio import strava --reauth # force new OAuth flow
uv run bincio import strava --output ~/other_dir # override output dir
+uv run bincio import strava --dev 50 # dev mode: 50 most recent → /tmp/bincio_dev/
```
Credentials resolution order:
diff --git a/CLAUDE.md b/CLAUDE.md
index d34e1f1..b71a631 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -85,19 +85,22 @@ site/ Astro project
## How to run
```bash
-# Extract from local files
+# Fast dev loop (50-file sample → /tmp/bincio_dev/, no real data touched)
+uv run bincio extract --dev 50
+uv run bincio import strava --dev 50 # 50 most recent Strava activities
+uv run bincio render --serve --data-dir /tmp/bincio_dev
+
+# Full extract from local files
cd ~/src/bincio_activity
-uv run bincio extract --input ~/src/cycling_data_davide/activities --output /tmp/bincio_test
+uv run bincio extract # uses extract_config.yaml
# Import from Strava (credentials in extract_config.yaml under import.strava)
uv sync --extra strava
uv run bincio import strava # first run opens browser for OAuth
uv run bincio import strava # subsequent runs are incremental
-# Site dev server
-cd site
-ln -sf /tmp/bincio_test public/data # symlink data
-npm run dev
+# Site dev server (render handles symlink + merge automatically)
+uv run bincio render --serve
# Edit server (enables drawer + file upload in the site)
uv run bincio edit --data-dir ~/bincio_data # port 4041
diff --git a/bincio/extract/cli.py b/bincio/extract/cli.py
index 60655d9..473fe88 100644
--- a/bincio/extract/cli.py
+++ b/bincio/extract/cli.py
@@ -113,6 +113,8 @@ def _process_file(path: Path) -> dict:
help="Only process files modified after this date.")
@click.option("--workers", default=None, type=int,
help="Parallel worker processes (default: CPU count).")
+@click.option("--dev", "dev_sample", default=None, type=int, metavar="N",
+ help="Dev mode: sample N files evenly across the full list, output to /tmp/bincio_dev/.")
def extract(
config_path: Optional[str],
input_dir: Optional[str],
@@ -120,6 +122,7 @@ def extract(
single_file: Optional[str],
since: Optional[str],
workers: Optional[int],
+ dev_sample: Optional[int],
) -> None:
"""Parse GPX/FIT/TCX files and write BAS JSON data store."""
@@ -128,13 +131,25 @@ def extract(
return
cfg = _resolve_config(config_path, input_dir, output_dir)
+
+ if dev_sample is not None:
+ cfg.output_dir = Path("/tmp/bincio_dev")
+ cfg.incremental = False
+ console.print(f"[yellow]Dev mode:[/yellow] sampling {dev_sample} files → [cyan]{cfg.output_dir}[/cyan]")
+
cfg.output_dir.mkdir(parents=True, exist_ok=True)
files = _collect_files(cfg, since)
if not files:
console.print("[yellow]No supported files found.[/yellow]")
return
- console.print(f"Found [bold]{len(files)}[/bold] activity files.")
+
+ if dev_sample is not None:
+ total = len(files)
+ files = _sample_diverse(files, dev_sample)
+ console.print(f"Sampled [bold]{len(files)}[/bold] files from {total} total.")
+ else:
+ console.print(f"Found [bold]{len(files)}[/bold] activity files.")
# Build strava lookup once (serialised dict, sent to workers via initializer)
strava_lookup: dict = {}
@@ -314,6 +329,15 @@ def _load_existing_summaries(output_dir: Path) -> list[dict]:
return []
+def _sample_diverse(files: list[Path], n: int) -> list[Path]:
+ """Return n files sampled evenly across the sorted list for date/format diversity."""
+ if len(files) <= n:
+ return files
+ files = sorted(files)
+ step = len(files) / n
+ return [files[int(i * step)] for i in range(n)]
+
+
def _patch_duplicate_of(output_dir: Path, activity_id: str, canonical_id: str) -> None:
p = output_dir / "activities" / f"{activity_id}.json"
if not p.exists():
diff --git a/bincio/extract/parsers/gpx.py b/bincio/extract/parsers/gpx.py
index 665ff04..bdf85df 100644
--- a/bincio/extract/parsers/gpx.py
+++ b/bincio/extract/parsers/gpx.py
@@ -8,7 +8,7 @@ import gpxpy.gpx
from bincio.extract.models import DataPoint, ParsedActivity
from bincio.extract.parsers.base import BaseParser
-from bincio.extract.sport import normalise_sport
+from bincio.extract.sport import normalise_sport, normalise_sub_sport
# Known GPX extension namespaces
_NS_GARMIN = "http://www.garmin.com/xmlschemas/TrackPointExtension/v1"
@@ -41,14 +41,15 @@ class GpxParser(BaseParser):
if not points:
raise ValueError(f"No trackpoints found in {path.name}")
- sport = normalise_sport(
- (gpx.tracks[0].type if gpx.tracks else None) or "cycling"
- )
+ raw_sport = (gpx.tracks[0].type if gpx.tracks else None) or "cycling"
+ sport = normalise_sport(raw_sport)
+ sub_sport = normalise_sub_sport(raw_sport)
started_at = points[0].timestamp
return ParsedActivity(
points=points,
sport=sport,
+ sub_sport=sub_sport,
started_at=started_at,
source_file=path.name,
source_hash="", # set by factory
diff --git a/bincio/extract/parsers/tcx.py b/bincio/extract/parsers/tcx.py
index c80e9d1..1d1387f 100644
--- a/bincio/extract/parsers/tcx.py
+++ b/bincio/extract/parsers/tcx.py
@@ -6,7 +6,7 @@ from pathlib import Path
from lxml import etree
from bincio.extract.models import DataPoint, ParsedActivity
-from bincio.extract.sport import normalise_sport
+from bincio.extract.sport import normalise_sport, normalise_sub_sport
_NS_HTTP = {
"tcx": "http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2",
@@ -33,7 +33,8 @@ class TcxParser:
# Use the first activity
act = activities[0]
sport_attr = act.get("Sport", "Biking")
- sport = normalise_sport(sport_attr)
+ sport = normalise_sport(sport_attr)
+ sub_sport = normalise_sub_sport(sport_attr)
points: list[DataPoint] = []
for tp in act.findall(".//tcx:Trackpoint", _NS):
@@ -78,6 +79,7 @@ class TcxParser:
return ParsedActivity(
points=points,
sport=sport,
+ sub_sport=sub_sport,
started_at=points[0].timestamp,
source_file=path.name,
source_hash="",
diff --git a/bincio/extract/sport.py b/bincio/extract/sport.py
index b333559..e7b9a2b 100644
--- a/bincio/extract/sport.py
+++ b/bincio/extract/sport.py
@@ -47,10 +47,13 @@ _MAPPING: dict[str, str] = {
"skiing": "skiing",
"cross_country_skiing": "skiing",
"nordic_skiing": "skiing",
+ "nordic_ski": "skiing",
"downhill_skiing": "skiing",
"alpine_skiing": "skiing",
+ "alpine_ski": "skiing",
"skate_skiing": "skiing",
"backcountry_skiing": "skiing",
+ "backcountry_ski": "skiing",
# swimming
"swimming": "swimming",
"swim": "swimming",
@@ -58,13 +61,63 @@ _MAPPING: dict[str, str] = {
"lap_swimming": "swimming",
}
+_SUB_SPORT_MAPPING: dict[str, str] = {
+ # cycling
+ "road_biking": "road",
+ "road_cycling": "road",
+ "mountain_biking": "mountain",
+ "mountain_bike_ride": "mountain",
+ "gravel_cycling": "gravel",
+ "gravel_ride": "gravel",
+ "cyclocross": "gravel",
+ "indoor_cycling": "indoor",
+ "indoor_ride": "indoor",
+ "virtual_ride": "indoor",
+ # running
+ "trail_running": "trail",
+ "trail_run": "trail",
+ "treadmill_running": "indoor",
+ "treadmill": "indoor",
+ "indoor_run": "indoor",
+ "virtual_run": "indoor",
+ "track_run": "track",
+ # skiing
+ "cross_country_skiing": "nordic",
+ "nordic_skiing": "nordic",
+ "nordic_ski": "nordic",
+ "skate_skiing": "nordic",
+ "backcountry_skiing": "nordic",
+ "backcountry_ski": "nordic",
+ "downhill_skiing": "alpine",
+ "alpine_skiing": "alpine",
+ "alpine_ski": "alpine",
+ # swimming
+ "open_water_swimming": "open_water",
+ "lap_swimming": "pool",
+}
+
BAS_SPORTS = {"cycling", "running", "hiking", "walking", "swimming", "skiing", "other"}
+def _normalise_key(raw: object) -> str:
+ key = str(raw).strip()
+ # CamelCase → snake_case ("MountainBikeRide" → "mountain_bike_ride")
+ key = re.sub(r"([A-Z])", r"_\1", key).lower().lstrip("_")
+ key = key.replace(" ", "_").replace("-", "_")
+ return re.sub(r"^\d+", "", key)
+
+
def normalise_sport(raw: object) -> str:
if raw is None:
return "other"
- key = str(raw).lower().strip().replace(" ", "_").replace("-", "_")
- # Strip leading date-like prefixes e.g. "20231117outdoor_run" → "outdoor_run"
- key = re.sub(r"^\d+", "", key)
- return _MAPPING.get(key, "other")
+ return _MAPPING.get(_normalise_key(raw), "other")
+
+
+def normalise_sub_sport(raw: object) -> str | None:
+ """Infer sub_sport from a raw sport type string (e.g. 'mountain_bike_ride' → 'mountain').
+
+ Returns None when no sub_sport is implied (e.g. plain 'ride', 'run').
+ """
+ if raw is None:
+ return None
+ return _SUB_SPORT_MAPPING.get(_normalise_key(raw))
diff --git a/bincio/import_/cli.py b/bincio/import_/cli.py
index 8070a01..2017e9f 100644
--- a/bincio/import_/cli.py
+++ b/bincio/import_/cli.py
@@ -29,6 +29,8 @@ def import_group() -> None:
help="Only import activities after this date (default: incremental from last sync).")
@click.option("--reauth", is_flag=True, default=False,
help="Force re-authorization even if valid tokens exist.")
+@click.option("--dev", "dev_sample", default=None, type=int, metavar="N",
+ help="Dev mode: import only the N most recent activities, output to /tmp/bincio_dev/.")
def strava_cmd(
client_id: Optional[str],
client_secret: Optional[str],
@@ -36,6 +38,7 @@ def strava_cmd(
config_path: Optional[str],
since: Optional[str],
reauth: bool,
+ dev_sample: Optional[int],
) -> None:
"""Import activities from Strava.
@@ -90,7 +93,11 @@ def strava_cmd(
"Add them to extract_config.yaml under import.strava, or pass --client-id/--client-secret."
)
- out = _resolve_output(output_dir, cfg)
+ if dev_sample is not None:
+ out = Path("/tmp/bincio_dev")
+ console.print(f"[yellow]Dev mode:[/yellow] importing {dev_sample} activities → [cyan]{out}[/cyan]")
+ else:
+ out = _resolve_output(output_dir, cfg)
console.print(f"Output dir: [cyan]{out}[/cyan]")
if reauth and TOKENS_FILE.exists():
@@ -108,7 +115,7 @@ def strava_cmd(
except ValueError:
raise click.BadParameter(f"Expected YYYY-MM-DD, got {since!r}", param_hint="--since")
- strava_sync(client, out, since_dt, console)
+ strava_sync(client, out, since_dt, console, limit=dev_sample)
def _load_config(config_path: Optional[str]):
diff --git a/bincio/import_/strava.py b/bincio/import_/strava.py
index 89a13f0..83cbdce 100644
--- a/bincio/import_/strava.py
+++ b/bincio/import_/strava.py
@@ -26,7 +26,7 @@ from rich.console import Console
from rich.progress import BarColumn, MofNCompleteColumn, Progress, TextColumn, TimeElapsedColumn
from bincio.extract.models import DataPoint, ParsedActivity
-from bincio.extract.sport import normalise_sport
+from bincio.extract.sport import normalise_sport, normalise_sub_sport
STRAVA_AUTH_URL = "https://www.strava.com/oauth/authorize"
STRAVA_TOKEN_URL = "https://www.strava.com/oauth/token"
@@ -214,7 +214,9 @@ def _strava_to_parsed(act: dict, streams: dict[str, list]) -> ParsedActivity:
"""Build a ParsedActivity from a Strava activity dict + its streams."""
started_at = datetime.fromisoformat(act["start_date"].replace("Z", "+00:00"))
- sport = normalise_sport(act.get("sport_type") or act.get("type") or "")
+ raw_sport = act.get("sport_type") or act.get("type") or ""
+ sport = normalise_sport(raw_sport)
+ sub_sport = normalise_sub_sport(raw_sport)
times = streams.get("time", []) # seconds since start
latlngs = streams.get("latlng", []) # [[lat, lon], ...]
@@ -244,6 +246,7 @@ def _strava_to_parsed(act: dict, streams: dict[str, list]) -> ParsedActivity:
return ParsedActivity(
points = points,
sport = sport,
+ sub_sport = sub_sport,
started_at = started_at,
source_file = f"strava_{strava_id}",
source_hash = source_hash,
@@ -287,6 +290,7 @@ def sync(
output_dir: Path,
since: datetime | None,
console: Console,
+ limit: int | None = None,
) -> None:
"""Fetch new Strava activities and write BAS JSON files.
@@ -323,6 +327,9 @@ def sync(
f"Found [bold]{len(new_acts)}[/bold] new activities "
f"([bold]{len(all_acts) - len(new_acts)}[/bold] already imported)."
)
+ if limit is not None and len(new_acts) > limit:
+ new_acts = new_acts[:limit]
+ console.print(f"[yellow]Dev mode:[/yellow] capped to {limit} activities.")
if not new_acts:
console.print("[green]All up to date.[/green]")
return
diff --git a/site/src/components/ActivityDetail.svelte b/site/src/components/ActivityDetail.svelte
index 3bd7389..ac4d9a8 100644
--- a/site/src/components/ActivityDetail.svelte
+++ b/site/src/components/ActivityDetail.svelte
@@ -158,8 +158,16 @@
class="text-xs font-medium px-2 py-0.5 rounded-full"
style="background:{color}22;color:{color}"
>
- {sportIcon(activity.sport)} {sportLabel(activity.sport, activity.sub_sport)}
+ {sportIcon(activity.sport)} {sportLabel(activity.sport)}
+ {#if activity.sub_sport && activity.sub_sport !== 'generic'}
+
+ {sportLabel(activity.sport, activity.sub_sport).split(' ')[0]}
+
+ {/if}
{formatDate(activity.started_at)} · {formatTime(activity.started_at)}
diff --git a/site/src/lib/format.ts b/site/src/lib/format.ts
index 458ce36..7da3cdf 100644
--- a/site/src/lib/format.ts
+++ b/site/src/lib/format.ts
@@ -75,10 +75,24 @@ export function sportColor(sport: Sport): string {
return SPORT_COLORS[sport] ?? '#a78bfa';
}
+const SUB_SPORT_LABELS: Record = {
+ road: 'Road',
+ mountain: 'MTB',
+ gravel: 'Gravel',
+ indoor: 'Indoor',
+ trail: 'Trail',
+ track: 'Track',
+ nordic: 'Nordic',
+ alpine: 'Alpine',
+ open_water: 'Open Water',
+ pool: 'Pool',
+};
+
export function sportLabel(sport: Sport, subSport?: string | null): string {
const base = sport.charAt(0).toUpperCase() + sport.slice(1);
if (subSport && subSport !== 'generic') {
- return `${subSport.charAt(0).toUpperCase() + subSport.slice(1)} ${base}`;
+ const sub = SUB_SPORT_LABELS[subSport] ?? (subSport.charAt(0).toUpperCase() + subSport.slice(1));
+ return `${sub} ${base}`;
}
return base;
}
diff --git a/site/src/lib/types.ts b/site/src/lib/types.ts
index 03ce82a..ba6763b 100644
--- a/site/src/lib/types.ts
+++ b/site/src/lib/types.ts
@@ -1,7 +1,7 @@
/** TypeScript types mirroring BAS v1.0 schema. */
export type Sport = "cycling" | "running" | "hiking" | "walking" | "swimming" | "skiing" | "other";
-export type SubSport = "road" | "mountain" | "gravel" | "indoor" | "trail" | "track" | "nordic" | null;
+export type SubSport = "road" | "mountain" | "gravel" | "indoor" | "trail" | "track" | "nordic" | "alpine" | "open_water" | "pool" | null;
export type Privacy = "public" | "blur_start" | "no_gps" | "private";
/** [duration_s, avg_watts] pairs, sorted by duration ascending. */