trying to get sub label showed properly

This commit is contained in:
Davide Scaini
2026-03-30 20:09:01 +02:00
parent c58bc8f7d5
commit 877472e620
11 changed files with 157 additions and 24 deletions
+14
View File
@@ -25,11 +25,24 @@ uv run bincio extract --since 2025-01-01 # only files newer than date
uv run bincio extract --file ride.gpx # single file → JSON on stdout uv run bincio extract --file ride.gpx # single file → JSON on stdout
uv run bincio extract --input ~/rides \ uv run bincio extract --input ~/rides \
--output ~/bincio_data # override config paths --output ~/bincio_data # override config paths
uv run bincio extract --dev 50 # dev mode: 50 files → /tmp/bincio_dev/
``` ```
Re-extraction is safe — unchanged files are skipped (hash-based dedup). Re-extraction is safe — unchanged files are skipped (hash-based dedup).
To force a full re-extract: `rm -rf ~/bincio_data && uv run bincio extract` To force a full re-extract: `rm -rf ~/bincio_data && uv run bincio extract`
### Dev mode
`--dev N` samples N files evenly across the full file list (spread by date and format)
and writes to `/tmp/bincio_dev/` so your real data is never touched. Use it for fast
iteration on UI or pipeline changes:
```bash
uv run bincio extract --dev 50
uv run bincio import strava --dev 50 # N most recent Strava activities
uv run bincio render --serve --data-dir /tmp/bincio_dev
```
--- ---
## Import from Strava ## Import from Strava
@@ -54,6 +67,7 @@ uv run bincio import strava
uv run bincio import strava --since 2025-01-01 # explicit date cutoff uv run bincio import strava --since 2025-01-01 # explicit date cutoff
uv run bincio import strava --reauth # force new OAuth flow uv run bincio import strava --reauth # force new OAuth flow
uv run bincio import strava --output ~/other_dir # override output dir uv run bincio import strava --output ~/other_dir # override output dir
uv run bincio import strava --dev 50 # dev mode: 50 most recent → /tmp/bincio_dev/
``` ```
Credentials resolution order: Credentials resolution order:
+9 -6
View File
@@ -85,19 +85,22 @@ site/ Astro project
## How to run ## How to run
```bash ```bash
# Extract from local files # Fast dev loop (50-file sample → /tmp/bincio_dev/, no real data touched)
uv run bincio extract --dev 50
uv run bincio import strava --dev 50 # 50 most recent Strava activities
uv run bincio render --serve --data-dir /tmp/bincio_dev
# Full extract from local files
cd ~/src/bincio_activity cd ~/src/bincio_activity
uv run bincio extract --input ~/src/cycling_data_davide/activities --output /tmp/bincio_test uv run bincio extract # uses extract_config.yaml
# Import from Strava (credentials in extract_config.yaml under import.strava) # Import from Strava (credentials in extract_config.yaml under import.strava)
uv sync --extra strava uv sync --extra strava
uv run bincio import strava # first run opens browser for OAuth uv run bincio import strava # first run opens browser for OAuth
uv run bincio import strava # subsequent runs are incremental uv run bincio import strava # subsequent runs are incremental
# Site dev server # Site dev server (render handles symlink + merge automatically)
cd site uv run bincio render --serve
ln -sf /tmp/bincio_test public/data # symlink data
npm run dev
# Edit server (enables drawer + file upload in the site) # Edit server (enables drawer + file upload in the site)
uv run bincio edit --data-dir ~/bincio_data # port 4041 uv run bincio edit --data-dir ~/bincio_data # port 4041
+25 -1
View File
@@ -113,6 +113,8 @@ def _process_file(path: Path) -> dict:
help="Only process files modified after this date.") help="Only process files modified after this date.")
@click.option("--workers", default=None, type=int, @click.option("--workers", default=None, type=int,
help="Parallel worker processes (default: CPU count).") help="Parallel worker processes (default: CPU count).")
@click.option("--dev", "dev_sample", default=None, type=int, metavar="N",
help="Dev mode: sample N files evenly across the full list, output to /tmp/bincio_dev/.")
def extract( def extract(
config_path: Optional[str], config_path: Optional[str],
input_dir: Optional[str], input_dir: Optional[str],
@@ -120,6 +122,7 @@ def extract(
single_file: Optional[str], single_file: Optional[str],
since: Optional[str], since: Optional[str],
workers: Optional[int], workers: Optional[int],
dev_sample: Optional[int],
) -> None: ) -> None:
"""Parse GPX/FIT/TCX files and write BAS JSON data store.""" """Parse GPX/FIT/TCX files and write BAS JSON data store."""
@@ -128,13 +131,25 @@ def extract(
return return
cfg = _resolve_config(config_path, input_dir, output_dir) cfg = _resolve_config(config_path, input_dir, output_dir)
if dev_sample is not None:
cfg.output_dir = Path("/tmp/bincio_dev")
cfg.incremental = False
console.print(f"[yellow]Dev mode:[/yellow] sampling {dev_sample} files → [cyan]{cfg.output_dir}[/cyan]")
cfg.output_dir.mkdir(parents=True, exist_ok=True) cfg.output_dir.mkdir(parents=True, exist_ok=True)
files = _collect_files(cfg, since) files = _collect_files(cfg, since)
if not files: if not files:
console.print("[yellow]No supported files found.[/yellow]") console.print("[yellow]No supported files found.[/yellow]")
return return
console.print(f"Found [bold]{len(files)}[/bold] activity files.")
if dev_sample is not None:
total = len(files)
files = _sample_diverse(files, dev_sample)
console.print(f"Sampled [bold]{len(files)}[/bold] files from {total} total.")
else:
console.print(f"Found [bold]{len(files)}[/bold] activity files.")
# Build strava lookup once (serialised dict, sent to workers via initializer) # Build strava lookup once (serialised dict, sent to workers via initializer)
strava_lookup: dict = {} strava_lookup: dict = {}
@@ -314,6 +329,15 @@ def _load_existing_summaries(output_dir: Path) -> list[dict]:
return [] return []
def _sample_diverse(files: list[Path], n: int) -> list[Path]:
"""Return n files sampled evenly across the sorted list for date/format diversity."""
if len(files) <= n:
return files
files = sorted(files)
step = len(files) / n
return [files[int(i * step)] for i in range(n)]
def _patch_duplicate_of(output_dir: Path, activity_id: str, canonical_id: str) -> None: def _patch_duplicate_of(output_dir: Path, activity_id: str, canonical_id: str) -> None:
p = output_dir / "activities" / f"{activity_id}.json" p = output_dir / "activities" / f"{activity_id}.json"
if not p.exists(): if not p.exists():
+5 -4
View File
@@ -8,7 +8,7 @@ import gpxpy.gpx
from bincio.extract.models import DataPoint, ParsedActivity from bincio.extract.models import DataPoint, ParsedActivity
from bincio.extract.parsers.base import BaseParser from bincio.extract.parsers.base import BaseParser
from bincio.extract.sport import normalise_sport from bincio.extract.sport import normalise_sport, normalise_sub_sport
# Known GPX extension namespaces # Known GPX extension namespaces
_NS_GARMIN = "http://www.garmin.com/xmlschemas/TrackPointExtension/v1" _NS_GARMIN = "http://www.garmin.com/xmlschemas/TrackPointExtension/v1"
@@ -41,14 +41,15 @@ class GpxParser(BaseParser):
if not points: if not points:
raise ValueError(f"No trackpoints found in {path.name}") raise ValueError(f"No trackpoints found in {path.name}")
sport = normalise_sport( raw_sport = (gpx.tracks[0].type if gpx.tracks else None) or "cycling"
(gpx.tracks[0].type if gpx.tracks else None) or "cycling" sport = normalise_sport(raw_sport)
) sub_sport = normalise_sub_sport(raw_sport)
started_at = points[0].timestamp started_at = points[0].timestamp
return ParsedActivity( return ParsedActivity(
points=points, points=points,
sport=sport, sport=sport,
sub_sport=sub_sport,
started_at=started_at, started_at=started_at,
source_file=path.name, source_file=path.name,
source_hash="", # set by factory source_hash="", # set by factory
+4 -2
View File
@@ -6,7 +6,7 @@ from pathlib import Path
from lxml import etree from lxml import etree
from bincio.extract.models import DataPoint, ParsedActivity from bincio.extract.models import DataPoint, ParsedActivity
from bincio.extract.sport import normalise_sport from bincio.extract.sport import normalise_sport, normalise_sub_sport
_NS_HTTP = { _NS_HTTP = {
"tcx": "http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2", "tcx": "http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2",
@@ -33,7 +33,8 @@ class TcxParser:
# Use the first activity # Use the first activity
act = activities[0] act = activities[0]
sport_attr = act.get("Sport", "Biking") sport_attr = act.get("Sport", "Biking")
sport = normalise_sport(sport_attr) sport = normalise_sport(sport_attr)
sub_sport = normalise_sub_sport(sport_attr)
points: list[DataPoint] = [] points: list[DataPoint] = []
for tp in act.findall(".//tcx:Trackpoint", _NS): for tp in act.findall(".//tcx:Trackpoint", _NS):
@@ -78,6 +79,7 @@ class TcxParser:
return ParsedActivity( return ParsedActivity(
points=points, points=points,
sport=sport, sport=sport,
sub_sport=sub_sport,
started_at=points[0].timestamp, started_at=points[0].timestamp,
source_file=path.name, source_file=path.name,
source_hash="", source_hash="",
+57 -4
View File
@@ -47,10 +47,13 @@ _MAPPING: dict[str, str] = {
"skiing": "skiing", "skiing": "skiing",
"cross_country_skiing": "skiing", "cross_country_skiing": "skiing",
"nordic_skiing": "skiing", "nordic_skiing": "skiing",
"nordic_ski": "skiing",
"downhill_skiing": "skiing", "downhill_skiing": "skiing",
"alpine_skiing": "skiing", "alpine_skiing": "skiing",
"alpine_ski": "skiing",
"skate_skiing": "skiing", "skate_skiing": "skiing",
"backcountry_skiing": "skiing", "backcountry_skiing": "skiing",
"backcountry_ski": "skiing",
# swimming # swimming
"swimming": "swimming", "swimming": "swimming",
"swim": "swimming", "swim": "swimming",
@@ -58,13 +61,63 @@ _MAPPING: dict[str, str] = {
"lap_swimming": "swimming", "lap_swimming": "swimming",
} }
_SUB_SPORT_MAPPING: dict[str, str] = {
# cycling
"road_biking": "road",
"road_cycling": "road",
"mountain_biking": "mountain",
"mountain_bike_ride": "mountain",
"gravel_cycling": "gravel",
"gravel_ride": "gravel",
"cyclocross": "gravel",
"indoor_cycling": "indoor",
"indoor_ride": "indoor",
"virtual_ride": "indoor",
# running
"trail_running": "trail",
"trail_run": "trail",
"treadmill_running": "indoor",
"treadmill": "indoor",
"indoor_run": "indoor",
"virtual_run": "indoor",
"track_run": "track",
# skiing
"cross_country_skiing": "nordic",
"nordic_skiing": "nordic",
"nordic_ski": "nordic",
"skate_skiing": "nordic",
"backcountry_skiing": "nordic",
"backcountry_ski": "nordic",
"downhill_skiing": "alpine",
"alpine_skiing": "alpine",
"alpine_ski": "alpine",
# swimming
"open_water_swimming": "open_water",
"lap_swimming": "pool",
}
BAS_SPORTS = {"cycling", "running", "hiking", "walking", "swimming", "skiing", "other"} BAS_SPORTS = {"cycling", "running", "hiking", "walking", "swimming", "skiing", "other"}
def _normalise_key(raw: object) -> str:
key = str(raw).strip()
# CamelCase → snake_case ("MountainBikeRide" → "mountain_bike_ride")
key = re.sub(r"([A-Z])", r"_\1", key).lower().lstrip("_")
key = key.replace(" ", "_").replace("-", "_")
return re.sub(r"^\d+", "", key)
def normalise_sport(raw: object) -> str: def normalise_sport(raw: object) -> str:
if raw is None: if raw is None:
return "other" return "other"
key = str(raw).lower().strip().replace(" ", "_").replace("-", "_") return _MAPPING.get(_normalise_key(raw), "other")
# Strip leading date-like prefixes e.g. "20231117outdoor_run" → "outdoor_run"
key = re.sub(r"^\d+", "", key)
return _MAPPING.get(key, "other") def normalise_sub_sport(raw: object) -> str | None:
"""Infer sub_sport from a raw sport type string (e.g. 'mountain_bike_ride''mountain').
Returns None when no sub_sport is implied (e.g. plain 'ride', 'run').
"""
if raw is None:
return None
return _SUB_SPORT_MAPPING.get(_normalise_key(raw))
+9 -2
View File
@@ -29,6 +29,8 @@ def import_group() -> None:
help="Only import activities after this date (default: incremental from last sync).") help="Only import activities after this date (default: incremental from last sync).")
@click.option("--reauth", is_flag=True, default=False, @click.option("--reauth", is_flag=True, default=False,
help="Force re-authorization even if valid tokens exist.") help="Force re-authorization even if valid tokens exist.")
@click.option("--dev", "dev_sample", default=None, type=int, metavar="N",
help="Dev mode: import only the N most recent activities, output to /tmp/bincio_dev/.")
def strava_cmd( def strava_cmd(
client_id: Optional[str], client_id: Optional[str],
client_secret: Optional[str], client_secret: Optional[str],
@@ -36,6 +38,7 @@ def strava_cmd(
config_path: Optional[str], config_path: Optional[str],
since: Optional[str], since: Optional[str],
reauth: bool, reauth: bool,
dev_sample: Optional[int],
) -> None: ) -> None:
"""Import activities from Strava. """Import activities from Strava.
@@ -90,7 +93,11 @@ def strava_cmd(
"Add them to extract_config.yaml under import.strava, or pass --client-id/--client-secret." "Add them to extract_config.yaml under import.strava, or pass --client-id/--client-secret."
) )
out = _resolve_output(output_dir, cfg) if dev_sample is not None:
out = Path("/tmp/bincio_dev")
console.print(f"[yellow]Dev mode:[/yellow] importing {dev_sample} activities → [cyan]{out}[/cyan]")
else:
out = _resolve_output(output_dir, cfg)
console.print(f"Output dir: [cyan]{out}[/cyan]") console.print(f"Output dir: [cyan]{out}[/cyan]")
if reauth and TOKENS_FILE.exists(): if reauth and TOKENS_FILE.exists():
@@ -108,7 +115,7 @@ def strava_cmd(
except ValueError: except ValueError:
raise click.BadParameter(f"Expected YYYY-MM-DD, got {since!r}", param_hint="--since") raise click.BadParameter(f"Expected YYYY-MM-DD, got {since!r}", param_hint="--since")
strava_sync(client, out, since_dt, console) strava_sync(client, out, since_dt, console, limit=dev_sample)
def _load_config(config_path: Optional[str]): def _load_config(config_path: Optional[str]):
+9 -2
View File
@@ -26,7 +26,7 @@ from rich.console import Console
from rich.progress import BarColumn, MofNCompleteColumn, Progress, TextColumn, TimeElapsedColumn from rich.progress import BarColumn, MofNCompleteColumn, Progress, TextColumn, TimeElapsedColumn
from bincio.extract.models import DataPoint, ParsedActivity from bincio.extract.models import DataPoint, ParsedActivity
from bincio.extract.sport import normalise_sport from bincio.extract.sport import normalise_sport, normalise_sub_sport
STRAVA_AUTH_URL = "https://www.strava.com/oauth/authorize" STRAVA_AUTH_URL = "https://www.strava.com/oauth/authorize"
STRAVA_TOKEN_URL = "https://www.strava.com/oauth/token" STRAVA_TOKEN_URL = "https://www.strava.com/oauth/token"
@@ -214,7 +214,9 @@ def _strava_to_parsed(act: dict, streams: dict[str, list]) -> ParsedActivity:
"""Build a ParsedActivity from a Strava activity dict + its streams.""" """Build a ParsedActivity from a Strava activity dict + its streams."""
started_at = datetime.fromisoformat(act["start_date"].replace("Z", "+00:00")) started_at = datetime.fromisoformat(act["start_date"].replace("Z", "+00:00"))
sport = normalise_sport(act.get("sport_type") or act.get("type") or "") raw_sport = act.get("sport_type") or act.get("type") or ""
sport = normalise_sport(raw_sport)
sub_sport = normalise_sub_sport(raw_sport)
times = streams.get("time", []) # seconds since start times = streams.get("time", []) # seconds since start
latlngs = streams.get("latlng", []) # [[lat, lon], ...] latlngs = streams.get("latlng", []) # [[lat, lon], ...]
@@ -244,6 +246,7 @@ def _strava_to_parsed(act: dict, streams: dict[str, list]) -> ParsedActivity:
return ParsedActivity( return ParsedActivity(
points = points, points = points,
sport = sport, sport = sport,
sub_sport = sub_sport,
started_at = started_at, started_at = started_at,
source_file = f"strava_{strava_id}", source_file = f"strava_{strava_id}",
source_hash = source_hash, source_hash = source_hash,
@@ -287,6 +290,7 @@ def sync(
output_dir: Path, output_dir: Path,
since: datetime | None, since: datetime | None,
console: Console, console: Console,
limit: int | None = None,
) -> None: ) -> None:
"""Fetch new Strava activities and write BAS JSON files. """Fetch new Strava activities and write BAS JSON files.
@@ -323,6 +327,9 @@ def sync(
f"Found [bold]{len(new_acts)}[/bold] new activities " f"Found [bold]{len(new_acts)}[/bold] new activities "
f"([bold]{len(all_acts) - len(new_acts)}[/bold] already imported)." f"([bold]{len(all_acts) - len(new_acts)}[/bold] already imported)."
) )
if limit is not None and len(new_acts) > limit:
new_acts = new_acts[:limit]
console.print(f"[yellow]Dev mode:[/yellow] capped to {limit} activities.")
if not new_acts: if not new_acts:
console.print("[green]All up to date.[/green]") console.print("[green]All up to date.[/green]")
return return
+9 -1
View File
@@ -158,8 +158,16 @@
class="text-xs font-medium px-2 py-0.5 rounded-full" class="text-xs font-medium px-2 py-0.5 rounded-full"
style="background:{color}22;color:{color}" style="background:{color}22;color:{color}"
> >
{sportIcon(activity.sport)} {sportLabel(activity.sport, activity.sub_sport)} {sportIcon(activity.sport)} {sportLabel(activity.sport)}
</span> </span>
{#if activity.sub_sport && activity.sub_sport !== 'generic'}
<span
class="text-xs font-medium px-2 py-0.5 rounded-full"
style="background:{color}11;color:{color}cc"
>
{sportLabel(activity.sport, activity.sub_sport).split(' ')[0]}
</span>
{/if}
<span class="text-xs text-zinc-500"> <span class="text-xs text-zinc-500">
{formatDate(activity.started_at)} · {formatTime(activity.started_at)} {formatDate(activity.started_at)} · {formatTime(activity.started_at)}
</span> </span>
+15 -1
View File
@@ -75,10 +75,24 @@ export function sportColor(sport: Sport): string {
return SPORT_COLORS[sport] ?? '#a78bfa'; return SPORT_COLORS[sport] ?? '#a78bfa';
} }
const SUB_SPORT_LABELS: Record<string, string> = {
road: 'Road',
mountain: 'MTB',
gravel: 'Gravel',
indoor: 'Indoor',
trail: 'Trail',
track: 'Track',
nordic: 'Nordic',
alpine: 'Alpine',
open_water: 'Open Water',
pool: 'Pool',
};
export function sportLabel(sport: Sport, subSport?: string | null): string { export function sportLabel(sport: Sport, subSport?: string | null): string {
const base = sport.charAt(0).toUpperCase() + sport.slice(1); const base = sport.charAt(0).toUpperCase() + sport.slice(1);
if (subSport && subSport !== 'generic') { if (subSport && subSport !== 'generic') {
return `${subSport.charAt(0).toUpperCase() + subSport.slice(1)} ${base}`; const sub = SUB_SPORT_LABELS[subSport] ?? (subSport.charAt(0).toUpperCase() + subSport.slice(1));
return `${sub} ${base}`;
} }
return base; return base;
} }
+1 -1
View File
@@ -1,7 +1,7 @@
/** TypeScript types mirroring BAS v1.0 schema. */ /** TypeScript types mirroring BAS v1.0 schema. */
export type Sport = "cycling" | "running" | "hiking" | "walking" | "swimming" | "skiing" | "other"; export type Sport = "cycling" | "running" | "hiking" | "walking" | "swimming" | "skiing" | "other";
export type SubSport = "road" | "mountain" | "gravel" | "indoor" | "trail" | "track" | "nordic" | null; export type SubSport = "road" | "mountain" | "gravel" | "indoor" | "trail" | "track" | "nordic" | "alpine" | "open_water" | "pool" | null;
export type Privacy = "public" | "blur_start" | "no_gps" | "private"; export type Privacy = "public" | "blur_start" | "no_gps" | "private";
/** [duration_s, avg_watts] pairs, sorted by duration ascending. */ /** [duration_s, avg_watts] pairs, sorted by duration ascending. */