trying to get sub label showed properly

This commit is contained in:
Davide Scaini
2026-03-30 20:09:01 +02:00
parent c58bc8f7d5
commit 877472e620
11 changed files with 157 additions and 24 deletions
+14
View File
@@ -25,11 +25,24 @@ uv run bincio extract --since 2025-01-01 # only files newer than date
uv run bincio extract --file ride.gpx # single file → JSON on stdout
uv run bincio extract --input ~/rides \
--output ~/bincio_data # override config paths
uv run bincio extract --dev 50 # dev mode: 50 files → /tmp/bincio_dev/
```
Re-extraction is safe — unchanged files are skipped (hash-based dedup).
To force a full re-extract: `rm -rf ~/bincio_data && uv run bincio extract`
### Dev mode
`--dev N` samples N files evenly across the full file list (spread by date and format)
and writes to `/tmp/bincio_dev/` so your real data is never touched. Use it for fast
iteration on UI or pipeline changes:
```bash
uv run bincio extract --dev 50
uv run bincio import strava --dev 50 # N most recent Strava activities
uv run bincio render --serve --data-dir /tmp/bincio_dev
```
---
## Import from Strava
@@ -54,6 +67,7 @@ uv run bincio import strava
uv run bincio import strava --since 2025-01-01 # explicit date cutoff
uv run bincio import strava --reauth # force new OAuth flow
uv run bincio import strava --output ~/other_dir # override output dir
uv run bincio import strava --dev 50 # dev mode: 50 most recent → /tmp/bincio_dev/
```
Credentials resolution order:
+9 -6
View File
@@ -85,19 +85,22 @@ site/ Astro project
## How to run
```bash
# Extract from local files
# Fast dev loop (50-file sample → /tmp/bincio_dev/, no real data touched)
uv run bincio extract --dev 50
uv run bincio import strava --dev 50 # 50 most recent Strava activities
uv run bincio render --serve --data-dir /tmp/bincio_dev
# Full extract from local files
cd ~/src/bincio_activity
uv run bincio extract --input ~/src/cycling_data_davide/activities --output /tmp/bincio_test
uv run bincio extract # uses extract_config.yaml
# Import from Strava (credentials in extract_config.yaml under import.strava)
uv sync --extra strava
uv run bincio import strava # first run opens browser for OAuth
uv run bincio import strava # subsequent runs are incremental
# Site dev server
cd site
ln -sf /tmp/bincio_test public/data # symlink data
npm run dev
# Site dev server (render handles symlink + merge automatically)
uv run bincio render --serve
# Edit server (enables drawer + file upload in the site)
uv run bincio edit --data-dir ~/bincio_data # port 4041
+25 -1
View File
@@ -113,6 +113,8 @@ def _process_file(path: Path) -> dict:
help="Only process files modified after this date.")
@click.option("--workers", default=None, type=int,
help="Parallel worker processes (default: CPU count).")
@click.option("--dev", "dev_sample", default=None, type=int, metavar="N",
help="Dev mode: sample N files evenly across the full list, output to /tmp/bincio_dev/.")
def extract(
config_path: Optional[str],
input_dir: Optional[str],
@@ -120,6 +122,7 @@ def extract(
single_file: Optional[str],
since: Optional[str],
workers: Optional[int],
dev_sample: Optional[int],
) -> None:
"""Parse GPX/FIT/TCX files and write BAS JSON data store."""
@@ -128,13 +131,25 @@ def extract(
return
cfg = _resolve_config(config_path, input_dir, output_dir)
if dev_sample is not None:
cfg.output_dir = Path("/tmp/bincio_dev")
cfg.incremental = False
console.print(f"[yellow]Dev mode:[/yellow] sampling {dev_sample} files → [cyan]{cfg.output_dir}[/cyan]")
cfg.output_dir.mkdir(parents=True, exist_ok=True)
files = _collect_files(cfg, since)
if not files:
console.print("[yellow]No supported files found.[/yellow]")
return
console.print(f"Found [bold]{len(files)}[/bold] activity files.")
if dev_sample is not None:
total = len(files)
files = _sample_diverse(files, dev_sample)
console.print(f"Sampled [bold]{len(files)}[/bold] files from {total} total.")
else:
console.print(f"Found [bold]{len(files)}[/bold] activity files.")
# Build strava lookup once (serialised dict, sent to workers via initializer)
strava_lookup: dict = {}
@@ -314,6 +329,15 @@ def _load_existing_summaries(output_dir: Path) -> list[dict]:
return []
def _sample_diverse(files: list[Path], n: int) -> list[Path]:
"""Return n files sampled evenly across the sorted list for date/format diversity."""
if len(files) <= n:
return files
files = sorted(files)
step = len(files) / n
return [files[int(i * step)] for i in range(n)]
def _patch_duplicate_of(output_dir: Path, activity_id: str, canonical_id: str) -> None:
p = output_dir / "activities" / f"{activity_id}.json"
if not p.exists():
+5 -4
View File
@@ -8,7 +8,7 @@ import gpxpy.gpx
from bincio.extract.models import DataPoint, ParsedActivity
from bincio.extract.parsers.base import BaseParser
from bincio.extract.sport import normalise_sport
from bincio.extract.sport import normalise_sport, normalise_sub_sport
# Known GPX extension namespaces
_NS_GARMIN = "http://www.garmin.com/xmlschemas/TrackPointExtension/v1"
@@ -41,14 +41,15 @@ class GpxParser(BaseParser):
if not points:
raise ValueError(f"No trackpoints found in {path.name}")
sport = normalise_sport(
(gpx.tracks[0].type if gpx.tracks else None) or "cycling"
)
raw_sport = (gpx.tracks[0].type if gpx.tracks else None) or "cycling"
sport = normalise_sport(raw_sport)
sub_sport = normalise_sub_sport(raw_sport)
started_at = points[0].timestamp
return ParsedActivity(
points=points,
sport=sport,
sub_sport=sub_sport,
started_at=started_at,
source_file=path.name,
source_hash="", # set by factory
+4 -2
View File
@@ -6,7 +6,7 @@ from pathlib import Path
from lxml import etree
from bincio.extract.models import DataPoint, ParsedActivity
from bincio.extract.sport import normalise_sport
from bincio.extract.sport import normalise_sport, normalise_sub_sport
_NS_HTTP = {
"tcx": "http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2",
@@ -33,7 +33,8 @@ class TcxParser:
# Use the first activity
act = activities[0]
sport_attr = act.get("Sport", "Biking")
sport = normalise_sport(sport_attr)
sport = normalise_sport(sport_attr)
sub_sport = normalise_sub_sport(sport_attr)
points: list[DataPoint] = []
for tp in act.findall(".//tcx:Trackpoint", _NS):
@@ -78,6 +79,7 @@ class TcxParser:
return ParsedActivity(
points=points,
sport=sport,
sub_sport=sub_sport,
started_at=points[0].timestamp,
source_file=path.name,
source_hash="",
+57 -4
View File
@@ -47,10 +47,13 @@ _MAPPING: dict[str, str] = {
"skiing": "skiing",
"cross_country_skiing": "skiing",
"nordic_skiing": "skiing",
"nordic_ski": "skiing",
"downhill_skiing": "skiing",
"alpine_skiing": "skiing",
"alpine_ski": "skiing",
"skate_skiing": "skiing",
"backcountry_skiing": "skiing",
"backcountry_ski": "skiing",
# swimming
"swimming": "swimming",
"swim": "swimming",
@@ -58,13 +61,63 @@ _MAPPING: dict[str, str] = {
"lap_swimming": "swimming",
}
_SUB_SPORT_MAPPING: dict[str, str] = {
# cycling
"road_biking": "road",
"road_cycling": "road",
"mountain_biking": "mountain",
"mountain_bike_ride": "mountain",
"gravel_cycling": "gravel",
"gravel_ride": "gravel",
"cyclocross": "gravel",
"indoor_cycling": "indoor",
"indoor_ride": "indoor",
"virtual_ride": "indoor",
# running
"trail_running": "trail",
"trail_run": "trail",
"treadmill_running": "indoor",
"treadmill": "indoor",
"indoor_run": "indoor",
"virtual_run": "indoor",
"track_run": "track",
# skiing
"cross_country_skiing": "nordic",
"nordic_skiing": "nordic",
"nordic_ski": "nordic",
"skate_skiing": "nordic",
"backcountry_skiing": "nordic",
"backcountry_ski": "nordic",
"downhill_skiing": "alpine",
"alpine_skiing": "alpine",
"alpine_ski": "alpine",
# swimming
"open_water_swimming": "open_water",
"lap_swimming": "pool",
}
BAS_SPORTS = {"cycling", "running", "hiking", "walking", "swimming", "skiing", "other"}
def _normalise_key(raw: object) -> str:
key = str(raw).strip()
# CamelCase → snake_case ("MountainBikeRide" → "mountain_bike_ride")
key = re.sub(r"([A-Z])", r"_\1", key).lower().lstrip("_")
key = key.replace(" ", "_").replace("-", "_")
return re.sub(r"^\d+", "", key)
def normalise_sport(raw: object) -> str:
if raw is None:
return "other"
key = str(raw).lower().strip().replace(" ", "_").replace("-", "_")
# Strip leading date-like prefixes e.g. "20231117outdoor_run" → "outdoor_run"
key = re.sub(r"^\d+", "", key)
return _MAPPING.get(key, "other")
return _MAPPING.get(_normalise_key(raw), "other")
def normalise_sub_sport(raw: object) -> str | None:
"""Infer sub_sport from a raw sport type string (e.g. 'mountain_bike_ride''mountain').
Returns None when no sub_sport is implied (e.g. plain 'ride', 'run').
"""
if raw is None:
return None
return _SUB_SPORT_MAPPING.get(_normalise_key(raw))
+9 -2
View File
@@ -29,6 +29,8 @@ def import_group() -> None:
help="Only import activities after this date (default: incremental from last sync).")
@click.option("--reauth", is_flag=True, default=False,
help="Force re-authorization even if valid tokens exist.")
@click.option("--dev", "dev_sample", default=None, type=int, metavar="N",
help="Dev mode: import only the N most recent activities, output to /tmp/bincio_dev/.")
def strava_cmd(
client_id: Optional[str],
client_secret: Optional[str],
@@ -36,6 +38,7 @@ def strava_cmd(
config_path: Optional[str],
since: Optional[str],
reauth: bool,
dev_sample: Optional[int],
) -> None:
"""Import activities from Strava.
@@ -90,7 +93,11 @@ def strava_cmd(
"Add them to extract_config.yaml under import.strava, or pass --client-id/--client-secret."
)
out = _resolve_output(output_dir, cfg)
if dev_sample is not None:
out = Path("/tmp/bincio_dev")
console.print(f"[yellow]Dev mode:[/yellow] importing {dev_sample} activities → [cyan]{out}[/cyan]")
else:
out = _resolve_output(output_dir, cfg)
console.print(f"Output dir: [cyan]{out}[/cyan]")
if reauth and TOKENS_FILE.exists():
@@ -108,7 +115,7 @@ def strava_cmd(
except ValueError:
raise click.BadParameter(f"Expected YYYY-MM-DD, got {since!r}", param_hint="--since")
strava_sync(client, out, since_dt, console)
strava_sync(client, out, since_dt, console, limit=dev_sample)
def _load_config(config_path: Optional[str]):
+9 -2
View File
@@ -26,7 +26,7 @@ from rich.console import Console
from rich.progress import BarColumn, MofNCompleteColumn, Progress, TextColumn, TimeElapsedColumn
from bincio.extract.models import DataPoint, ParsedActivity
from bincio.extract.sport import normalise_sport
from bincio.extract.sport import normalise_sport, normalise_sub_sport
STRAVA_AUTH_URL = "https://www.strava.com/oauth/authorize"
STRAVA_TOKEN_URL = "https://www.strava.com/oauth/token"
@@ -214,7 +214,9 @@ def _strava_to_parsed(act: dict, streams: dict[str, list]) -> ParsedActivity:
"""Build a ParsedActivity from a Strava activity dict + its streams."""
started_at = datetime.fromisoformat(act["start_date"].replace("Z", "+00:00"))
sport = normalise_sport(act.get("sport_type") or act.get("type") or "")
raw_sport = act.get("sport_type") or act.get("type") or ""
sport = normalise_sport(raw_sport)
sub_sport = normalise_sub_sport(raw_sport)
times = streams.get("time", []) # seconds since start
latlngs = streams.get("latlng", []) # [[lat, lon], ...]
@@ -244,6 +246,7 @@ def _strava_to_parsed(act: dict, streams: dict[str, list]) -> ParsedActivity:
return ParsedActivity(
points = points,
sport = sport,
sub_sport = sub_sport,
started_at = started_at,
source_file = f"strava_{strava_id}",
source_hash = source_hash,
@@ -287,6 +290,7 @@ def sync(
output_dir: Path,
since: datetime | None,
console: Console,
limit: int | None = None,
) -> None:
"""Fetch new Strava activities and write BAS JSON files.
@@ -323,6 +327,9 @@ def sync(
f"Found [bold]{len(new_acts)}[/bold] new activities "
f"([bold]{len(all_acts) - len(new_acts)}[/bold] already imported)."
)
if limit is not None and len(new_acts) > limit:
new_acts = new_acts[:limit]
console.print(f"[yellow]Dev mode:[/yellow] capped to {limit} activities.")
if not new_acts:
console.print("[green]All up to date.[/green]")
return
+9 -1
View File
@@ -158,8 +158,16 @@
class="text-xs font-medium px-2 py-0.5 rounded-full"
style="background:{color}22;color:{color}"
>
{sportIcon(activity.sport)} {sportLabel(activity.sport, activity.sub_sport)}
{sportIcon(activity.sport)} {sportLabel(activity.sport)}
</span>
{#if activity.sub_sport && activity.sub_sport !== 'generic'}
<span
class="text-xs font-medium px-2 py-0.5 rounded-full"
style="background:{color}11;color:{color}cc"
>
{sportLabel(activity.sport, activity.sub_sport).split(' ')[0]}
</span>
{/if}
<span class="text-xs text-zinc-500">
{formatDate(activity.started_at)} · {formatTime(activity.started_at)}
</span>
+15 -1
View File
@@ -75,10 +75,24 @@ export function sportColor(sport: Sport): string {
return SPORT_COLORS[sport] ?? '#a78bfa';
}
const SUB_SPORT_LABELS: Record<string, string> = {
road: 'Road',
mountain: 'MTB',
gravel: 'Gravel',
indoor: 'Indoor',
trail: 'Trail',
track: 'Track',
nordic: 'Nordic',
alpine: 'Alpine',
open_water: 'Open Water',
pool: 'Pool',
};
export function sportLabel(sport: Sport, subSport?: string | null): string {
const base = sport.charAt(0).toUpperCase() + sport.slice(1);
if (subSport && subSport !== 'generic') {
return `${subSport.charAt(0).toUpperCase() + subSport.slice(1)} ${base}`;
const sub = SUB_SPORT_LABELS[subSport] ?? (subSport.charAt(0).toUpperCase() + subSport.slice(1));
return `${sub} ${base}`;
}
return base;
}
+1 -1
View File
@@ -1,7 +1,7 @@
/** TypeScript types mirroring BAS v1.0 schema. */
export type Sport = "cycling" | "running" | "hiking" | "walking" | "swimming" | "skiing" | "other";
export type SubSport = "road" | "mountain" | "gravel" | "indoor" | "trail" | "track" | "nordic" | null;
export type SubSport = "road" | "mountain" | "gravel" | "indoor" | "trail" | "track" | "nordic" | "alpine" | "open_water" | "pool" | null;
export type Privacy = "public" | "blur_start" | "no_gps" | "private";
/** [duration_s, avg_watts] pairs, sorted by duration ascending. */