trying to get sub label showed properly
This commit is contained in:
@@ -25,11 +25,24 @@ uv run bincio extract --since 2025-01-01 # only files newer than date
|
||||
uv run bincio extract --file ride.gpx # single file → JSON on stdout
|
||||
uv run bincio extract --input ~/rides \
|
||||
--output ~/bincio_data # override config paths
|
||||
uv run bincio extract --dev 50 # dev mode: 50 files → /tmp/bincio_dev/
|
||||
```
|
||||
|
||||
Re-extraction is safe — unchanged files are skipped (hash-based dedup).
|
||||
To force a full re-extract: `rm -rf ~/bincio_data && uv run bincio extract`
|
||||
|
||||
### Dev mode
|
||||
|
||||
`--dev N` samples N files evenly across the full file list (spread by date and format)
|
||||
and writes to `/tmp/bincio_dev/` so your real data is never touched. Use it for fast
|
||||
iteration on UI or pipeline changes:
|
||||
|
||||
```bash
|
||||
uv run bincio extract --dev 50
|
||||
uv run bincio import strava --dev 50 # N most recent Strava activities
|
||||
uv run bincio render --serve --data-dir /tmp/bincio_dev
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Import from Strava
|
||||
@@ -54,6 +67,7 @@ uv run bincio import strava
|
||||
uv run bincio import strava --since 2025-01-01 # explicit date cutoff
|
||||
uv run bincio import strava --reauth # force new OAuth flow
|
||||
uv run bincio import strava --output ~/other_dir # override output dir
|
||||
uv run bincio import strava --dev 50 # dev mode: 50 most recent → /tmp/bincio_dev/
|
||||
```
|
||||
|
||||
Credentials resolution order:
|
||||
|
||||
@@ -85,19 +85,22 @@ site/ Astro project
|
||||
## How to run
|
||||
|
||||
```bash
|
||||
# Extract from local files
|
||||
# Fast dev loop (50-file sample → /tmp/bincio_dev/, no real data touched)
|
||||
uv run bincio extract --dev 50
|
||||
uv run bincio import strava --dev 50 # 50 most recent Strava activities
|
||||
uv run bincio render --serve --data-dir /tmp/bincio_dev
|
||||
|
||||
# Full extract from local files
|
||||
cd ~/src/bincio_activity
|
||||
uv run bincio extract --input ~/src/cycling_data_davide/activities --output /tmp/bincio_test
|
||||
uv run bincio extract # uses extract_config.yaml
|
||||
|
||||
# Import from Strava (credentials in extract_config.yaml under import.strava)
|
||||
uv sync --extra strava
|
||||
uv run bincio import strava # first run opens browser for OAuth
|
||||
uv run bincio import strava # subsequent runs are incremental
|
||||
|
||||
# Site dev server
|
||||
cd site
|
||||
ln -sf /tmp/bincio_test public/data # symlink data
|
||||
npm run dev
|
||||
# Site dev server (render handles symlink + merge automatically)
|
||||
uv run bincio render --serve
|
||||
|
||||
# Edit server (enables drawer + file upload in the site)
|
||||
uv run bincio edit --data-dir ~/bincio_data # port 4041
|
||||
|
||||
+25
-1
@@ -113,6 +113,8 @@ def _process_file(path: Path) -> dict:
|
||||
help="Only process files modified after this date.")
|
||||
@click.option("--workers", default=None, type=int,
|
||||
help="Parallel worker processes (default: CPU count).")
|
||||
@click.option("--dev", "dev_sample", default=None, type=int, metavar="N",
|
||||
help="Dev mode: sample N files evenly across the full list, output to /tmp/bincio_dev/.")
|
||||
def extract(
|
||||
config_path: Optional[str],
|
||||
input_dir: Optional[str],
|
||||
@@ -120,6 +122,7 @@ def extract(
|
||||
single_file: Optional[str],
|
||||
since: Optional[str],
|
||||
workers: Optional[int],
|
||||
dev_sample: Optional[int],
|
||||
) -> None:
|
||||
"""Parse GPX/FIT/TCX files and write BAS JSON data store."""
|
||||
|
||||
@@ -128,13 +131,25 @@ def extract(
|
||||
return
|
||||
|
||||
cfg = _resolve_config(config_path, input_dir, output_dir)
|
||||
|
||||
if dev_sample is not None:
|
||||
cfg.output_dir = Path("/tmp/bincio_dev")
|
||||
cfg.incremental = False
|
||||
console.print(f"[yellow]Dev mode:[/yellow] sampling {dev_sample} files → [cyan]{cfg.output_dir}[/cyan]")
|
||||
|
||||
cfg.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
files = _collect_files(cfg, since)
|
||||
if not files:
|
||||
console.print("[yellow]No supported files found.[/yellow]")
|
||||
return
|
||||
console.print(f"Found [bold]{len(files)}[/bold] activity files.")
|
||||
|
||||
if dev_sample is not None:
|
||||
total = len(files)
|
||||
files = _sample_diverse(files, dev_sample)
|
||||
console.print(f"Sampled [bold]{len(files)}[/bold] files from {total} total.")
|
||||
else:
|
||||
console.print(f"Found [bold]{len(files)}[/bold] activity files.")
|
||||
|
||||
# Build strava lookup once (serialised dict, sent to workers via initializer)
|
||||
strava_lookup: dict = {}
|
||||
@@ -314,6 +329,15 @@ def _load_existing_summaries(output_dir: Path) -> list[dict]:
|
||||
return []
|
||||
|
||||
|
||||
def _sample_diverse(files: list[Path], n: int) -> list[Path]:
|
||||
"""Return n files sampled evenly across the sorted list for date/format diversity."""
|
||||
if len(files) <= n:
|
||||
return files
|
||||
files = sorted(files)
|
||||
step = len(files) / n
|
||||
return [files[int(i * step)] for i in range(n)]
|
||||
|
||||
|
||||
def _patch_duplicate_of(output_dir: Path, activity_id: str, canonical_id: str) -> None:
|
||||
p = output_dir / "activities" / f"{activity_id}.json"
|
||||
if not p.exists():
|
||||
|
||||
@@ -8,7 +8,7 @@ import gpxpy.gpx
|
||||
|
||||
from bincio.extract.models import DataPoint, ParsedActivity
|
||||
from bincio.extract.parsers.base import BaseParser
|
||||
from bincio.extract.sport import normalise_sport
|
||||
from bincio.extract.sport import normalise_sport, normalise_sub_sport
|
||||
|
||||
# Known GPX extension namespaces
|
||||
_NS_GARMIN = "http://www.garmin.com/xmlschemas/TrackPointExtension/v1"
|
||||
@@ -41,14 +41,15 @@ class GpxParser(BaseParser):
|
||||
if not points:
|
||||
raise ValueError(f"No trackpoints found in {path.name}")
|
||||
|
||||
sport = normalise_sport(
|
||||
(gpx.tracks[0].type if gpx.tracks else None) or "cycling"
|
||||
)
|
||||
raw_sport = (gpx.tracks[0].type if gpx.tracks else None) or "cycling"
|
||||
sport = normalise_sport(raw_sport)
|
||||
sub_sport = normalise_sub_sport(raw_sport)
|
||||
started_at = points[0].timestamp
|
||||
|
||||
return ParsedActivity(
|
||||
points=points,
|
||||
sport=sport,
|
||||
sub_sport=sub_sport,
|
||||
started_at=started_at,
|
||||
source_file=path.name,
|
||||
source_hash="", # set by factory
|
||||
|
||||
@@ -6,7 +6,7 @@ from pathlib import Path
|
||||
from lxml import etree
|
||||
|
||||
from bincio.extract.models import DataPoint, ParsedActivity
|
||||
from bincio.extract.sport import normalise_sport
|
||||
from bincio.extract.sport import normalise_sport, normalise_sub_sport
|
||||
|
||||
_NS_HTTP = {
|
||||
"tcx": "http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2",
|
||||
@@ -33,7 +33,8 @@ class TcxParser:
|
||||
# Use the first activity
|
||||
act = activities[0]
|
||||
sport_attr = act.get("Sport", "Biking")
|
||||
sport = normalise_sport(sport_attr)
|
||||
sport = normalise_sport(sport_attr)
|
||||
sub_sport = normalise_sub_sport(sport_attr)
|
||||
|
||||
points: list[DataPoint] = []
|
||||
for tp in act.findall(".//tcx:Trackpoint", _NS):
|
||||
@@ -78,6 +79,7 @@ class TcxParser:
|
||||
return ParsedActivity(
|
||||
points=points,
|
||||
sport=sport,
|
||||
sub_sport=sub_sport,
|
||||
started_at=points[0].timestamp,
|
||||
source_file=path.name,
|
||||
source_hash="",
|
||||
|
||||
+57
-4
@@ -47,10 +47,13 @@ _MAPPING: dict[str, str] = {
|
||||
"skiing": "skiing",
|
||||
"cross_country_skiing": "skiing",
|
||||
"nordic_skiing": "skiing",
|
||||
"nordic_ski": "skiing",
|
||||
"downhill_skiing": "skiing",
|
||||
"alpine_skiing": "skiing",
|
||||
"alpine_ski": "skiing",
|
||||
"skate_skiing": "skiing",
|
||||
"backcountry_skiing": "skiing",
|
||||
"backcountry_ski": "skiing",
|
||||
# swimming
|
||||
"swimming": "swimming",
|
||||
"swim": "swimming",
|
||||
@@ -58,13 +61,63 @@ _MAPPING: dict[str, str] = {
|
||||
"lap_swimming": "swimming",
|
||||
}
|
||||
|
||||
_SUB_SPORT_MAPPING: dict[str, str] = {
|
||||
# cycling
|
||||
"road_biking": "road",
|
||||
"road_cycling": "road",
|
||||
"mountain_biking": "mountain",
|
||||
"mountain_bike_ride": "mountain",
|
||||
"gravel_cycling": "gravel",
|
||||
"gravel_ride": "gravel",
|
||||
"cyclocross": "gravel",
|
||||
"indoor_cycling": "indoor",
|
||||
"indoor_ride": "indoor",
|
||||
"virtual_ride": "indoor",
|
||||
# running
|
||||
"trail_running": "trail",
|
||||
"trail_run": "trail",
|
||||
"treadmill_running": "indoor",
|
||||
"treadmill": "indoor",
|
||||
"indoor_run": "indoor",
|
||||
"virtual_run": "indoor",
|
||||
"track_run": "track",
|
||||
# skiing
|
||||
"cross_country_skiing": "nordic",
|
||||
"nordic_skiing": "nordic",
|
||||
"nordic_ski": "nordic",
|
||||
"skate_skiing": "nordic",
|
||||
"backcountry_skiing": "nordic",
|
||||
"backcountry_ski": "nordic",
|
||||
"downhill_skiing": "alpine",
|
||||
"alpine_skiing": "alpine",
|
||||
"alpine_ski": "alpine",
|
||||
# swimming
|
||||
"open_water_swimming": "open_water",
|
||||
"lap_swimming": "pool",
|
||||
}
|
||||
|
||||
BAS_SPORTS = {"cycling", "running", "hiking", "walking", "swimming", "skiing", "other"}
|
||||
|
||||
|
||||
def _normalise_key(raw: object) -> str:
|
||||
key = str(raw).strip()
|
||||
# CamelCase → snake_case ("MountainBikeRide" → "mountain_bike_ride")
|
||||
key = re.sub(r"([A-Z])", r"_\1", key).lower().lstrip("_")
|
||||
key = key.replace(" ", "_").replace("-", "_")
|
||||
return re.sub(r"^\d+", "", key)
|
||||
|
||||
|
||||
def normalise_sport(raw: object) -> str:
|
||||
if raw is None:
|
||||
return "other"
|
||||
key = str(raw).lower().strip().replace(" ", "_").replace("-", "_")
|
||||
# Strip leading date-like prefixes e.g. "20231117outdoor_run" → "outdoor_run"
|
||||
key = re.sub(r"^\d+", "", key)
|
||||
return _MAPPING.get(key, "other")
|
||||
return _MAPPING.get(_normalise_key(raw), "other")
|
||||
|
||||
|
||||
def normalise_sub_sport(raw: object) -> str | None:
|
||||
"""Infer sub_sport from a raw sport type string (e.g. 'mountain_bike_ride' → 'mountain').
|
||||
|
||||
Returns None when no sub_sport is implied (e.g. plain 'ride', 'run').
|
||||
"""
|
||||
if raw is None:
|
||||
return None
|
||||
return _SUB_SPORT_MAPPING.get(_normalise_key(raw))
|
||||
|
||||
@@ -29,6 +29,8 @@ def import_group() -> None:
|
||||
help="Only import activities after this date (default: incremental from last sync).")
|
||||
@click.option("--reauth", is_flag=True, default=False,
|
||||
help="Force re-authorization even if valid tokens exist.")
|
||||
@click.option("--dev", "dev_sample", default=None, type=int, metavar="N",
|
||||
help="Dev mode: import only the N most recent activities, output to /tmp/bincio_dev/.")
|
||||
def strava_cmd(
|
||||
client_id: Optional[str],
|
||||
client_secret: Optional[str],
|
||||
@@ -36,6 +38,7 @@ def strava_cmd(
|
||||
config_path: Optional[str],
|
||||
since: Optional[str],
|
||||
reauth: bool,
|
||||
dev_sample: Optional[int],
|
||||
) -> None:
|
||||
"""Import activities from Strava.
|
||||
|
||||
@@ -90,7 +93,11 @@ def strava_cmd(
|
||||
"Add them to extract_config.yaml under import.strava, or pass --client-id/--client-secret."
|
||||
)
|
||||
|
||||
out = _resolve_output(output_dir, cfg)
|
||||
if dev_sample is not None:
|
||||
out = Path("/tmp/bincio_dev")
|
||||
console.print(f"[yellow]Dev mode:[/yellow] importing {dev_sample} activities → [cyan]{out}[/cyan]")
|
||||
else:
|
||||
out = _resolve_output(output_dir, cfg)
|
||||
console.print(f"Output dir: [cyan]{out}[/cyan]")
|
||||
|
||||
if reauth and TOKENS_FILE.exists():
|
||||
@@ -108,7 +115,7 @@ def strava_cmd(
|
||||
except ValueError:
|
||||
raise click.BadParameter(f"Expected YYYY-MM-DD, got {since!r}", param_hint="--since")
|
||||
|
||||
strava_sync(client, out, since_dt, console)
|
||||
strava_sync(client, out, since_dt, console, limit=dev_sample)
|
||||
|
||||
|
||||
def _load_config(config_path: Optional[str]):
|
||||
|
||||
@@ -26,7 +26,7 @@ from rich.console import Console
|
||||
from rich.progress import BarColumn, MofNCompleteColumn, Progress, TextColumn, TimeElapsedColumn
|
||||
|
||||
from bincio.extract.models import DataPoint, ParsedActivity
|
||||
from bincio.extract.sport import normalise_sport
|
||||
from bincio.extract.sport import normalise_sport, normalise_sub_sport
|
||||
|
||||
STRAVA_AUTH_URL = "https://www.strava.com/oauth/authorize"
|
||||
STRAVA_TOKEN_URL = "https://www.strava.com/oauth/token"
|
||||
@@ -214,7 +214,9 @@ def _strava_to_parsed(act: dict, streams: dict[str, list]) -> ParsedActivity:
|
||||
"""Build a ParsedActivity from a Strava activity dict + its streams."""
|
||||
started_at = datetime.fromisoformat(act["start_date"].replace("Z", "+00:00"))
|
||||
|
||||
sport = normalise_sport(act.get("sport_type") or act.get("type") or "")
|
||||
raw_sport = act.get("sport_type") or act.get("type") or ""
|
||||
sport = normalise_sport(raw_sport)
|
||||
sub_sport = normalise_sub_sport(raw_sport)
|
||||
|
||||
times = streams.get("time", []) # seconds since start
|
||||
latlngs = streams.get("latlng", []) # [[lat, lon], ...]
|
||||
@@ -244,6 +246,7 @@ def _strava_to_parsed(act: dict, streams: dict[str, list]) -> ParsedActivity:
|
||||
return ParsedActivity(
|
||||
points = points,
|
||||
sport = sport,
|
||||
sub_sport = sub_sport,
|
||||
started_at = started_at,
|
||||
source_file = f"strava_{strava_id}",
|
||||
source_hash = source_hash,
|
||||
@@ -287,6 +290,7 @@ def sync(
|
||||
output_dir: Path,
|
||||
since: datetime | None,
|
||||
console: Console,
|
||||
limit: int | None = None,
|
||||
) -> None:
|
||||
"""Fetch new Strava activities and write BAS JSON files.
|
||||
|
||||
@@ -323,6 +327,9 @@ def sync(
|
||||
f"Found [bold]{len(new_acts)}[/bold] new activities "
|
||||
f"([bold]{len(all_acts) - len(new_acts)}[/bold] already imported)."
|
||||
)
|
||||
if limit is not None and len(new_acts) > limit:
|
||||
new_acts = new_acts[:limit]
|
||||
console.print(f"[yellow]Dev mode:[/yellow] capped to {limit} activities.")
|
||||
if not new_acts:
|
||||
console.print("[green]All up to date.[/green]")
|
||||
return
|
||||
|
||||
@@ -158,8 +158,16 @@
|
||||
class="text-xs font-medium px-2 py-0.5 rounded-full"
|
||||
style="background:{color}22;color:{color}"
|
||||
>
|
||||
{sportIcon(activity.sport)} {sportLabel(activity.sport, activity.sub_sport)}
|
||||
{sportIcon(activity.sport)} {sportLabel(activity.sport)}
|
||||
</span>
|
||||
{#if activity.sub_sport && activity.sub_sport !== 'generic'}
|
||||
<span
|
||||
class="text-xs font-medium px-2 py-0.5 rounded-full"
|
||||
style="background:{color}11;color:{color}cc"
|
||||
>
|
||||
{sportLabel(activity.sport, activity.sub_sport).split(' ')[0]}
|
||||
</span>
|
||||
{/if}
|
||||
<span class="text-xs text-zinc-500">
|
||||
{formatDate(activity.started_at)} · {formatTime(activity.started_at)}
|
||||
</span>
|
||||
|
||||
+15
-1
@@ -75,10 +75,24 @@ export function sportColor(sport: Sport): string {
|
||||
return SPORT_COLORS[sport] ?? '#a78bfa';
|
||||
}
|
||||
|
||||
const SUB_SPORT_LABELS: Record<string, string> = {
|
||||
road: 'Road',
|
||||
mountain: 'MTB',
|
||||
gravel: 'Gravel',
|
||||
indoor: 'Indoor',
|
||||
trail: 'Trail',
|
||||
track: 'Track',
|
||||
nordic: 'Nordic',
|
||||
alpine: 'Alpine',
|
||||
open_water: 'Open Water',
|
||||
pool: 'Pool',
|
||||
};
|
||||
|
||||
export function sportLabel(sport: Sport, subSport?: string | null): string {
|
||||
const base = sport.charAt(0).toUpperCase() + sport.slice(1);
|
||||
if (subSport && subSport !== 'generic') {
|
||||
return `${subSport.charAt(0).toUpperCase() + subSport.slice(1)} ${base}`;
|
||||
const sub = SUB_SPORT_LABELS[subSport] ?? (subSport.charAt(0).toUpperCase() + subSport.slice(1));
|
||||
return `${sub} ${base}`;
|
||||
}
|
||||
return base;
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/** TypeScript types mirroring BAS v1.0 schema. */
|
||||
|
||||
export type Sport = "cycling" | "running" | "hiking" | "walking" | "swimming" | "skiing" | "other";
|
||||
export type SubSport = "road" | "mountain" | "gravel" | "indoor" | "trail" | "track" | "nordic" | null;
|
||||
export type SubSport = "road" | "mountain" | "gravel" | "indoor" | "trail" | "track" | "nordic" | "alpine" | "open_water" | "pool" | null;
|
||||
export type Privacy = "public" | "blur_start" | "no_gps" | "private";
|
||||
|
||||
/** [duration_s, avg_watts] pairs, sorted by duration ascending. */
|
||||
|
||||
Reference in New Issue
Block a user