fix activities' types

This commit is contained in:
Davide Scaini
2026-03-29 10:37:08 +02:00
parent 3441079913
commit 643d092acd
5 changed files with 60 additions and 19 deletions
+5 -1
View File
@@ -60,7 +60,7 @@ def _process_file(path: Path) -> dict:
if activity.source_hash in _known_hashes:
return {"status": "duplicate"}
# Enrich from Strava CSV
# Enrich from Strava CSV (CSV is authoritative for sport on Strava activities)
row = _strava_lookup.get(activity.source_file)
if row:
if not activity.title:
@@ -69,6 +69,10 @@ def _process_file(path: Path) -> dict:
activity.description = row.get("Activity Description", "").strip() or None
if not activity.strava_id:
activity.strava_id = row.get("Activity ID", "").strip() or None
csv_type = row.get("Activity Type", "").strip()
if csv_type:
from bincio.extract.sport import normalise_sport
activity.sport = normalise_sport(csv_type)
try:
metrics = compute(activity)
+12 -2
View File
@@ -16,7 +16,7 @@ class FitParser:
points: list[DataPoint] = []
laps: list[LapData] = []
sport: str = "cycling"
sport: str = "other"
sub_sport: str | None = None
device: str | None = None
@@ -26,9 +26,19 @@ class FitParser:
continue
if frame.name == "sport":
sport = normalise_sport(_get(frame, "sport", "cycling"))
sport = normalise_sport(_get(frame, "sport"))
sub_sport = _normalise_sub_sport(_get(frame, "sub_sport"))
elif frame.name == "session":
# Karoo and Strava-generated FIT files store sport here
# instead of (or in addition to) a separate 'sport' message.
# Only use session sport if no 'sport' frame was seen yet.
if sport == "other":
raw_sport = _get(frame, "sport")
if raw_sport is not None:
sport = normalise_sport(raw_sport)
sub_sport = _normalise_sub_sport(_get(frame, "sub_sport"))
elif frame.name == "device_info":
mfr = _get(frame, "manufacturer")
prod = _get(frame, "product_name") or _get(frame, "garmin_product")
+25 -3
View File
@@ -1,33 +1,53 @@
"""Sport name normalisation."""
import re
_MAPPING: dict[str, str] = {
# cycling variants
# cycling variants (FIT enums, GPX types, Strava API/CSV types)
"cycling": "cycling",
"biking": "cycling",
"bike": "cycling",
"ride": "cycling",
"road_biking": "cycling",
"road_cycling": "cycling",
"mountain_biking": "cycling",
"mountain_bike_ride": "cycling",
"gravel_cycling": "cycling",
"gravel_ride": "cycling",
"cyclocross": "cycling",
"indoor_cycling": "cycling",
"indoor_ride": "cycling",
"virtual_ride": "cycling",
"e-biking": "cycling",
"e_biking": "cycling",
"ebikeride": "cycling",
"e_bike_ride": "cycling",
"ebike_ride": "cycling",
"handcycle": "cycling",
"velomobile": "cycling",
# running
"running": "running",
"run": "running",
"trail_running": "running",
"trail_run": "running",
"treadmill_running": "running",
"treadmill": "running",
"virtual_run": "running",
"outdoor_run": "running",
"indoor_run": "running",
"track_run": "running",
# hiking
"hiking": "hiking",
"hike": "hiking",
"outdoor_hike": "hiking",
# walking
"walking": "walking",
"walk": "walking",
"outdoor_walk": "walking",
# swimming
"swimming": "swimming",
"swim": "swimming",
"open_water_swimming": "swimming",
"lap_swimming": "swimming",
}
BAS_SPORTS = {"cycling", "running", "hiking", "walking", "swimming", "other"}
@@ -36,5 +56,7 @@ BAS_SPORTS = {"cycling", "running", "hiking", "walking", "swimming", "other"}
def normalise_sport(raw: object) -> str:
if raw is None:
return "other"
key = str(raw).lower().strip().replace(" ", "_")
key = str(raw).lower().strip().replace(" ", "_").replace("-", "_")
# Strip leading date-like prefixes e.g. "20231117outdoor_run" → "outdoor_run"
key = re.sub(r"^\d+", "", key)
return _MAPPING.get(key, "other")