diff --git a/bincio/extract/writer.py b/bincio/extract/writer.py index a37a011..3a71092 100644 --- a/bincio/extract/writer.py +++ b/bincio/extract/writer.py @@ -10,6 +10,18 @@ from bincio.extract.models import LapData, ParsedActivity from bincio.extract.simplify import build_geojson, preview_coords from bincio.extract.timeseries import build_timeseries +# Titles that reliably identify indoor/virtual activities regardless of sub_sport metadata. +# Strava imports from Zwift and FTP-builder platforms lose sub_sport on export. +_INDOOR_TITLE_RE = re.compile( + r'\b(zwift|ftp[\s\-]builder|turbo[\s\-]?trainer|rodillo)\b', + re.IGNORECASE, +) + + +def _infer_indoor_title(title: str) -> bool: + """Return True if the title reliably identifies an indoor/virtual activity.""" + return bool(_INDOOR_TITLE_RE.search(title)) + def make_activity_id(activity: ParsedActivity) -> str: """Generate a BAS activity ID from started_at + optional title slug. @@ -278,14 +290,11 @@ def write_athlete_json(summaries: list[dict], output_dir: Path, athlete_config: return [[d, w] for d, w in sorted(best.items())] _INDOOR_SUB_SPORTS = {"indoor", "treadmill", "virtual"} - _INDOOR_TITLE_RE = re.compile(r'\bzwift\b', re.IGNORECASE) def _is_outdoor(s: dict) -> bool: if s.get("sub_sport") in _INDOOR_SUB_SPORTS: return False - if _INDOOR_TITLE_RE.search(s.get("title") or ""): - return False - return True + return not _infer_indoor_title(s.get("title") or "") all_mmps = [s["mmp"] for s in summaries if s.get("mmp") and _is_outdoor(s)] mmps_365 = [s["mmp"] for s in summaries if s.get("mmp") and _is_outdoor(s) and s["started_at"] >= cutoff_365] diff --git a/bincio/render/merge.py b/bincio/render/merge.py index 4bdefda..276de88 100644 --- a/bincio/render/merge.py +++ b/bincio/render/merge.py @@ -71,6 +71,7 @@ def parse_sidecar(path: Path) -> tuple[dict, str]: def apply_sidecar(detail: dict, fm: dict, body: str) -> dict: """Apply sidecar overrides to a detail JSON dict. Returns a modified copy.""" + from bincio.extract.writer import _infer_indoor_title d = dict(detail) d.setdefault("custom", {}) d["custom"] = dict(d["custom"]) # don't mutate original @@ -81,6 +82,9 @@ def apply_sidecar(detail: dict, fm: dict, body: str) -> dict: d["sport"] = str(fm["sport"]) if "sub_sport" in fm: d["sub_sport"] = str(fm["sub_sport"]) if fm["sub_sport"] else None + # Infer indoor from title when sub_sport is still absent after sidecar + if not d.get("sub_sport") and _infer_indoor_title(d.get("title") or ""): + d["sub_sport"] = "indoor" if "gear" in fm: d["gear"] = str(fm["gear"]) if fm["gear"] else d.get("gear") if body: @@ -99,6 +103,7 @@ def apply_sidecar(detail: dict, fm: dict, body: str) -> dict: def _apply_sidecar_summary(summary: dict, fm: dict) -> dict: """Apply sidecar overrides to an index summary entry.""" + from bincio.extract.writer import _infer_indoor_title s = dict(summary) s.setdefault("custom", {}) s["custom"] = dict(s["custom"]) @@ -113,6 +118,9 @@ def _apply_sidecar_summary(summary: dict, fm: dict) -> dict: s["custom"]["highlight"] = bool(fm["highlight"]) if "private" in fm: s["privacy"] = "unlisted" if fm["private"] else summary.get("privacy", "public") + # Infer indoor from title when sub_sport is still absent + if not s.get("sub_sport") and _infer_indoor_title(s.get("title") or ""): + s["sub_sport"] = "indoor" return s @@ -156,6 +164,12 @@ def _merge_one_locked(data_dir: Path, activity_id: str) -> None: ) needs_merge = has_sidecar or bool(image_files) + # Also need a real file (not symlink) when title inference would change sub_sport + if not needs_merge and not has_sidecar: + from bincio.extract.writer import _infer_indoor_title + _peek = json.loads(src.read_text(encoding="utf-8")) + if not _peek.get("sub_sport") and _infer_indoor_title(_peek.get("title") or ""): + needs_merge = True # Symlink the timeseries file (never merged — always points to the extract output) ts_src = acts_dir / f"{activity_id}.timeseries.json" @@ -170,10 +184,13 @@ def _merge_one_locked(data_dir: Path, activity_id: str) -> None: dest.unlink() if needs_merge: - detail = json.loads(src.read_text(encoding="utf-8")) + detail = locals().get("_peek") or json.loads(src.read_text(encoding="utf-8")) if has_sidecar: fm, body = parse_sidecar(sidecar_path) # type: ignore[arg-type] detail = apply_sidecar(detail, fm, body) + else: + # No sidecar — still apply title inference + detail = apply_sidecar(detail, {}, "") if image_files: detail["custom"] = dict(detail.get("custom") or {}) detail["custom"]["images"] = image_files @@ -195,9 +212,8 @@ def _merge_one_locked(data_dir: Path, activity_id: str) -> None: activities = [] for s in index.get("activities", []): aid = s.get("id", "") - if aid in all_sidecars: - fm, _ = all_sidecars[aid] - s = _apply_sidecar_summary(s, fm) + fm, _ = all_sidecars[aid] if aid in all_sidecars else ({}, "") + s = _apply_sidecar_summary(s, fm) activities.append(s) activities.sort(key=lambda a: a.get("started_at", ""), reverse=True) @@ -243,6 +259,17 @@ def _merge_all_locked(data_dir: Path) -> int: to_merge = set(sidecars) | set(image_lists) + # Also include activities whose title implies indoor (no sidecar required) + _index_path = data_dir / "index.json" + _cached_index: dict | None = None + if _index_path.exists(): + from bincio.extract.writer import _infer_indoor_title + _cached_index = json.loads(_index_path.read_text(encoding="utf-8")) + for _s in _cached_index.get("activities", []): + _aid = _s.get("id", "") + if _aid and not _s.get("sub_sport") and _infer_indoor_title(_s.get("title") or ""): + to_merge.add(_aid) + # Wipe and recreate _merged/activities/ shutil.rmtree(merged_acts, ignore_errors=True) merged_acts.mkdir(parents=True, exist_ok=True) @@ -259,6 +286,8 @@ def _merge_all_locked(data_dir: Path) -> int: if activity_id in sidecars: fm, body = sidecars[activity_id] detail = apply_sidecar(detail, fm, body) + else: + detail = apply_sidecar(detail, {}, "") if activity_id in image_lists: detail["custom"] = dict(detail.get("custom") or {}) detail["custom"]["images"] = image_lists[activity_id] @@ -303,13 +332,12 @@ def _merge_all_locked(data_dir: Path) -> int: # Write merged index.json (private filtered, highlight sorted) index_path = data_dir / "index.json" if index_path.exists(): - index = json.loads(index_path.read_text(encoding="utf-8")) + index = _cached_index or json.loads(index_path.read_text(encoding="utf-8")) activities = [] for s in index.get("activities", []): aid = s.get("id", "") - if aid in sidecars: - fm, _ = sidecars[aid] - s = _apply_sidecar_summary(s, fm) + fm, _ = sidecars[aid] if aid in sidecars else ({}, "") + s = _apply_sidecar_summary(s, fm) activities.append(s) # "unlisted" (and legacy "private") activities are kept in the index so