Indoor detection: title-based inference in merge layer + fix _merge_all_locked
- Add _INDOOR_TITLE_RE / _infer_indoor_title() to writer.py (matches zwift,
ftp-builder, turbo-trainer, rodillo); replaces the narrower zwift-only regex
that was local to write_athlete_json
- _is_outdoor now delegates to _infer_indoor_title so all four keywords are
excluded from records and MMP aggregation
- apply_sidecar and _apply_sidecar_summary both set sub_sport=indoor when the
title matches and no explicit sub_sport is already present
- _merge_one_locked: detect title-inferred activities as needs_merge and call
apply_sidecar({},{}) so the _merged copy gets sub_sport=indoor written
- _merge_all_locked: read index upfront to populate to_merge with title-inferred
IDs; call apply_sidecar({},{}) for activities in to_merge without sidecars;
apply _apply_sidecar_summary to ALL summary entries (not only sidecar ones)
This commit is contained in:
@@ -10,6 +10,18 @@ from bincio.extract.models import LapData, ParsedActivity
|
|||||||
from bincio.extract.simplify import build_geojson, preview_coords
|
from bincio.extract.simplify import build_geojson, preview_coords
|
||||||
from bincio.extract.timeseries import build_timeseries
|
from bincio.extract.timeseries import build_timeseries
|
||||||
|
|
||||||
|
# Titles that reliably identify indoor/virtual activities regardless of sub_sport metadata.
|
||||||
|
# Strava imports from Zwift and FTP-builder platforms lose sub_sport on export.
|
||||||
|
_INDOOR_TITLE_RE = re.compile(
|
||||||
|
r'\b(zwift|ftp[\s\-]builder|turbo[\s\-]?trainer|rodillo)\b',
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _infer_indoor_title(title: str) -> bool:
|
||||||
|
"""Return True if the title reliably identifies an indoor/virtual activity."""
|
||||||
|
return bool(_INDOOR_TITLE_RE.search(title))
|
||||||
|
|
||||||
|
|
||||||
def make_activity_id(activity: ParsedActivity) -> str:
|
def make_activity_id(activity: ParsedActivity) -> str:
|
||||||
"""Generate a BAS activity ID from started_at + optional title slug.
|
"""Generate a BAS activity ID from started_at + optional title slug.
|
||||||
@@ -278,14 +290,11 @@ def write_athlete_json(summaries: list[dict], output_dir: Path, athlete_config:
|
|||||||
return [[d, w] for d, w in sorted(best.items())]
|
return [[d, w] for d, w in sorted(best.items())]
|
||||||
|
|
||||||
_INDOOR_SUB_SPORTS = {"indoor", "treadmill", "virtual"}
|
_INDOOR_SUB_SPORTS = {"indoor", "treadmill", "virtual"}
|
||||||
_INDOOR_TITLE_RE = re.compile(r'\bzwift\b', re.IGNORECASE)
|
|
||||||
|
|
||||||
def _is_outdoor(s: dict) -> bool:
|
def _is_outdoor(s: dict) -> bool:
|
||||||
if s.get("sub_sport") in _INDOOR_SUB_SPORTS:
|
if s.get("sub_sport") in _INDOOR_SUB_SPORTS:
|
||||||
return False
|
return False
|
||||||
if _INDOOR_TITLE_RE.search(s.get("title") or ""):
|
return not _infer_indoor_title(s.get("title") or "")
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
all_mmps = [s["mmp"] for s in summaries if s.get("mmp") and _is_outdoor(s)]
|
all_mmps = [s["mmp"] for s in summaries if s.get("mmp") and _is_outdoor(s)]
|
||||||
mmps_365 = [s["mmp"] for s in summaries if s.get("mmp") and _is_outdoor(s) and s["started_at"] >= cutoff_365]
|
mmps_365 = [s["mmp"] for s in summaries if s.get("mmp") and _is_outdoor(s) and s["started_at"] >= cutoff_365]
|
||||||
|
|||||||
+36
-8
@@ -71,6 +71,7 @@ def parse_sidecar(path: Path) -> tuple[dict, str]:
|
|||||||
|
|
||||||
def apply_sidecar(detail: dict, fm: dict, body: str) -> dict:
|
def apply_sidecar(detail: dict, fm: dict, body: str) -> dict:
|
||||||
"""Apply sidecar overrides to a detail JSON dict. Returns a modified copy."""
|
"""Apply sidecar overrides to a detail JSON dict. Returns a modified copy."""
|
||||||
|
from bincio.extract.writer import _infer_indoor_title
|
||||||
d = dict(detail)
|
d = dict(detail)
|
||||||
d.setdefault("custom", {})
|
d.setdefault("custom", {})
|
||||||
d["custom"] = dict(d["custom"]) # don't mutate original
|
d["custom"] = dict(d["custom"]) # don't mutate original
|
||||||
@@ -81,6 +82,9 @@ def apply_sidecar(detail: dict, fm: dict, body: str) -> dict:
|
|||||||
d["sport"] = str(fm["sport"])
|
d["sport"] = str(fm["sport"])
|
||||||
if "sub_sport" in fm:
|
if "sub_sport" in fm:
|
||||||
d["sub_sport"] = str(fm["sub_sport"]) if fm["sub_sport"] else None
|
d["sub_sport"] = str(fm["sub_sport"]) if fm["sub_sport"] else None
|
||||||
|
# Infer indoor from title when sub_sport is still absent after sidecar
|
||||||
|
if not d.get("sub_sport") and _infer_indoor_title(d.get("title") or ""):
|
||||||
|
d["sub_sport"] = "indoor"
|
||||||
if "gear" in fm:
|
if "gear" in fm:
|
||||||
d["gear"] = str(fm["gear"]) if fm["gear"] else d.get("gear")
|
d["gear"] = str(fm["gear"]) if fm["gear"] else d.get("gear")
|
||||||
if body:
|
if body:
|
||||||
@@ -99,6 +103,7 @@ def apply_sidecar(detail: dict, fm: dict, body: str) -> dict:
|
|||||||
|
|
||||||
def _apply_sidecar_summary(summary: dict, fm: dict) -> dict:
|
def _apply_sidecar_summary(summary: dict, fm: dict) -> dict:
|
||||||
"""Apply sidecar overrides to an index summary entry."""
|
"""Apply sidecar overrides to an index summary entry."""
|
||||||
|
from bincio.extract.writer import _infer_indoor_title
|
||||||
s = dict(summary)
|
s = dict(summary)
|
||||||
s.setdefault("custom", {})
|
s.setdefault("custom", {})
|
||||||
s["custom"] = dict(s["custom"])
|
s["custom"] = dict(s["custom"])
|
||||||
@@ -113,6 +118,9 @@ def _apply_sidecar_summary(summary: dict, fm: dict) -> dict:
|
|||||||
s["custom"]["highlight"] = bool(fm["highlight"])
|
s["custom"]["highlight"] = bool(fm["highlight"])
|
||||||
if "private" in fm:
|
if "private" in fm:
|
||||||
s["privacy"] = "unlisted" if fm["private"] else summary.get("privacy", "public")
|
s["privacy"] = "unlisted" if fm["private"] else summary.get("privacy", "public")
|
||||||
|
# Infer indoor from title when sub_sport is still absent
|
||||||
|
if not s.get("sub_sport") and _infer_indoor_title(s.get("title") or ""):
|
||||||
|
s["sub_sport"] = "indoor"
|
||||||
|
|
||||||
return s
|
return s
|
||||||
|
|
||||||
@@ -156,6 +164,12 @@ def _merge_one_locked(data_dir: Path, activity_id: str) -> None:
|
|||||||
)
|
)
|
||||||
|
|
||||||
needs_merge = has_sidecar or bool(image_files)
|
needs_merge = has_sidecar or bool(image_files)
|
||||||
|
# Also need a real file (not symlink) when title inference would change sub_sport
|
||||||
|
if not needs_merge and not has_sidecar:
|
||||||
|
from bincio.extract.writer import _infer_indoor_title
|
||||||
|
_peek = json.loads(src.read_text(encoding="utf-8"))
|
||||||
|
if not _peek.get("sub_sport") and _infer_indoor_title(_peek.get("title") or ""):
|
||||||
|
needs_merge = True
|
||||||
|
|
||||||
# Symlink the timeseries file (never merged — always points to the extract output)
|
# Symlink the timeseries file (never merged — always points to the extract output)
|
||||||
ts_src = acts_dir / f"{activity_id}.timeseries.json"
|
ts_src = acts_dir / f"{activity_id}.timeseries.json"
|
||||||
@@ -170,10 +184,13 @@ def _merge_one_locked(data_dir: Path, activity_id: str) -> None:
|
|||||||
dest.unlink()
|
dest.unlink()
|
||||||
|
|
||||||
if needs_merge:
|
if needs_merge:
|
||||||
detail = json.loads(src.read_text(encoding="utf-8"))
|
detail = locals().get("_peek") or json.loads(src.read_text(encoding="utf-8"))
|
||||||
if has_sidecar:
|
if has_sidecar:
|
||||||
fm, body = parse_sidecar(sidecar_path) # type: ignore[arg-type]
|
fm, body = parse_sidecar(sidecar_path) # type: ignore[arg-type]
|
||||||
detail = apply_sidecar(detail, fm, body)
|
detail = apply_sidecar(detail, fm, body)
|
||||||
|
else:
|
||||||
|
# No sidecar — still apply title inference
|
||||||
|
detail = apply_sidecar(detail, {}, "")
|
||||||
if image_files:
|
if image_files:
|
||||||
detail["custom"] = dict(detail.get("custom") or {})
|
detail["custom"] = dict(detail.get("custom") or {})
|
||||||
detail["custom"]["images"] = image_files
|
detail["custom"]["images"] = image_files
|
||||||
@@ -195,9 +212,8 @@ def _merge_one_locked(data_dir: Path, activity_id: str) -> None:
|
|||||||
activities = []
|
activities = []
|
||||||
for s in index.get("activities", []):
|
for s in index.get("activities", []):
|
||||||
aid = s.get("id", "")
|
aid = s.get("id", "")
|
||||||
if aid in all_sidecars:
|
fm, _ = all_sidecars[aid] if aid in all_sidecars else ({}, "")
|
||||||
fm, _ = all_sidecars[aid]
|
s = _apply_sidecar_summary(s, fm)
|
||||||
s = _apply_sidecar_summary(s, fm)
|
|
||||||
activities.append(s)
|
activities.append(s)
|
||||||
|
|
||||||
activities.sort(key=lambda a: a.get("started_at", ""), reverse=True)
|
activities.sort(key=lambda a: a.get("started_at", ""), reverse=True)
|
||||||
@@ -243,6 +259,17 @@ def _merge_all_locked(data_dir: Path) -> int:
|
|||||||
|
|
||||||
to_merge = set(sidecars) | set(image_lists)
|
to_merge = set(sidecars) | set(image_lists)
|
||||||
|
|
||||||
|
# Also include activities whose title implies indoor (no sidecar required)
|
||||||
|
_index_path = data_dir / "index.json"
|
||||||
|
_cached_index: dict | None = None
|
||||||
|
if _index_path.exists():
|
||||||
|
from bincio.extract.writer import _infer_indoor_title
|
||||||
|
_cached_index = json.loads(_index_path.read_text(encoding="utf-8"))
|
||||||
|
for _s in _cached_index.get("activities", []):
|
||||||
|
_aid = _s.get("id", "")
|
||||||
|
if _aid and not _s.get("sub_sport") and _infer_indoor_title(_s.get("title") or ""):
|
||||||
|
to_merge.add(_aid)
|
||||||
|
|
||||||
# Wipe and recreate _merged/activities/
|
# Wipe and recreate _merged/activities/
|
||||||
shutil.rmtree(merged_acts, ignore_errors=True)
|
shutil.rmtree(merged_acts, ignore_errors=True)
|
||||||
merged_acts.mkdir(parents=True, exist_ok=True)
|
merged_acts.mkdir(parents=True, exist_ok=True)
|
||||||
@@ -259,6 +286,8 @@ def _merge_all_locked(data_dir: Path) -> int:
|
|||||||
if activity_id in sidecars:
|
if activity_id in sidecars:
|
||||||
fm, body = sidecars[activity_id]
|
fm, body = sidecars[activity_id]
|
||||||
detail = apply_sidecar(detail, fm, body)
|
detail = apply_sidecar(detail, fm, body)
|
||||||
|
else:
|
||||||
|
detail = apply_sidecar(detail, {}, "")
|
||||||
if activity_id in image_lists:
|
if activity_id in image_lists:
|
||||||
detail["custom"] = dict(detail.get("custom") or {})
|
detail["custom"] = dict(detail.get("custom") or {})
|
||||||
detail["custom"]["images"] = image_lists[activity_id]
|
detail["custom"]["images"] = image_lists[activity_id]
|
||||||
@@ -303,13 +332,12 @@ def _merge_all_locked(data_dir: Path) -> int:
|
|||||||
# Write merged index.json (private filtered, highlight sorted)
|
# Write merged index.json (private filtered, highlight sorted)
|
||||||
index_path = data_dir / "index.json"
|
index_path = data_dir / "index.json"
|
||||||
if index_path.exists():
|
if index_path.exists():
|
||||||
index = json.loads(index_path.read_text(encoding="utf-8"))
|
index = _cached_index or json.loads(index_path.read_text(encoding="utf-8"))
|
||||||
activities = []
|
activities = []
|
||||||
for s in index.get("activities", []):
|
for s in index.get("activities", []):
|
||||||
aid = s.get("id", "")
|
aid = s.get("id", "")
|
||||||
if aid in sidecars:
|
fm, _ = sidecars[aid] if aid in sidecars else ({}, "")
|
||||||
fm, _ = sidecars[aid]
|
s = _apply_sidecar_summary(s, fm)
|
||||||
s = _apply_sidecar_summary(s, fm)
|
|
||||||
activities.append(s)
|
activities.append(s)
|
||||||
|
|
||||||
# "unlisted" (and legacy "private") activities are kept in the index so
|
# "unlisted" (and legacy "private") activities are kept in the index so
|
||||||
|
|||||||
Reference in New Issue
Block a user