refactor: extract import_garmin_gear() + add backfill script

Move gear backfill logic from the route handler into
import_garmin_gear(data_dir, user_dir) in garmin_sync.py so it can be
called both from the API and from the CLI script.

scripts/backfill_garmin_gear.py finds all users with Garmin credentials
and runs the backfill for each, printing a per-user summary.
This commit is contained in:
Davide Scaini
2026-05-24 13:13:47 +02:00
parent 801140ac51
commit 7db7bf91e0
3 changed files with 211 additions and 140 deletions
+142
View File
@@ -244,3 +244,145 @@ def run_garmin_sync(data_dir: Path, user_dir: Path) -> dict:
elif event["type"] == "error": elif event["type"] == "error":
raise RuntimeError(event["message"]) raise RuntimeError(event["message"])
return result return result
def import_garmin_gear(data_dir: Path, user_dir: Path) -> dict:
"""Backfill gear for all existing activities by querying Garmin's gear-activities API.
For each gear item, fetches the list of activities from Garmin and matches them
to local activities by UTC start timestamp (±60 s). Writes a sidecar and calls
merge_one for each match that doesn't already have gear set.
Returns {"gear_added": int, "activities_updated": int}.
"""
import contextlib
import re
import uuid
import yaml
from bincio.extract.garmin_api import GarminError, get_client
from bincio.render.merge import merge_one
from bincio.serve.routers.gear import _load as _gear_load
from bincio.serve.routers.gear import _save as _gear_save
client = get_client(data_dir, user_dir)
# Fetch gear list from Garmin
prof = client.connectapi("/userprofile-service/socialProfile")
profile_id = prof.get("profileId") if isinstance(prof, dict) else None
if not profile_id:
raise GarminError("Could not read Garmin profile ID")
garmin_gear = client.get_gear(profile_id)
if not isinstance(garmin_gear, list) or not garmin_gear:
return {"gear_added": 0, "activities_updated": 0}
# Build / update local gear registry
registry = _gear_load(user_dir)
known = {g.get("garmin_id") for g in registry if g.get("garmin_id")}
uuid_to_name: dict[str, str] = {}
gear_added = 0
for g in garmin_gear:
guuid = g.get("uuid") or ""
name = (g.get("customMakeModel") or g.get("displayName") or
f"{g.get('gearMakeName', '')} {g.get('gearModelName', '')}".strip())
if not name or not guuid:
continue
uuid_to_name[guuid] = name
if guuid not in known:
gear_type = g.get("gearTypeName", "").lower()
if gear_type not in ("bike", "shoes", "skis"):
gear_type = "other"
retired = g.get("gearStatusName") == "retired"
registry.append({"id": str(uuid.uuid4()), "name": name,
"type": gear_type, "retired": retired, "garmin_id": guuid})
known.add(guuid)
gear_added += 1
else:
for item in registry:
if item.get("garmin_id") == guuid:
item["name"] = name
_gear_save(user_dir, registry)
# Build timestamp → activity_id map from index shards
ts_to_id: dict[int, str] = {}
merged_dir = user_dir / "_merged"
shard_dir = merged_dir if merged_dir.exists() else user_dir
for shard_path in sorted(shard_dir.glob("index*.json")):
try:
idx = json.loads(shard_path.read_text(encoding="utf-8"))
for a in idx.get("activities", []):
started = a.get("started_at") or ""
if started and a.get("id"):
dt = datetime.fromisoformat(started.replace("Z", "+00:00"))
ts_to_id[int(dt.astimezone(UTC).timestamp())] = a["id"]
except (OSError, json.JSONDecodeError, KeyError):
continue
edits_dir = user_dir / "edits"
edits_dir.mkdir(exist_ok=True)
activities_updated = 0
for guuid, gear_name in uuid_to_name.items():
try:
gear_acts = client.get_gear_activities(guuid, limit=10000)
except Exception:
continue
if not isinstance(gear_acts, list):
continue
for ga in gear_acts:
gmt = ga.get("startTimeGMT") or ""
if not gmt:
continue
try:
dt = datetime.strptime(gmt, "%Y-%m-%d %H:%M:%S").replace(tzinfo=UTC)
ts = int(dt.timestamp())
except ValueError:
continue
act_id = None
for delta in range(0, 61):
act_id = ts_to_id.get(ts + delta) or ts_to_id.get(ts - delta)
if act_id:
break
if not act_id:
continue
# Skip if activity already has gear set
act_json = user_dir / "activities" / f"{act_id}.json"
if act_json.exists():
try:
if json.loads(act_json.read_text(encoding="utf-8")).get("gear"):
continue
except (OSError, json.JSONDecodeError):
pass
sidecar = edits_dir / f"{act_id}.md"
fm, body = {}, ""
if sidecar.exists():
try:
text = sidecar.read_text(encoding="utf-8")
parts = re.split(r"^---[ \t]*$", text, maxsplit=2, flags=re.MULTILINE)
if len(parts) >= 3:
fm = yaml.safe_load(parts[1]) or {}
body = parts[2].strip()
except Exception:
pass
if fm.get("gear"):
continue
fm["gear"] = gear_name
fm_text = yaml.safe_dump(fm, default_flow_style=False, allow_unicode=True).strip()
content = f"---\n{fm_text}\n---\n"
if body:
content += f"\n{body}\n"
sidecar.write_text(content, encoding="utf-8")
with contextlib.suppress(Exception):
merge_one(user_dir, act_id)
activities_updated += 1
return {"gear_added": gear_added, "activities_updated": activities_updated}
+6 -140
View File
@@ -2,7 +2,6 @@
from __future__ import annotations from __future__ import annotations
import json import json
from datetime import UTC
from fastapi import APIRouter, Cookie, HTTPException, Request from fastapi import APIRouter, Cookie, HTTPException, Request
from fastapi.responses import JSONResponse, StreamingResponse from fastapi.responses import JSONResponse, StreamingResponse
@@ -122,16 +121,8 @@ async def garmin_sync_stream(bincio_session: str | None = Cookie(default=None))
@router.post("/api/garmin/import-gear") @router.post("/api/garmin/import-gear")
async def garmin_import_gear(bincio_session: str | None = Cookie(default=None)) -> JSONResponse: async def garmin_import_gear(bincio_session: str | None = Cookie(default=None)) -> JSONResponse:
"""One-time backfill: fetch gear registry from Garmin and match to existing activities by timestamp.""" """One-time backfill: fetch gear registry from Garmin and match to existing activities by timestamp."""
import contextlib from bincio.extract.garmin_api import GarminError, has_credentials
import re from bincio.extract.garmin_sync import import_garmin_gear
import uuid
import yaml
from bincio.extract.garmin_api import GarminError, get_client, has_credentials
from bincio.render.merge import merge_one
from bincio.serve.routers.gear import _load as _gear_load
from bincio.serve.routers.gear import _save as _gear_save
user = deps._require_user(bincio_session) user = deps._require_user(bincio_session)
data_dir = deps._get_data_dir() data_dir = deps._get_data_dir()
@@ -141,134 +132,9 @@ async def garmin_import_gear(bincio_session: str | None = Cookie(default=None))
raise HTTPException(400, "No Garmin credentials stored — connect first") raise HTTPException(400, "No Garmin credentials stored — connect first")
try: try:
client = get_client(data_dir, user_dir) result = import_garmin_gear(data_dir, user_dir)
except GarminError as e: except GarminError as exc:
raise HTTPException(502, str(e)) raise HTTPException(502, _garmin_user_message(exc))
# Fetch gear list
try:
prof = client.connectapi("/userprofile-service/socialProfile")
profile_id = prof.get("profileId") if isinstance(prof, dict) else None
if not profile_id:
raise HTTPException(502, "Could not read Garmin profile ID")
garmin_gear = client.get_gear(profile_id)
except GarminError as e:
raise HTTPException(502, str(e))
if not isinstance(garmin_gear, list) or not garmin_gear:
return JSONResponse({"ok": True, "gear_added": 0, "activities_updated": 0})
# Build / update gear registry
registry = _gear_load(user_dir)
known = {g.get("garmin_id") for g in registry if g.get("garmin_id")}
uuid_to_name: dict[str, str] = {}
gear_added = 0
for g in garmin_gear:
guuid = g.get("uuid") or ""
name = (g.get("customMakeModel") or g.get("displayName") or
f"{g.get('gearMakeName','')} {g.get('gearModelName','')}".strip())
if not name or not guuid:
continue
uuid_to_name[guuid] = name
if guuid not in known:
gear_type = g.get("gearTypeName", "").lower()
if gear_type not in ("bike", "shoes", "skis"):
gear_type = "other"
retired = g.get("gearStatusName") == "retired"
registry.append({"id": str(uuid.uuid4()), "name": name,
"type": gear_type, "retired": retired, "garmin_id": guuid})
known.add(guuid)
gear_added += 1
else:
for item in registry:
if item.get("garmin_id") == guuid:
item["name"] = name
_gear_save(user_dir, registry)
# Build timestamp → activity_id map from the user's index shards
from datetime import datetime
ts_to_id: dict[int, str] = {}
merged_dir = user_dir / "_merged"
shard_dirs = [merged_dir] if merged_dir.exists() else [user_dir]
for shard_dir in shard_dirs:
for shard_path in sorted(shard_dir.glob("index*.json")):
try:
idx = json.loads(shard_path.read_text(encoding="utf-8"))
for a in idx.get("activities", []):
started = a.get("started_at") or ""
if started and a.get("id"):
dt = datetime.fromisoformat(started.replace("Z", "+00:00"))
ts_to_id[int(dt.astimezone(UTC).timestamp())] = a["id"]
except (OSError, json.JSONDecodeError, KeyError):
continue
# For each gear, fetch its activities and match by timestamp
edits_dir = user_dir / "edits"
edits_dir.mkdir(exist_ok=True)
activities_updated = 0
for guuid, gear_name in uuid_to_name.items():
try:
gear_acts = client.get_gear_activities(guuid, limit=10000)
except Exception:
continue
if not isinstance(gear_acts, list):
continue
for ga in gear_acts:
gmt = ga.get("startTimeGMT") or ""
if not gmt:
continue
try:
from datetime import datetime
dt = datetime.strptime(gmt, "%Y-%m-%d %H:%M:%S").replace(tzinfo=UTC)
ts = int(dt.timestamp())
except ValueError:
continue
# Match within ±60 s
act_id = None
for delta in range(0, 61):
act_id = ts_to_id.get(ts + delta) or ts_to_id.get(ts - delta)
if act_id:
break
if not act_id:
continue
# Skip if activity already has gear set
act_json = user_dir / "activities" / f"{act_id}.json"
if act_json.exists():
try:
if json.loads(act_json.read_text(encoding="utf-8")).get("gear"):
continue
except (OSError, json.JSONDecodeError):
pass
sidecar = edits_dir / f"{act_id}.md"
fm, body = {}, ""
if sidecar.exists():
try:
text = sidecar.read_text(encoding="utf-8")
parts = re.split(r"^---[ \t]*$", text, maxsplit=2, flags=re.MULTILINE)
if len(parts) >= 3:
fm = yaml.safe_load(parts[1]) or {}
body = parts[2].strip()
except Exception:
pass
if fm.get("gear"):
continue
fm["gear"] = gear_name
fm_text = yaml.safe_dump(fm, default_flow_style=False, allow_unicode=True).strip()
content = f"---\n{fm_text}\n---\n"
if body:
content += f"\n{body}\n"
sidecar.write_text(content, encoding="utf-8")
with contextlib.suppress(Exception):
merge_one(user_dir, act_id)
activities_updated += 1
tasks._trigger_rebuild(user.handle) tasks._trigger_rebuild(user.handle)
return JSONResponse({"ok": True, "gear_added": gear_added, "activities_updated": activities_updated}) return JSONResponse({"ok": True, **result})
+63
View File
@@ -0,0 +1,63 @@
#!/usr/bin/env python3
"""Backfill Garmin gear for all users who have stored Garmin credentials.
Usage (on VPS):
cd /opt/bincio
uv run python3 scripts/backfill_garmin_gear.py --data-dir /var/bincio/data
# Limit to specific users:
uv run python3 scripts/backfill_garmin_gear.py --data-dir /var/bincio/data --users plagzo12
"""
from __future__ import annotations
import argparse
import sys
from pathlib import Path
def main() -> None:
parser = argparse.ArgumentParser(description="Backfill Garmin gear for all users")
parser.add_argument("--data-dir", required=True, type=Path, help="Root data directory")
parser.add_argument("--users", nargs="*", help="Limit to these user handles (default: all)")
args = parser.parse_args()
data_dir: Path = args.data_dir.resolve()
if not data_dir.is_dir():
sys.exit(f"data-dir not found: {data_dir}")
from bincio.extract.garmin_api import GarminError, has_credentials
from bincio.extract.garmin_sync import import_garmin_gear
candidates = (
[data_dir / h for h in args.users]
if args.users
else sorted(p for p in data_dir.iterdir() if p.is_dir())
)
garmin_users = [p for p in candidates if has_credentials(p)]
if not garmin_users:
print("No users with Garmin credentials found.")
return
print(f"Found {len(garmin_users)} Garmin user(s): {[p.name for p in garmin_users]}\n")
for user_dir in garmin_users:
handle = user_dir.name
print(f"[{handle}] importing gear...", flush=True)
try:
result = import_garmin_gear(data_dir, user_dir)
print(
f"[{handle}] done — "
f"gear_added={result['gear_added']}, "
f"activities_updated={result['activities_updated']}"
)
except GarminError as exc:
print(f"[{handle}] Garmin error: {exc}")
except Exception as exc:
print(f"[{handle}] unexpected error: {type(exc).__name__}: {exc}")
print("\nAll done. Run merge_all or trigger a rebuild to refresh the index shards.")
if __name__ == "__main__":
main()