refactor: extract import_garmin_gear() + add backfill script

Move gear backfill logic from the route handler into
import_garmin_gear(data_dir, user_dir) in garmin_sync.py so it can be
called both from the API and from the CLI script.

scripts/backfill_garmin_gear.py finds all users with Garmin credentials
and runs the backfill for each, printing a per-user summary.
This commit is contained in:
Davide Scaini
2026-05-24 13:13:47 +02:00
parent 801140ac51
commit 7db7bf91e0
3 changed files with 211 additions and 140 deletions
+142
View File
@@ -244,3 +244,145 @@ def run_garmin_sync(data_dir: Path, user_dir: Path) -> dict:
elif event["type"] == "error":
raise RuntimeError(event["message"])
return result
def import_garmin_gear(data_dir: Path, user_dir: Path) -> dict:
"""Backfill gear for all existing activities by querying Garmin's gear-activities API.
For each gear item, fetches the list of activities from Garmin and matches them
to local activities by UTC start timestamp (±60 s). Writes a sidecar and calls
merge_one for each match that doesn't already have gear set.
Returns {"gear_added": int, "activities_updated": int}.
"""
import contextlib
import re
import uuid
import yaml
from bincio.extract.garmin_api import GarminError, get_client
from bincio.render.merge import merge_one
from bincio.serve.routers.gear import _load as _gear_load
from bincio.serve.routers.gear import _save as _gear_save
client = get_client(data_dir, user_dir)
# Fetch gear list from Garmin
prof = client.connectapi("/userprofile-service/socialProfile")
profile_id = prof.get("profileId") if isinstance(prof, dict) else None
if not profile_id:
raise GarminError("Could not read Garmin profile ID")
garmin_gear = client.get_gear(profile_id)
if not isinstance(garmin_gear, list) or not garmin_gear:
return {"gear_added": 0, "activities_updated": 0}
# Build / update local gear registry
registry = _gear_load(user_dir)
known = {g.get("garmin_id") for g in registry if g.get("garmin_id")}
uuid_to_name: dict[str, str] = {}
gear_added = 0
for g in garmin_gear:
guuid = g.get("uuid") or ""
name = (g.get("customMakeModel") or g.get("displayName") or
f"{g.get('gearMakeName', '')} {g.get('gearModelName', '')}".strip())
if not name or not guuid:
continue
uuid_to_name[guuid] = name
if guuid not in known:
gear_type = g.get("gearTypeName", "").lower()
if gear_type not in ("bike", "shoes", "skis"):
gear_type = "other"
retired = g.get("gearStatusName") == "retired"
registry.append({"id": str(uuid.uuid4()), "name": name,
"type": gear_type, "retired": retired, "garmin_id": guuid})
known.add(guuid)
gear_added += 1
else:
for item in registry:
if item.get("garmin_id") == guuid:
item["name"] = name
_gear_save(user_dir, registry)
# Build timestamp → activity_id map from index shards
ts_to_id: dict[int, str] = {}
merged_dir = user_dir / "_merged"
shard_dir = merged_dir if merged_dir.exists() else user_dir
for shard_path in sorted(shard_dir.glob("index*.json")):
try:
idx = json.loads(shard_path.read_text(encoding="utf-8"))
for a in idx.get("activities", []):
started = a.get("started_at") or ""
if started and a.get("id"):
dt = datetime.fromisoformat(started.replace("Z", "+00:00"))
ts_to_id[int(dt.astimezone(UTC).timestamp())] = a["id"]
except (OSError, json.JSONDecodeError, KeyError):
continue
edits_dir = user_dir / "edits"
edits_dir.mkdir(exist_ok=True)
activities_updated = 0
for guuid, gear_name in uuid_to_name.items():
try:
gear_acts = client.get_gear_activities(guuid, limit=10000)
except Exception:
continue
if not isinstance(gear_acts, list):
continue
for ga in gear_acts:
gmt = ga.get("startTimeGMT") or ""
if not gmt:
continue
try:
dt = datetime.strptime(gmt, "%Y-%m-%d %H:%M:%S").replace(tzinfo=UTC)
ts = int(dt.timestamp())
except ValueError:
continue
act_id = None
for delta in range(0, 61):
act_id = ts_to_id.get(ts + delta) or ts_to_id.get(ts - delta)
if act_id:
break
if not act_id:
continue
# Skip if activity already has gear set
act_json = user_dir / "activities" / f"{act_id}.json"
if act_json.exists():
try:
if json.loads(act_json.read_text(encoding="utf-8")).get("gear"):
continue
except (OSError, json.JSONDecodeError):
pass
sidecar = edits_dir / f"{act_id}.md"
fm, body = {}, ""
if sidecar.exists():
try:
text = sidecar.read_text(encoding="utf-8")
parts = re.split(r"^---[ \t]*$", text, maxsplit=2, flags=re.MULTILINE)
if len(parts) >= 3:
fm = yaml.safe_load(parts[1]) or {}
body = parts[2].strip()
except Exception:
pass
if fm.get("gear"):
continue
fm["gear"] = gear_name
fm_text = yaml.safe_dump(fm, default_flow_style=False, allow_unicode=True).strip()
content = f"---\n{fm_text}\n---\n"
if body:
content += f"\n{body}\n"
sidecar.write_text(content, encoding="utf-8")
with contextlib.suppress(Exception):
merge_one(user_dir, act_id)
activities_updated += 1
return {"gear_added": gear_added, "activities_updated": activities_updated}
+6 -140
View File
@@ -2,7 +2,6 @@
from __future__ import annotations
import json
from datetime import UTC
from fastapi import APIRouter, Cookie, HTTPException, Request
from fastapi.responses import JSONResponse, StreamingResponse
@@ -122,16 +121,8 @@ async def garmin_sync_stream(bincio_session: str | None = Cookie(default=None))
@router.post("/api/garmin/import-gear")
async def garmin_import_gear(bincio_session: str | None = Cookie(default=None)) -> JSONResponse:
"""One-time backfill: fetch gear registry from Garmin and match to existing activities by timestamp."""
import contextlib
import re
import uuid
import yaml
from bincio.extract.garmin_api import GarminError, get_client, has_credentials
from bincio.render.merge import merge_one
from bincio.serve.routers.gear import _load as _gear_load
from bincio.serve.routers.gear import _save as _gear_save
from bincio.extract.garmin_api import GarminError, has_credentials
from bincio.extract.garmin_sync import import_garmin_gear
user = deps._require_user(bincio_session)
data_dir = deps._get_data_dir()
@@ -141,134 +132,9 @@ async def garmin_import_gear(bincio_session: str | None = Cookie(default=None))
raise HTTPException(400, "No Garmin credentials stored — connect first")
try:
client = get_client(data_dir, user_dir)
except GarminError as e:
raise HTTPException(502, str(e))
# Fetch gear list
try:
prof = client.connectapi("/userprofile-service/socialProfile")
profile_id = prof.get("profileId") if isinstance(prof, dict) else None
if not profile_id:
raise HTTPException(502, "Could not read Garmin profile ID")
garmin_gear = client.get_gear(profile_id)
except GarminError as e:
raise HTTPException(502, str(e))
if not isinstance(garmin_gear, list) or not garmin_gear:
return JSONResponse({"ok": True, "gear_added": 0, "activities_updated": 0})
# Build / update gear registry
registry = _gear_load(user_dir)
known = {g.get("garmin_id") for g in registry if g.get("garmin_id")}
uuid_to_name: dict[str, str] = {}
gear_added = 0
for g in garmin_gear:
guuid = g.get("uuid") or ""
name = (g.get("customMakeModel") or g.get("displayName") or
f"{g.get('gearMakeName','')} {g.get('gearModelName','')}".strip())
if not name or not guuid:
continue
uuid_to_name[guuid] = name
if guuid not in known:
gear_type = g.get("gearTypeName", "").lower()
if gear_type not in ("bike", "shoes", "skis"):
gear_type = "other"
retired = g.get("gearStatusName") == "retired"
registry.append({"id": str(uuid.uuid4()), "name": name,
"type": gear_type, "retired": retired, "garmin_id": guuid})
known.add(guuid)
gear_added += 1
else:
for item in registry:
if item.get("garmin_id") == guuid:
item["name"] = name
_gear_save(user_dir, registry)
# Build timestamp → activity_id map from the user's index shards
from datetime import datetime
ts_to_id: dict[int, str] = {}
merged_dir = user_dir / "_merged"
shard_dirs = [merged_dir] if merged_dir.exists() else [user_dir]
for shard_dir in shard_dirs:
for shard_path in sorted(shard_dir.glob("index*.json")):
try:
idx = json.loads(shard_path.read_text(encoding="utf-8"))
for a in idx.get("activities", []):
started = a.get("started_at") or ""
if started and a.get("id"):
dt = datetime.fromisoformat(started.replace("Z", "+00:00"))
ts_to_id[int(dt.astimezone(UTC).timestamp())] = a["id"]
except (OSError, json.JSONDecodeError, KeyError):
continue
# For each gear, fetch its activities and match by timestamp
edits_dir = user_dir / "edits"
edits_dir.mkdir(exist_ok=True)
activities_updated = 0
for guuid, gear_name in uuid_to_name.items():
try:
gear_acts = client.get_gear_activities(guuid, limit=10000)
except Exception:
continue
if not isinstance(gear_acts, list):
continue
for ga in gear_acts:
gmt = ga.get("startTimeGMT") or ""
if not gmt:
continue
try:
from datetime import datetime
dt = datetime.strptime(gmt, "%Y-%m-%d %H:%M:%S").replace(tzinfo=UTC)
ts = int(dt.timestamp())
except ValueError:
continue
# Match within ±60 s
act_id = None
for delta in range(0, 61):
act_id = ts_to_id.get(ts + delta) or ts_to_id.get(ts - delta)
if act_id:
break
if not act_id:
continue
# Skip if activity already has gear set
act_json = user_dir / "activities" / f"{act_id}.json"
if act_json.exists():
try:
if json.loads(act_json.read_text(encoding="utf-8")).get("gear"):
continue
except (OSError, json.JSONDecodeError):
pass
sidecar = edits_dir / f"{act_id}.md"
fm, body = {}, ""
if sidecar.exists():
try:
text = sidecar.read_text(encoding="utf-8")
parts = re.split(r"^---[ \t]*$", text, maxsplit=2, flags=re.MULTILINE)
if len(parts) >= 3:
fm = yaml.safe_load(parts[1]) or {}
body = parts[2].strip()
except Exception:
pass
if fm.get("gear"):
continue
fm["gear"] = gear_name
fm_text = yaml.safe_dump(fm, default_flow_style=False, allow_unicode=True).strip()
content = f"---\n{fm_text}\n---\n"
if body:
content += f"\n{body}\n"
sidecar.write_text(content, encoding="utf-8")
with contextlib.suppress(Exception):
merge_one(user_dir, act_id)
activities_updated += 1
result = import_garmin_gear(data_dir, user_dir)
except GarminError as exc:
raise HTTPException(502, _garmin_user_message(exc))
tasks._trigger_rebuild(user.handle)
return JSONResponse({"ok": True, "gear_added": gear_added, "activities_updated": activities_updated})
return JSONResponse({"ok": True, **result})
+63
View File
@@ -0,0 +1,63 @@
#!/usr/bin/env python3
"""Backfill Garmin gear for all users who have stored Garmin credentials.
Usage (on VPS):
cd /opt/bincio
uv run python3 scripts/backfill_garmin_gear.py --data-dir /var/bincio/data
# Limit to specific users:
uv run python3 scripts/backfill_garmin_gear.py --data-dir /var/bincio/data --users plagzo12
"""
from __future__ import annotations
import argparse
import sys
from pathlib import Path
def main() -> None:
parser = argparse.ArgumentParser(description="Backfill Garmin gear for all users")
parser.add_argument("--data-dir", required=True, type=Path, help="Root data directory")
parser.add_argument("--users", nargs="*", help="Limit to these user handles (default: all)")
args = parser.parse_args()
data_dir: Path = args.data_dir.resolve()
if not data_dir.is_dir():
sys.exit(f"data-dir not found: {data_dir}")
from bincio.extract.garmin_api import GarminError, has_credentials
from bincio.extract.garmin_sync import import_garmin_gear
candidates = (
[data_dir / h for h in args.users]
if args.users
else sorted(p for p in data_dir.iterdir() if p.is_dir())
)
garmin_users = [p for p in candidates if has_credentials(p)]
if not garmin_users:
print("No users with Garmin credentials found.")
return
print(f"Found {len(garmin_users)} Garmin user(s): {[p.name for p in garmin_users]}\n")
for user_dir in garmin_users:
handle = user_dir.name
print(f"[{handle}] importing gear...", flush=True)
try:
result = import_garmin_gear(data_dir, user_dir)
print(
f"[{handle}] done — "
f"gear_added={result['gear_added']}, "
f"activities_updated={result['activities_updated']}"
)
except GarminError as exc:
print(f"[{handle}] Garmin error: {exc}")
except Exception as exc:
print(f"[{handle}] unexpected error: {type(exc).__name__}: {exc}")
print("\nAll done. Run merge_all or trigger a rebuild to refresh the index shards.")
if __name__ == "__main__":
main()