156 lines
5.3 KiB
Python
156 lines
5.3 KiB
Python
"""Import metadata from Strava's activities.csv bulk export.
|
|
|
|
Strava export columns we care about:
|
|
Activity ID, Activity Date, Activity Name, Activity Description, Filename
|
|
"""
|
|
|
|
import csv
|
|
import json
|
|
import re
|
|
from pathlib import Path
|
|
from typing import Iterator, Optional
|
|
|
|
|
|
_STRAVA_DATE_FMTS = (
|
|
"%b %d, %Y, %I:%M:%S %p", # "Jun 1, 2024, 7:30:12 AM"
|
|
"%Y-%m-%d %H:%M:%S",
|
|
)
|
|
|
|
|
|
class StravaMetadata:
|
|
"""Maps original filename → Strava metadata, with secondary strava_id index."""
|
|
|
|
def __init__(self, csv_path: Path) -> None:
|
|
self._by_filename: dict[str, dict] = {}
|
|
self._by_strava_id: dict[str, dict] = {}
|
|
self._load(csv_path)
|
|
|
|
def _load(self, path: Path) -> None:
|
|
with path.open(newline="", encoding="utf-8-sig") as f:
|
|
reader = csv.DictReader(f)
|
|
for row in reader:
|
|
filename = row.get("Filename", "").strip()
|
|
if filename:
|
|
basename = Path(filename).name
|
|
self._by_filename[basename] = row
|
|
strava_id = row.get("Activity ID", "").strip()
|
|
if strava_id:
|
|
self._by_strava_id[strava_id] = row
|
|
|
|
def lookup(self, source_file: str) -> Optional[dict]:
|
|
"""Return the Strava CSV row for a given source filename, or None."""
|
|
return self._by_filename.get(source_file)
|
|
|
|
def lookup_by_strava_id(self, strava_id: str) -> Optional[dict]:
|
|
"""Return the Strava CSV row for a given Strava activity ID, or None."""
|
|
return self._by_strava_id.get(str(strava_id))
|
|
|
|
def enrich(self, source_file: str, activity: object) -> None:
|
|
"""Mutate a ParsedActivity with Strava metadata if found."""
|
|
row = self.lookup(source_file)
|
|
if row is None:
|
|
return
|
|
|
|
if not activity.title and row.get("Activity Name"): # type: ignore[attr-defined]
|
|
activity.title = row["Activity Name"].strip() # type: ignore[attr-defined]
|
|
|
|
if not activity.description and row.get("Activity Description"): # type: ignore[attr-defined]
|
|
activity.description = row["Activity Description"].strip() # type: ignore[attr-defined]
|
|
|
|
if not activity.strava_id and row.get("Activity ID"): # type: ignore[attr-defined]
|
|
activity.strava_id = row["Activity ID"].strip() # type: ignore[attr-defined]
|
|
|
|
|
|
# ── Retroactive sidecar update ────────────────────────────────────────────────
|
|
|
|
def _parse_sidecar(path: Path) -> tuple[dict, str]:
|
|
"""Return (frontmatter_dict, body) from a sidecar .md file."""
|
|
import re as _re
|
|
import yaml
|
|
text = path.read_text(encoding="utf-8")
|
|
if text.startswith("---"):
|
|
parts = _re.split(r"^---[ \t]*$", text, maxsplit=2, flags=_re.MULTILINE)
|
|
if len(parts) >= 3:
|
|
fm = yaml.safe_load(parts[1]) or {}
|
|
return fm, parts[2].strip()
|
|
return {}, text.strip()
|
|
|
|
|
|
def _write_sidecar(path: Path, fm: dict, body: str) -> None:
|
|
import yaml
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
fm_text = yaml.safe_dump(fm, default_flow_style=False, allow_unicode=True).strip()
|
|
content = f"---\n{fm_text}\n---\n"
|
|
if body:
|
|
content += f"\n{body}\n"
|
|
path.write_text(content, encoding="utf-8")
|
|
|
|
|
|
def _update_sidecar_from_row(sidecar_path: Path, row: dict) -> bool:
|
|
"""Create or update a sidecar with CSV title/description.
|
|
|
|
Only fills fields that are not already set in the sidecar.
|
|
Returns True if anything changed.
|
|
"""
|
|
title = row.get("Activity Name", "").strip()
|
|
description = row.get("Activity Description", "").strip()
|
|
if not title and not description:
|
|
return False
|
|
|
|
fm, body = _parse_sidecar(sidecar_path) if sidecar_path.exists() else ({}, "")
|
|
|
|
changed = False
|
|
if title and "title" not in fm:
|
|
fm["title"] = title
|
|
changed = True
|
|
if description and not body:
|
|
body = description
|
|
changed = True
|
|
|
|
if not changed:
|
|
return False
|
|
|
|
_write_sidecar(sidecar_path, fm, body)
|
|
return True
|
|
|
|
|
|
def apply_csv_to_data_dir(data_dir: Path, metadata: StravaMetadata) -> int:
|
|
"""Retroactively apply CSV metadata to existing activities via sidecars.
|
|
|
|
Scans all activity JSONs in data_dir/activities/. For each activity that
|
|
has a strava_id, looks up the corresponding CSV row and creates/updates
|
|
the sidecar in data_dir/edits/ with any missing title or description.
|
|
|
|
Only writes fields not already present in the sidecar — manual edits are
|
|
never overwritten.
|
|
|
|
Returns the count of activities whose sidecars were created or updated.
|
|
"""
|
|
activities_dir = data_dir / "activities"
|
|
edits_dir = data_dir / "edits"
|
|
|
|
if not activities_dir.exists():
|
|
return 0
|
|
|
|
updated = 0
|
|
for json_path in sorted(activities_dir.glob("*.json")):
|
|
try:
|
|
detail = json.loads(json_path.read_text(encoding="utf-8"))
|
|
except Exception:
|
|
continue
|
|
|
|
strava_id = detail.get("strava_id")
|
|
if not strava_id:
|
|
continue
|
|
|
|
row = metadata.lookup_by_strava_id(str(strava_id))
|
|
if row is None:
|
|
continue
|
|
|
|
activity_id = json_path.stem
|
|
sidecar_path = edits_dir / f"{activity_id}.md"
|
|
if _update_sidecar_from_row(sidecar_path, row):
|
|
updated += 1
|
|
|
|
return updated
|