"""Apply sidecar .md edits to BAS JSON files. Produces data_dir/_merged/ — a mirror of data_dir where: - Files without sidecars are symlinked to the originals (cheap, preserves extracted data) - Files with sidecars are written as merged copies - index.json is rewritten with private filtering + highlight sort This keeps data_dir/activities/*.json pristine (re-running extract never clobbers user edits, and removing a sidecar always reverts fully on the next render). """ from __future__ import annotations import json import re import shutil from pathlib import Path import yaml def parse_sidecar(path: Path) -> tuple[dict, str]: """Return (frontmatter_dict, markdown_body) from a sidecar .md file.""" text = path.read_text(encoding="utf-8") if text.startswith("---"): parts = re.split(r"^---[ \t]*$", text, maxsplit=2, flags=re.MULTILINE) if len(parts) >= 3: fm = yaml.safe_load(parts[1]) or {} return fm, parts[2].strip() return {}, text.strip() def apply_sidecar(detail: dict, fm: dict, body: str) -> dict: """Apply sidecar overrides to a detail JSON dict. Returns a modified copy.""" d = dict(detail) d.setdefault("custom", {}) d["custom"] = dict(d["custom"]) # don't mutate original if "title" in fm: d["title"] = str(fm["title"]) if "sport" in fm: d["sport"] = str(fm["sport"]) if "gear" in fm: d["gear"] = str(fm["gear"]) if fm["gear"] else d.get("gear") if body: d["description"] = body elif "description" in fm: d["description"] = str(fm["description"]) if "highlight" in fm: d["custom"]["highlight"] = bool(fm["highlight"]) if "private" in fm: d["privacy"] = "private" if fm["private"] else detail.get("privacy", "public") if "hide_stats" in fm: d["custom"]["hide_stats"] = [str(s) for s in (fm["hide_stats"] or [])] return d def _apply_sidecar_summary(summary: dict, fm: dict) -> dict: """Apply sidecar overrides to an index summary entry.""" s = dict(summary) s.setdefault("custom", {}) s["custom"] = dict(s["custom"]) if "title" in fm: s["title"] = str(fm["title"]) if "sport" in fm: s["sport"] = str(fm["sport"]) if "highlight" in fm: s["custom"]["highlight"] = bool(fm["highlight"]) if "private" in fm: s["privacy"] = "private" if fm["private"] else summary.get("privacy", "public") return s def merge_all(data_dir: Path) -> int: """Build data_dir/_merged/ with all sidecar overrides applied. Returns the number of sidecars found and applied. """ edits_dir = data_dir / "edits" acts_dir = data_dir / "activities" merged_dir = data_dir / "_merged" merged_acts = merged_dir / "activities" # Collect sidecars upfront sidecars: dict[str, tuple[dict, str]] = {} if edits_dir.exists(): for md_path in sorted(edits_dir.glob("*.md")): sidecars[md_path.stem] = parse_sidecar(md_path) # Collect image lists — activities with uploaded images get custom.images even # if they have no sidecar text yet image_lists: dict[str, list[str]] = {} images_root = edits_dir / "images" if edits_dir.exists() else None if images_root and images_root.exists(): for img_dir in sorted(images_root.iterdir()): if img_dir.is_dir(): files = sorted( p.name for p in img_dir.iterdir() if p.is_file() and not p.name.startswith(".") ) if files: image_lists[img_dir.name] = files to_merge = set(sidecars) | set(image_lists) # Wipe and recreate _merged/activities/ if merged_acts.exists(): shutil.rmtree(merged_acts) merged_acts.mkdir(parents=True) # Mirror activities/ — symlink unmodified, write merged copies for overridden if acts_dir.exists(): for src in sorted(acts_dir.iterdir()): if not src.is_file(): continue dest = merged_acts / src.name activity_id = src.stem if src.suffix == ".json" and activity_id in to_merge: detail = json.loads(src.read_text(encoding="utf-8")) if activity_id in sidecars: fm, body = sidecars[activity_id] detail = apply_sidecar(detail, fm, body) if activity_id in image_lists: detail["custom"] = dict(detail.get("custom") or {}) detail["custom"]["images"] = image_lists[activity_id] dest.write_text(json.dumps(detail, indent=2, ensure_ascii=False)) else: dest.symlink_to(src.resolve()) # Mirror edits/images/ → _merged/activities/images/ so the site can serve them if images_root and images_root.exists(): merged_images = merged_acts / "images" merged_images.mkdir(exist_ok=True) for img_dir in images_root.iterdir(): if img_dir.is_dir(): dest_img = merged_images / img_dir.name if not dest_img.exists(): dest_img.symlink_to(img_dir.resolve()) # Symlink athlete.json if present athlete_src = data_dir / "athlete.json" athlete_dest = merged_dir / "athlete.json" if athlete_dest.exists() or athlete_dest.is_symlink(): athlete_dest.unlink() if athlete_src.exists(): athlete_dest.symlink_to(athlete_src.resolve()) # Write merged index.json (private filtered, highlight sorted) index_path = data_dir / "index.json" if index_path.exists(): index = json.loads(index_path.read_text(encoding="utf-8")) activities = [] for s in index.get("activities", []): aid = s.get("id", "") if aid in sidecars: fm, _ = sidecars[aid] s = _apply_sidecar_summary(s, fm) activities.append(s) # Drop private activities from the published feed activities = [a for a in activities if a.get("privacy") != "private"] # Sort: newest first, then bring highlighted activities to the top activities.sort(key=lambda a: a.get("started_at", ""), reverse=True) activities.sort(key=lambda a: 0 if a.get("custom", {}).get("highlight") else 1) index["activities"] = activities (merged_dir / "index.json").write_text( json.dumps(index, indent=2, ensure_ascii=False) ) elif (merged_dir / "index.json").exists(): (merged_dir / "index.json").unlink() return len(sidecars)