feat: OG link previews — track image + meta tags for Telegram/WhatsApp

- bincio/render/ogimage.py: generate 400x400 elevation-coloured PNG with Pillow - bincio/serve/routers/ogimage.py: /activity/{id}/ OG HTML stub for bot UAs; /og-image/{user}/{id}.png serves pre-generated images with on-demand fallback - scripts/generate_og_images.py: batch pre-generation, incremental (mtime skip) - scripts/strava_elevation_audit.py: add source/threshold/MA columns and pct stats - pyproject.toml: add Pillow>=10 to serve extras
2026-05-23 21:44:19 +02:00
parent 56932f7f25
commit 693f720cbd
6 changed files with 574 additions and 0 deletions
@@ -0,0 +1,91 @@
+"""Pre-generate OG track images for all activities.
+
+Writes 400×400 PNGs to {www_root}/og-image/{user}/{activity_id}.png.
+Skips activities that already have an up-to-date image (mtime check).
+Safe to run repeatedly — only processes new/changed activities.
+
+Usage:
+    uv run scripts/generate_og_images.py [--data-dir /var/bincio/data] [--www-root /var/www/activity]
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from pathlib import Path
+
+
+def generate_all(data_dir: Path, www_root: Path) -> None:
+    out_root = www_root / "og-image"
+    out_root.mkdir(parents=True, exist_ok=True)
+
+    from bincio.render.ogimage import generate
+
+    total = generated = skipped = errors = 0
+
+    users = sorted(
+        d.name for d in data_dir.iterdir()
+        if d.is_dir() and not d.name.startswith("_") and d.name != "segments"
+    )
+
+    for handle in users:
+        user_dir   = data_dir / handle
+        acts_dir   = user_dir / "activities"
+        img_dir    = out_root / handle
+        if not acts_dir.exists():
+            continue
+        img_dir.mkdir(exist_ok=True)
+        u_gen = u_skip = u_err = 0
+
+        for ts_path in sorted(acts_dir.glob("*.timeseries.json")):
+            activity_id = ts_path.name.replace(".timeseries.json", "")
+            out_path    = img_dir / f"{activity_id}.png"
+            total += 1
+
+            # Skip if image is newer than timeseries
+            if out_path.exists() and out_path.stat().st_mtime >= ts_path.stat().st_mtime:
+                skipped += 1
+                u_skip  += 1
+                continue
+
+            try:
+                ts      = json.loads(ts_path.read_text(encoding="utf-8"))
+                lat_arr = ts.get("lat") or []
+                lon_arr = ts.get("lon") or []
+                ele_arr = ts.get("elevation_m") or []
+                png     = generate(lat_arr, lon_arr, ele_arr)
+                out_path.write_bytes(png)
+                generated += 1
+                u_gen     += 1
+            except Exception as exc:
+                errors += 1
+                u_err  += 1
+                print(f"  ERROR {handle}/{activity_id}: {exc}", file=sys.stderr)
+
+        if u_gen or u_err:
+            print(f"{handle:<25} generated={u_gen:4d}  skipped={u_skip:4d}  errors={u_err}")
+        else:
+            print(f"{handle:<25} skipped={u_skip:4d} (all up to date)")
+
+    print(f"\nDone — {generated} generated, {skipped} skipped, {errors} errors  (total {total})")
+
+
+def main() -> None:
+    ap = argparse.ArgumentParser(description="Pre-generate OG track images")
+    ap.add_argument("--data-dir",  default="/var/bincio/data",   type=Path)
+    ap.add_argument("--www-root",  default="/var/www/activity",  type=Path)
+    args = ap.parse_args()
+
+    if not args.data_dir.exists():
+        print(f"ERROR: data dir not found: {args.data_dir}", file=sys.stderr)
+        sys.exit(1)
+
+    print(f"data-dir : {args.data_dir}")
+    print(f"www-root : {args.www_root}")
+    print(f"output   : {args.www_root}/og-image/\n")
+    generate_all(args.data_dir, args.www_root)
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,198 @@
+"""Audit elevation accuracy vs Strava.
+
+Friends add a note with the Strava elevation to their activity descriptions.
+Supported formats (case-insensitive):
+  - "strava 1323md+"            most common
+  - "strava 1323 m d+"
+  - "Strava 1625 m d+"
+  - "Strava Elevation 1173m"
+  - "1038 m d+ Strava"          number before the word strava
+  - "Strava 207 metri di dislivello"
+
+Descriptions live in _merged/activities/ (sidecar merge).
+Computed elevation_gain_m is read from activities/ (main file).
+
+Usage:
+    uv run scripts/strava_elevation_audit.py [--data-dir /var/bincio/data] [--out elevation_audit.csv]
+"""
+
+from __future__ import annotations
+
+import argparse
+import csv
+import json
+import re
+import sys
+from pathlib import Path
+
+from bincio.extract.metrics import elevation_params
+
+# Patterns tried in order; first match wins.
+# Each pattern must have exactly one capturing group for the numeric value.
+_PATTERNS: list[re.Pattern] = [
+    # "strava NNN m ..."  or  "strava NNNmd+"
+    re.compile(r'\bstrava\b\s*([0-9][0-9.,]*)\s*m', re.IGNORECASE),
+    # "Strava Elevation NNNm"  or  "Strava ... NNNm"  (one word between)
+    re.compile(r'\bstrava\b\s+\w+\s+([0-9][0-9.,]*)\s*m', re.IGNORECASE),
+    # "NNN m ... strava"  (number comes first, up to 20 chars before strava)
+    re.compile(r'([0-9][0-9.,]*)\s*m\b.{0,20}?\bstrava\b', re.IGNORECASE),
+    # "Strava NNN metri di dislivello"  (Italian)
+    re.compile(r'\bstrava\b.*?([0-9][0-9.,]*)\s+metr', re.IGNORECASE),
+]
+
+
+def _find_strava_elevation(description: str) -> float | None:
+    for pat in _PATTERNS:
+        m = pat.search(description)
+        if m:
+            raw = m.group(1).replace(',', '.')
+            try:
+                return float(raw)
+            except ValueError:
+                continue
+    return None
+
+
+def audit(data_dir: Path, out_path: Path) -> list[dict]:
+    rows: list[dict] = []
+    unmatched: list[tuple[str, str]] = []  # (path, desc) couldn't parse elevation
+
+    for merged_path in sorted(data_dir.glob("*/_merged/activities/*.json")):
+        if merged_path.suffix != ".json":
+            continue
+        if ".timeseries." in merged_path.name or ".geojson" in merged_path.name:
+            continue
+
+        try:
+            merged = json.loads(merged_path.read_text(encoding="utf-8"))
+        except (json.JSONDecodeError, OSError):
+            continue
+
+        description = merged.get("description") or ""
+        if not description or "strava" not in description.lower():
+            continue
+
+        # Skip strava:// athlete-mention links (not elevation notes)
+        if re.search(r'strava://', description, re.IGNORECASE):
+            continue
+
+        strava_elev = _find_strava_elevation(description)
+        if strava_elev is None:
+            unmatched.append((str(merged_path), description))
+            continue
+
+        # Read computed elevation from main activity file
+        main_path = (
+            merged_path.parents[3]  # data_dir
+            / merged_path.parents[2].name  # user
+            / "activities"
+            / merged_path.name
+        )
+        try:
+            main = json.loads(main_path.read_text(encoding="utf-8"))
+        except (json.JSONDecodeError, OSError):
+            main = merged  # fall back to merged values
+
+        our_elev = main.get("elevation_gain_m")
+        title = main.get("title") or merged.get("title") or merged_path.stem
+        user = merged_path.parents[2].name
+        altitude_source = main.get("altitude_source") or "unknown"
+        source = main.get("source") or ""
+        device = main.get("device") or "unknown"
+        ma_window, threshold = elevation_params(altitude_source, source)
+
+        delta = round(our_elev - strava_elev, 1) if our_elev is not None else None
+        pct = (
+            round((our_elev - strava_elev) / strava_elev * 100, 1)
+            if our_elev is not None and strava_elev != 0
+            else None
+        )
+
+        rows.append({
+            "file": merged_path.name,
+            "user": user,
+            "title": title,
+            "device": device,
+            "altitude_source": altitude_source,
+            "source": source,
+            "ma_window_s": ma_window,
+            "threshold_m": threshold,
+            "our_elevation_m": our_elev,
+            "strava_elevation_m": strava_elev,
+            "delta_m": delta,
+            "delta_pct": pct,
+            "description": description[:120].replace("\n", " ").replace("\r", ""),
+        })
+
+    rows.sort(key=lambda r: abs(r["delta_m"] or 0), reverse=True)
+
+    if rows:
+        with out_path.open("w", newline="", encoding="utf-8") as f:
+            writer = csv.DictWriter(f, fieldnames=list(rows[0].keys()))
+            writer.writeheader()
+            writer.writerows(rows)
+
+    if unmatched:
+        print(f"\nCould not parse elevation from {len(unmatched)} description(s):")
+        for path, desc in unmatched:
+            print(f"  {Path(path).name}  {desc[:80]!r}")
+
+    return rows
+
+
+def main() -> None:
+    ap = argparse.ArgumentParser(description="Audit elevation accuracy vs Strava notes")
+    ap.add_argument("--data-dir", default="/var/bincio/data", type=Path)
+    ap.add_argument("--out", default="elevation_audit.csv", type=Path)
+    args = ap.parse_args()
+
+    if not args.data_dir.exists():
+        print(f"ERROR: data dir not found: {args.data_dir}", file=sys.stderr)
+        sys.exit(1)
+
+    print(f"Scanning {args.data_dir} …")
+    rows = audit(args.data_dir, args.out)
+
+    if not rows:
+        print("No activities found with a parseable Strava elevation note.")
+        return
+
+    print(f"\nFound {len(rows)} activit{'y' if len(rows)==1 else 'ies'}:\n")
+    header = (
+        f"{'File':<50} {'User':<15} {'Source':<16} {'AltSrc':<12}"
+        f" {'MA':>4} {'Thr':>5} {'Ours':>8} {'Strava':>8} {'Delta':>8} {'Delta%':>7}"
+    )
+    print(header)
+    print("-" * len(header))
+    for r in rows:
+        delta_str = f"{r['delta_m']:+.0f}" if r['delta_m'] is not None else "n/a"
+        pct_str   = f"{r['delta_pct']:+.1f}%" if r['delta_pct'] is not None else "n/a"
+        our_str   = f"{r['our_elevation_m']:.0f}" if r['our_elevation_m'] is not None else "n/a"
+        print(
+            f"{r['file']:<50} {r['user']:<15} {r['source']:<16} {r['altitude_source']:<12}"
+            f" {r['ma_window_s']:>4} {r['threshold_m']:>5.1f}"
+            f" {our_str:>8} {r['strava_elevation_m']:>8.0f}"
+            f" {delta_str:>8} {pct_str:>7}"
+        )
+
+    n = len(rows)
+    pcts = [r["delta_pct"] for r in rows if r["delta_pct"] is not None]
+    deltas = [r["delta_m"] for r in rows if r["delta_m"] is not None]
+    if pcts:
+        avg_pct = sum(pcts) / len(pcts)
+        sorted_pcts = sorted(pcts)
+        median_pct = sorted_pcts[len(sorted_pcts) // 2]
+        within_10 = sum(1 for p in pcts if abs(p) <= 10)
+        within_15 = sum(1 for p in pcts if abs(p) <= 15)
+        avg_d = sum(deltas) / len(deltas) if deltas else 0
+        print(
+            f"\n  n={n}  avg={avg_pct:+.1f}%  median={median_pct:+.1f}%"
+            f"  avg delta={avg_d:+.0f} m"
+            f"  within ±10%: {within_10}/{n}  within ±15%: {within_15}/{n}"
+        )
+
+    print(f"\nCSV saved to: {args.out}")
+
+
+if __name__ == "__main__":
+    main()