feat: OG link previews — track image + meta tags for Telegram/WhatsApp
- bincio/render/ogimage.py: generate 400x400 elevation-coloured PNG with Pillow
- bincio/serve/routers/ogimage.py: /activity/{id}/ OG HTML stub for bot UAs;
/og-image/{user}/{id}.png serves pre-generated images with on-demand fallback
- scripts/generate_og_images.py: batch pre-generation, incremental (mtime skip)
- scripts/strava_elevation_audit.py: add source/threshold/MA columns and pct stats
- pyproject.toml: add Pillow>=10 to serve extras
This commit is contained in:
@@ -0,0 +1,198 @@
|
||||
"""Audit elevation accuracy vs Strava.
|
||||
|
||||
Friends add a note with the Strava elevation to their activity descriptions.
|
||||
Supported formats (case-insensitive):
|
||||
- "strava 1323md+" most common
|
||||
- "strava 1323 m d+"
|
||||
- "Strava 1625 m d+"
|
||||
- "Strava Elevation 1173m"
|
||||
- "1038 m d+ Strava" number before the word strava
|
||||
- "Strava 207 metri di dislivello"
|
||||
|
||||
Descriptions live in _merged/activities/ (sidecar merge).
|
||||
Computed elevation_gain_m is read from activities/ (main file).
|
||||
|
||||
Usage:
|
||||
uv run scripts/strava_elevation_audit.py [--data-dir /var/bincio/data] [--out elevation_audit.csv]
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from bincio.extract.metrics import elevation_params
|
||||
|
||||
# Patterns tried in order; first match wins.
|
||||
# Each pattern must have exactly one capturing group for the numeric value.
|
||||
_PATTERNS: list[re.Pattern] = [
|
||||
# "strava NNN m ..." or "strava NNNmd+"
|
||||
re.compile(r'\bstrava\b\s*([0-9][0-9.,]*)\s*m', re.IGNORECASE),
|
||||
# "Strava Elevation NNNm" or "Strava ... NNNm" (one word between)
|
||||
re.compile(r'\bstrava\b\s+\w+\s+([0-9][0-9.,]*)\s*m', re.IGNORECASE),
|
||||
# "NNN m ... strava" (number comes first, up to 20 chars before strava)
|
||||
re.compile(r'([0-9][0-9.,]*)\s*m\b.{0,20}?\bstrava\b', re.IGNORECASE),
|
||||
# "Strava NNN metri di dislivello" (Italian)
|
||||
re.compile(r'\bstrava\b.*?([0-9][0-9.,]*)\s+metr', re.IGNORECASE),
|
||||
]
|
||||
|
||||
|
||||
def _find_strava_elevation(description: str) -> float | None:
|
||||
for pat in _PATTERNS:
|
||||
m = pat.search(description)
|
||||
if m:
|
||||
raw = m.group(1).replace(',', '.')
|
||||
try:
|
||||
return float(raw)
|
||||
except ValueError:
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
def audit(data_dir: Path, out_path: Path) -> list[dict]:
|
||||
rows: list[dict] = []
|
||||
unmatched: list[tuple[str, str]] = [] # (path, desc) couldn't parse elevation
|
||||
|
||||
for merged_path in sorted(data_dir.glob("*/_merged/activities/*.json")):
|
||||
if merged_path.suffix != ".json":
|
||||
continue
|
||||
if ".timeseries." in merged_path.name or ".geojson" in merged_path.name:
|
||||
continue
|
||||
|
||||
try:
|
||||
merged = json.loads(merged_path.read_text(encoding="utf-8"))
|
||||
except (json.JSONDecodeError, OSError):
|
||||
continue
|
||||
|
||||
description = merged.get("description") or ""
|
||||
if not description or "strava" not in description.lower():
|
||||
continue
|
||||
|
||||
# Skip strava:// athlete-mention links (not elevation notes)
|
||||
if re.search(r'strava://', description, re.IGNORECASE):
|
||||
continue
|
||||
|
||||
strava_elev = _find_strava_elevation(description)
|
||||
if strava_elev is None:
|
||||
unmatched.append((str(merged_path), description))
|
||||
continue
|
||||
|
||||
# Read computed elevation from main activity file
|
||||
main_path = (
|
||||
merged_path.parents[3] # data_dir
|
||||
/ merged_path.parents[2].name # user
|
||||
/ "activities"
|
||||
/ merged_path.name
|
||||
)
|
||||
try:
|
||||
main = json.loads(main_path.read_text(encoding="utf-8"))
|
||||
except (json.JSONDecodeError, OSError):
|
||||
main = merged # fall back to merged values
|
||||
|
||||
our_elev = main.get("elevation_gain_m")
|
||||
title = main.get("title") or merged.get("title") or merged_path.stem
|
||||
user = merged_path.parents[2].name
|
||||
altitude_source = main.get("altitude_source") or "unknown"
|
||||
source = main.get("source") or ""
|
||||
device = main.get("device") or "unknown"
|
||||
ma_window, threshold = elevation_params(altitude_source, source)
|
||||
|
||||
delta = round(our_elev - strava_elev, 1) if our_elev is not None else None
|
||||
pct = (
|
||||
round((our_elev - strava_elev) / strava_elev * 100, 1)
|
||||
if our_elev is not None and strava_elev != 0
|
||||
else None
|
||||
)
|
||||
|
||||
rows.append({
|
||||
"file": merged_path.name,
|
||||
"user": user,
|
||||
"title": title,
|
||||
"device": device,
|
||||
"altitude_source": altitude_source,
|
||||
"source": source,
|
||||
"ma_window_s": ma_window,
|
||||
"threshold_m": threshold,
|
||||
"our_elevation_m": our_elev,
|
||||
"strava_elevation_m": strava_elev,
|
||||
"delta_m": delta,
|
||||
"delta_pct": pct,
|
||||
"description": description[:120].replace("\n", " ").replace("\r", ""),
|
||||
})
|
||||
|
||||
rows.sort(key=lambda r: abs(r["delta_m"] or 0), reverse=True)
|
||||
|
||||
if rows:
|
||||
with out_path.open("w", newline="", encoding="utf-8") as f:
|
||||
writer = csv.DictWriter(f, fieldnames=list(rows[0].keys()))
|
||||
writer.writeheader()
|
||||
writer.writerows(rows)
|
||||
|
||||
if unmatched:
|
||||
print(f"\nCould not parse elevation from {len(unmatched)} description(s):")
|
||||
for path, desc in unmatched:
|
||||
print(f" {Path(path).name} {desc[:80]!r}")
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
def main() -> None:
|
||||
ap = argparse.ArgumentParser(description="Audit elevation accuracy vs Strava notes")
|
||||
ap.add_argument("--data-dir", default="/var/bincio/data", type=Path)
|
||||
ap.add_argument("--out", default="elevation_audit.csv", type=Path)
|
||||
args = ap.parse_args()
|
||||
|
||||
if not args.data_dir.exists():
|
||||
print(f"ERROR: data dir not found: {args.data_dir}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Scanning {args.data_dir} …")
|
||||
rows = audit(args.data_dir, args.out)
|
||||
|
||||
if not rows:
|
||||
print("No activities found with a parseable Strava elevation note.")
|
||||
return
|
||||
|
||||
print(f"\nFound {len(rows)} activit{'y' if len(rows)==1 else 'ies'}:\n")
|
||||
header = (
|
||||
f"{'File':<50} {'User':<15} {'Source':<16} {'AltSrc':<12}"
|
||||
f" {'MA':>4} {'Thr':>5} {'Ours':>8} {'Strava':>8} {'Delta':>8} {'Delta%':>7}"
|
||||
)
|
||||
print(header)
|
||||
print("-" * len(header))
|
||||
for r in rows:
|
||||
delta_str = f"{r['delta_m']:+.0f}" if r['delta_m'] is not None else "n/a"
|
||||
pct_str = f"{r['delta_pct']:+.1f}%" if r['delta_pct'] is not None else "n/a"
|
||||
our_str = f"{r['our_elevation_m']:.0f}" if r['our_elevation_m'] is not None else "n/a"
|
||||
print(
|
||||
f"{r['file']:<50} {r['user']:<15} {r['source']:<16} {r['altitude_source']:<12}"
|
||||
f" {r['ma_window_s']:>4} {r['threshold_m']:>5.1f}"
|
||||
f" {our_str:>8} {r['strava_elevation_m']:>8.0f}"
|
||||
f" {delta_str:>8} {pct_str:>7}"
|
||||
)
|
||||
|
||||
n = len(rows)
|
||||
pcts = [r["delta_pct"] for r in rows if r["delta_pct"] is not None]
|
||||
deltas = [r["delta_m"] for r in rows if r["delta_m"] is not None]
|
||||
if pcts:
|
||||
avg_pct = sum(pcts) / len(pcts)
|
||||
sorted_pcts = sorted(pcts)
|
||||
median_pct = sorted_pcts[len(sorted_pcts) // 2]
|
||||
within_10 = sum(1 for p in pcts if abs(p) <= 10)
|
||||
within_15 = sum(1 for p in pcts if abs(p) <= 15)
|
||||
avg_d = sum(deltas) / len(deltas) if deltas else 0
|
||||
print(
|
||||
f"\n n={n} avg={avg_pct:+.1f}% median={median_pct:+.1f}%"
|
||||
f" avg delta={avg_d:+.0f} m"
|
||||
f" within ±10%: {within_10}/{n} within ±15%: {within_15}/{n}"
|
||||
)
|
||||
|
||||
print(f"\nCSV saved to: {args.out}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user