perf: combined feed index for multi-user global feed

Instead of the browser resolving 20+ user shards recursively (~27 MB),
generate a pre-sorted feed.json at merge time with 50 activities per
page. The global feed loads one ~30 KB file on first paint; "Load more"
fetches subsequent pages (feed-2.json, feed-3.json, etc.).

Per-user profile pages still use year-sharded loadIndexPaged as before.
This commit is contained in:
Davide Scaini
2026-04-20 15:31:35 +02:00
parent e8a5fbbaba
commit db7047f210
4 changed files with 160 additions and 8 deletions
+5
View File
@@ -133,6 +133,11 @@ def _write_root_manifest(data: Path) -> None:
root.write_text(json.dumps(manifest, indent=2))
console.print(f"Root manifest updated: [cyan]{len(users)}[/cyan] user shard(s)")
if len(users) > 1:
from bincio.render.merge import write_combined_feed
n = write_combined_feed(data)
console.print(f"Combined feed: [cyan]{n}[/cyan] activities across all users")
def _link_data(site: Path, data: Path) -> None:
"""Symlink site/public/data → data root (each user has their own _merged/)."""
+82
View File
@@ -320,3 +320,85 @@ def _write_year_shards(merged_dir: Path, activities: list[dict], index_meta: dic
"activities": [],
}
(merged_dir / "index.json").write_text(json.dumps(root_doc, indent=2, ensure_ascii=False))
FEED_PAGE_SIZE = 50
# Extra fields stripped from the combined feed — preview_coords is the biggest
# contributor (~24% of shard size) but the feed cards need it for thumbnails,
# so we keep it. mmp is never displayed in feed cards.
_COMBINED_FEED_STRIP = _FEED_STRIP | {"mmp"}
def write_combined_feed(data_dir: Path) -> int:
"""Build data_dir/feed.json — the N most recent activities across all users.
The global feed page loads this single file instead of resolving 20+ user
shards recursively. Returns the number of activities written.
"""
user_dirs = sorted(
p for p in data_dir.iterdir()
if p.is_dir() and (p / "activities").exists()
)
all_activities: list[dict] = []
for user_dir in user_dirs:
handle = user_dir.name
merged = user_dir / "_merged"
index_path = merged / "index.json" if merged.exists() else user_dir / "index.json"
if not index_path.exists():
continue
index = json.loads(index_path.read_text(encoding="utf-8"))
shards = index.get("shards", [])
activities = index.get("activities", [])
if shards:
year_shards = sorted(
[s for s in shards if re.match(r"index-\d{4}\.json$", s.get("url", ""))],
key=lambda s: s["url"],
reverse=True,
)
base = index_path.parent
for shard in year_shards[:2]:
shard_path = base / shard["url"]
if shard_path.exists():
shard_data = json.loads(shard_path.read_text(encoding="utf-8"))
for a in shard_data.get("activities", []):
a_tagged = {**a, "handle": handle}
detail_url = a_tagged.get("detail_url", "")
if detail_url and not detail_url.startswith("http") and not detail_url.startswith("/"):
merged_rel = f"{handle}/_merged/" if merged.exists() else f"{handle}/"
a_tagged["detail_url"] = merged_rel + detail_url
track_url = a_tagged.get("track_url", "")
if track_url and not track_url.startswith("http") and not track_url.startswith("/"):
merged_rel = f"{handle}/_merged/" if merged.exists() else f"{handle}/"
a_tagged["track_url"] = merged_rel + track_url
all_activities.append(a_tagged)
else:
for a in activities:
all_activities.append({**a, "handle": handle})
all_activities.sort(key=lambda a: a.get("started_at", ""), reverse=True)
# Remove stale feed pages
for f in data_dir.glob("feed*.json"):
f.unlink()
if not all_activities:
return 0
pages = [all_activities[i:i + FEED_PAGE_SIZE] for i in range(0, len(all_activities), FEED_PAGE_SIZE)]
for page_num, page in enumerate(pages):
slim = [{k: v for k, v in a.items() if k not in _COMBINED_FEED_STRIP} for a in page]
fname = "feed.json" if page_num == 0 else f"feed-{page_num + 1}.json"
doc = {
"bas_version": "1.0",
"page": page_num + 1,
"total_pages": len(pages),
"total_activities": len(all_activities),
"activities": slim,
}
(data_dir / fname).write_text(json.dumps(doc, indent=2, ensure_ascii=False))
return len(all_activities)