Global feed: switch from sequential pages to month-based BAS shards

feed.json is now a BAS shard index pointing to feed-YYYY-MM.json files
(~150 activities / ~25 KB gzip each) instead of 400+ sequential feed-N.json
pages. The frontend can now jump directly to a specific month when filtering
by year or date range, without loading all newer data first.

- merge.py: write_combined_feed groups by YYYY-MM and emits a shard index
- dataloader.ts: isYearShardUrl matches feed-YYYY-MM.json; loadCombinedFeed
  returns pendingShards; FeedPage interface and loadCombinedFeedPage removed
- ActivityFeed.svelte: _yearFromShard handles both index-YYYY and feed-YYYY-MM;
  feedNextPage/feedTotalPages/loadingAllFeedPages removed; infinite-loop bug
  fixed (toLoad.length guard before setting loadingAllShards); onMount uses
  pendingShards from loadCombinedFeed
This commit is contained in:
Davide Scaini
2026-05-15 10:25:01 +02:00
parent d3bce49445
commit fe437626e6
3 changed files with 69 additions and 122 deletions
+28 -16
View File
@@ -412,10 +412,11 @@ _COMBINED_FEED_STRIP = _FEED_STRIP | {"mmp"}
def write_combined_feed(data_dir: Path) -> int:
"""Build data_dir/feed.json — the N most recent activities across all users.
"""Build data_dir/feed.json and per-month data_dir/feed-YYYY-MM.json shards.
The global feed page loads this single file instead of resolving 20+ user
shards recursively. Returns the number of activities written.
feed.json is a BAS shard index (same format as per-user index.json).
Each feed-YYYY-MM.json contains all activities for that month across all users,
sorted newest-first. Returns the number of activities written.
"""
user_dirs = sorted(
p for p in data_dir.iterdir()
@@ -458,24 +459,35 @@ def write_combined_feed(data_dir: Path) -> int:
all_activities.sort(key=lambda a: a.get("started_at", ""), reverse=True)
# Remove stale feed pages
# Remove stale feed files (sequential pages and old year shards)
for f in data_dir.glob("feed*.json"):
f.unlink(missing_ok=True)
if not all_activities:
return 0
pages = [all_activities[i:i + FEED_PAGE_SIZE] for i in range(0, len(all_activities), FEED_PAGE_SIZE)]
for page_num, page in enumerate(pages):
slim = [{k: v for k, v in a.items() if k not in _COMBINED_FEED_STRIP} for a in page]
fname = "feed.json" if page_num == 0 else f"feed-{page_num + 1}.json"
doc = {
"bas_version": "1.0",
"page": page_num + 1,
"total_pages": len(pages),
"total_activities": len(all_activities),
"activities": slim,
}
(data_dir / fname).write_text(_dumps(doc))
# Group by YYYY-MM (month), preserving newest-first order within each bucket
by_month: dict[str, list[dict]] = {}
for a in all_activities:
ym = (a.get("started_at") or "")[:7] # "YYYY-MM"
if len(ym) == 7 and ym[4] == "-":
by_month.setdefault(ym, []).append(a)
months_desc = sorted(by_month.keys(), reverse=True)
# Write per-month shard files (~150-200 acts each → ~25 KB gzip)
for ym, acts in by_month.items():
slim = [{k: v for k, v in a.items() if k not in _COMBINED_FEED_STRIP} for a in acts]
doc: dict = {"bas_version": "1.0", "activities": slim}
(data_dir / f"feed-{ym}.json").write_text(_dumps(doc))
# Write feed.json as a BAS shard index (same pattern as per-user index.json)
index_doc: dict = {
"bas_version": "1.0",
"total_activities": len(all_activities),
"shards": [{"url": f"feed-{ym}.json"} for ym in months_desc],
"activities": [],
}
(data_dir / "feed.json").write_text(_dumps(index_doc))
return len(all_activities)