Global feed: switch from sequential pages to month-based BAS shards

feed.json is now a BAS shard index pointing to feed-YYYY-MM.json files (~150 activities / ~25 KB gzip each) instead of 400+ sequential feed-N.json pages. The frontend can now jump directly to a specific month when filtering by year or date range, without loading all newer data first. - merge.py: write_combined_feed groups by YYYY-MM and emits a shard index - dataloader.ts: isYearShardUrl matches feed-YYYY-MM.json; loadCombinedFeed returns pendingShards; FeedPage interface and loadCombinedFeedPage removed - ActivityFeed.svelte: _yearFromShard handles both index-YYYY and feed-YYYY-MM; feedNextPage/feedTotalPages/loadingAllFeedPages removed; infinite-loop bug fixed (toLoad.length guard before setting loadingAllShards); onMount uses pendingShards from loadCombinedFeed
2026-05-15 10:25:01 +02:00
parent d3bce49445
commit fe437626e6
3 changed files with 69 additions and 122 deletions
@@ -412,10 +412,11 @@ _COMBINED_FEED_STRIP = _FEED_STRIP | {"mmp"}


 def write_combined_feed(data_dir: Path) -> int:
-    """Build data_dir/feed.json — the N most recent activities across all users.
+    """Build data_dir/feed.json and per-month data_dir/feed-YYYY-MM.json shards.

-    The global feed page loads this single file instead of resolving 20+ user
-    shards recursively.  Returns the number of activities written.
+    feed.json is a BAS shard index (same format as per-user index.json).
+    Each feed-YYYY-MM.json contains all activities for that month across all users,
+    sorted newest-first.  Returns the number of activities written.
    """
    user_dirs = sorted(
        p for p in data_dir.iterdir()
@@ -458,24 +459,35 @@ def write_combined_feed(data_dir: Path) -> int:

    all_activities.sort(key=lambda a: a.get("started_at", ""), reverse=True)

-    # Remove stale feed pages
+    # Remove stale feed files (sequential pages and old year shards)
    for f in data_dir.glob("feed*.json"):
        f.unlink(missing_ok=True)

    if not all_activities:
        return 0

-    pages = [all_activities[i:i + FEED_PAGE_SIZE] for i in range(0, len(all_activities), FEED_PAGE_SIZE)]
-    for page_num, page in enumerate(pages):
-        slim = [{k: v for k, v in a.items() if k not in _COMBINED_FEED_STRIP} for a in page]
-        fname = "feed.json" if page_num == 0 else f"feed-{page_num + 1}.json"
-        doc = {
-            "bas_version": "1.0",
-            "page": page_num + 1,
-            "total_pages": len(pages),
-            "total_activities": len(all_activities),
-            "activities": slim,
-        }
-        (data_dir / fname).write_text(_dumps(doc))
+    # Group by YYYY-MM (month), preserving newest-first order within each bucket
+    by_month: dict[str, list[dict]] = {}
+    for a in all_activities:
+        ym = (a.get("started_at") or "")[:7]  # "YYYY-MM"
+        if len(ym) == 7 and ym[4] == "-":
+            by_month.setdefault(ym, []).append(a)
+
+    months_desc = sorted(by_month.keys(), reverse=True)
+
+    # Write per-month shard files (~150-200 acts each → ~25 KB gzip)
+    for ym, acts in by_month.items():
+        slim = [{k: v for k, v in a.items() if k not in _COMBINED_FEED_STRIP} for a in acts]
+        doc: dict = {"bas_version": "1.0", "activities": slim}
+        (data_dir / f"feed-{ym}.json").write_text(_dumps(doc))
+
+    # Write feed.json as a BAS shard index (same pattern as per-user index.json)
+    index_doc: dict = {
+        "bas_version": "1.0",
+        "total_activities": len(all_activities),
+        "shards": [{"url": f"feed-{ym}.json"} for ym in months_desc],
+        "activities": [],
+    }
+    (data_dir / "feed.json").write_text(_dumps(index_doc))

    return len(all_activities)