diff --git a/bincio/render/merge.py b/bincio/render/merge.py index 7c82308..bdc8fc6 100644 --- a/bincio/render/merge.py +++ b/bincio/render/merge.py @@ -155,8 +155,7 @@ def merge_one(data_dir: Path, activity_id: str) -> None: activities.sort(key=lambda a: a.get("started_at", ""), reverse=True) activities.sort(key=lambda a: 0 if a.get("custom", {}).get("highlight") else 1) - index["activities"] = activities - (merged_dir / "index.json").write_text(json.dumps(index, indent=2, ensure_ascii=False)) + _write_year_shards(merged_dir, activities, index) def merge_all(data_dir: Path) -> int: @@ -267,11 +266,57 @@ def merge_all(data_dir: Path) -> int: activities.sort(key=lambda a: a.get("started_at", ""), reverse=True) activities.sort(key=lambda a: 0 if a.get("custom", {}).get("highlight") else 1) - index["activities"] = activities - (merged_dir / "index.json").write_text( - json.dumps(index, indent=2, ensure_ascii=False) - ) - elif (merged_dir / "index.json").exists(): - (merged_dir / "index.json").unlink() + _write_year_shards(merged_dir, activities, index) + else: + # Remove any stale year shard files if the source index disappeared + for f in merged_dir.glob("index-*.json"): + f.unlink() + if (merged_dir / "index.json").exists(): + (merged_dir / "index.json").unlink() return len(sidecars) + + +# Fields only needed for athlete.json aggregation at extract time — they add +# bulk to every summary entry but are never read by the feed UI. +_FEED_STRIP = {"best_efforts", "best_climb_m", "source"} + + +def _write_year_shards(merged_dir: Path, activities: list[dict], index_meta: dict) -> None: + """Split activities by year and write index-{year}.json shards. + + Replaces merged_dir/index.json with a shard manifest so the feed can + load only the most-recent year on first paint and fetch older years lazily. + """ + from collections import defaultdict + + # Remove stale year shard files from previous runs + for f in merged_dir.glob("index-*.json"): + f.unlink() + + by_year: dict[str, list[dict]] = defaultdict(list) + for a in activities: + year = (a.get("started_at") or "")[:4] or "unknown" + # Strip aggregation-only fields to keep shard files small + slim = {k: v for k, v in a.items() if k not in _FEED_STRIP} + by_year[year].append(slim) + + years = sorted(by_year.keys(), reverse=True) # newest first + shards = [] + for year in years: + shard_doc = { + **{k: v for k, v in index_meta.items() if k not in ("activities", "shards")}, + "shards": [], + "activities": by_year[year], + } + fname = f"index-{year}.json" + (merged_dir / fname).write_text(json.dumps(shard_doc, indent=2, ensure_ascii=False)) + shards.append({"url": fname, "year": int(year) if year.isdigit() else 0, + "count": len(by_year[year])}) + + root_doc = { + **{k: v for k, v in index_meta.items() if k not in ("activities", "shards")}, + "shards": shards, + "activities": [], + } + (merged_dir / "index.json").write_text(json.dumps(root_doc, indent=2, ensure_ascii=False)) diff --git a/site/src/components/ActivityFeed.svelte b/site/src/components/ActivityFeed.svelte index e486e33..f292dbf 100644 --- a/site/src/components/ActivityFeed.svelte +++ b/site/src/components/ActivityFeed.svelte @@ -2,7 +2,7 @@ import { onMount } from 'svelte'; import type { ActivitySummary, BASIndex, Sport } from '../lib/types'; import { formatDistance, formatDuration, formatElevation, formatDate, isUnlisted, sportIcon, sportColor, sportLabel } from '../lib/format'; - import { loadIndex } from '../lib/dataloader'; + import { loadIndexPaged, loadShardActivities } from '../lib/dataloader'; /** Render preview_coords as an SVG polyline path string. */ function trackPath(coords: [number, number][] | null, w: number, h: number): string { @@ -41,8 +41,10 @@ let sport: Sport | 'all' = 'all'; let shown = PAGE_SIZE; let loading = true; + let loadingMore = false; let error = ''; let mounted = false; + let pendingShards: string[] = []; /** Logged-in handle — resolved async via bincio:me event. */ let me: string = ''; @@ -58,7 +60,33 @@ }); $: filtered = sport === 'all' ? withPrivacy : withPrivacy.filter(a => a.sport === sport); $: visible = filtered.slice(0, shown); - $: hasMore = shown < filtered.length; + $: canShowMore = shown < filtered.length; + $: hasMore = canShowMore || pendingShards.length > 0; + + async function loadMore() { + if (canShowMore) { + shown += PAGE_SIZE; + return; + } + if (!pendingShards.length) return; + loadingMore = true; + const url = pendingShards[0]; + pendingShards = pendingShards.slice(1); + try { + const fresh = await loadShardActivities(url); + // Merge avoiding duplicates (IDB activities may already be present) + const existing = new Map(all.map(a => [a.id, a])); + for (const a of fresh) if (!existing.has(a.id)) existing.set(a.id, a); + all = [...existing.values()].sort((a, b) => + (b.started_at ?? '').localeCompare(a.started_at ?? ''), + ); + shown += PAGE_SIZE; + } catch { + // shard load failed — don't block the user + } finally { + loadingMore = false; + } + } $: if (sport) shown = PAGE_SIZE; // reset pagination on filter change @@ -84,12 +112,9 @@ const indexUrl = profileIndexUrl ? `${base}data/${profileIndexUrl}` : `${base}data/index.json`; - const index = await loadIndex(base, indexUrl); + const { index, pendingShards: pending } = await loadIndexPaged(base, indexUrl); + pendingShards = pending; let activities = index.activities; - // filterHandle only applies when loading the root manifest (multi-user feed). - // When profileIndexUrl is set we already loaded the right user's shard directly — - // activities from a direct shard fetch have no handle tag, so the filter would - // remove everything. if (filterHandle && !profileIndexUrl) { activities = activities.filter(a => (a as any).handle === filterHandle); } @@ -230,10 +255,17 @@ {#if hasMore}
{/if} diff --git a/site/src/lib/dataloader.ts b/site/src/lib/dataloader.ts index a19dc87..032c8b8 100644 --- a/site/src/lib/dataloader.ts +++ b/site/src/lib/dataloader.ts @@ -55,6 +55,22 @@ function emptyIndex(): BASIndex { }; } +// ── Helpers ─────────────────────────────────────────────────────────────────── + +function isYearShardUrl(url: string): boolean { + return /(?:^|\/)index-\d{4}\.json$/.test(url); +} + +function rewriteActivityUrls(a: ActivitySummary, shardBase: string): ActivitySummary { + return { + ...a, + detail_url: a.detail_url && !a.detail_url.startsWith('http') + ? `${shardBase}${a.detail_url}` : a.detail_url, + track_url: a.track_url && !a.track_url.startsWith('http') + ? `${shardBase}${a.track_url}` : a.track_url, + }; +} + // ── Public API ──────────────────────────────────────────────────────────────── /** @@ -87,14 +103,8 @@ async function resolveShards( // Rewrite relative detail_url / track_url to be absolute so they can be // fetched correctly regardless of where the root index lives. return activities.map(a => ({ - ...a, + ...rewriteActivityUrls(a, shardBase), ...(shard.handle ? { handle: shard.handle } : {}), - detail_url: a.detail_url && !a.detail_url.startsWith('http') - ? `${shardBase}${a.detail_url}` - : a.detail_url, - track_url: a.track_url && !a.track_url.startsWith('http') - ? `${shardBase}${a.track_url}` - : a.track_url, })); }), ); @@ -150,6 +160,97 @@ export async function loadIndex(baseUrl: string, indexUrl?: string): Promise { + indexUrl = indexUrl ?? `${baseUrl}data/index.json`; + + const [serverResult, localResult] = await Promise.allSettled([ + fetchJSON(indexUrl), + listLocalActivities(), + ]); + + const server = serverResult.status === 'fulfilled' ? serverResult.value : null; + const local = localResult.status === 'fulfilled' ? localResult.value : []; + + if (!server && local.length === 0) return { index: emptyIndex(), pendingShards: [] }; + + const base = indexUrl.substring(0, indexUrl.lastIndexOf('/') + 1); + const allShards = server?.shards ?? []; + + const yearShards = allShards.filter(s => isYearShardUrl(s.url)); + const otherShards = allShards.filter(s => !isYearShardUrl(s.url)); + + // ── Year-sharded index (single-user or profile page) ─────────────────────── + // Load only the first (most-recent) year shard; return the rest as pending. + let yearFirstActivities: ActivitySummary[] = []; + let pendingShards: string[] = []; + + if (yearShards.length > 0) { + const sorted = [...yearShards].sort((a, b) => b.url.localeCompare(a.url)); + const firstUrl = sorted[0].url.startsWith('http') ? sorted[0].url : `${base}${sorted[0].url}`; + const shardBase = firstUrl.substring(0, firstUrl.lastIndexOf('/') + 1); + try { + const first = await fetchJSON(firstUrl); + yearFirstActivities = (first.activities ?? []).map(a => rewriteActivityUrls(a, shardBase)); + } catch (e) { + console.error('[bincio] first year shard failed:', sorted[0].url, e); + } + pendingShards = sorted.slice(1).map(s => + s.url.startsWith('http') ? s.url : `${base}${s.url}`, + ); + } + + // ── Non-year shards (multi-user manifest) — loaded eagerly as before ─────── + let otherActivities: ActivitySummary[] = []; + if (otherShards.length > 0) { + const otherIndex: BASIndex = { ...(server ?? emptyIndex()), shards: otherShards }; + otherActivities = await resolveShards(otherIndex, indexUrl); + } + + // ── Own activities (legacy flat index with no shards) ────────────────────── + const ownActivities = allShards.length === 0 ? (server?.activities ?? []) : []; + + // Merge: server + local (local overrides server for same id) + const serverActivities = [...ownActivities, ...otherActivities, ...yearFirstActivities]; + const merged = new Map(); + for (const a of serverActivities) merged.set(a.id, a); + for (const a of local as ActivitySummary[]) merged.set(a.id, a); + + return { + index: { + ...(server ?? emptyIndex()), + activities: [...merged.values()].sort( + (a, b) => (b.started_at ?? '').localeCompare(a.started_at ?? ''), + ), + }, + pendingShards, + }; +} + +/** + * Fetch activities from a single year shard URL (absolute). + * Used by ActivityFeed to lazily load older years when "Load more" is clicked. + */ +export async function loadShardActivities(shardUrl: string): Promise { + try { + const data = await fetchJSON(shardUrl); + const base = shardUrl.substring(0, shardUrl.lastIndexOf('/') + 1); + return (data.activities ?? []).map(a => rewriteActivityUrls(a, base)); + } catch { + return []; + } +} + /** * Load a single activity detail, checking IndexedDB first so locally-converted * activities are available offline. diff --git a/tests/test_merge.py b/tests/test_merge.py index e3dbd4c..ff286dc 100644 --- a/tests/test_merge.py +++ b/tests/test_merge.py @@ -9,6 +9,18 @@ import pytest from bincio.render.merge import apply_sidecar, merge_all, merge_one, parse_sidecar +def _load_merged_activities(merged_dir: Path) -> dict: + """Load all activities from year-sharded merged index. Returns id→dict map.""" + root = json.loads((merged_dir / "index.json").read_text()) + all_acts = list(root.get("activities", [])) + for shard in root.get("shards", []): + shard_path = merged_dir / shard["url"] + if shard_path.exists(): + sub = json.loads(shard_path.read_text()) + all_acts.extend(sub.get("activities", [])) + return {a["id"]: a for a in all_acts} + + # ── parse_sidecar ───────────────────────────────────────────────────────────── @@ -176,8 +188,7 @@ def test_merge_all_private_filtered_from_index(data_dir): (edits / "2024-01-01T080000Z-morning-ride.md").write_text("---\nprivate: true\n---\n") merge_all(data_dir) - index = json.loads((data_dir / "_merged" / "index.json").read_text()) - activities = {a["id"]: a for a in index["activities"]} + activities = _load_merged_activities(data_dir / "_merged") # unlisted activities are kept in the index; filtering is client-side assert "2024-01-01T080000Z-morning-ride" in activities assert activities["2024-01-01T080000Z-morning-ride"]["privacy"] == "unlisted" @@ -191,8 +202,11 @@ def test_merge_all_highlight_sorts_first(data_dir): (edits / "2024-01-01T080000Z-morning-ride.md").write_text("---\nhighlight: true\n---\n") merge_all(data_dir) - index = json.loads((data_dir / "_merged" / "index.json").read_text()) - ids = [a["id"] for a in index["activities"]] + # Highlighted activity must be first within its year shard + merged_dir = data_dir / "_merged" + root = json.loads((merged_dir / "index.json").read_text()) + shard_path = merged_dir / root["shards"][0]["url"] + ids = [a["id"] for a in json.loads(shard_path.read_text())["activities"]] assert ids[0] == "2024-01-01T080000Z-morning-ride" diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index 3e0e87d..1c52036 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -83,10 +83,15 @@ class TestPipeline: merge_all(data_root / "brut") for handle in ("dave", "brut"): - merged = json.loads((data_root / handle / "_merged" / "index.json").read_text()) - assert len(merged["activities"]) >= 8, ( - f"Expected ≥8 merged activities for {handle}" - ) + merged_dir = data_root / handle / "_merged" + root = json.loads((merged_dir / "index.json").read_text()) + # Root index now has year shards; collect all activities across them + all_acts: list = list(root.get("activities", [])) + for shard in root.get("shards", []): + sp = merged_dir / shard["url"] + if sp.exists(): + all_acts.extend(json.loads(sp.read_text()).get("activities", [])) + assert len(all_acts) >= 8, f"Expected ≥8 merged activities for {handle}" def test_root_manifest(self, data_root): from bincio.render.cli import _user_dirs, _write_root_manifest