perf: year-shard index.json to cut initial load from MBs to ~1 year
merge_all/_merged/index.json is now a shard manifest; activities are
split into index-{year}.json files. The feed loads only the most-recent
year on first paint (~200 activities instead of all of them). Older
years are fetched lazily when the user clicks "Load older activities".
Also strips best_efforts / best_climb_m / source from shard files —
these fields are aggregation inputs only, never read by the feed UI.
This commit is contained in:
+52
-7
@@ -155,8 +155,7 @@ def merge_one(data_dir: Path, activity_id: str) -> None:
|
|||||||
activities.sort(key=lambda a: a.get("started_at", ""), reverse=True)
|
activities.sort(key=lambda a: a.get("started_at", ""), reverse=True)
|
||||||
activities.sort(key=lambda a: 0 if a.get("custom", {}).get("highlight") else 1)
|
activities.sort(key=lambda a: 0 if a.get("custom", {}).get("highlight") else 1)
|
||||||
|
|
||||||
index["activities"] = activities
|
_write_year_shards(merged_dir, activities, index)
|
||||||
(merged_dir / "index.json").write_text(json.dumps(index, indent=2, ensure_ascii=False))
|
|
||||||
|
|
||||||
|
|
||||||
def merge_all(data_dir: Path) -> int:
|
def merge_all(data_dir: Path) -> int:
|
||||||
@@ -267,11 +266,57 @@ def merge_all(data_dir: Path) -> int:
|
|||||||
activities.sort(key=lambda a: a.get("started_at", ""), reverse=True)
|
activities.sort(key=lambda a: a.get("started_at", ""), reverse=True)
|
||||||
activities.sort(key=lambda a: 0 if a.get("custom", {}).get("highlight") else 1)
|
activities.sort(key=lambda a: 0 if a.get("custom", {}).get("highlight") else 1)
|
||||||
|
|
||||||
index["activities"] = activities
|
_write_year_shards(merged_dir, activities, index)
|
||||||
(merged_dir / "index.json").write_text(
|
else:
|
||||||
json.dumps(index, indent=2, ensure_ascii=False)
|
# Remove any stale year shard files if the source index disappeared
|
||||||
)
|
for f in merged_dir.glob("index-*.json"):
|
||||||
elif (merged_dir / "index.json").exists():
|
f.unlink()
|
||||||
|
if (merged_dir / "index.json").exists():
|
||||||
(merged_dir / "index.json").unlink()
|
(merged_dir / "index.json").unlink()
|
||||||
|
|
||||||
return len(sidecars)
|
return len(sidecars)
|
||||||
|
|
||||||
|
|
||||||
|
# Fields only needed for athlete.json aggregation at extract time — they add
|
||||||
|
# bulk to every summary entry but are never read by the feed UI.
|
||||||
|
_FEED_STRIP = {"best_efforts", "best_climb_m", "source"}
|
||||||
|
|
||||||
|
|
||||||
|
def _write_year_shards(merged_dir: Path, activities: list[dict], index_meta: dict) -> None:
|
||||||
|
"""Split activities by year and write index-{year}.json shards.
|
||||||
|
|
||||||
|
Replaces merged_dir/index.json with a shard manifest so the feed can
|
||||||
|
load only the most-recent year on first paint and fetch older years lazily.
|
||||||
|
"""
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
# Remove stale year shard files from previous runs
|
||||||
|
for f in merged_dir.glob("index-*.json"):
|
||||||
|
f.unlink()
|
||||||
|
|
||||||
|
by_year: dict[str, list[dict]] = defaultdict(list)
|
||||||
|
for a in activities:
|
||||||
|
year = (a.get("started_at") or "")[:4] or "unknown"
|
||||||
|
# Strip aggregation-only fields to keep shard files small
|
||||||
|
slim = {k: v for k, v in a.items() if k not in _FEED_STRIP}
|
||||||
|
by_year[year].append(slim)
|
||||||
|
|
||||||
|
years = sorted(by_year.keys(), reverse=True) # newest first
|
||||||
|
shards = []
|
||||||
|
for year in years:
|
||||||
|
shard_doc = {
|
||||||
|
**{k: v for k, v in index_meta.items() if k not in ("activities", "shards")},
|
||||||
|
"shards": [],
|
||||||
|
"activities": by_year[year],
|
||||||
|
}
|
||||||
|
fname = f"index-{year}.json"
|
||||||
|
(merged_dir / fname).write_text(json.dumps(shard_doc, indent=2, ensure_ascii=False))
|
||||||
|
shards.append({"url": fname, "year": int(year) if year.isdigit() else 0,
|
||||||
|
"count": len(by_year[year])})
|
||||||
|
|
||||||
|
root_doc = {
|
||||||
|
**{k: v for k, v in index_meta.items() if k not in ("activities", "shards")},
|
||||||
|
"shards": shards,
|
||||||
|
"activities": [],
|
||||||
|
}
|
||||||
|
(merged_dir / "index.json").write_text(json.dumps(root_doc, indent=2, ensure_ascii=False))
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
import { onMount } from 'svelte';
|
import { onMount } from 'svelte';
|
||||||
import type { ActivitySummary, BASIndex, Sport } from '../lib/types';
|
import type { ActivitySummary, BASIndex, Sport } from '../lib/types';
|
||||||
import { formatDistance, formatDuration, formatElevation, formatDate, isUnlisted, sportIcon, sportColor, sportLabel } from '../lib/format';
|
import { formatDistance, formatDuration, formatElevation, formatDate, isUnlisted, sportIcon, sportColor, sportLabel } from '../lib/format';
|
||||||
import { loadIndex } from '../lib/dataloader';
|
import { loadIndexPaged, loadShardActivities } from '../lib/dataloader';
|
||||||
|
|
||||||
/** Render preview_coords as an SVG polyline path string. */
|
/** Render preview_coords as an SVG polyline path string. */
|
||||||
function trackPath(coords: [number, number][] | null, w: number, h: number): string {
|
function trackPath(coords: [number, number][] | null, w: number, h: number): string {
|
||||||
@@ -41,8 +41,10 @@
|
|||||||
let sport: Sport | 'all' = 'all';
|
let sport: Sport | 'all' = 'all';
|
||||||
let shown = PAGE_SIZE;
|
let shown = PAGE_SIZE;
|
||||||
let loading = true;
|
let loading = true;
|
||||||
|
let loadingMore = false;
|
||||||
let error = '';
|
let error = '';
|
||||||
let mounted = false;
|
let mounted = false;
|
||||||
|
let pendingShards: string[] = [];
|
||||||
/** Logged-in handle — resolved async via bincio:me event. */
|
/** Logged-in handle — resolved async via bincio:me event. */
|
||||||
let me: string = '';
|
let me: string = '';
|
||||||
|
|
||||||
@@ -58,7 +60,33 @@
|
|||||||
});
|
});
|
||||||
$: filtered = sport === 'all' ? withPrivacy : withPrivacy.filter(a => a.sport === sport);
|
$: filtered = sport === 'all' ? withPrivacy : withPrivacy.filter(a => a.sport === sport);
|
||||||
$: visible = filtered.slice(0, shown);
|
$: visible = filtered.slice(0, shown);
|
||||||
$: hasMore = shown < filtered.length;
|
$: canShowMore = shown < filtered.length;
|
||||||
|
$: hasMore = canShowMore || pendingShards.length > 0;
|
||||||
|
|
||||||
|
async function loadMore() {
|
||||||
|
if (canShowMore) {
|
||||||
|
shown += PAGE_SIZE;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!pendingShards.length) return;
|
||||||
|
loadingMore = true;
|
||||||
|
const url = pendingShards[0];
|
||||||
|
pendingShards = pendingShards.slice(1);
|
||||||
|
try {
|
||||||
|
const fresh = await loadShardActivities(url);
|
||||||
|
// Merge avoiding duplicates (IDB activities may already be present)
|
||||||
|
const existing = new Map(all.map(a => [a.id, a]));
|
||||||
|
for (const a of fresh) if (!existing.has(a.id)) existing.set(a.id, a);
|
||||||
|
all = [...existing.values()].sort((a, b) =>
|
||||||
|
(b.started_at ?? '').localeCompare(a.started_at ?? ''),
|
||||||
|
);
|
||||||
|
shown += PAGE_SIZE;
|
||||||
|
} catch {
|
||||||
|
// shard load failed — don't block the user
|
||||||
|
} finally {
|
||||||
|
loadingMore = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
$: if (sport) shown = PAGE_SIZE; // reset pagination on filter change
|
$: if (sport) shown = PAGE_SIZE; // reset pagination on filter change
|
||||||
|
|
||||||
@@ -84,12 +112,9 @@
|
|||||||
const indexUrl = profileIndexUrl
|
const indexUrl = profileIndexUrl
|
||||||
? `${base}data/${profileIndexUrl}`
|
? `${base}data/${profileIndexUrl}`
|
||||||
: `${base}data/index.json`;
|
: `${base}data/index.json`;
|
||||||
const index = await loadIndex(base, indexUrl);
|
const { index, pendingShards: pending } = await loadIndexPaged(base, indexUrl);
|
||||||
|
pendingShards = pending;
|
||||||
let activities = index.activities;
|
let activities = index.activities;
|
||||||
// filterHandle only applies when loading the root manifest (multi-user feed).
|
|
||||||
// When profileIndexUrl is set we already loaded the right user's shard directly —
|
|
||||||
// activities from a direct shard fetch have no handle tag, so the filter would
|
|
||||||
// remove everything.
|
|
||||||
if (filterHandle && !profileIndexUrl) {
|
if (filterHandle && !profileIndexUrl) {
|
||||||
activities = activities.filter(a => (a as any).handle === filterHandle);
|
activities = activities.filter(a => (a as any).handle === filterHandle);
|
||||||
}
|
}
|
||||||
@@ -230,10 +255,17 @@
|
|||||||
{#if hasMore}
|
{#if hasMore}
|
||||||
<div class="text-center mt-8">
|
<div class="text-center mt-8">
|
||||||
<button
|
<button
|
||||||
class="px-6 py-2 rounded-full border border-zinc-700 text-zinc-300 hover:border-zinc-500 hover:text-white transition-colors text-sm"
|
class="px-6 py-2 rounded-full border border-zinc-700 text-zinc-300 hover:border-zinc-500 hover:text-white disabled:opacity-40 transition-colors text-sm"
|
||||||
on:click={() => shown += PAGE_SIZE}
|
disabled={loadingMore}
|
||||||
|
on:click={loadMore}
|
||||||
>
|
>
|
||||||
|
{#if loadingMore}
|
||||||
|
Loading…
|
||||||
|
{:else if canShowMore}
|
||||||
Load more ({filtered.length - shown} remaining)
|
Load more ({filtered.length - shown} remaining)
|
||||||
|
{:else}
|
||||||
|
Load older activities ({pendingShards.length} more {pendingShards.length === 1 ? 'year' : 'years'})
|
||||||
|
{/if}
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
{/if}
|
{/if}
|
||||||
|
|||||||
+108
-7
@@ -55,6 +55,22 @@ function emptyIndex(): BASIndex {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Helpers ───────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
function isYearShardUrl(url: string): boolean {
|
||||||
|
return /(?:^|\/)index-\d{4}\.json$/.test(url);
|
||||||
|
}
|
||||||
|
|
||||||
|
function rewriteActivityUrls(a: ActivitySummary, shardBase: string): ActivitySummary {
|
||||||
|
return {
|
||||||
|
...a,
|
||||||
|
detail_url: a.detail_url && !a.detail_url.startsWith('http')
|
||||||
|
? `${shardBase}${a.detail_url}` : a.detail_url,
|
||||||
|
track_url: a.track_url && !a.track_url.startsWith('http')
|
||||||
|
? `${shardBase}${a.track_url}` : a.track_url,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
// ── Public API ────────────────────────────────────────────────────────────────
|
// ── Public API ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -87,14 +103,8 @@ async function resolveShards(
|
|||||||
// Rewrite relative detail_url / track_url to be absolute so they can be
|
// Rewrite relative detail_url / track_url to be absolute so they can be
|
||||||
// fetched correctly regardless of where the root index lives.
|
// fetched correctly regardless of where the root index lives.
|
||||||
return activities.map(a => ({
|
return activities.map(a => ({
|
||||||
...a,
|
...rewriteActivityUrls(a, shardBase),
|
||||||
...(shard.handle ? { handle: shard.handle } : {}),
|
...(shard.handle ? { handle: shard.handle } : {}),
|
||||||
detail_url: a.detail_url && !a.detail_url.startsWith('http')
|
|
||||||
? `${shardBase}${a.detail_url}`
|
|
||||||
: a.detail_url,
|
|
||||||
track_url: a.track_url && !a.track_url.startsWith('http')
|
|
||||||
? `${shardBase}${a.track_url}`
|
|
||||||
: a.track_url,
|
|
||||||
}));
|
}));
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
@@ -150,6 +160,97 @@ export async function loadIndex(baseUrl: string, indexUrl?: string): Promise<BAS
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Like loadIndex but only fetches the most-recent year shard immediately.
|
||||||
|
* Returns the first-page activities plus a list of remaining shard URLs that
|
||||||
|
* can be fetched on demand (e.g. when the user clicks "Load more").
|
||||||
|
*
|
||||||
|
* Falls back to full eager loading for non-year shard manifests (multi-user
|
||||||
|
* combined feed) so the behaviour is identical to loadIndex in those cases.
|
||||||
|
*/
|
||||||
|
export async function loadIndexPaged(
|
||||||
|
baseUrl: string,
|
||||||
|
indexUrl?: string,
|
||||||
|
): Promise<{ index: BASIndex; pendingShards: string[] }> {
|
||||||
|
indexUrl = indexUrl ?? `${baseUrl}data/index.json`;
|
||||||
|
|
||||||
|
const [serverResult, localResult] = await Promise.allSettled([
|
||||||
|
fetchJSON<BASIndex>(indexUrl),
|
||||||
|
listLocalActivities(),
|
||||||
|
]);
|
||||||
|
|
||||||
|
const server = serverResult.status === 'fulfilled' ? serverResult.value : null;
|
||||||
|
const local = localResult.status === 'fulfilled' ? localResult.value : [];
|
||||||
|
|
||||||
|
if (!server && local.length === 0) return { index: emptyIndex(), pendingShards: [] };
|
||||||
|
|
||||||
|
const base = indexUrl.substring(0, indexUrl.lastIndexOf('/') + 1);
|
||||||
|
const allShards = server?.shards ?? [];
|
||||||
|
|
||||||
|
const yearShards = allShards.filter(s => isYearShardUrl(s.url));
|
||||||
|
const otherShards = allShards.filter(s => !isYearShardUrl(s.url));
|
||||||
|
|
||||||
|
// ── Year-sharded index (single-user or profile page) ───────────────────────
|
||||||
|
// Load only the first (most-recent) year shard; return the rest as pending.
|
||||||
|
let yearFirstActivities: ActivitySummary[] = [];
|
||||||
|
let pendingShards: string[] = [];
|
||||||
|
|
||||||
|
if (yearShards.length > 0) {
|
||||||
|
const sorted = [...yearShards].sort((a, b) => b.url.localeCompare(a.url));
|
||||||
|
const firstUrl = sorted[0].url.startsWith('http') ? sorted[0].url : `${base}${sorted[0].url}`;
|
||||||
|
const shardBase = firstUrl.substring(0, firstUrl.lastIndexOf('/') + 1);
|
||||||
|
try {
|
||||||
|
const first = await fetchJSON<BASIndex>(firstUrl);
|
||||||
|
yearFirstActivities = (first.activities ?? []).map(a => rewriteActivityUrls(a, shardBase));
|
||||||
|
} catch (e) {
|
||||||
|
console.error('[bincio] first year shard failed:', sorted[0].url, e);
|
||||||
|
}
|
||||||
|
pendingShards = sorted.slice(1).map(s =>
|
||||||
|
s.url.startsWith('http') ? s.url : `${base}${s.url}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Non-year shards (multi-user manifest) — loaded eagerly as before ───────
|
||||||
|
let otherActivities: ActivitySummary[] = [];
|
||||||
|
if (otherShards.length > 0) {
|
||||||
|
const otherIndex: BASIndex = { ...(server ?? emptyIndex()), shards: otherShards };
|
||||||
|
otherActivities = await resolveShards(otherIndex, indexUrl);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Own activities (legacy flat index with no shards) ──────────────────────
|
||||||
|
const ownActivities = allShards.length === 0 ? (server?.activities ?? []) : [];
|
||||||
|
|
||||||
|
// Merge: server + local (local overrides server for same id)
|
||||||
|
const serverActivities = [...ownActivities, ...otherActivities, ...yearFirstActivities];
|
||||||
|
const merged = new Map<string, ActivitySummary>();
|
||||||
|
for (const a of serverActivities) merged.set(a.id, a);
|
||||||
|
for (const a of local as ActivitySummary[]) merged.set(a.id, a);
|
||||||
|
|
||||||
|
return {
|
||||||
|
index: {
|
||||||
|
...(server ?? emptyIndex()),
|
||||||
|
activities: [...merged.values()].sort(
|
||||||
|
(a, b) => (b.started_at ?? '').localeCompare(a.started_at ?? ''),
|
||||||
|
),
|
||||||
|
},
|
||||||
|
pendingShards,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fetch activities from a single year shard URL (absolute).
|
||||||
|
* Used by ActivityFeed to lazily load older years when "Load more" is clicked.
|
||||||
|
*/
|
||||||
|
export async function loadShardActivities(shardUrl: string): Promise<ActivitySummary[]> {
|
||||||
|
try {
|
||||||
|
const data = await fetchJSON<BASIndex>(shardUrl);
|
||||||
|
const base = shardUrl.substring(0, shardUrl.lastIndexOf('/') + 1);
|
||||||
|
return (data.activities ?? []).map(a => rewriteActivityUrls(a, base));
|
||||||
|
} catch {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Load a single activity detail, checking IndexedDB first so locally-converted
|
* Load a single activity detail, checking IndexedDB first so locally-converted
|
||||||
* activities are available offline.
|
* activities are available offline.
|
||||||
|
|||||||
+18
-4
@@ -9,6 +9,18 @@ import pytest
|
|||||||
from bincio.render.merge import apply_sidecar, merge_all, merge_one, parse_sidecar
|
from bincio.render.merge import apply_sidecar, merge_all, merge_one, parse_sidecar
|
||||||
|
|
||||||
|
|
||||||
|
def _load_merged_activities(merged_dir: Path) -> dict:
|
||||||
|
"""Load all activities from year-sharded merged index. Returns id→dict map."""
|
||||||
|
root = json.loads((merged_dir / "index.json").read_text())
|
||||||
|
all_acts = list(root.get("activities", []))
|
||||||
|
for shard in root.get("shards", []):
|
||||||
|
shard_path = merged_dir / shard["url"]
|
||||||
|
if shard_path.exists():
|
||||||
|
sub = json.loads(shard_path.read_text())
|
||||||
|
all_acts.extend(sub.get("activities", []))
|
||||||
|
return {a["id"]: a for a in all_acts}
|
||||||
|
|
||||||
|
|
||||||
# ── parse_sidecar ─────────────────────────────────────────────────────────────
|
# ── parse_sidecar ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
@@ -176,8 +188,7 @@ def test_merge_all_private_filtered_from_index(data_dir):
|
|||||||
(edits / "2024-01-01T080000Z-morning-ride.md").write_text("---\nprivate: true\n---\n")
|
(edits / "2024-01-01T080000Z-morning-ride.md").write_text("---\nprivate: true\n---\n")
|
||||||
merge_all(data_dir)
|
merge_all(data_dir)
|
||||||
|
|
||||||
index = json.loads((data_dir / "_merged" / "index.json").read_text())
|
activities = _load_merged_activities(data_dir / "_merged")
|
||||||
activities = {a["id"]: a for a in index["activities"]}
|
|
||||||
# unlisted activities are kept in the index; filtering is client-side
|
# unlisted activities are kept in the index; filtering is client-side
|
||||||
assert "2024-01-01T080000Z-morning-ride" in activities
|
assert "2024-01-01T080000Z-morning-ride" in activities
|
||||||
assert activities["2024-01-01T080000Z-morning-ride"]["privacy"] == "unlisted"
|
assert activities["2024-01-01T080000Z-morning-ride"]["privacy"] == "unlisted"
|
||||||
@@ -191,8 +202,11 @@ def test_merge_all_highlight_sorts_first(data_dir):
|
|||||||
(edits / "2024-01-01T080000Z-morning-ride.md").write_text("---\nhighlight: true\n---\n")
|
(edits / "2024-01-01T080000Z-morning-ride.md").write_text("---\nhighlight: true\n---\n")
|
||||||
merge_all(data_dir)
|
merge_all(data_dir)
|
||||||
|
|
||||||
index = json.loads((data_dir / "_merged" / "index.json").read_text())
|
# Highlighted activity must be first within its year shard
|
||||||
ids = [a["id"] for a in index["activities"]]
|
merged_dir = data_dir / "_merged"
|
||||||
|
root = json.loads((merged_dir / "index.json").read_text())
|
||||||
|
shard_path = merged_dir / root["shards"][0]["url"]
|
||||||
|
ids = [a["id"] for a in json.loads(shard_path.read_text())["activities"]]
|
||||||
assert ids[0] == "2024-01-01T080000Z-morning-ride"
|
assert ids[0] == "2024-01-01T080000Z-morning-ride"
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -83,10 +83,15 @@ class TestPipeline:
|
|||||||
merge_all(data_root / "brut")
|
merge_all(data_root / "brut")
|
||||||
|
|
||||||
for handle in ("dave", "brut"):
|
for handle in ("dave", "brut"):
|
||||||
merged = json.loads((data_root / handle / "_merged" / "index.json").read_text())
|
merged_dir = data_root / handle / "_merged"
|
||||||
assert len(merged["activities"]) >= 8, (
|
root = json.loads((merged_dir / "index.json").read_text())
|
||||||
f"Expected ≥8 merged activities for {handle}"
|
# Root index now has year shards; collect all activities across them
|
||||||
)
|
all_acts: list = list(root.get("activities", []))
|
||||||
|
for shard in root.get("shards", []):
|
||||||
|
sp = merged_dir / shard["url"]
|
||||||
|
if sp.exists():
|
||||||
|
all_acts.extend(json.loads(sp.read_text()).get("activities", []))
|
||||||
|
assert len(all_acts) >= 8, f"Expected ≥8 merged activities for {handle}"
|
||||||
|
|
||||||
def test_root_manifest(self, data_root):
|
def test_root_manifest(self, data_root):
|
||||||
from bincio.render.cli import _user_dirs, _write_root_manifest
|
from bincio.render.cli import _user_dirs, _write_root_manifest
|
||||||
|
|||||||
Reference in New Issue
Block a user