perf: year-shard index.json to cut initial load from MBs to ~1 year
merge_all/_merged/index.json is now a shard manifest; activities are
split into index-{year}.json files. The feed loads only the most-recent
year on first paint (~200 activities instead of all of them). Older
years are fetched lazily when the user clicks "Load older activities".
Also strips best_efforts / best_climb_m / source from shard files —
these fields are aggregation inputs only, never read by the feed UI.
This commit is contained in:
+53
-8
@@ -155,8 +155,7 @@ def merge_one(data_dir: Path, activity_id: str) -> None:
|
||||
activities.sort(key=lambda a: a.get("started_at", ""), reverse=True)
|
||||
activities.sort(key=lambda a: 0 if a.get("custom", {}).get("highlight") else 1)
|
||||
|
||||
index["activities"] = activities
|
||||
(merged_dir / "index.json").write_text(json.dumps(index, indent=2, ensure_ascii=False))
|
||||
_write_year_shards(merged_dir, activities, index)
|
||||
|
||||
|
||||
def merge_all(data_dir: Path) -> int:
|
||||
@@ -267,11 +266,57 @@ def merge_all(data_dir: Path) -> int:
|
||||
activities.sort(key=lambda a: a.get("started_at", ""), reverse=True)
|
||||
activities.sort(key=lambda a: 0 if a.get("custom", {}).get("highlight") else 1)
|
||||
|
||||
index["activities"] = activities
|
||||
(merged_dir / "index.json").write_text(
|
||||
json.dumps(index, indent=2, ensure_ascii=False)
|
||||
)
|
||||
elif (merged_dir / "index.json").exists():
|
||||
(merged_dir / "index.json").unlink()
|
||||
_write_year_shards(merged_dir, activities, index)
|
||||
else:
|
||||
# Remove any stale year shard files if the source index disappeared
|
||||
for f in merged_dir.glob("index-*.json"):
|
||||
f.unlink()
|
||||
if (merged_dir / "index.json").exists():
|
||||
(merged_dir / "index.json").unlink()
|
||||
|
||||
return len(sidecars)
|
||||
|
||||
|
||||
# Fields only needed for athlete.json aggregation at extract time — they add
|
||||
# bulk to every summary entry but are never read by the feed UI.
|
||||
_FEED_STRIP = {"best_efforts", "best_climb_m", "source"}
|
||||
|
||||
|
||||
def _write_year_shards(merged_dir: Path, activities: list[dict], index_meta: dict) -> None:
|
||||
"""Split activities by year and write index-{year}.json shards.
|
||||
|
||||
Replaces merged_dir/index.json with a shard manifest so the feed can
|
||||
load only the most-recent year on first paint and fetch older years lazily.
|
||||
"""
|
||||
from collections import defaultdict
|
||||
|
||||
# Remove stale year shard files from previous runs
|
||||
for f in merged_dir.glob("index-*.json"):
|
||||
f.unlink()
|
||||
|
||||
by_year: dict[str, list[dict]] = defaultdict(list)
|
||||
for a in activities:
|
||||
year = (a.get("started_at") or "")[:4] or "unknown"
|
||||
# Strip aggregation-only fields to keep shard files small
|
||||
slim = {k: v for k, v in a.items() if k not in _FEED_STRIP}
|
||||
by_year[year].append(slim)
|
||||
|
||||
years = sorted(by_year.keys(), reverse=True) # newest first
|
||||
shards = []
|
||||
for year in years:
|
||||
shard_doc = {
|
||||
**{k: v for k, v in index_meta.items() if k not in ("activities", "shards")},
|
||||
"shards": [],
|
||||
"activities": by_year[year],
|
||||
}
|
||||
fname = f"index-{year}.json"
|
||||
(merged_dir / fname).write_text(json.dumps(shard_doc, indent=2, ensure_ascii=False))
|
||||
shards.append({"url": fname, "year": int(year) if year.isdigit() else 0,
|
||||
"count": len(by_year[year])})
|
||||
|
||||
root_doc = {
|
||||
**{k: v for k, v in index_meta.items() if k not in ("activities", "shards")},
|
||||
"shards": shards,
|
||||
"activities": [],
|
||||
}
|
||||
(merged_dir / "index.json").write_text(json.dumps(root_doc, indent=2, ensure_ascii=False))
|
||||
|
||||
Reference in New Issue
Block a user