feat(serve): debounced site rebuild — burst uploads trigger one build, not N

Replace per-upload Astro build threads with a single background worker
(_site_rebuild_worker) that waits on an event, sleeps 60 s to let upload
bursts settle, then runs one full build + rsync. 271 concurrent uploads now
produce one build instead of 271 serialised builds, eliminating the OOM kill.
--webroot is re-enabled; merge-only path still runs immediately per upload.

Also: date filter row added to ActivityFeed.svelte (sport + date presets
with dynamic year pills); deploy/vps gitignored for VPS config backups.
This commit is contained in:
Davide Scaini
2026-04-30 21:23:29 +02:00
parent 5e36806392
commit f6e9fe8198
3 changed files with 128 additions and 73 deletions
+3
View File
@@ -32,6 +32,9 @@ bincio_data/
.env .env
extract_config.yaml extract_config.yaml
# VPS config backup (contains secrets, keep local only)
deploy/vps/
# Local working / scratch files # Local working / scratch files
advice.md advice.md
issues.md issues.md
+77 -68
View File
@@ -204,8 +204,8 @@ app = FastAPI(title="BincioActivity Serve")
@app.on_event("startup") @app.on_event("startup")
async def _cleanup_orphaned_tmp_zips() -> None: async def _on_startup() -> None:
"""Remove tmp*.zip files left in user data dirs by the pre-fix upload handler.""" """Startup tasks: clean orphaned tmp zips; launch site-rebuild worker if --webroot set."""
import glob as _glob import glob as _glob
data_dir = _get_data_dir() data_dir = _get_data_dir()
for p in _glob.glob(str(data_dir / "*" / "tmp*.zip")): for p in _glob.glob(str(data_dir / "*" / "tmp*.zip")):
@@ -213,6 +213,8 @@ async def _cleanup_orphaned_tmp_zips() -> None:
Path(p).unlink() Path(p).unlink()
except Exception: except Exception:
pass pass
if webroot is not None:
threading.Thread(target=_site_rebuild_worker, daemon=True, name="site-rebuild").start()
app.add_middleware(GZipMiddleware, minimum_size=1024) app.add_middleware(GZipMiddleware, minimum_size=1024)
@@ -329,20 +331,65 @@ def _unique_image_name(directory: Path, filename: str) -> str:
# ── Post-write rebuild ──────────────────────────────────────────────────────── # ── Post-write rebuild ────────────────────────────────────────────────────────
# Serialises concurrent rebuilds — only one full build runs at a time. # Serialises per-user merge subprocesses — concurrent merge_all runs on the
# A second upload that arrives while a build is in progress will queue and # same user dir would corrupt _merged/activities/.
# run after the first finishes, picking up all data written in between.
_rebuild_lock = threading.Lock() _rebuild_lock = threading.Lock()
# Signals the site-rebuild worker that at least one merge has completed.
# Using an Event as a boolean flag: set() by any merge, cleared by the worker.
_site_rebuild_event = threading.Event()
def _site_rebuild_worker() -> None:
"""Single background thread: debounced Astro build + rsync after uploads.
Waits for _site_rebuild_event, sleeps 60 s to let upload bursts settle,
then runs one full build. 271 concurrent uploads → 1 build, not 271.
Uploads that arrive during the build set the event again, so a follow-up
build starts after the current one finishes.
"""
_webroot = str(webroot)
_data_dir = str(data_dir)
_site_dir = str(site_dir)
uv = shutil.which("uv") or str(Path.home() / ".local" / "bin" / "uv")
while True:
_site_rebuild_event.wait()
_site_rebuild_event.clear()
time.sleep(60) # collect burst uploads
_site_rebuild_event.clear() # discard signals from the sleep window
log.info("site-rebuild: starting full build + rsync to %s", _webroot)
try:
result = subprocess.run(
[uv, "run", "bincio", "render",
"--data-dir", _data_dir,
"--site-dir", _site_dir],
capture_output=True,
text=True,
)
if result.returncode != 0:
log.error("site-rebuild: build failed (rc=%d):\n%s\n%s",
result.returncode, result.stdout, result.stderr)
continue
dist_data = Path(_site_dir) / "dist" / "data"
if dist_data.exists():
shutil.rmtree(dist_data)
rsync = subprocess.run(
["rsync", "-a", "--delete", "--exclude=data/",
f"{_site_dir}/dist/", _webroot + "/"],
capture_output=True,
text=True,
)
if rsync.returncode != 0:
log.error("site-rebuild: rsync failed (rc=%d):\n%s\n%s",
rsync.returncode, rsync.stdout, rsync.stderr)
else:
log.info("site-rebuild: done")
except Exception:
log.exception("site-rebuild: unexpected error")
def _trigger_rebuild(handle: str) -> None: def _trigger_rebuild(handle: str) -> None:
"""Asynchronously re-merge and optionally rebuild + rsync the site. """Merge sidecars for handle asynchronously; signal the site-rebuild worker."""
- Without --webroot: fast path — merges sidecars + rewrites root manifest
(~1 s). New activity pages require the nginx try_files fallback to work.
- With --webroot: full Astro build + rsync to the nginx webroot (~3060 s,
serialised). New activity pages are immediately accessible.
"""
if site_dir is None: if site_dir is None:
return return
if not _VALID_HANDLE.match(handle): if not _VALID_HANDLE.match(handle):
@@ -351,66 +398,28 @@ def _trigger_rebuild(handle: str) -> None:
uv = shutil.which("uv") or str(Path.home() / ".local" / "bin" / "uv") uv = shutil.which("uv") or str(Path.home() / ".local" / "bin" / "uv")
_data_dir = str(data_dir) _data_dir = str(data_dir)
_site_dir = str(site_dir) _site_dir = str(site_dir)
_webroot = str(webroot) if webroot else None
_handle = handle _handle = handle
def _run() -> None: def _run() -> None:
try: try:
if _webroot is None: log.info("rebuild[%s]: merge-only", _handle)
# Fast: only update data, skip Astro build. with _rebuild_lock:
# Serialised with the same lock: merge_all wipes and recreates result = subprocess.run(
# _merged/activities/ — concurrent runs would corrupt each other. [uv, "run", "bincio", "render",
log.info("rebuild[%s]: merge-only (no webroot)", _handle) "--data-dir", _data_dir,
with _rebuild_lock: "--site-dir", _site_dir,
result = subprocess.run( "--handle", _handle,
[uv, "run", "bincio", "render", "--no-build"],
"--data-dir", _data_dir, capture_output=True,
"--site-dir", _site_dir, text=True,
"--handle", _handle, )
"--no-build"], if result.returncode != 0:
capture_output=True, log.error("rebuild[%s]: merge failed (rc=%d):\n%s\n%s",
text=True, _handle, result.returncode, result.stdout, result.stderr)
) else:
if result.returncode != 0: log.info("rebuild[%s]: merge done", _handle)
log.error("rebuild[%s]: merge failed (rc=%d):\n%s\n%s", if webroot is not None:
_handle, result.returncode, result.stdout, result.stderr) _site_rebuild_event.set()
else:
log.info("rebuild[%s]: merge done", _handle)
else:
# Full build + rsync — serialised so concurrent uploads don't race
log.info("rebuild[%s]: full build + rsync to %s", _handle, _webroot)
with _rebuild_lock:
result = subprocess.run(
[uv, "run", "bincio", "render",
"--data-dir", _data_dir,
"--site-dir", _site_dir,
"--handle", _handle],
capture_output=True,
text=True,
)
if result.returncode != 0:
log.error("rebuild[%s]: build failed (rc=%d):\n%s\n%s",
_handle, result.returncode, result.stdout, result.stderr)
else:
log.info("rebuild[%s]: build done, rsyncing", _handle)
# Prune dist/data/ before rsync: Astro resolves the
# public/data symlink and copies all activity JSON into
# dist/, but nginx already serves /data/ directly from
# the live data dir — rsyncing it would duplicate GBs.
dist_data = Path(_site_dir) / "dist" / "data"
if dist_data.exists():
shutil.rmtree(dist_data)
rsync = subprocess.run(
["rsync", "-a", "--delete", "--exclude=data/",
f"{_site_dir}/dist/", _webroot + "/"],
capture_output=True,
text=True,
)
if rsync.returncode != 0:
log.error("rebuild[%s]: rsync failed (rc=%d):\n%s\n%s",
_handle, rsync.returncode, rsync.stdout, rsync.stderr)
else:
log.info("rebuild[%s]: rsync done", _handle)
except Exception: except Exception:
log.exception("rebuild[%s]: unexpected error", _handle) log.exception("rebuild[%s]: unexpected error", _handle)
+48 -5
View File
@@ -39,6 +39,9 @@
let all: ActivitySummary[] = []; let all: ActivitySummary[] = [];
let sport: Sport | 'all' = 'all'; let sport: Sport | 'all' = 'all';
let datePre = 'all';
let dateFrom = '';
let dateTo = '';
let shown = PAGE_SIZE; let shown = PAGE_SIZE;
let loading = true; let loading = true;
let loadingMore = false; let loadingMore = false;
@@ -53,6 +56,21 @@
/** Logged-in handle — resolved async via bincio:me event. */ /** Logged-in handle — resolved async via bincio:me event. */
let me: string = ''; let me: string = '';
function computeDateRange(preset: string): { dateFrom: string; dateTo: string } {
if (preset === 'all') return { dateFrom: '', dateTo: '' };
if (/^\d{4}$/.test(preset)) {
const y = parseInt(preset, 10);
return { dateFrom: `${y}-01-01T`, dateTo: `${y + 1}-01-01T` };
}
const pad = (n: number) => String(n).padStart(2, '0');
const now = new Date();
let d: Date;
if (preset === '7d') d = new Date(now.getTime() - 7 * 86_400_000);
else if (preset === '30d') d = new Date(now.getTime() - 30 * 86_400_000);
else { d = new Date(now); d.setMonth(d.getMonth() - 6); }
return { dateFrom: `${d.getFullYear()}-${pad(d.getMonth() + 1)}-${pad(d.getDate())}T`, dateTo: '' };
}
// Show private activities only to their owner. // Show private activities only to their owner.
// On a profile page (filterHandle set): show unlisted if me === filterHandle. // On a profile page (filterHandle set): show unlisted if me === filterHandle.
// On the global feed: show unlisted only for the logged-in user's own activities. // On the global feed: show unlisted only for the logged-in user's own activities.
@@ -63,7 +81,12 @@
} }
return true; return true;
}); });
$: filtered = sport === 'all' ? withPrivacy : withPrivacy.filter(a => a.sport === sport); $: allYears = [...new Set(all.map(a => a.started_at?.slice(0, 4)).filter(Boolean) as string[])].sort().reverse();
$: ({ dateFrom, dateTo } = computeDateRange(datePre));
$: withDate = !dateFrom && !dateTo ? withPrivacy : withPrivacy.filter(a =>
(!dateFrom || a.started_at >= dateFrom) && (!dateTo || a.started_at < dateTo)
);
$: filtered = sport === 'all' ? withDate : withDate.filter(a => a.sport === sport);
$: visible = filtered.slice(0, shown); $: visible = filtered.slice(0, shown);
$: canShowMore = shown < filtered.length; $: canShowMore = shown < filtered.length;
$: hasMore = canShowMore || pendingShards.length > 0 || feedNextPage > 0; $: hasMore = canShowMore || pendingShards.length > 0 || feedNextPage > 0;
@@ -99,17 +122,20 @@
} }
} }
$: if (sport) shown = PAGE_SIZE; // reset pagination on filter change $: if (sport || datePre) shown = PAGE_SIZE; // reset pagination on filter change
$: if (mounted) { $: if (mounted) {
const params = new URLSearchParams(window.location.search); const params = new URLSearchParams(window.location.search);
if (sport === 'all') params.delete('sport'); else params.set('sport', sport); if (sport === 'all') params.delete('sport'); else params.set('sport', sport);
if (datePre === 'all') params.delete('date'); else params.set('date', datePre);
const qs = params.toString(); const qs = params.toString();
history.replaceState(null, '', qs ? `?${qs}` : window.location.pathname); history.replaceState(null, '', qs ? `?${qs}` : window.location.pathname);
} }
onMount(async () => { onMount(async () => {
sport = (new URLSearchParams(window.location.search).get('sport') as Sport | 'all') ?? 'all'; const params = new URLSearchParams(window.location.search);
sport = (params.get('sport') as Sport | 'all') ?? 'all';
datePre = params.get('date') ?? 'all';
mounted = true; mounted = true;
// Resolve the logged-in handle so we can show the owner their private activities. // Resolve the logged-in handle so we can show the owner their private activities.
@@ -161,8 +187,8 @@
]; ];
</script> </script>
<!-- Filter bar --> <!-- Sport filter bar -->
<div class="flex gap-2 mb-6 flex-wrap"> <div class="flex gap-2 mb-3 flex-wrap">
{#each sports as s} {#each sports as s}
<button <button
class="px-3 py-1 rounded-full text-sm font-medium border transition-colors" class="px-3 py-1 rounded-full text-sm font-medium border transition-colors"
@@ -187,6 +213,23 @@
{/if} {/if}
</div> </div>
<!-- Date filter bar -->
<div class="flex gap-2 mb-6 flex-wrap">
{#each [{ value: 'all', label: 'All time' }, { value: '7d', label: '7 days' }, { value: '30d', label: '30 days' }, { value: '6mo', label: '6 months' }, ...allYears.map(y => ({ value: y, label: y }))] as d}
<button
class="px-3 py-1 rounded-full text-sm font-medium border transition-colors"
class:border-zinc-700={datePre !== d.value}
class:text-zinc-400={datePre !== d.value}
class:border-[--accent]={datePre === d.value}
class:text-white={datePre === d.value}
style={datePre === d.value ? 'background:var(--accent-dim)' : ''}
on:click={() => datePre = d.value}
>
{d.label}
</button>
{/each}
</div>
{#if loading} {#if loading}
<div class="grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-3 gap-4"> <div class="grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-3 gap-4">
{#each Array(12) as _} {#each Array(12) as _}