From cea1dbc2fb783c0f9000c153a17edc1c95b76ba2 Mon Sep 17 00:00:00 2001 From: Davide Scaini Date: Sun, 19 Apr 2026 23:34:55 +0200 Subject: [PATCH] ops: fix data/ triple-duplication costing ~24 GB on VPS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit astro build resolves the public/data symlink and copies all activity JSON into dist/; rsync then copied that to the webroot — but nginx already serves /data/ directly from /var/bincio/data/ via alias, so both copies were dead weight. Freed 36 GB → 14 GB on the live server. - post-receive hook: prune dist/data/ before rsync, add --exclude=data/ - docs: update manual rebuild command and nginx comment to match - serve/server.py: _mb() now uses lstat() to count symlinks at face value rather than following them to targets, so admin storage panel no longer double-counts _merged/ (which is mostly symlinks into activities/) --- bincio/serve/server.py | 6 ++++-- docs/deployment/vps.md | 13 +++++++++++-- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/bincio/serve/server.py b/bincio/serve/server.py index 78cffdc..51e5104 100644 --- a/bincio/serve/server.py +++ b/bincio/serve/server.py @@ -594,7 +594,9 @@ async def admin_disk(bincio_session: Optional[str] = Cookie(default=None)) -> JS def _mb(path: Path) -> float: if not path.exists(): return 0.0 - total = sum(f.stat().st_size for f in path.rglob("*") if f.is_file()) + # Use lstat to count symlink entries (few bytes each) rather than following + # the link to the target — prevents _merged/ from double-counting activities/. + total = sum(f.lstat().st_size for f in path.rglob("*") if f.is_file() or f.is_symlink()) return round(total / 1_048_576, 1) def _count(path: Path, pattern: str = "*") -> int: @@ -991,7 +993,7 @@ async def me_storage(bincio_session: Optional[str] = Cookie(default=None)) -> JS def _mb(path: Path) -> float: if not path.exists(): return 0.0 - total = sum(f.stat().st_size for f in path.rglob("*") if f.is_file()) + total = sum(f.lstat().st_size for f in path.rglob("*") if f.is_file() or f.is_symlink()) return round(total / 1_048_576, 2) def _count(path: Path, pattern: str = "*") -> int: diff --git a/docs/deployment/vps.md b/docs/deployment/vps.md index 87b6d5b..02655f2 100644 --- a/docs/deployment/vps.md +++ b/docs/deployment/vps.md @@ -68,8 +68,11 @@ while read oldrev newrev refname; do cd $DEPLOY ~/.local/bin/uv run bincio render --data-dir $DATA --site-dir $DEPLOY/site + echo "--- Pruning dist/data (nginx serves /data/ directly from $DATA) ---" + rm -rf $DEPLOY/site/dist/data + echo "--- Copying dist to webroot ---" - rsync -a --delete $DEPLOY/site/dist/ /var/www/bincio/ + rsync -a --delete --exclude=data/ $DEPLOY/site/dist/ /var/www/bincio/ echo "--- Restarting API ---" systemctl restart bincio || echo "WARNING: bincio service restart failed — check journalctl -u bincio" @@ -223,7 +226,8 @@ ssh root@ "cd /opt/bincio && uv run bincio extract" # rebuild site ssh root@ "cd /opt/bincio && \ uv run bincio render --data-dir /var/bincio/data --site-dir site && \ - rsync -a --delete site/dist/ /var/www/bincio/" + rm -rf site/dist/data && \ + rsync -a --delete --exclude=data/ site/dist/ /var/www/bincio/" ``` --- @@ -253,6 +257,11 @@ server { # Data files served live from disk — bypasses the build/rsync cycle # so uploads and merges are visible immediately without a site rebuild. + # + # IMPORTANT: because nginx owns /data/ here, the post-receive hook must + # delete dist/data/ before rsyncing to the webroot. Otherwise astro build + # copies all activity JSON (GBs) into dist/ and rsync duplicates it again. + # The hook already does this; manual rebuilds must do the same. location /data/ { alias /var/bincio/data/; add_header Cache-Control "no-cache, must-revalidate";