From 3b8bc159c55d58703c294d37ebe11dcb55c2fbfc Mon Sep 17 00:00:00 2001 From: Davide Scaini Date: Fri, 10 Apr 2026 22:01:44 +0200 Subject: [PATCH] upload strava zip --- bincio/extract/ingest.py | 5 +- bincio/extract/models.py | 1 + bincio/extract/strava_api.py | 5 ++ bincio/extract/strava_zip.py | 147 +++++++++++++++++++++++++++++++++++ bincio/serve/server.py | 49 ++++++++++++ docs/deployment/vps.md | 3 +- site/src/layouts/Base.astro | 105 +++++++++++++++++++++++++ tests/test_server_imports.py | 1 + 8 files changed, 313 insertions(+), 3 deletions(-) create mode 100644 bincio/extract/strava_zip.py diff --git a/bincio/extract/ingest.py b/bincio/extract/ingest.py index 24daf4e..de44618 100644 --- a/bincio/extract/ingest.py +++ b/bincio/extract/ingest.py @@ -46,8 +46,9 @@ def ingest_parsed( raise FileExistsError(f"Activity already exists: {activity_id}") metrics = compute(parsed) - write_activity(parsed, metrics, data_dir, privacy=privacy, rdp_epsilon=rdp_epsilon) - summary = build_summary(parsed, metrics, activity_id, privacy) + effective_privacy = parsed.privacy if parsed.privacy is not None else privacy + write_activity(parsed, metrics, data_dir, privacy=effective_privacy, rdp_epsilon=rdp_epsilon) + summary = build_summary(parsed, metrics, activity_id, effective_privacy) index_path = data_dir / "index.json" if index_path.exists(): diff --git a/bincio/extract/models.py b/bincio/extract/models.py index 253bb38..b5821b0 100644 --- a/bincio/extract/models.py +++ b/bincio/extract/models.py @@ -55,4 +55,5 @@ class ParsedActivity: description: Optional[str] = None gear: Optional[str] = None strava_id: Optional[str] = None + privacy: Optional[str] = None # "public", "private", or None (caller decides) laps: list[LapData] = field(default_factory=list) diff --git a/bincio/extract/strava_api.py b/bincio/extract/strava_api.py index 348fbd7..c60ecd4 100644 --- a/bincio/extract/strava_api.py +++ b/bincio/extract/strava_api.py @@ -201,6 +201,10 @@ def strava_to_parsed(meta: dict, streams: dict) -> ParsedActivity: source = f"strava:{meta['id']}" source_hash = "sha256:" + hashlib.sha256(source.encode()).hexdigest() + # Map Strava visibility to BAS privacy: only_me → private, everything else → public + visibility = meta.get("visibility") or "" + is_private = meta.get("private", False) or visibility == "only_me" + return ParsedActivity( points=points, sport=normalise_sport(meta.get("sport_type") or meta.get("type") or ""), @@ -210,4 +214,5 @@ def strava_to_parsed(meta: dict, streams: dict) -> ParsedActivity: title=meta.get("name") or None, description=meta.get("description") or None, strava_id=str(meta["id"]), + privacy="private" if is_private else "public", ) diff --git a/bincio/extract/strava_zip.py b/bincio/extract/strava_zip.py new file mode 100644 index 0000000..65777e7 --- /dev/null +++ b/bincio/extract/strava_zip.py @@ -0,0 +1,147 @@ +"""Process a Strava bulk export ZIP file into a BAS data store. + +The ZIP (downloaded from strava.com/athlete/delete_your_account or the data export +page) contains: + activities/ ← GPX, FIT, TCX files (plain or .gz variants) + activities.csv ← metadata (title, description, gear, strava ID) + bikes.csv / shoes.csv / … (ignored here) + +Processing strategy: stream one activity at a time to keep disk usage low. +The ZIP is never fully extracted; each activity file is extracted to a temp path, +parsed, ingested, then immediately deleted. The ZIP itself is deleted once done. +""" + +from __future__ import annotations + +import io +import json +import tempfile +import zipfile +from pathlib import Path +from typing import Generator, Optional + + +# File extensions recognised as activity files inside the ZIP. +_ACTIVITY_SUFFIXES = {".gpx", ".fit", ".tcx", ".gpx.gz", ".fit.gz", ".tcx.gz"} + + +def _is_activity_file(name: str) -> bool: + n = name.lower() + return any(n.endswith(s) for s in _ACTIVITY_SUFFIXES) + + +def strava_zip_iter( + zip_path: Path, + data_dir: Path, + originals_dir: Optional[Path] = None, +) -> Generator[dict, None, None]: + """Process a Strava export ZIP, yielding SSE-style progress dicts. + + Event types: + {"type": "validating"} + {"type": "error", "message": str} + {"type": "extracting_csv"} + {"type": "progress", "n": int, "total": int, "name": str, "status": "imported"|"skipped"|"error"} + {"type": "done", "imported": int, "skipped": int, "error_count": int, "errors": list[str]} + + The zip_path file is deleted after processing regardless of success/failure. + """ + from bincio.extract.ingest import ingest_parsed + from bincio.extract.parsers.factory import parse_file + from bincio.extract.strava_csv import StravaMetadata + + yield {"type": "validating"} + + try: + zf = zipfile.ZipFile(zip_path, "r") + except zipfile.BadZipFile as e: + zip_path.unlink(missing_ok=True) + yield {"type": "error", "message": f"Not a valid ZIP file: {e}"} + return + + try: + names = zf.namelist() + + # Validate structure + has_csv = "activities.csv" in names + activity_files = [n for n in names if n.startswith("activities/") and _is_activity_file(n)] + + if not has_csv: + yield {"type": "error", "message": "This doesn't look like a Strava export: activities.csv not found"} + return + if not activity_files: + yield {"type": "error", "message": "No activity files found in activities/ folder"} + return + + # Load activities.csv into memory (it's small — ~700 KB) + yield {"type": "extracting_csv"} + csv_bytes = zf.read("activities.csv") + with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp_csv: + tmp_csv.write(csv_bytes) + tmp_csv_path = Path(tmp_csv.name) + try: + metadata = StravaMetadata(tmp_csv_path) + finally: + tmp_csv_path.unlink(missing_ok=True) + + total = len(activity_files) + imported = 0 + skipped = 0 + errors: list[str] = [] + + for n, zip_entry in enumerate(activity_files, 1): + entry_name = Path(zip_entry).name # e.g. "12345678.fit.gz" + # Title from metadata if available; fall back to filename stem + meta_row = metadata.lookup(entry_name) + display_name = (meta_row or {}).get("Activity Name", "").strip() or entry_name + + # Determine activity ID from entry to check for duplicates before extracting + # (can't do this without parsing, so we extract to a small temp file) + suffix = "".join(Path(entry_name).suffixes) # ".fit.gz" or ".gpx" etc. + tmp_path: Optional[Path] = None + try: + with tempfile.NamedTemporaryFile(suffix=suffix, delete=False, dir=data_dir) as tmp: + tmp.write(zf.read(zip_entry)) + tmp_path = Path(tmp.name) + + parsed = parse_file(tmp_path) + + # Enrich with CSV metadata + if meta_row: + if not parsed.title and meta_row.get("Activity Name"): + parsed.title = meta_row["Activity Name"].strip() + if not parsed.description and meta_row.get("Activity Description"): + parsed.description = meta_row["Activity Description"].strip() + if not parsed.strava_id and meta_row.get("Activity ID"): + parsed.strava_id = meta_row["Activity ID"].strip() + + if originals_dir is not None: + import shutil + orig_dest = originals_dir / entry_name + shutil.copy2(tmp_path, orig_dest) + + ingest_parsed(parsed, data_dir, privacy="public") + imported += 1 + yield {"type": "progress", "n": n, "total": total, "name": display_name, "status": "imported"} + + except FileExistsError: + skipped += 1 + yield {"type": "progress", "n": n, "total": total, "name": display_name, "status": "skipped"} + except Exception as exc: + errors.append(f"{entry_name}: {type(exc).__name__}") + yield {"type": "progress", "n": n, "total": total, "name": display_name, "status": "error"} + finally: + if tmp_path is not None: + tmp_path.unlink(missing_ok=True) + + finally: + zf.close() + zip_path.unlink(missing_ok=True) + + yield { + "type": "done", + "imported": imported, + "skipped": skipped, + "error_count": len(errors), + "errors": errors[:5], + } diff --git a/bincio/serve/server.py b/bincio/serve/server.py index 83051fa..7c1f297 100644 --- a/bincio/serve/server.py +++ b/bincio/serve/server.py @@ -587,6 +587,55 @@ async def upload_activity( return JSONResponse({"ok": True, "added": len(added), "results": results}) +@app.post("/api/upload/strava-zip") +async def upload_strava_zip( + file: UploadFile = File(...), + bincio_session: Optional[str] = Cookie(default=None), +) -> StreamingResponse: + """Accept a Strava bulk export ZIP and stream SSE progress while processing. + + The ZIP is written to a temp file, processed activity-by-activity, then deleted. + Originals are never kept — the UI informs the user of this upfront. + """ + user = _require_user(bincio_session) + if not file.filename or not file.filename.lower().endswith(".zip"): + raise HTTPException(400, "Please upload a .zip file") + + dd = _get_data_dir() / user.handle + import tempfile + tmp = tempfile.NamedTemporaryFile(suffix=".zip", delete=False, dir=dd) + zip_path = Path(tmp.name) + try: + while chunk := await file.read(1024 * 1024): # 1 MB chunks + tmp.write(chunk) + finally: + tmp.close() + + from bincio.extract.strava_zip import strava_zip_iter + from bincio.render.merge import merge_all + + def event_stream(): + any_imported = False + try: + for event in strava_zip_iter(zip_path, dd): + yield f"data: {json.dumps(event)}\n\n" + if event.get("type") == "progress" and event.get("status") == "imported": + any_imported = True + if event.get("type") == "done": + if any_imported: + merge_all(dd) + _trigger_rebuild(user.handle) + except Exception as exc: + zip_path.unlink(missing_ok=True) + yield f"data: {json.dumps({'type': 'error', 'message': str(exc)})}\n\n" + + return StreamingResponse( + event_stream(), + media_type="text/event-stream", + headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"}, + ) + + # ── Feedback ────────────────────────────────────────────────────────────────── _FEEDBACK_IMAGE_SUFFIXES = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".heic"} diff --git a/docs/deployment/vps.md b/docs/deployment/vps.md index 1a5f9b5..8842d08 100644 --- a/docs/deployment/vps.md +++ b/docs/deployment/vps.md @@ -239,7 +239,8 @@ server { root /var/www/bincio; index index.html; - client_max_body_size 512M; # bulk activity uploads + client_max_body_size 2G; # Strava export ZIPs can exceed 1 GB + client_body_timeout 300s; # allow slow uploads without nginx dropping the connection # API → bincio serve location /api/ { diff --git a/site/src/layouts/Base.astro b/site/src/layouts/Base.astro index da29b81..85494f7 100644 --- a/site/src/layouts/Base.astro +++ b/site/src/layouts/Base.astro @@ -248,6 +248,16 @@ try {

Checking…

+ @@ -307,6 +317,22 @@ try {

+ + + )} @@ -403,10 +429,17 @@ try { const viewChoose = document.getElementById('upload-view-choose'); const viewFile = document.getElementById('upload-view-file'); const viewStrava = document.getElementById('upload-view-strava'); + const viewZip = document.getElementById('upload-view-zip'); const chooseFile = document.getElementById('upload-choose-file'); const chooseStrava = document.getElementById('upload-choose-strava'); + const chooseZip = document.getElementById('upload-choose-zip'); const backFile = document.getElementById('upload-back-file'); const backStrava = document.getElementById('upload-back-strava'); + const backZip = document.getElementById('upload-back-zip'); + const zipDrop = document.getElementById('zip-drop'); + const zipInput = document.getElementById('zip-input'); + const zipLabel = document.getElementById('zip-label'); + const zipStatus = document.getElementById('zip-status'); const drop = document.getElementById('upload-drop'); const input = document.getElementById('upload-input'); const label = document.getElementById('upload-label'); @@ -427,6 +460,7 @@ try { viewChoose.style.display = name === 'choose' ? '' : 'none'; viewFile.style.display = name === 'file' ? '' : 'none'; viewStrava.style.display = name === 'strava' ? '' : 'none'; + viewZip.style.display = name === 'zip' ? '' : 'none'; } function openModal() { @@ -446,8 +480,10 @@ try { document.addEventListener('keydown', e => { if (e.key === 'Escape' && modal.style.display !== 'none') closeModal(); }); chooseFile.addEventListener('click', () => showView('file')); + chooseZip.addEventListener('click', () => showView('zip')); backFile.addEventListener('click', () => showView('choose')); backStrava.addEventListener('click', () => showView('choose')); + backZip.addEventListener('click', () => showView('choose')); // ── file upload ─────────────────────────────────────────────────────── drop.addEventListener('click', () => input.click()); @@ -638,6 +674,75 @@ try { stravaResetSoftBtn.addEventListener('click', () => stravaReset('soft')); stravaResetHardBtn.addEventListener('click', () => stravaReset('hard')); + // ── Strava ZIP upload ───────────────────────────────────────────────── + function doZipUpload(file) { + if (!file) return; + zipLabel.textContent = file.name; + zipStatus.textContent = 'Uploading…'; + zipStatus.style.color = ''; + + const fd = new FormData(); + fd.append('file', file); + + // POST the file; server responds with SSE stream immediately after receiving body + const xhr = new XMLHttpRequest(); + xhr.open('POST', `${editUrl}/api/upload/strava-zip`); + xhr.withCredentials = true; + xhr.setRequestHeader('Accept', 'text/event-stream'); + + let buf = ''; + let imported = 0; + + xhr.onprogress = () => { + // Parse SSE lines from the incrementally received response text + const newText = xhr.responseText.slice(buf.length); + buf = xhr.responseText; + for (const line of newText.split('\n')) { + if (!line.startsWith('data: ')) continue; + try { + const ev = JSON.parse(line.slice(6)); + if (ev.type === 'validating') { + zipStatus.textContent = 'Validating ZIP structure…'; + } else if (ev.type === 'extracting_csv') { + zipStatus.textContent = 'Reading activities.csv…'; + } else if (ev.type === 'progress') { + const pct = Math.round((ev.n / ev.total) * 100); + const icon = ev.status === 'imported' ? '↓' : ev.status === 'error' ? '✗' : '·'; + zipStatus.textContent = `${icon} ${ev.n}/${ev.total} (${pct}%) — ${ev.name}`; + if (ev.status === 'imported') imported++; + } else if (ev.type === 'done') { + const errNote = ev.error_count ? `, ${ev.error_count} errors` : ''; + zipStatus.textContent = `Done — ${ev.imported} imported, ${ev.skipped} already up to date${errNote}.`; + zipStatus.style.color = '#4ade80'; + zipInput.value = ''; + if (ev.imported > 0) setTimeout(() => window.location.reload(), 1500); + } else if (ev.type === 'error') { + zipStatus.textContent = 'Error: ' + ev.message; + zipStatus.style.color = '#f87171'; + zipInput.value = ''; + } + } catch (_) {} + } + }; + + xhr.onerror = () => { + zipStatus.textContent = 'Upload failed — check your connection.'; + zipStatus.style.color = '#f87171'; + }; + + xhr.send(fd); + } + + zipDrop.addEventListener('click', () => zipInput.click()); + zipInput.addEventListener('change', () => doZipUpload(zipInput.files?.[0])); + zipDrop.addEventListener('dragover', e => { e.preventDefault(); zipDrop.classList.add('border-zinc-400'); }); + zipDrop.addEventListener('dragleave', () => zipDrop.classList.remove('border-zinc-400')); + zipDrop.addEventListener('drop', e => { + e.preventDefault(); + zipDrop.classList.remove('border-zinc-400'); + doZipUpload(e.dataTransfer?.files?.[0]); + }); + // Handle ?strava= param set by the callback redirect (popup scenario) const sp = new URLSearchParams(window.location.search); if (sp.has('strava')) { diff --git a/tests/test_server_imports.py b/tests/test_server_imports.py index 8ac4c7d..63f11c4 100644 --- a/tests/test_server_imports.py +++ b/tests/test_server_imports.py @@ -14,6 +14,7 @@ def test_serve_app_has_routes(): paths = {r.path for r in app.routes} assert "/api/me" in paths assert "/api/upload" in paths + assert "/api/upload/strava-zip" in paths assert "/api/strava/status" in paths assert "/api/strava/auth-url" in paths assert "/api/strava/callback" in paths