upload strava zip

This commit is contained in:
Davide Scaini
2026-04-10 22:01:44 +02:00
parent e5eadc69f2
commit 3b8bc159c5
8 changed files with 313 additions and 3 deletions
+3 -2
View File
@@ -46,8 +46,9 @@ def ingest_parsed(
raise FileExistsError(f"Activity already exists: {activity_id}") raise FileExistsError(f"Activity already exists: {activity_id}")
metrics = compute(parsed) metrics = compute(parsed)
write_activity(parsed, metrics, data_dir, privacy=privacy, rdp_epsilon=rdp_epsilon) effective_privacy = parsed.privacy if parsed.privacy is not None else privacy
summary = build_summary(parsed, metrics, activity_id, privacy) write_activity(parsed, metrics, data_dir, privacy=effective_privacy, rdp_epsilon=rdp_epsilon)
summary = build_summary(parsed, metrics, activity_id, effective_privacy)
index_path = data_dir / "index.json" index_path = data_dir / "index.json"
if index_path.exists(): if index_path.exists():
+1
View File
@@ -55,4 +55,5 @@ class ParsedActivity:
description: Optional[str] = None description: Optional[str] = None
gear: Optional[str] = None gear: Optional[str] = None
strava_id: Optional[str] = None strava_id: Optional[str] = None
privacy: Optional[str] = None # "public", "private", or None (caller decides)
laps: list[LapData] = field(default_factory=list) laps: list[LapData] = field(default_factory=list)
+5
View File
@@ -201,6 +201,10 @@ def strava_to_parsed(meta: dict, streams: dict) -> ParsedActivity:
source = f"strava:{meta['id']}" source = f"strava:{meta['id']}"
source_hash = "sha256:" + hashlib.sha256(source.encode()).hexdigest() source_hash = "sha256:" + hashlib.sha256(source.encode()).hexdigest()
# Map Strava visibility to BAS privacy: only_me → private, everything else → public
visibility = meta.get("visibility") or ""
is_private = meta.get("private", False) or visibility == "only_me"
return ParsedActivity( return ParsedActivity(
points=points, points=points,
sport=normalise_sport(meta.get("sport_type") or meta.get("type") or ""), sport=normalise_sport(meta.get("sport_type") or meta.get("type") or ""),
@@ -210,4 +214,5 @@ def strava_to_parsed(meta: dict, streams: dict) -> ParsedActivity:
title=meta.get("name") or None, title=meta.get("name") or None,
description=meta.get("description") or None, description=meta.get("description") or None,
strava_id=str(meta["id"]), strava_id=str(meta["id"]),
privacy="private" if is_private else "public",
) )
+147
View File
@@ -0,0 +1,147 @@
"""Process a Strava bulk export ZIP file into a BAS data store.
The ZIP (downloaded from strava.com/athlete/delete_your_account or the data export
page) contains:
activities/ ← GPX, FIT, TCX files (plain or .gz variants)
activities.csv ← metadata (title, description, gear, strava ID)
bikes.csv / shoes.csv / … (ignored here)
Processing strategy: stream one activity at a time to keep disk usage low.
The ZIP is never fully extracted; each activity file is extracted to a temp path,
parsed, ingested, then immediately deleted. The ZIP itself is deleted once done.
"""
from __future__ import annotations
import io
import json
import tempfile
import zipfile
from pathlib import Path
from typing import Generator, Optional
# File extensions recognised as activity files inside the ZIP.
_ACTIVITY_SUFFIXES = {".gpx", ".fit", ".tcx", ".gpx.gz", ".fit.gz", ".tcx.gz"}
def _is_activity_file(name: str) -> bool:
n = name.lower()
return any(n.endswith(s) for s in _ACTIVITY_SUFFIXES)
def strava_zip_iter(
zip_path: Path,
data_dir: Path,
originals_dir: Optional[Path] = None,
) -> Generator[dict, None, None]:
"""Process a Strava export ZIP, yielding SSE-style progress dicts.
Event types:
{"type": "validating"}
{"type": "error", "message": str}
{"type": "extracting_csv"}
{"type": "progress", "n": int, "total": int, "name": str, "status": "imported"|"skipped"|"error"}
{"type": "done", "imported": int, "skipped": int, "error_count": int, "errors": list[str]}
The zip_path file is deleted after processing regardless of success/failure.
"""
from bincio.extract.ingest import ingest_parsed
from bincio.extract.parsers.factory import parse_file
from bincio.extract.strava_csv import StravaMetadata
yield {"type": "validating"}
try:
zf = zipfile.ZipFile(zip_path, "r")
except zipfile.BadZipFile as e:
zip_path.unlink(missing_ok=True)
yield {"type": "error", "message": f"Not a valid ZIP file: {e}"}
return
try:
names = zf.namelist()
# Validate structure
has_csv = "activities.csv" in names
activity_files = [n for n in names if n.startswith("activities/") and _is_activity_file(n)]
if not has_csv:
yield {"type": "error", "message": "This doesn't look like a Strava export: activities.csv not found"}
return
if not activity_files:
yield {"type": "error", "message": "No activity files found in activities/ folder"}
return
# Load activities.csv into memory (it's small — ~700 KB)
yield {"type": "extracting_csv"}
csv_bytes = zf.read("activities.csv")
with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp_csv:
tmp_csv.write(csv_bytes)
tmp_csv_path = Path(tmp_csv.name)
try:
metadata = StravaMetadata(tmp_csv_path)
finally:
tmp_csv_path.unlink(missing_ok=True)
total = len(activity_files)
imported = 0
skipped = 0
errors: list[str] = []
for n, zip_entry in enumerate(activity_files, 1):
entry_name = Path(zip_entry).name # e.g. "12345678.fit.gz"
# Title from metadata if available; fall back to filename stem
meta_row = metadata.lookup(entry_name)
display_name = (meta_row or {}).get("Activity Name", "").strip() or entry_name
# Determine activity ID from entry to check for duplicates before extracting
# (can't do this without parsing, so we extract to a small temp file)
suffix = "".join(Path(entry_name).suffixes) # ".fit.gz" or ".gpx" etc.
tmp_path: Optional[Path] = None
try:
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False, dir=data_dir) as tmp:
tmp.write(zf.read(zip_entry))
tmp_path = Path(tmp.name)
parsed = parse_file(tmp_path)
# Enrich with CSV metadata
if meta_row:
if not parsed.title and meta_row.get("Activity Name"):
parsed.title = meta_row["Activity Name"].strip()
if not parsed.description and meta_row.get("Activity Description"):
parsed.description = meta_row["Activity Description"].strip()
if not parsed.strava_id and meta_row.get("Activity ID"):
parsed.strava_id = meta_row["Activity ID"].strip()
if originals_dir is not None:
import shutil
orig_dest = originals_dir / entry_name
shutil.copy2(tmp_path, orig_dest)
ingest_parsed(parsed, data_dir, privacy="public")
imported += 1
yield {"type": "progress", "n": n, "total": total, "name": display_name, "status": "imported"}
except FileExistsError:
skipped += 1
yield {"type": "progress", "n": n, "total": total, "name": display_name, "status": "skipped"}
except Exception as exc:
errors.append(f"{entry_name}: {type(exc).__name__}")
yield {"type": "progress", "n": n, "total": total, "name": display_name, "status": "error"}
finally:
if tmp_path is not None:
tmp_path.unlink(missing_ok=True)
finally:
zf.close()
zip_path.unlink(missing_ok=True)
yield {
"type": "done",
"imported": imported,
"skipped": skipped,
"error_count": len(errors),
"errors": errors[:5],
}
+49
View File
@@ -587,6 +587,55 @@ async def upload_activity(
return JSONResponse({"ok": True, "added": len(added), "results": results}) return JSONResponse({"ok": True, "added": len(added), "results": results})
@app.post("/api/upload/strava-zip")
async def upload_strava_zip(
file: UploadFile = File(...),
bincio_session: Optional[str] = Cookie(default=None),
) -> StreamingResponse:
"""Accept a Strava bulk export ZIP and stream SSE progress while processing.
The ZIP is written to a temp file, processed activity-by-activity, then deleted.
Originals are never kept — the UI informs the user of this upfront.
"""
user = _require_user(bincio_session)
if not file.filename or not file.filename.lower().endswith(".zip"):
raise HTTPException(400, "Please upload a .zip file")
dd = _get_data_dir() / user.handle
import tempfile
tmp = tempfile.NamedTemporaryFile(suffix=".zip", delete=False, dir=dd)
zip_path = Path(tmp.name)
try:
while chunk := await file.read(1024 * 1024): # 1 MB chunks
tmp.write(chunk)
finally:
tmp.close()
from bincio.extract.strava_zip import strava_zip_iter
from bincio.render.merge import merge_all
def event_stream():
any_imported = False
try:
for event in strava_zip_iter(zip_path, dd):
yield f"data: {json.dumps(event)}\n\n"
if event.get("type") == "progress" and event.get("status") == "imported":
any_imported = True
if event.get("type") == "done":
if any_imported:
merge_all(dd)
_trigger_rebuild(user.handle)
except Exception as exc:
zip_path.unlink(missing_ok=True)
yield f"data: {json.dumps({'type': 'error', 'message': str(exc)})}\n\n"
return StreamingResponse(
event_stream(),
media_type="text/event-stream",
headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
)
# ── Feedback ────────────────────────────────────────────────────────────────── # ── Feedback ──────────────────────────────────────────────────────────────────
_FEEDBACK_IMAGE_SUFFIXES = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".heic"} _FEEDBACK_IMAGE_SUFFIXES = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".heic"}
+2 -1
View File
@@ -239,7 +239,8 @@ server {
root /var/www/bincio; root /var/www/bincio;
index index.html; index index.html;
client_max_body_size 512M; # bulk activity uploads client_max_body_size 2G; # Strava export ZIPs can exceed 1 GB
client_body_timeout 300s; # allow slow uploads without nginx dropping the connection
# API → bincio serve # API → bincio serve
location /api/ { location /api/ {
+105
View File
@@ -248,6 +248,16 @@ try {
<p id="strava-choose-sub" class="text-xs text-zinc-500">Checking…</p> <p id="strava-choose-sub" class="text-xs text-zinc-500">Checking…</p>
</div> </div>
</button> </button>
<button
id="upload-choose-zip"
class="flex items-center gap-3 p-4 rounded-lg border border-zinc-700 hover:border-zinc-500 hover:bg-zinc-800 transition-colors text-left"
>
<span class="text-2xl">📦</span>
<div>
<p class="text-sm font-medium text-white">Strava export ZIP</p>
<p class="text-xs text-zinc-500">Import your full Strava archive</p>
</div>
</button>
</div> </div>
</div> </div>
@@ -307,6 +317,22 @@ try {
</div> </div>
<p id="strava-status" class="mt-3 text-xs text-center" style="min-height: 1.25rem"></p> <p id="strava-status" class="mt-3 text-xs text-center" style="min-height: 1.25rem"></p>
</div> </div>
<!-- View: Strava ZIP upload -->
<div id="upload-view-zip" style="display:none">
<button id="upload-back-zip" class="text-xs text-zinc-500 hover:text-white mb-3 transition-colors">← Back</button>
<div class="rounded-lg border border-amber-800/50 bg-amber-950/30 p-3 mb-4 text-xs text-amber-300 leading-relaxed">
⚠ The ZIP will be processed and <strong>immediately deleted</strong> from the server — originals are not kept. Make sure you keep your own copy.
</div>
<div
id="zip-drop"
class="border-2 border-dashed border-zinc-700 rounded-lg p-6 text-center text-zinc-500 text-sm cursor-pointer hover:border-zinc-500 hover:text-zinc-300 transition-colors"
>
<div id="zip-label">Drop your Strava export .zip<br/>or click to browse</div>
<input id="zip-input" type="file" accept=".zip" class="hidden" />
</div>
<p id="zip-status" class="mt-3 text-xs text-center leading-relaxed" style="min-height: 1.25rem"></p>
</div>
</div> </div>
</div> </div>
)} )}
@@ -403,10 +429,17 @@ try {
const viewChoose = document.getElementById('upload-view-choose'); const viewChoose = document.getElementById('upload-view-choose');
const viewFile = document.getElementById('upload-view-file'); const viewFile = document.getElementById('upload-view-file');
const viewStrava = document.getElementById('upload-view-strava'); const viewStrava = document.getElementById('upload-view-strava');
const viewZip = document.getElementById('upload-view-zip');
const chooseFile = document.getElementById('upload-choose-file'); const chooseFile = document.getElementById('upload-choose-file');
const chooseStrava = document.getElementById('upload-choose-strava'); const chooseStrava = document.getElementById('upload-choose-strava');
const chooseZip = document.getElementById('upload-choose-zip');
const backFile = document.getElementById('upload-back-file'); const backFile = document.getElementById('upload-back-file');
const backStrava = document.getElementById('upload-back-strava'); const backStrava = document.getElementById('upload-back-strava');
const backZip = document.getElementById('upload-back-zip');
const zipDrop = document.getElementById('zip-drop');
const zipInput = document.getElementById('zip-input');
const zipLabel = document.getElementById('zip-label');
const zipStatus = document.getElementById('zip-status');
const drop = document.getElementById('upload-drop'); const drop = document.getElementById('upload-drop');
const input = document.getElementById('upload-input'); const input = document.getElementById('upload-input');
const label = document.getElementById('upload-label'); const label = document.getElementById('upload-label');
@@ -427,6 +460,7 @@ try {
viewChoose.style.display = name === 'choose' ? '' : 'none'; viewChoose.style.display = name === 'choose' ? '' : 'none';
viewFile.style.display = name === 'file' ? '' : 'none'; viewFile.style.display = name === 'file' ? '' : 'none';
viewStrava.style.display = name === 'strava' ? '' : 'none'; viewStrava.style.display = name === 'strava' ? '' : 'none';
viewZip.style.display = name === 'zip' ? '' : 'none';
} }
function openModal() { function openModal() {
@@ -446,8 +480,10 @@ try {
document.addEventListener('keydown', e => { if (e.key === 'Escape' && modal.style.display !== 'none') closeModal(); }); document.addEventListener('keydown', e => { if (e.key === 'Escape' && modal.style.display !== 'none') closeModal(); });
chooseFile.addEventListener('click', () => showView('file')); chooseFile.addEventListener('click', () => showView('file'));
chooseZip.addEventListener('click', () => showView('zip'));
backFile.addEventListener('click', () => showView('choose')); backFile.addEventListener('click', () => showView('choose'));
backStrava.addEventListener('click', () => showView('choose')); backStrava.addEventListener('click', () => showView('choose'));
backZip.addEventListener('click', () => showView('choose'));
// ── file upload ─────────────────────────────────────────────────────── // ── file upload ───────────────────────────────────────────────────────
drop.addEventListener('click', () => input.click()); drop.addEventListener('click', () => input.click());
@@ -638,6 +674,75 @@ try {
stravaResetSoftBtn.addEventListener('click', () => stravaReset('soft')); stravaResetSoftBtn.addEventListener('click', () => stravaReset('soft'));
stravaResetHardBtn.addEventListener('click', () => stravaReset('hard')); stravaResetHardBtn.addEventListener('click', () => stravaReset('hard'));
// ── Strava ZIP upload ─────────────────────────────────────────────────
function doZipUpload(file) {
if (!file) return;
zipLabel.textContent = file.name;
zipStatus.textContent = 'Uploading…';
zipStatus.style.color = '';
const fd = new FormData();
fd.append('file', file);
// POST the file; server responds with SSE stream immediately after receiving body
const xhr = new XMLHttpRequest();
xhr.open('POST', `${editUrl}/api/upload/strava-zip`);
xhr.withCredentials = true;
xhr.setRequestHeader('Accept', 'text/event-stream');
let buf = '';
let imported = 0;
xhr.onprogress = () => {
// Parse SSE lines from the incrementally received response text
const newText = xhr.responseText.slice(buf.length);
buf = xhr.responseText;
for (const line of newText.split('\n')) {
if (!line.startsWith('data: ')) continue;
try {
const ev = JSON.parse(line.slice(6));
if (ev.type === 'validating') {
zipStatus.textContent = 'Validating ZIP structure…';
} else if (ev.type === 'extracting_csv') {
zipStatus.textContent = 'Reading activities.csv…';
} else if (ev.type === 'progress') {
const pct = Math.round((ev.n / ev.total) * 100);
const icon = ev.status === 'imported' ? '↓' : ev.status === 'error' ? '✗' : '·';
zipStatus.textContent = `${icon} ${ev.n}/${ev.total} (${pct}%) — ${ev.name}`;
if (ev.status === 'imported') imported++;
} else if (ev.type === 'done') {
const errNote = ev.error_count ? `, ${ev.error_count} errors` : '';
zipStatus.textContent = `Done — ${ev.imported} imported, ${ev.skipped} already up to date${errNote}.`;
zipStatus.style.color = '#4ade80';
zipInput.value = '';
if (ev.imported > 0) setTimeout(() => window.location.reload(), 1500);
} else if (ev.type === 'error') {
zipStatus.textContent = 'Error: ' + ev.message;
zipStatus.style.color = '#f87171';
zipInput.value = '';
}
} catch (_) {}
}
};
xhr.onerror = () => {
zipStatus.textContent = 'Upload failed — check your connection.';
zipStatus.style.color = '#f87171';
};
xhr.send(fd);
}
zipDrop.addEventListener('click', () => zipInput.click());
zipInput.addEventListener('change', () => doZipUpload(zipInput.files?.[0]));
zipDrop.addEventListener('dragover', e => { e.preventDefault(); zipDrop.classList.add('border-zinc-400'); });
zipDrop.addEventListener('dragleave', () => zipDrop.classList.remove('border-zinc-400'));
zipDrop.addEventListener('drop', e => {
e.preventDefault();
zipDrop.classList.remove('border-zinc-400');
doZipUpload(e.dataTransfer?.files?.[0]);
});
// Handle ?strava= param set by the callback redirect (popup scenario) // Handle ?strava= param set by the callback redirect (popup scenario)
const sp = new URLSearchParams(window.location.search); const sp = new URLSearchParams(window.location.search);
if (sp.has('strava')) { if (sp.has('strava')) {
+1
View File
@@ -14,6 +14,7 @@ def test_serve_app_has_routes():
paths = {r.path for r in app.routes} paths = {r.path for r in app.routes}
assert "/api/me" in paths assert "/api/me" in paths
assert "/api/upload" in paths assert "/api/upload" in paths
assert "/api/upload/strava-zip" in paths
assert "/api/strava/status" in paths assert "/api/strava/status" in paths
assert "/api/strava/auth-url" in paths assert "/api/strava/auth-url" in paths
assert "/api/strava/callback" in paths assert "/api/strava/callback" in paths