upload strava zip
This commit is contained in:
@@ -46,8 +46,9 @@ def ingest_parsed(
|
||||
raise FileExistsError(f"Activity already exists: {activity_id}")
|
||||
|
||||
metrics = compute(parsed)
|
||||
write_activity(parsed, metrics, data_dir, privacy=privacy, rdp_epsilon=rdp_epsilon)
|
||||
summary = build_summary(parsed, metrics, activity_id, privacy)
|
||||
effective_privacy = parsed.privacy if parsed.privacy is not None else privacy
|
||||
write_activity(parsed, metrics, data_dir, privacy=effective_privacy, rdp_epsilon=rdp_epsilon)
|
||||
summary = build_summary(parsed, metrics, activity_id, effective_privacy)
|
||||
|
||||
index_path = data_dir / "index.json"
|
||||
if index_path.exists():
|
||||
|
||||
@@ -55,4 +55,5 @@ class ParsedActivity:
|
||||
description: Optional[str] = None
|
||||
gear: Optional[str] = None
|
||||
strava_id: Optional[str] = None
|
||||
privacy: Optional[str] = None # "public", "private", or None (caller decides)
|
||||
laps: list[LapData] = field(default_factory=list)
|
||||
|
||||
@@ -201,6 +201,10 @@ def strava_to_parsed(meta: dict, streams: dict) -> ParsedActivity:
|
||||
source = f"strava:{meta['id']}"
|
||||
source_hash = "sha256:" + hashlib.sha256(source.encode()).hexdigest()
|
||||
|
||||
# Map Strava visibility to BAS privacy: only_me → private, everything else → public
|
||||
visibility = meta.get("visibility") or ""
|
||||
is_private = meta.get("private", False) or visibility == "only_me"
|
||||
|
||||
return ParsedActivity(
|
||||
points=points,
|
||||
sport=normalise_sport(meta.get("sport_type") or meta.get("type") or ""),
|
||||
@@ -210,4 +214,5 @@ def strava_to_parsed(meta: dict, streams: dict) -> ParsedActivity:
|
||||
title=meta.get("name") or None,
|
||||
description=meta.get("description") or None,
|
||||
strava_id=str(meta["id"]),
|
||||
privacy="private" if is_private else "public",
|
||||
)
|
||||
|
||||
@@ -0,0 +1,147 @@
|
||||
"""Process a Strava bulk export ZIP file into a BAS data store.
|
||||
|
||||
The ZIP (downloaded from strava.com/athlete/delete_your_account or the data export
|
||||
page) contains:
|
||||
activities/ ← GPX, FIT, TCX files (plain or .gz variants)
|
||||
activities.csv ← metadata (title, description, gear, strava ID)
|
||||
bikes.csv / shoes.csv / … (ignored here)
|
||||
|
||||
Processing strategy: stream one activity at a time to keep disk usage low.
|
||||
The ZIP is never fully extracted; each activity file is extracted to a temp path,
|
||||
parsed, ingested, then immediately deleted. The ZIP itself is deleted once done.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import json
|
||||
import tempfile
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
from typing import Generator, Optional
|
||||
|
||||
|
||||
# File extensions recognised as activity files inside the ZIP.
|
||||
_ACTIVITY_SUFFIXES = {".gpx", ".fit", ".tcx", ".gpx.gz", ".fit.gz", ".tcx.gz"}
|
||||
|
||||
|
||||
def _is_activity_file(name: str) -> bool:
|
||||
n = name.lower()
|
||||
return any(n.endswith(s) for s in _ACTIVITY_SUFFIXES)
|
||||
|
||||
|
||||
def strava_zip_iter(
|
||||
zip_path: Path,
|
||||
data_dir: Path,
|
||||
originals_dir: Optional[Path] = None,
|
||||
) -> Generator[dict, None, None]:
|
||||
"""Process a Strava export ZIP, yielding SSE-style progress dicts.
|
||||
|
||||
Event types:
|
||||
{"type": "validating"}
|
||||
{"type": "error", "message": str}
|
||||
{"type": "extracting_csv"}
|
||||
{"type": "progress", "n": int, "total": int, "name": str, "status": "imported"|"skipped"|"error"}
|
||||
{"type": "done", "imported": int, "skipped": int, "error_count": int, "errors": list[str]}
|
||||
|
||||
The zip_path file is deleted after processing regardless of success/failure.
|
||||
"""
|
||||
from bincio.extract.ingest import ingest_parsed
|
||||
from bincio.extract.parsers.factory import parse_file
|
||||
from bincio.extract.strava_csv import StravaMetadata
|
||||
|
||||
yield {"type": "validating"}
|
||||
|
||||
try:
|
||||
zf = zipfile.ZipFile(zip_path, "r")
|
||||
except zipfile.BadZipFile as e:
|
||||
zip_path.unlink(missing_ok=True)
|
||||
yield {"type": "error", "message": f"Not a valid ZIP file: {e}"}
|
||||
return
|
||||
|
||||
try:
|
||||
names = zf.namelist()
|
||||
|
||||
# Validate structure
|
||||
has_csv = "activities.csv" in names
|
||||
activity_files = [n for n in names if n.startswith("activities/") and _is_activity_file(n)]
|
||||
|
||||
if not has_csv:
|
||||
yield {"type": "error", "message": "This doesn't look like a Strava export: activities.csv not found"}
|
||||
return
|
||||
if not activity_files:
|
||||
yield {"type": "error", "message": "No activity files found in activities/ folder"}
|
||||
return
|
||||
|
||||
# Load activities.csv into memory (it's small — ~700 KB)
|
||||
yield {"type": "extracting_csv"}
|
||||
csv_bytes = zf.read("activities.csv")
|
||||
with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp_csv:
|
||||
tmp_csv.write(csv_bytes)
|
||||
tmp_csv_path = Path(tmp_csv.name)
|
||||
try:
|
||||
metadata = StravaMetadata(tmp_csv_path)
|
||||
finally:
|
||||
tmp_csv_path.unlink(missing_ok=True)
|
||||
|
||||
total = len(activity_files)
|
||||
imported = 0
|
||||
skipped = 0
|
||||
errors: list[str] = []
|
||||
|
||||
for n, zip_entry in enumerate(activity_files, 1):
|
||||
entry_name = Path(zip_entry).name # e.g. "12345678.fit.gz"
|
||||
# Title from metadata if available; fall back to filename stem
|
||||
meta_row = metadata.lookup(entry_name)
|
||||
display_name = (meta_row or {}).get("Activity Name", "").strip() or entry_name
|
||||
|
||||
# Determine activity ID from entry to check for duplicates before extracting
|
||||
# (can't do this without parsing, so we extract to a small temp file)
|
||||
suffix = "".join(Path(entry_name).suffixes) # ".fit.gz" or ".gpx" etc.
|
||||
tmp_path: Optional[Path] = None
|
||||
try:
|
||||
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False, dir=data_dir) as tmp:
|
||||
tmp.write(zf.read(zip_entry))
|
||||
tmp_path = Path(tmp.name)
|
||||
|
||||
parsed = parse_file(tmp_path)
|
||||
|
||||
# Enrich with CSV metadata
|
||||
if meta_row:
|
||||
if not parsed.title and meta_row.get("Activity Name"):
|
||||
parsed.title = meta_row["Activity Name"].strip()
|
||||
if not parsed.description and meta_row.get("Activity Description"):
|
||||
parsed.description = meta_row["Activity Description"].strip()
|
||||
if not parsed.strava_id and meta_row.get("Activity ID"):
|
||||
parsed.strava_id = meta_row["Activity ID"].strip()
|
||||
|
||||
if originals_dir is not None:
|
||||
import shutil
|
||||
orig_dest = originals_dir / entry_name
|
||||
shutil.copy2(tmp_path, orig_dest)
|
||||
|
||||
ingest_parsed(parsed, data_dir, privacy="public")
|
||||
imported += 1
|
||||
yield {"type": "progress", "n": n, "total": total, "name": display_name, "status": "imported"}
|
||||
|
||||
except FileExistsError:
|
||||
skipped += 1
|
||||
yield {"type": "progress", "n": n, "total": total, "name": display_name, "status": "skipped"}
|
||||
except Exception as exc:
|
||||
errors.append(f"{entry_name}: {type(exc).__name__}")
|
||||
yield {"type": "progress", "n": n, "total": total, "name": display_name, "status": "error"}
|
||||
finally:
|
||||
if tmp_path is not None:
|
||||
tmp_path.unlink(missing_ok=True)
|
||||
|
||||
finally:
|
||||
zf.close()
|
||||
zip_path.unlink(missing_ok=True)
|
||||
|
||||
yield {
|
||||
"type": "done",
|
||||
"imported": imported,
|
||||
"skipped": skipped,
|
||||
"error_count": len(errors),
|
||||
"errors": errors[:5],
|
||||
}
|
||||
@@ -587,6 +587,55 @@ async def upload_activity(
|
||||
return JSONResponse({"ok": True, "added": len(added), "results": results})
|
||||
|
||||
|
||||
@app.post("/api/upload/strava-zip")
|
||||
async def upload_strava_zip(
|
||||
file: UploadFile = File(...),
|
||||
bincio_session: Optional[str] = Cookie(default=None),
|
||||
) -> StreamingResponse:
|
||||
"""Accept a Strava bulk export ZIP and stream SSE progress while processing.
|
||||
|
||||
The ZIP is written to a temp file, processed activity-by-activity, then deleted.
|
||||
Originals are never kept — the UI informs the user of this upfront.
|
||||
"""
|
||||
user = _require_user(bincio_session)
|
||||
if not file.filename or not file.filename.lower().endswith(".zip"):
|
||||
raise HTTPException(400, "Please upload a .zip file")
|
||||
|
||||
dd = _get_data_dir() / user.handle
|
||||
import tempfile
|
||||
tmp = tempfile.NamedTemporaryFile(suffix=".zip", delete=False, dir=dd)
|
||||
zip_path = Path(tmp.name)
|
||||
try:
|
||||
while chunk := await file.read(1024 * 1024): # 1 MB chunks
|
||||
tmp.write(chunk)
|
||||
finally:
|
||||
tmp.close()
|
||||
|
||||
from bincio.extract.strava_zip import strava_zip_iter
|
||||
from bincio.render.merge import merge_all
|
||||
|
||||
def event_stream():
|
||||
any_imported = False
|
||||
try:
|
||||
for event in strava_zip_iter(zip_path, dd):
|
||||
yield f"data: {json.dumps(event)}\n\n"
|
||||
if event.get("type") == "progress" and event.get("status") == "imported":
|
||||
any_imported = True
|
||||
if event.get("type") == "done":
|
||||
if any_imported:
|
||||
merge_all(dd)
|
||||
_trigger_rebuild(user.handle)
|
||||
except Exception as exc:
|
||||
zip_path.unlink(missing_ok=True)
|
||||
yield f"data: {json.dumps({'type': 'error', 'message': str(exc)})}\n\n"
|
||||
|
||||
return StreamingResponse(
|
||||
event_stream(),
|
||||
media_type="text/event-stream",
|
||||
headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
|
||||
)
|
||||
|
||||
|
||||
# ── Feedback ──────────────────────────────────────────────────────────────────
|
||||
|
||||
_FEEDBACK_IMAGE_SUFFIXES = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".heic"}
|
||||
|
||||
Reference in New Issue
Block a user