ingest activities.csv

2026-04-11 08:13:27 +02:00
parent cbd5a98cd3
commit 01db4eb9ae
5 changed files with 367 additions and 79 deletions
@@ -538,73 +538,108 @@ def _file_suffix(name: str) -> str:

@app.post("/api/upload")
 async def upload_activity(
-    file: UploadFile = File(...),
+    files: list[UploadFile] = File(...),
    store_original: bool = Form(False),
 ) -> JSONResponse:
-    """Accept a FIT/GPX/TCX file, extract it, update index.json, and re-merge."""
+    """Accept FIT/GPX/TCX files and/or activities.csv, extract, update index, re-merge.
+
+    activities.csv (Strava export format) can be included in the batch to:
+      - Enrich activity files being uploaded in the same batch (matched by filename)
+      - Retroactively update sidecars for existing activities (matched by strava_id)
+    """
+    from bincio.extract.ingest import ingest_parsed
+    from bincio.extract.parsers.factory import parse_file
+    from bincio.extract.writer import make_activity_id
+    from bincio.render.merge import merge_all
+
    dd = _get_data_dir()
-
-    name = Path(file.filename or "upload.fit").name  # strip any path components
-    suffix = _file_suffix(name)
-    if suffix not in _SUPPORTED_SUFFIXES:
-        raise HTTPException(400, f"Unsupported file type '{Path(name).suffix}'. Expected FIT, GPX, or TCX.")
-
-    _MAX_UPLOAD_BYTES = 50 * 1024 * 1024  # 50 MB
-    contents = await file.read()
-    if len(contents) > _MAX_UPLOAD_BYTES:
-        raise HTTPException(413, f"File too large ({len(contents)} bytes). Maximum is 50 MB.")
-
    staging = dd / "_uploads"
    staging.mkdir(exist_ok=True)
-    staged = staging / name
-    staged.write_bytes(contents)

-    kept = False
-    try:
-        from bincio.extract.metrics import compute
-        from bincio.extract.parsers.factory import parse_file
-        from bincio.extract.writer import build_summary, make_activity_id, write_activity, write_index
+    _MAX_UPLOAD_BYTES = 50 * 1024 * 1024  # 50 MB

-        activity = parse_file(staged)
-        metrics = compute(activity)
-        activity_id = make_activity_id(activity)
-
-        existing_json = dd / "activities" / f"{activity_id}.json"
-        if existing_json.exists():
-            raise HTTPException(409, f"Activity already exists: {activity_id}")
-
-        write_activity(activity, metrics, dd, privacy="public", rdp_epsilon=0.0001)
-        summary = build_summary(activity, metrics, activity_id, "public")
-
-        # Read current index to preserve owner + existing summaries
-        index_path = dd / "index.json"
-        if index_path.exists():
-            index_data = json.loads(index_path.read_text(encoding="utf-8"))
+    # Separate CSV files from activity files
+    csv_files: list[UploadFile] = []
+    activity_files: list[UploadFile] = []
+    for f in files:
+        name = Path(f.filename or "").name.lower()
+        if name.endswith(".csv"):
+            csv_files.append(f)
        else:
-            index_data = {"owner": {"handle": "unknown"}, "activities": []}
-        owner = index_data.get("owner", {})
-        existing = {s["id"]: s for s in index_data.get("activities", [])}
-        existing[activity_id] = summary
-        write_index(list(existing.values()), dd, owner)
+            activity_files.append(f)

-        if store_original:
-            originals_dir = dd / "originals"
-            originals_dir.mkdir(exist_ok=True)
-            staged.rename(originals_dir / name)
-            kept = True
+    # Build metadata from the first CSV found (activities.csv from Strava export)
+    metadata = None
+    if csv_files:
+        from bincio.extract.strava_csv import StravaMetadata
+        import tempfile
+        csv_upload = csv_files[0]
+        csv_bytes = await csv_upload.read()
+        with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp:
+            tmp.write(csv_bytes)
+            tmp_path = Path(tmp.name)
+        try:
+            metadata = StravaMetadata(tmp_path)
+        finally:
+            tmp_path.unlink(missing_ok=True)

-        from bincio.render.merge import merge_all
+    results = []
+    any_added = False
+
+    for file in activity_files:
+        name = Path(file.filename or "upload.fit").name
+        suffix = _file_suffix(name)
+        if suffix not in _SUPPORTED_SUFFIXES:
+            results.append({"name": name, "ok": False, "error": f"Unsupported file type '{Path(name).suffix}'"})
+            continue
+
+        contents = await file.read()
+        if len(contents) > _MAX_UPLOAD_BYTES:
+            results.append({"name": name, "ok": False, "error": "File too large (max 50 MB)"})
+            continue
+
+        staged = staging / name
+        staged.write_bytes(contents)
+        kept = False
+        try:
+            activity = parse_file(staged)
+
+            # Enrich with CSV metadata when available (matched by filename)
+            if metadata is not None:
+                metadata.enrich(name, activity)
+
+            activity_id = make_activity_id(activity)
+            if (dd / "activities" / f"{activity_id}.json").exists():
+                results.append({"name": name, "ok": False, "error": "duplicate"})
+                continue
+
+            ingest_parsed(activity, dd, privacy="public")
+
+            if store_original:
+                originals_dir = dd / "originals"
+                originals_dir.mkdir(exist_ok=True)
+                staged.rename(originals_dir / name)
+                kept = True
+
+            results.append({"name": name, "ok": True, "id": activity_id})
+            any_added = True
+        except Exception:
+            results.append({"name": name, "ok": False, "error": "Processing failed"})
+        finally:
+            if not kept:
+                staged.unlink(missing_ok=True)
+
+    # Retroactively update sidecars for existing activities matched by strava_id
+    csv_updates = 0
+    if metadata is not None:
+        from bincio.extract.strava_csv import apply_csv_to_data_dir
+        csv_updates = apply_csv_to_data_dir(dd, metadata)
+
+    if any_added or csv_updates:
        merge_all(dd)

-    except HTTPException:
-        raise
-    except Exception as exc:
-        raise HTTPException(422, f"Failed to process activity file: {type(exc).__name__}")
-    finally:
-        if not kept:
-            staged.unlink(missing_ok=True)
-
-    return JSONResponse({"ok": True, "id": activity_id})
+    added = [r for r in results if r["ok"]]
+    return JSONResponse({"ok": True, "added": len(added), "csv_updates": csv_updates, "results": results})


@app.post("/api/import-bas")