ingest activities.csv

This commit is contained in:
Davide Scaini
2026-04-11 08:13:27 +02:00
parent cbd5a98cd3
commit 01db4eb9ae
5 changed files with 367 additions and 79 deletions
+90 -55
View File
@@ -538,73 +538,108 @@ def _file_suffix(name: str) -> str:
@app.post("/api/upload")
async def upload_activity(
file: UploadFile = File(...),
files: list[UploadFile] = File(...),
store_original: bool = Form(False),
) -> JSONResponse:
"""Accept a FIT/GPX/TCX file, extract it, update index.json, and re-merge."""
"""Accept FIT/GPX/TCX files and/or activities.csv, extract, update index, re-merge.
activities.csv (Strava export format) can be included in the batch to:
- Enrich activity files being uploaded in the same batch (matched by filename)
- Retroactively update sidecars for existing activities (matched by strava_id)
"""
from bincio.extract.ingest import ingest_parsed
from bincio.extract.parsers.factory import parse_file
from bincio.extract.writer import make_activity_id
from bincio.render.merge import merge_all
dd = _get_data_dir()
name = Path(file.filename or "upload.fit").name # strip any path components
suffix = _file_suffix(name)
if suffix not in _SUPPORTED_SUFFIXES:
raise HTTPException(400, f"Unsupported file type '{Path(name).suffix}'. Expected FIT, GPX, or TCX.")
_MAX_UPLOAD_BYTES = 50 * 1024 * 1024 # 50 MB
contents = await file.read()
if len(contents) > _MAX_UPLOAD_BYTES:
raise HTTPException(413, f"File too large ({len(contents)} bytes). Maximum is 50 MB.")
staging = dd / "_uploads"
staging.mkdir(exist_ok=True)
staged = staging / name
staged.write_bytes(contents)
kept = False
try:
from bincio.extract.metrics import compute
from bincio.extract.parsers.factory import parse_file
from bincio.extract.writer import build_summary, make_activity_id, write_activity, write_index
_MAX_UPLOAD_BYTES = 50 * 1024 * 1024 # 50 MB
activity = parse_file(staged)
metrics = compute(activity)
activity_id = make_activity_id(activity)
existing_json = dd / "activities" / f"{activity_id}.json"
if existing_json.exists():
raise HTTPException(409, f"Activity already exists: {activity_id}")
write_activity(activity, metrics, dd, privacy="public", rdp_epsilon=0.0001)
summary = build_summary(activity, metrics, activity_id, "public")
# Read current index to preserve owner + existing summaries
index_path = dd / "index.json"
if index_path.exists():
index_data = json.loads(index_path.read_text(encoding="utf-8"))
# Separate CSV files from activity files
csv_files: list[UploadFile] = []
activity_files: list[UploadFile] = []
for f in files:
name = Path(f.filename or "").name.lower()
if name.endswith(".csv"):
csv_files.append(f)
else:
index_data = {"owner": {"handle": "unknown"}, "activities": []}
owner = index_data.get("owner", {})
existing = {s["id"]: s for s in index_data.get("activities", [])}
existing[activity_id] = summary
write_index(list(existing.values()), dd, owner)
activity_files.append(f)
if store_original:
originals_dir = dd / "originals"
originals_dir.mkdir(exist_ok=True)
staged.rename(originals_dir / name)
kept = True
# Build metadata from the first CSV found (activities.csv from Strava export)
metadata = None
if csv_files:
from bincio.extract.strava_csv import StravaMetadata
import tempfile
csv_upload = csv_files[0]
csv_bytes = await csv_upload.read()
with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp:
tmp.write(csv_bytes)
tmp_path = Path(tmp.name)
try:
metadata = StravaMetadata(tmp_path)
finally:
tmp_path.unlink(missing_ok=True)
from bincio.render.merge import merge_all
results = []
any_added = False
for file in activity_files:
name = Path(file.filename or "upload.fit").name
suffix = _file_suffix(name)
if suffix not in _SUPPORTED_SUFFIXES:
results.append({"name": name, "ok": False, "error": f"Unsupported file type '{Path(name).suffix}'"})
continue
contents = await file.read()
if len(contents) > _MAX_UPLOAD_BYTES:
results.append({"name": name, "ok": False, "error": "File too large (max 50 MB)"})
continue
staged = staging / name
staged.write_bytes(contents)
kept = False
try:
activity = parse_file(staged)
# Enrich with CSV metadata when available (matched by filename)
if metadata is not None:
metadata.enrich(name, activity)
activity_id = make_activity_id(activity)
if (dd / "activities" / f"{activity_id}.json").exists():
results.append({"name": name, "ok": False, "error": "duplicate"})
continue
ingest_parsed(activity, dd, privacy="public")
if store_original:
originals_dir = dd / "originals"
originals_dir.mkdir(exist_ok=True)
staged.rename(originals_dir / name)
kept = True
results.append({"name": name, "ok": True, "id": activity_id})
any_added = True
except Exception:
results.append({"name": name, "ok": False, "error": "Processing failed"})
finally:
if not kept:
staged.unlink(missing_ok=True)
# Retroactively update sidecars for existing activities matched by strava_id
csv_updates = 0
if metadata is not None:
from bincio.extract.strava_csv import apply_csv_to_data_dir
csv_updates = apply_csv_to_data_dir(dd, metadata)
if any_added or csv_updates:
merge_all(dd)
except HTTPException:
raise
except Exception as exc:
raise HTTPException(422, f"Failed to process activity file: {type(exc).__name__}")
finally:
if not kept:
staged.unlink(missing_ok=True)
return JSONResponse({"ok": True, "id": activity_id})
added = [r for r in results if r["ok"]]
return JSONResponse({"ok": True, "added": len(added), "csv_updates": csv_updates, "results": results})
@app.post("/api/import-bas")