upload: add overwrite option to replace existing activities

When 'Overwrite existing activities' is checked, duplicate activities are re-extracted and replaced instead of silently skipped: - Deletes {id}.json, .geojson, .timeseries.json from activities/ and _merged/ - Removes the stale index summary and dedup cache entry - Ingests the new file fresh via ingest_parsed - Reports 'overwritten' (↺) status in the SSE stream vs 'imported' (↓) - done event includes 'overwritten' count; UI shows it alongside 'added'
2026-04-15 20:17:32 +02:00
parent a33fea91cf
commit 764da09130
2 changed files with 61 additions and 12 deletions
@@ -1129,6 +1129,7 @@ def _file_suffix(name: str) -> str:
 async def upload_activity(
    files: list[UploadFile] = File(...),
    store_original: bool = Form(False),
+    overwrite: bool = Form(False),
    bincio_session: Optional[str] = Cookie(default=None),
 ) -> StreamingResponse:
    """Accept FIT/GPX/TCX files and/or activities.csv; stream SSE progress while processing.
@@ -1138,9 +1139,9 @@ async def upload_activity(
      - Retroactively update sidecars for existing activities (matched by strava_id)

    SSE events:
-      {"type": "progress", "n": N, "total": T, "name": "...", "status": "imported"|"duplicate"|"error"}
+      {"type": "progress", "n": N, "total": T, "name": "...", "status": "imported"|"overwritten"|"duplicate"|"error"}
      {"type": "csv", "updates": N}   -- only when CSV was included
-      {"type": "done", "added": N, "csv_updates": N, "duplicates": N, "errors": N}
+      {"type": "done", "added": N, "csv_updates": N, "duplicates": N, "overwritten": N, "errors": N}
    """
    from bincio.extract.ingest import ingest_parsed
    from bincio.extract.parsers.factory import parse_file
@@ -1182,6 +1183,7 @@ async def upload_activity(

    def event_stream():
        added = 0
+        overwritten = 0
        duplicates = 0
        errors = 0
        any_added = False
@@ -1209,19 +1211,51 @@ async def upload_activity(
                if metadata is not None:
                    metadata.enrich(name, activity)
                activity_id = make_activity_id(activity)
+                was_overwrite = False
                if (dd / "activities" / f"{activity_id}.json").exists():
-                    duplicates += 1
-                    yield f"data: {json.dumps({'type': 'progress', 'n': n, 'total': total_files, 'name': name, 'status': 'duplicate'})}\n\n"
-                    continue
+                    if not overwrite:
+                        duplicates += 1
+                        yield f"data: {json.dumps({'type': 'progress', 'n': n, 'total': total_files, 'name': name, 'status': 'duplicate'})}\n\n"
+                        continue
+                    # Overwrite: delete existing files before re-ingesting.
+                    for ext in (".json", ".geojson", ".timeseries.json"):
+                        (dd / "activities" / f"{activity_id}{ext}").unlink(missing_ok=True)
+                    # Remove stale summary from index so ingest_parsed writes a clean one
+                    index_path = dd / "index.json"
+                    if index_path.exists():
+                        idx = json.loads(index_path.read_text(encoding="utf-8"))
+                        idx["activities"] = [a for a in idx.get("activities", []) if a.get("id") != activity_id]
+                        index_path.write_text(json.dumps(idx, indent=2, ensure_ascii=False))
+                    # Remove from dedup hash cache so the new file isn't blocked
+                    cache_path = dd / ".bincio_cache.json"
+                    if cache_path.exists():
+                        try:
+                            cache = json.loads(cache_path.read_text(encoding="utf-8"))
+                            cache.pop(activity_id, None)
+                            cache_path.write_text(json.dumps(cache, ensure_ascii=False))
+                        except Exception:
+                            pass
+                    # Remove merged copies (merge_all will regenerate them after ingest)
+                    merged_acts = dd / "_merged" / "activities"
+                    if merged_acts.exists():
+                        for ext in (".json", ".geojson", ".timeseries.json"):
+                            p = merged_acts / f"{activity_id}{ext}"
+                            if p.exists() or p.is_symlink():
+                                p.unlink(missing_ok=True)
+                    was_overwrite = True
                ingest_parsed(activity, dd, privacy="public")
                if store_original:
                    originals_dir = dd / "originals"
                    originals_dir.mkdir(exist_ok=True)
                    staged.rename(originals_dir / name)
                    kept = True
-                added += 1
+                if was_overwrite:
+                    overwritten += 1
+                else:
+                    added += 1
                any_added = True
-                yield f"data: {json.dumps({'type': 'progress', 'n': n, 'total': total_files, 'name': name, 'status': 'imported'})}\n\n"
+                status = 'overwritten' if was_overwrite else 'imported'
+                yield f"data: {json.dumps({'type': 'progress', 'n': n, 'total': total_files, 'name': name, 'status': status})}\n\n"
            except Exception as exc:
                errors += 1
                log.error("upload[%s]: failed to process %s: %s", user.handle, name, exc, exc_info=True)
@@ -1243,7 +1277,7 @@ async def upload_activity(
            if any_added:
                _trigger_rebuild(user.handle)

-        yield f"data: {json.dumps({'type': 'done', 'added': added, 'csv_updates': csv_updates, 'duplicates': duplicates, 'errors': errors})}\n\n"
+        yield f"data: {json.dumps({'type': 'done', 'added': added, 'overwritten': overwritten, 'csv_updates': csv_updates, 'duplicates': duplicates, 'errors': errors})}\n\n"

        if job_id:
            _job_finish(job_id)