upload: add overwrite option to replace existing activities

When 'Overwrite existing activities' is checked, duplicate activities are
re-extracted and replaced instead of silently skipped:
- Deletes {id}.json, .geojson, .timeseries.json from activities/ and _merged/
- Removes the stale index summary and dedup cache entry
- Ingests the new file fresh via ingest_parsed
- Reports 'overwritten' (↺) status in the SSE stream vs 'imported' (↓)
- done event includes 'overwritten' count; UI shows it alongside 'added'
This commit is contained in:
Davide Scaini
2026-04-15 20:17:32 +02:00
parent a33fea91cf
commit 764da09130
2 changed files with 61 additions and 12 deletions
+42 -8
View File
@@ -1129,6 +1129,7 @@ def _file_suffix(name: str) -> str:
async def upload_activity(
files: list[UploadFile] = File(...),
store_original: bool = Form(False),
overwrite: bool = Form(False),
bincio_session: Optional[str] = Cookie(default=None),
) -> StreamingResponse:
"""Accept FIT/GPX/TCX files and/or activities.csv; stream SSE progress while processing.
@@ -1138,9 +1139,9 @@ async def upload_activity(
- Retroactively update sidecars for existing activities (matched by strava_id)
SSE events:
{"type": "progress", "n": N, "total": T, "name": "...", "status": "imported"|"duplicate"|"error"}
{"type": "progress", "n": N, "total": T, "name": "...", "status": "imported"|"overwritten"|"duplicate"|"error"}
{"type": "csv", "updates": N} -- only when CSV was included
{"type": "done", "added": N, "csv_updates": N, "duplicates": N, "errors": N}
{"type": "done", "added": N, "csv_updates": N, "duplicates": N, "overwritten": N, "errors": N}
"""
from bincio.extract.ingest import ingest_parsed
from bincio.extract.parsers.factory import parse_file
@@ -1182,6 +1183,7 @@ async def upload_activity(
def event_stream():
added = 0
overwritten = 0
duplicates = 0
errors = 0
any_added = False
@@ -1209,19 +1211,51 @@ async def upload_activity(
if metadata is not None:
metadata.enrich(name, activity)
activity_id = make_activity_id(activity)
was_overwrite = False
if (dd / "activities" / f"{activity_id}.json").exists():
duplicates += 1
yield f"data: {json.dumps({'type': 'progress', 'n': n, 'total': total_files, 'name': name, 'status': 'duplicate'})}\n\n"
continue
if not overwrite:
duplicates += 1
yield f"data: {json.dumps({'type': 'progress', 'n': n, 'total': total_files, 'name': name, 'status': 'duplicate'})}\n\n"
continue
# Overwrite: delete existing files before re-ingesting.
for ext in (".json", ".geojson", ".timeseries.json"):
(dd / "activities" / f"{activity_id}{ext}").unlink(missing_ok=True)
# Remove stale summary from index so ingest_parsed writes a clean one
index_path = dd / "index.json"
if index_path.exists():
idx = json.loads(index_path.read_text(encoding="utf-8"))
idx["activities"] = [a for a in idx.get("activities", []) if a.get("id") != activity_id]
index_path.write_text(json.dumps(idx, indent=2, ensure_ascii=False))
# Remove from dedup hash cache so the new file isn't blocked
cache_path = dd / ".bincio_cache.json"
if cache_path.exists():
try:
cache = json.loads(cache_path.read_text(encoding="utf-8"))
cache.pop(activity_id, None)
cache_path.write_text(json.dumps(cache, ensure_ascii=False))
except Exception:
pass
# Remove merged copies (merge_all will regenerate them after ingest)
merged_acts = dd / "_merged" / "activities"
if merged_acts.exists():
for ext in (".json", ".geojson", ".timeseries.json"):
p = merged_acts / f"{activity_id}{ext}"
if p.exists() or p.is_symlink():
p.unlink(missing_ok=True)
was_overwrite = True
ingest_parsed(activity, dd, privacy="public")
if store_original:
originals_dir = dd / "originals"
originals_dir.mkdir(exist_ok=True)
staged.rename(originals_dir / name)
kept = True
added += 1
if was_overwrite:
overwritten += 1
else:
added += 1
any_added = True
yield f"data: {json.dumps({'type': 'progress', 'n': n, 'total': total_files, 'name': name, 'status': 'imported'})}\n\n"
status = 'overwritten' if was_overwrite else 'imported'
yield f"data: {json.dumps({'type': 'progress', 'n': n, 'total': total_files, 'name': name, 'status': status})}\n\n"
except Exception as exc:
errors += 1
log.error("upload[%s]: failed to process %s: %s", user.handle, name, exc, exc_info=True)
@@ -1243,7 +1277,7 @@ async def upload_activity(
if any_added:
_trigger_rebuild(user.handle)
yield f"data: {json.dumps({'type': 'done', 'added': added, 'csv_updates': csv_updates, 'duplicates': duplicates, 'errors': errors})}\n\n"
yield f"data: {json.dumps({'type': 'done', 'added': added, 'overwritten': overwritten, 'csv_updates': csv_updates, 'duplicates': duplicates, 'errors': errors})}\n\n"
if job_id:
_job_finish(job_id)