upload: add overwrite option to replace existing activities
When 'Overwrite existing activities' is checked, duplicate activities are
re-extracted and replaced instead of silently skipped:
- Deletes {id}.json, .geojson, .timeseries.json from activities/ and _merged/
- Removes the stale index summary and dedup cache entry
- Ingests the new file fresh via ingest_parsed
- Reports 'overwritten' (↺) status in the SSE stream vs 'imported' (↓)
- done event includes 'overwritten' count; UI shows it alongside 'added'
This commit is contained in:
+42
-8
@@ -1129,6 +1129,7 @@ def _file_suffix(name: str) -> str:
|
||||
async def upload_activity(
|
||||
files: list[UploadFile] = File(...),
|
||||
store_original: bool = Form(False),
|
||||
overwrite: bool = Form(False),
|
||||
bincio_session: Optional[str] = Cookie(default=None),
|
||||
) -> StreamingResponse:
|
||||
"""Accept FIT/GPX/TCX files and/or activities.csv; stream SSE progress while processing.
|
||||
@@ -1138,9 +1139,9 @@ async def upload_activity(
|
||||
- Retroactively update sidecars for existing activities (matched by strava_id)
|
||||
|
||||
SSE events:
|
||||
{"type": "progress", "n": N, "total": T, "name": "...", "status": "imported"|"duplicate"|"error"}
|
||||
{"type": "progress", "n": N, "total": T, "name": "...", "status": "imported"|"overwritten"|"duplicate"|"error"}
|
||||
{"type": "csv", "updates": N} -- only when CSV was included
|
||||
{"type": "done", "added": N, "csv_updates": N, "duplicates": N, "errors": N}
|
||||
{"type": "done", "added": N, "csv_updates": N, "duplicates": N, "overwritten": N, "errors": N}
|
||||
"""
|
||||
from bincio.extract.ingest import ingest_parsed
|
||||
from bincio.extract.parsers.factory import parse_file
|
||||
@@ -1182,6 +1183,7 @@ async def upload_activity(
|
||||
|
||||
def event_stream():
|
||||
added = 0
|
||||
overwritten = 0
|
||||
duplicates = 0
|
||||
errors = 0
|
||||
any_added = False
|
||||
@@ -1209,19 +1211,51 @@ async def upload_activity(
|
||||
if metadata is not None:
|
||||
metadata.enrich(name, activity)
|
||||
activity_id = make_activity_id(activity)
|
||||
was_overwrite = False
|
||||
if (dd / "activities" / f"{activity_id}.json").exists():
|
||||
duplicates += 1
|
||||
yield f"data: {json.dumps({'type': 'progress', 'n': n, 'total': total_files, 'name': name, 'status': 'duplicate'})}\n\n"
|
||||
continue
|
||||
if not overwrite:
|
||||
duplicates += 1
|
||||
yield f"data: {json.dumps({'type': 'progress', 'n': n, 'total': total_files, 'name': name, 'status': 'duplicate'})}\n\n"
|
||||
continue
|
||||
# Overwrite: delete existing files before re-ingesting.
|
||||
for ext in (".json", ".geojson", ".timeseries.json"):
|
||||
(dd / "activities" / f"{activity_id}{ext}").unlink(missing_ok=True)
|
||||
# Remove stale summary from index so ingest_parsed writes a clean one
|
||||
index_path = dd / "index.json"
|
||||
if index_path.exists():
|
||||
idx = json.loads(index_path.read_text(encoding="utf-8"))
|
||||
idx["activities"] = [a for a in idx.get("activities", []) if a.get("id") != activity_id]
|
||||
index_path.write_text(json.dumps(idx, indent=2, ensure_ascii=False))
|
||||
# Remove from dedup hash cache so the new file isn't blocked
|
||||
cache_path = dd / ".bincio_cache.json"
|
||||
if cache_path.exists():
|
||||
try:
|
||||
cache = json.loads(cache_path.read_text(encoding="utf-8"))
|
||||
cache.pop(activity_id, None)
|
||||
cache_path.write_text(json.dumps(cache, ensure_ascii=False))
|
||||
except Exception:
|
||||
pass
|
||||
# Remove merged copies (merge_all will regenerate them after ingest)
|
||||
merged_acts = dd / "_merged" / "activities"
|
||||
if merged_acts.exists():
|
||||
for ext in (".json", ".geojson", ".timeseries.json"):
|
||||
p = merged_acts / f"{activity_id}{ext}"
|
||||
if p.exists() or p.is_symlink():
|
||||
p.unlink(missing_ok=True)
|
||||
was_overwrite = True
|
||||
ingest_parsed(activity, dd, privacy="public")
|
||||
if store_original:
|
||||
originals_dir = dd / "originals"
|
||||
originals_dir.mkdir(exist_ok=True)
|
||||
staged.rename(originals_dir / name)
|
||||
kept = True
|
||||
added += 1
|
||||
if was_overwrite:
|
||||
overwritten += 1
|
||||
else:
|
||||
added += 1
|
||||
any_added = True
|
||||
yield f"data: {json.dumps({'type': 'progress', 'n': n, 'total': total_files, 'name': name, 'status': 'imported'})}\n\n"
|
||||
status = 'overwritten' if was_overwrite else 'imported'
|
||||
yield f"data: {json.dumps({'type': 'progress', 'n': n, 'total': total_files, 'name': name, 'status': status})}\n\n"
|
||||
except Exception as exc:
|
||||
errors += 1
|
||||
log.error("upload[%s]: failed to process %s: %s", user.handle, name, exc, exc_info=True)
|
||||
@@ -1243,7 +1277,7 @@ async def upload_activity(
|
||||
if any_added:
|
||||
_trigger_rebuild(user.handle)
|
||||
|
||||
yield f"data: {json.dumps({'type': 'done', 'added': added, 'csv_updates': csv_updates, 'duplicates': duplicates, 'errors': errors})}\n\n"
|
||||
yield f"data: {json.dumps({'type': 'done', 'added': added, 'overwritten': overwritten, 'csv_updates': csv_updates, 'duplicates': duplicates, 'errors': errors})}\n\n"
|
||||
|
||||
if job_id:
|
||||
_job_finish(job_id)
|
||||
|
||||
Reference in New Issue
Block a user