diff --git a/bincio/serve/server.py b/bincio/serve/server.py index 4e862bb..2c19563 100644 --- a/bincio/serve/server.py +++ b/bincio/serve/server.py @@ -619,6 +619,82 @@ async def admin_rebuild_sync( return JSONResponse(resp) +@app.post("/api/admin/users/{handle}/reextract-originals") +async def admin_reextract_originals( + handle: str, + bincio_session: Optional[str] = Cookie(default=None), +) -> StreamingResponse: + """Re-extract activities from stored Strava originals without hitting the API. + + Reads each file in originals/strava/{id}.json (containing {"meta", "streams"}), + calls strava_to_parsed, and writes the activity JSON + GeoJSON. + + Skips activities that already have a valid JSON file in activities/. + Streams SSE progress; call /rebuild after completion if no --webroot. + + SSE events: + {"type": "progress", "n": N, "total": T, "name": "...", "status": "imported"|"skipped"|"error", "detail": "..."} + {"type": "done", "imported": N, "skipped": N, "errors": N} + """ + _require_admin(bincio_session) + user_dir = _get_data_dir() / handle + originals_dir = user_dir / "originals" / "strava" + if not originals_dir.exists(): + raise HTTPException(404, f"No Strava originals directory for '{handle}'") + + original_files = sorted(originals_dir.glob("*.json")) + total = len(original_files) + log.info("reextract[%s]: starting, %d originals found", handle, total) + + def event_stream(): + from bincio.extract.strava_api import strava_to_parsed + from bincio.extract.ingest import ingest_parsed + from bincio.render.merge import merge_all + + imported = 0 + skipped = 0 + errors = 0 + any_imported = False + + for n, orig_path in enumerate(original_files, 1): + try: + raw = json.loads(orig_path.read_text(encoding="utf-8")) + meta = raw.get("meta", {}) + streams = raw.get("streams", {}) + name = meta.get("name", orig_path.stem) + + parsed = strava_to_parsed(meta, streams) + + from bincio.extract.writer import make_activity_id + activity_id = make_activity_id(parsed) + if (user_dir / "activities" / f"{activity_id}.json").exists(): + skipped += 1 + yield f"data: {json.dumps({'type': 'progress', 'n': n, 'total': total, 'name': name, 'status': 'skipped'})}\n\n" + continue + + ingest_parsed(parsed, user_dir, privacy="public", rdp_epsilon=0.0001) + imported += 1 + any_imported = True + yield f"data: {json.dumps({'type': 'progress', 'n': n, 'total': total, 'name': name, 'status': 'imported'})}\n\n" + except Exception as exc: + errors += 1 + log.error("reextract[%s]: failed on %s: %s", handle, orig_path.name, exc, exc_info=True) + yield f"data: {json.dumps({'type': 'progress', 'n': n, 'total': total, 'name': orig_path.stem, 'status': 'error', 'detail': str(exc)})}\n\n" + + log.info("reextract[%s]: done — imported=%d skipped=%d errors=%d", handle, imported, skipped, errors) + if any_imported: + merge_all(user_dir) + _trigger_rebuild(handle) + + yield f"data: {json.dumps({'type': 'done', 'imported': imported, 'skipped': skipped, 'errors': errors})}\n\n" + + return StreamingResponse( + event_stream(), + media_type="text/event-stream", + headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"}, + ) + + @app.get("/api/admin/users/{handle}/diag") async def admin_diag( handle: str, @@ -662,6 +738,25 @@ async def admin_diag( except Exception: root_activity_count = -1 + # Peek at a few filenames in activities/ to understand the actual state + acts_sample: list[str] = [] + acts_symlinks = 0 + if activities_dir.exists(): + for f in sorted(activities_dir.iterdir())[:10]: + acts_sample.append(f.name + (" → symlink" if f.is_symlink() else "")) + if f.is_symlink(): + acts_symlinks += 1 + + # Check _merged/activities/ separately + merged_acts_dir = merged_dir / "activities" + merged_acts_json = _count(merged_acts_dir, "*.json") + merged_acts_geojson = _count(merged_acts_dir, "*.geojson") + + # List pending files + pending_files: list[str] = [] + if uploads_dir.exists(): + pending_files = [f.name for f in uploads_dir.iterdir() if f.is_file()] + return JSONResponse({ "handle": handle, "user_dir": str(user_dir), @@ -669,6 +764,8 @@ async def admin_diag( "json_files": _count(activities_dir, "*.json"), "geojson_files": _count(activities_dir, "*.geojson"), "size_mb": round(_size_mb(activities_dir), 2), + "sample": acts_sample, + "symlink_count": acts_symlinks, }, "originals": { "exists": originals_dir.exists(), @@ -679,12 +776,15 @@ async def admin_diag( "exists": merged_dir.exists(), "activity_count_in_index": merged_activity_count, "size_mb": round(_size_mb(merged_dir), 2), + "activities_json": merged_acts_json, + "activities_geojson": merged_acts_geojson, }, "root_index": { "exists": root_index.exists(), "activity_count": root_activity_count, }, - "pending_uploads": _count(uploads_dir), + "pending_uploads": len(pending_files), + "pending_files": pending_files, "dedup_cache_exists": (user_dir / ".bincio_cache.json").exists(), "athlete_json_exists": (user_dir / "athlete.json").exists(), }) diff --git a/site/src/pages/admin/index.astro b/site/src/pages/admin/index.astro index 8f6502c..827c6ac 100644 --- a/site/src/pages/admin/index.astro +++ b/site/src/pages/admin/index.astro @@ -31,6 +31,16 @@ import Base from '../../layouts/Base.astro'; + + + + Re-extract from Strava originals — + Close + + + + + @@ -62,6 +72,13 @@ import Base from '../../layouts/Base.astro'; const diagOutput = document.getElementById('diag-output')!; document.getElementById('diag-close')!.addEventListener('click', () => diagDialog.close()); diagDialog.addEventListener('click', e => { if (e.target === diagDialog) diagDialog.close(); }); + + const reextractDialog = document.getElementById('reextract-dialog') as HTMLDialogElement; + const reextractHandle = document.getElementById('reextract-handle')!; + const reextractSummary = document.getElementById('reextract-summary')!; + const reextractLog = document.getElementById('reextract-log')!; + const reextractClose = document.getElementById('reextract-close') as HTMLButtonElement; + reextractClose.addEventListener('click', () => { reextractDialog.close(); load(); }); const confirmOk = document.getElementById('confirm-ok')!; const confirmCancel = document.getElementById('confirm-cancel')!; @@ -136,6 +153,11 @@ import Base from '../../layouts/Base.astro'; data-handle="${u.handle}" title="Show diagnostic snapshot of this user's data directory" >Diag + Re-extract ('.reextract-btn').forEach(btn => { + btn.addEventListener('click', async () => { + const h = btn.dataset.handle!; + reextractHandle.textContent = h; + reextractLog.innerHTML = ''; + reextractSummary.textContent = 'Starting…'; + reextractClose.disabled = true; + reextractDialog.showModal(); + + let imported = 0, skipped = 0, errors = 0; + try { + const es = new EventSource(`/api/admin/users/${h}/reextract-originals`); + // EventSource only does GET; use fetch + ReadableStream instead + es.close(); + + const r = await fetch(`/api/admin/users/${h}/reextract-originals`, { + method: 'POST', credentials: 'include', + }); + const reader = r.body!.getReader(); + const decoder = new TextDecoder(); + let buf = ''; + while (true) { + const { done, value } = await reader.read(); + if (done) break; + buf += decoder.decode(value, { stream: true }); + const lines = buf.split('\n\n'); + buf = lines.pop() ?? ''; + for (const chunk of lines) { + const dataLine = chunk.split('\n').find(l => l.startsWith('data: ')); + if (!dataLine) continue; + const ev = JSON.parse(dataLine.slice(6)); + if (ev.type === 'progress') { + const color = ev.status === 'imported' ? 'text-green-400' + : ev.status === 'error' ? 'text-red-400' + : 'text-zinc-500'; + const line = document.createElement('div'); + line.className = `text-xs font-mono ${color}`; + line.textContent = `[${ev.n}/${ev.total}] ${ev.status.padEnd(8)} ${ev.name}${ev.detail ? ' — ' + ev.detail : ''}`; + reextractLog.appendChild(line); + reextractLog.scrollTop = reextractLog.scrollHeight; + if (ev.status === 'imported') imported++; + else if (ev.status === 'error') errors++; + else skipped++; + reextractSummary.textContent = `Processing… ${ev.n}/${ev.total} — imported: ${imported}, skipped: ${skipped}, errors: ${errors}`; + } else if (ev.type === 'done') { + reextractSummary.textContent = `Done — imported: ${ev.imported}, skipped: ${ev.skipped}, errors: ${ev.errors}`; + } + } + } + } catch (err) { + reextractSummary.textContent = 'Error: ' + String(err); + } finally { + reextractClose.disabled = false; + } + }); + }); + tbodyEl.querySelectorAll('.diag-btn').forEach(btn => { btn.addEventListener('click', async () => { const h = btn.dataset.handle!;