add re-extract from Strava originals endpoint and improve diag

- POST /api/admin/users/{handle}/reextract-originals: reads stored
  originals/strava/*.json and re-runs strava_to_parsed + ingest_parsed
  without hitting the Strava API; streams SSE progress; calls merge_all
  and rebuild on completion
- GET /api/admin/users/{handle}/diag: now shows _merged/activities/
  file counts, a sample of filenames in activities/ (with symlink flag),
  and lists pending_files by name
- Admin page: Re-extract button per user with live SSE progress modal
This commit is contained in:
Davide Scaini
2026-04-15 08:08:57 +02:00
parent 1e30f85bdc
commit 89b92397cf
2 changed files with 180 additions and 1 deletions
+101 -1
View File
@@ -619,6 +619,82 @@ async def admin_rebuild_sync(
return JSONResponse(resp) return JSONResponse(resp)
@app.post("/api/admin/users/{handle}/reextract-originals")
async def admin_reextract_originals(
handle: str,
bincio_session: Optional[str] = Cookie(default=None),
) -> StreamingResponse:
"""Re-extract activities from stored Strava originals without hitting the API.
Reads each file in originals/strava/{id}.json (containing {"meta", "streams"}),
calls strava_to_parsed, and writes the activity JSON + GeoJSON.
Skips activities that already have a valid JSON file in activities/.
Streams SSE progress; call /rebuild after completion if no --webroot.
SSE events:
{"type": "progress", "n": N, "total": T, "name": "...", "status": "imported"|"skipped"|"error", "detail": "..."}
{"type": "done", "imported": N, "skipped": N, "errors": N}
"""
_require_admin(bincio_session)
user_dir = _get_data_dir() / handle
originals_dir = user_dir / "originals" / "strava"
if not originals_dir.exists():
raise HTTPException(404, f"No Strava originals directory for '{handle}'")
original_files = sorted(originals_dir.glob("*.json"))
total = len(original_files)
log.info("reextract[%s]: starting, %d originals found", handle, total)
def event_stream():
from bincio.extract.strava_api import strava_to_parsed
from bincio.extract.ingest import ingest_parsed
from bincio.render.merge import merge_all
imported = 0
skipped = 0
errors = 0
any_imported = False
for n, orig_path in enumerate(original_files, 1):
try:
raw = json.loads(orig_path.read_text(encoding="utf-8"))
meta = raw.get("meta", {})
streams = raw.get("streams", {})
name = meta.get("name", orig_path.stem)
parsed = strava_to_parsed(meta, streams)
from bincio.extract.writer import make_activity_id
activity_id = make_activity_id(parsed)
if (user_dir / "activities" / f"{activity_id}.json").exists():
skipped += 1
yield f"data: {json.dumps({'type': 'progress', 'n': n, 'total': total, 'name': name, 'status': 'skipped'})}\n\n"
continue
ingest_parsed(parsed, user_dir, privacy="public", rdp_epsilon=0.0001)
imported += 1
any_imported = True
yield f"data: {json.dumps({'type': 'progress', 'n': n, 'total': total, 'name': name, 'status': 'imported'})}\n\n"
except Exception as exc:
errors += 1
log.error("reextract[%s]: failed on %s: %s", handle, orig_path.name, exc, exc_info=True)
yield f"data: {json.dumps({'type': 'progress', 'n': n, 'total': total, 'name': orig_path.stem, 'status': 'error', 'detail': str(exc)})}\n\n"
log.info("reextract[%s]: done — imported=%d skipped=%d errors=%d", handle, imported, skipped, errors)
if any_imported:
merge_all(user_dir)
_trigger_rebuild(handle)
yield f"data: {json.dumps({'type': 'done', 'imported': imported, 'skipped': skipped, 'errors': errors})}\n\n"
return StreamingResponse(
event_stream(),
media_type="text/event-stream",
headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
)
@app.get("/api/admin/users/{handle}/diag") @app.get("/api/admin/users/{handle}/diag")
async def admin_diag( async def admin_diag(
handle: str, handle: str,
@@ -662,6 +738,25 @@ async def admin_diag(
except Exception: except Exception:
root_activity_count = -1 root_activity_count = -1
# Peek at a few filenames in activities/ to understand the actual state
acts_sample: list[str] = []
acts_symlinks = 0
if activities_dir.exists():
for f in sorted(activities_dir.iterdir())[:10]:
acts_sample.append(f.name + (" → symlink" if f.is_symlink() else ""))
if f.is_symlink():
acts_symlinks += 1
# Check _merged/activities/ separately
merged_acts_dir = merged_dir / "activities"
merged_acts_json = _count(merged_acts_dir, "*.json")
merged_acts_geojson = _count(merged_acts_dir, "*.geojson")
# List pending files
pending_files: list[str] = []
if uploads_dir.exists():
pending_files = [f.name for f in uploads_dir.iterdir() if f.is_file()]
return JSONResponse({ return JSONResponse({
"handle": handle, "handle": handle,
"user_dir": str(user_dir), "user_dir": str(user_dir),
@@ -669,6 +764,8 @@ async def admin_diag(
"json_files": _count(activities_dir, "*.json"), "json_files": _count(activities_dir, "*.json"),
"geojson_files": _count(activities_dir, "*.geojson"), "geojson_files": _count(activities_dir, "*.geojson"),
"size_mb": round(_size_mb(activities_dir), 2), "size_mb": round(_size_mb(activities_dir), 2),
"sample": acts_sample,
"symlink_count": acts_symlinks,
}, },
"originals": { "originals": {
"exists": originals_dir.exists(), "exists": originals_dir.exists(),
@@ -679,12 +776,15 @@ async def admin_diag(
"exists": merged_dir.exists(), "exists": merged_dir.exists(),
"activity_count_in_index": merged_activity_count, "activity_count_in_index": merged_activity_count,
"size_mb": round(_size_mb(merged_dir), 2), "size_mb": round(_size_mb(merged_dir), 2),
"activities_json": merged_acts_json,
"activities_geojson": merged_acts_geojson,
}, },
"root_index": { "root_index": {
"exists": root_index.exists(), "exists": root_index.exists(),
"activity_count": root_activity_count, "activity_count": root_activity_count,
}, },
"pending_uploads": _count(uploads_dir), "pending_uploads": len(pending_files),
"pending_files": pending_files,
"dedup_cache_exists": (user_dir / ".bincio_cache.json").exists(), "dedup_cache_exists": (user_dir / ".bincio_cache.json").exists(),
"athlete_json_exists": (user_dir / "athlete.json").exists(), "athlete_json_exists": (user_dir / "athlete.json").exists(),
}) })
+79
View File
@@ -31,6 +31,16 @@ import Base from '../../layouts/Base.astro';
</table> </table>
</div> </div>
<!-- Re-extract progress modal -->
<dialog id="reextract-dialog" class="rounded-xl bg-zinc-900 border border-zinc-700 p-6 text-white max-w-2xl w-full backdrop:bg-black/60">
<div class="flex items-center justify-between mb-4">
<h3 class="font-semibold text-sm">Re-extract from Strava originals — <span id="reextract-handle" class="text-zinc-400 font-mono"></span></h3>
<button id="reextract-close" class="text-zinc-500 hover:text-zinc-200 text-xs px-2 py-1 rounded bg-zinc-800" disabled>Close</button>
</div>
<div id="reextract-summary" class="text-xs text-zinc-400 mb-2"></div>
<div class="bg-zinc-950 rounded p-3 h-64 overflow-y-auto" id="reextract-log"></div>
</dialog>
<!-- Diag modal --> <!-- Diag modal -->
<dialog id="diag-dialog" class="rounded-xl bg-zinc-900 border border-zinc-700 p-6 text-white max-w-2xl w-full backdrop:bg-black/60"> <dialog id="diag-dialog" class="rounded-xl bg-zinc-900 border border-zinc-700 p-6 text-white max-w-2xl w-full backdrop:bg-black/60">
<div class="flex items-center justify-between mb-4"> <div class="flex items-center justify-between mb-4">
@@ -62,6 +72,13 @@ import Base from '../../layouts/Base.astro';
const diagOutput = document.getElementById('diag-output')!; const diagOutput = document.getElementById('diag-output')!;
document.getElementById('diag-close')!.addEventListener('click', () => diagDialog.close()); document.getElementById('diag-close')!.addEventListener('click', () => diagDialog.close());
diagDialog.addEventListener('click', e => { if (e.target === diagDialog) diagDialog.close(); }); diagDialog.addEventListener('click', e => { if (e.target === diagDialog) diagDialog.close(); });
const reextractDialog = document.getElementById('reextract-dialog') as HTMLDialogElement;
const reextractHandle = document.getElementById('reextract-handle')!;
const reextractSummary = document.getElementById('reextract-summary')!;
const reextractLog = document.getElementById('reextract-log')!;
const reextractClose = document.getElementById('reextract-close') as HTMLButtonElement;
reextractClose.addEventListener('click', () => { reextractDialog.close(); load(); });
const confirmOk = document.getElementById('confirm-ok')!; const confirmOk = document.getElementById('confirm-ok')!;
const confirmCancel = document.getElementById('confirm-cancel')!; const confirmCancel = document.getElementById('confirm-cancel')!;
@@ -136,6 +153,11 @@ import Base from '../../layouts/Base.astro';
data-handle="${u.handle}" data-handle="${u.handle}"
title="Show diagnostic snapshot of this user's data directory" title="Show diagnostic snapshot of this user's data directory"
>Diag</button> >Diag</button>
<button
class="reextract-btn text-xs px-3 py-1.5 rounded-lg bg-zinc-800 hover:bg-amber-900 hover:text-amber-300 text-zinc-400 transition-colors"
data-handle="${u.handle}"
title="Re-extract activities from stored Strava originals (no API call)"
>Re-extract</button>
<button <button
class="rebuild-btn text-xs px-3 py-1.5 rounded-lg bg-zinc-800 hover:bg-zinc-700 text-zinc-400 hover:text-zinc-200 transition-colors" class="rebuild-btn text-xs px-3 py-1.5 rounded-lg bg-zinc-800 hover:bg-zinc-700 text-zinc-400 hover:text-zinc-200 transition-colors"
data-handle="${u.handle}" data-handle="${u.handle}"
@@ -157,6 +179,63 @@ import Base from '../../layouts/Base.astro';
`; `;
}).join(''); }).join('');
tbodyEl.querySelectorAll<HTMLButtonElement>('.reextract-btn').forEach(btn => {
btn.addEventListener('click', async () => {
const h = btn.dataset.handle!;
reextractHandle.textContent = h;
reextractLog.innerHTML = '';
reextractSummary.textContent = 'Starting…';
reextractClose.disabled = true;
reextractDialog.showModal();
let imported = 0, skipped = 0, errors = 0;
try {
const es = new EventSource(`/api/admin/users/${h}/reextract-originals`);
// EventSource only does GET; use fetch + ReadableStream instead
es.close();
const r = await fetch(`/api/admin/users/${h}/reextract-originals`, {
method: 'POST', credentials: 'include',
});
const reader = r.body!.getReader();
const decoder = new TextDecoder();
let buf = '';
while (true) {
const { done, value } = await reader.read();
if (done) break;
buf += decoder.decode(value, { stream: true });
const lines = buf.split('\n\n');
buf = lines.pop() ?? '';
for (const chunk of lines) {
const dataLine = chunk.split('\n').find(l => l.startsWith('data: '));
if (!dataLine) continue;
const ev = JSON.parse(dataLine.slice(6));
if (ev.type === 'progress') {
const color = ev.status === 'imported' ? 'text-green-400'
: ev.status === 'error' ? 'text-red-400'
: 'text-zinc-500';
const line = document.createElement('div');
line.className = `text-xs font-mono ${color}`;
line.textContent = `[${ev.n}/${ev.total}] ${ev.status.padEnd(8)} ${ev.name}${ev.detail ? ' — ' + ev.detail : ''}`;
reextractLog.appendChild(line);
reextractLog.scrollTop = reextractLog.scrollHeight;
if (ev.status === 'imported') imported++;
else if (ev.status === 'error') errors++;
else skipped++;
reextractSummary.textContent = `Processing… ${ev.n}/${ev.total} — imported: ${imported}, skipped: ${skipped}, errors: ${errors}`;
} else if (ev.type === 'done') {
reextractSummary.textContent = `Done — imported: ${ev.imported}, skipped: ${ev.skipped}, errors: ${ev.errors}`;
}
}
}
} catch (err) {
reextractSummary.textContent = 'Error: ' + String(err);
} finally {
reextractClose.disabled = false;
}
});
});
tbodyEl.querySelectorAll<HTMLButtonElement>('.diag-btn').forEach(btn => { tbodyEl.querySelectorAll<HTMLButtonElement>('.diag-btn').forEach(btn => {
btn.addEventListener('click', async () => { btn.addEventListener('click', async () => {
const h = btn.dataset.handle!; const h = btn.dataset.handle!;