add re-extract from Strava originals endpoint and improve diag
- POST /api/admin/users/{handle}/reextract-originals: reads stored
originals/strava/*.json and re-runs strava_to_parsed + ingest_parsed
without hitting the Strava API; streams SSE progress; calls merge_all
and rebuild on completion
- GET /api/admin/users/{handle}/diag: now shows _merged/activities/
file counts, a sample of filenames in activities/ (with symlink flag),
and lists pending_files by name
- Admin page: Re-extract button per user with live SSE progress modal
This commit is contained in:
+101
-1
@@ -619,6 +619,82 @@ async def admin_rebuild_sync(
|
||||
return JSONResponse(resp)
|
||||
|
||||
|
||||
@app.post("/api/admin/users/{handle}/reextract-originals")
|
||||
async def admin_reextract_originals(
|
||||
handle: str,
|
||||
bincio_session: Optional[str] = Cookie(default=None),
|
||||
) -> StreamingResponse:
|
||||
"""Re-extract activities from stored Strava originals without hitting the API.
|
||||
|
||||
Reads each file in originals/strava/{id}.json (containing {"meta", "streams"}),
|
||||
calls strava_to_parsed, and writes the activity JSON + GeoJSON.
|
||||
|
||||
Skips activities that already have a valid JSON file in activities/.
|
||||
Streams SSE progress; call /rebuild after completion if no --webroot.
|
||||
|
||||
SSE events:
|
||||
{"type": "progress", "n": N, "total": T, "name": "...", "status": "imported"|"skipped"|"error", "detail": "..."}
|
||||
{"type": "done", "imported": N, "skipped": N, "errors": N}
|
||||
"""
|
||||
_require_admin(bincio_session)
|
||||
user_dir = _get_data_dir() / handle
|
||||
originals_dir = user_dir / "originals" / "strava"
|
||||
if not originals_dir.exists():
|
||||
raise HTTPException(404, f"No Strava originals directory for '{handle}'")
|
||||
|
||||
original_files = sorted(originals_dir.glob("*.json"))
|
||||
total = len(original_files)
|
||||
log.info("reextract[%s]: starting, %d originals found", handle, total)
|
||||
|
||||
def event_stream():
|
||||
from bincio.extract.strava_api import strava_to_parsed
|
||||
from bincio.extract.ingest import ingest_parsed
|
||||
from bincio.render.merge import merge_all
|
||||
|
||||
imported = 0
|
||||
skipped = 0
|
||||
errors = 0
|
||||
any_imported = False
|
||||
|
||||
for n, orig_path in enumerate(original_files, 1):
|
||||
try:
|
||||
raw = json.loads(orig_path.read_text(encoding="utf-8"))
|
||||
meta = raw.get("meta", {})
|
||||
streams = raw.get("streams", {})
|
||||
name = meta.get("name", orig_path.stem)
|
||||
|
||||
parsed = strava_to_parsed(meta, streams)
|
||||
|
||||
from bincio.extract.writer import make_activity_id
|
||||
activity_id = make_activity_id(parsed)
|
||||
if (user_dir / "activities" / f"{activity_id}.json").exists():
|
||||
skipped += 1
|
||||
yield f"data: {json.dumps({'type': 'progress', 'n': n, 'total': total, 'name': name, 'status': 'skipped'})}\n\n"
|
||||
continue
|
||||
|
||||
ingest_parsed(parsed, user_dir, privacy="public", rdp_epsilon=0.0001)
|
||||
imported += 1
|
||||
any_imported = True
|
||||
yield f"data: {json.dumps({'type': 'progress', 'n': n, 'total': total, 'name': name, 'status': 'imported'})}\n\n"
|
||||
except Exception as exc:
|
||||
errors += 1
|
||||
log.error("reextract[%s]: failed on %s: %s", handle, orig_path.name, exc, exc_info=True)
|
||||
yield f"data: {json.dumps({'type': 'progress', 'n': n, 'total': total, 'name': orig_path.stem, 'status': 'error', 'detail': str(exc)})}\n\n"
|
||||
|
||||
log.info("reextract[%s]: done — imported=%d skipped=%d errors=%d", handle, imported, skipped, errors)
|
||||
if any_imported:
|
||||
merge_all(user_dir)
|
||||
_trigger_rebuild(handle)
|
||||
|
||||
yield f"data: {json.dumps({'type': 'done', 'imported': imported, 'skipped': skipped, 'errors': errors})}\n\n"
|
||||
|
||||
return StreamingResponse(
|
||||
event_stream(),
|
||||
media_type="text/event-stream",
|
||||
headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
|
||||
)
|
||||
|
||||
|
||||
@app.get("/api/admin/users/{handle}/diag")
|
||||
async def admin_diag(
|
||||
handle: str,
|
||||
@@ -662,6 +738,25 @@ async def admin_diag(
|
||||
except Exception:
|
||||
root_activity_count = -1
|
||||
|
||||
# Peek at a few filenames in activities/ to understand the actual state
|
||||
acts_sample: list[str] = []
|
||||
acts_symlinks = 0
|
||||
if activities_dir.exists():
|
||||
for f in sorted(activities_dir.iterdir())[:10]:
|
||||
acts_sample.append(f.name + (" → symlink" if f.is_symlink() else ""))
|
||||
if f.is_symlink():
|
||||
acts_symlinks += 1
|
||||
|
||||
# Check _merged/activities/ separately
|
||||
merged_acts_dir = merged_dir / "activities"
|
||||
merged_acts_json = _count(merged_acts_dir, "*.json")
|
||||
merged_acts_geojson = _count(merged_acts_dir, "*.geojson")
|
||||
|
||||
# List pending files
|
||||
pending_files: list[str] = []
|
||||
if uploads_dir.exists():
|
||||
pending_files = [f.name for f in uploads_dir.iterdir() if f.is_file()]
|
||||
|
||||
return JSONResponse({
|
||||
"handle": handle,
|
||||
"user_dir": str(user_dir),
|
||||
@@ -669,6 +764,8 @@ async def admin_diag(
|
||||
"json_files": _count(activities_dir, "*.json"),
|
||||
"geojson_files": _count(activities_dir, "*.geojson"),
|
||||
"size_mb": round(_size_mb(activities_dir), 2),
|
||||
"sample": acts_sample,
|
||||
"symlink_count": acts_symlinks,
|
||||
},
|
||||
"originals": {
|
||||
"exists": originals_dir.exists(),
|
||||
@@ -679,12 +776,15 @@ async def admin_diag(
|
||||
"exists": merged_dir.exists(),
|
||||
"activity_count_in_index": merged_activity_count,
|
||||
"size_mb": round(_size_mb(merged_dir), 2),
|
||||
"activities_json": merged_acts_json,
|
||||
"activities_geojson": merged_acts_geojson,
|
||||
},
|
||||
"root_index": {
|
||||
"exists": root_index.exists(),
|
||||
"activity_count": root_activity_count,
|
||||
},
|
||||
"pending_uploads": _count(uploads_dir),
|
||||
"pending_uploads": len(pending_files),
|
||||
"pending_files": pending_files,
|
||||
"dedup_cache_exists": (user_dir / ".bincio_cache.json").exists(),
|
||||
"athlete_json_exists": (user_dir / "athlete.json").exists(),
|
||||
})
|
||||
|
||||
@@ -31,6 +31,16 @@ import Base from '../../layouts/Base.astro';
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<!-- Re-extract progress modal -->
|
||||
<dialog id="reextract-dialog" class="rounded-xl bg-zinc-900 border border-zinc-700 p-6 text-white max-w-2xl w-full backdrop:bg-black/60">
|
||||
<div class="flex items-center justify-between mb-4">
|
||||
<h3 class="font-semibold text-sm">Re-extract from Strava originals — <span id="reextract-handle" class="text-zinc-400 font-mono"></span></h3>
|
||||
<button id="reextract-close" class="text-zinc-500 hover:text-zinc-200 text-xs px-2 py-1 rounded bg-zinc-800" disabled>Close</button>
|
||||
</div>
|
||||
<div id="reextract-summary" class="text-xs text-zinc-400 mb-2"></div>
|
||||
<div class="bg-zinc-950 rounded p-3 h-64 overflow-y-auto" id="reextract-log"></div>
|
||||
</dialog>
|
||||
|
||||
<!-- Diag modal -->
|
||||
<dialog id="diag-dialog" class="rounded-xl bg-zinc-900 border border-zinc-700 p-6 text-white max-w-2xl w-full backdrop:bg-black/60">
|
||||
<div class="flex items-center justify-between mb-4">
|
||||
@@ -62,6 +72,13 @@ import Base from '../../layouts/Base.astro';
|
||||
const diagOutput = document.getElementById('diag-output')!;
|
||||
document.getElementById('diag-close')!.addEventListener('click', () => diagDialog.close());
|
||||
diagDialog.addEventListener('click', e => { if (e.target === diagDialog) diagDialog.close(); });
|
||||
|
||||
const reextractDialog = document.getElementById('reextract-dialog') as HTMLDialogElement;
|
||||
const reextractHandle = document.getElementById('reextract-handle')!;
|
||||
const reextractSummary = document.getElementById('reextract-summary')!;
|
||||
const reextractLog = document.getElementById('reextract-log')!;
|
||||
const reextractClose = document.getElementById('reextract-close') as HTMLButtonElement;
|
||||
reextractClose.addEventListener('click', () => { reextractDialog.close(); load(); });
|
||||
const confirmOk = document.getElementById('confirm-ok')!;
|
||||
const confirmCancel = document.getElementById('confirm-cancel')!;
|
||||
|
||||
@@ -136,6 +153,11 @@ import Base from '../../layouts/Base.astro';
|
||||
data-handle="${u.handle}"
|
||||
title="Show diagnostic snapshot of this user's data directory"
|
||||
>Diag</button>
|
||||
<button
|
||||
class="reextract-btn text-xs px-3 py-1.5 rounded-lg bg-zinc-800 hover:bg-amber-900 hover:text-amber-300 text-zinc-400 transition-colors"
|
||||
data-handle="${u.handle}"
|
||||
title="Re-extract activities from stored Strava originals (no API call)"
|
||||
>Re-extract</button>
|
||||
<button
|
||||
class="rebuild-btn text-xs px-3 py-1.5 rounded-lg bg-zinc-800 hover:bg-zinc-700 text-zinc-400 hover:text-zinc-200 transition-colors"
|
||||
data-handle="${u.handle}"
|
||||
@@ -157,6 +179,63 @@ import Base from '../../layouts/Base.astro';
|
||||
`;
|
||||
}).join('');
|
||||
|
||||
tbodyEl.querySelectorAll<HTMLButtonElement>('.reextract-btn').forEach(btn => {
|
||||
btn.addEventListener('click', async () => {
|
||||
const h = btn.dataset.handle!;
|
||||
reextractHandle.textContent = h;
|
||||
reextractLog.innerHTML = '';
|
||||
reextractSummary.textContent = 'Starting…';
|
||||
reextractClose.disabled = true;
|
||||
reextractDialog.showModal();
|
||||
|
||||
let imported = 0, skipped = 0, errors = 0;
|
||||
try {
|
||||
const es = new EventSource(`/api/admin/users/${h}/reextract-originals`);
|
||||
// EventSource only does GET; use fetch + ReadableStream instead
|
||||
es.close();
|
||||
|
||||
const r = await fetch(`/api/admin/users/${h}/reextract-originals`, {
|
||||
method: 'POST', credentials: 'include',
|
||||
});
|
||||
const reader = r.body!.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
let buf = '';
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
buf += decoder.decode(value, { stream: true });
|
||||
const lines = buf.split('\n\n');
|
||||
buf = lines.pop() ?? '';
|
||||
for (const chunk of lines) {
|
||||
const dataLine = chunk.split('\n').find(l => l.startsWith('data: '));
|
||||
if (!dataLine) continue;
|
||||
const ev = JSON.parse(dataLine.slice(6));
|
||||
if (ev.type === 'progress') {
|
||||
const color = ev.status === 'imported' ? 'text-green-400'
|
||||
: ev.status === 'error' ? 'text-red-400'
|
||||
: 'text-zinc-500';
|
||||
const line = document.createElement('div');
|
||||
line.className = `text-xs font-mono ${color}`;
|
||||
line.textContent = `[${ev.n}/${ev.total}] ${ev.status.padEnd(8)} ${ev.name}${ev.detail ? ' — ' + ev.detail : ''}`;
|
||||
reextractLog.appendChild(line);
|
||||
reextractLog.scrollTop = reextractLog.scrollHeight;
|
||||
if (ev.status === 'imported') imported++;
|
||||
else if (ev.status === 'error') errors++;
|
||||
else skipped++;
|
||||
reextractSummary.textContent = `Processing… ${ev.n}/${ev.total} — imported: ${imported}, skipped: ${skipped}, errors: ${errors}`;
|
||||
} else if (ev.type === 'done') {
|
||||
reextractSummary.textContent = `Done — imported: ${ev.imported}, skipped: ${ev.skipped}, errors: ${ev.errors}`;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
reextractSummary.textContent = 'Error: ' + String(err);
|
||||
} finally {
|
||||
reextractClose.disabled = false;
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
tbodyEl.querySelectorAll<HTMLButtonElement>('.diag-btn').forEach(btn => {
|
||||
btn.addEventListener('click', async () => {
|
||||
const h = btn.dataset.handle!;
|
||||
|
||||
Reference in New Issue
Block a user