add re-extract from Strava originals endpoint and improve diag
- POST /api/admin/users/{handle}/reextract-originals: reads stored
originals/strava/*.json and re-runs strava_to_parsed + ingest_parsed
without hitting the Strava API; streams SSE progress; calls merge_all
and rebuild on completion
- GET /api/admin/users/{handle}/diag: now shows _merged/activities/
file counts, a sample of filenames in activities/ (with symlink flag),
and lists pending_files by name
- Admin page: Re-extract button per user with live SSE progress modal
This commit is contained in:
+101
-1
@@ -619,6 +619,82 @@ async def admin_rebuild_sync(
|
|||||||
return JSONResponse(resp)
|
return JSONResponse(resp)
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/api/admin/users/{handle}/reextract-originals")
|
||||||
|
async def admin_reextract_originals(
|
||||||
|
handle: str,
|
||||||
|
bincio_session: Optional[str] = Cookie(default=None),
|
||||||
|
) -> StreamingResponse:
|
||||||
|
"""Re-extract activities from stored Strava originals without hitting the API.
|
||||||
|
|
||||||
|
Reads each file in originals/strava/{id}.json (containing {"meta", "streams"}),
|
||||||
|
calls strava_to_parsed, and writes the activity JSON + GeoJSON.
|
||||||
|
|
||||||
|
Skips activities that already have a valid JSON file in activities/.
|
||||||
|
Streams SSE progress; call /rebuild after completion if no --webroot.
|
||||||
|
|
||||||
|
SSE events:
|
||||||
|
{"type": "progress", "n": N, "total": T, "name": "...", "status": "imported"|"skipped"|"error", "detail": "..."}
|
||||||
|
{"type": "done", "imported": N, "skipped": N, "errors": N}
|
||||||
|
"""
|
||||||
|
_require_admin(bincio_session)
|
||||||
|
user_dir = _get_data_dir() / handle
|
||||||
|
originals_dir = user_dir / "originals" / "strava"
|
||||||
|
if not originals_dir.exists():
|
||||||
|
raise HTTPException(404, f"No Strava originals directory for '{handle}'")
|
||||||
|
|
||||||
|
original_files = sorted(originals_dir.glob("*.json"))
|
||||||
|
total = len(original_files)
|
||||||
|
log.info("reextract[%s]: starting, %d originals found", handle, total)
|
||||||
|
|
||||||
|
def event_stream():
|
||||||
|
from bincio.extract.strava_api import strava_to_parsed
|
||||||
|
from bincio.extract.ingest import ingest_parsed
|
||||||
|
from bincio.render.merge import merge_all
|
||||||
|
|
||||||
|
imported = 0
|
||||||
|
skipped = 0
|
||||||
|
errors = 0
|
||||||
|
any_imported = False
|
||||||
|
|
||||||
|
for n, orig_path in enumerate(original_files, 1):
|
||||||
|
try:
|
||||||
|
raw = json.loads(orig_path.read_text(encoding="utf-8"))
|
||||||
|
meta = raw.get("meta", {})
|
||||||
|
streams = raw.get("streams", {})
|
||||||
|
name = meta.get("name", orig_path.stem)
|
||||||
|
|
||||||
|
parsed = strava_to_parsed(meta, streams)
|
||||||
|
|
||||||
|
from bincio.extract.writer import make_activity_id
|
||||||
|
activity_id = make_activity_id(parsed)
|
||||||
|
if (user_dir / "activities" / f"{activity_id}.json").exists():
|
||||||
|
skipped += 1
|
||||||
|
yield f"data: {json.dumps({'type': 'progress', 'n': n, 'total': total, 'name': name, 'status': 'skipped'})}\n\n"
|
||||||
|
continue
|
||||||
|
|
||||||
|
ingest_parsed(parsed, user_dir, privacy="public", rdp_epsilon=0.0001)
|
||||||
|
imported += 1
|
||||||
|
any_imported = True
|
||||||
|
yield f"data: {json.dumps({'type': 'progress', 'n': n, 'total': total, 'name': name, 'status': 'imported'})}\n\n"
|
||||||
|
except Exception as exc:
|
||||||
|
errors += 1
|
||||||
|
log.error("reextract[%s]: failed on %s: %s", handle, orig_path.name, exc, exc_info=True)
|
||||||
|
yield f"data: {json.dumps({'type': 'progress', 'n': n, 'total': total, 'name': orig_path.stem, 'status': 'error', 'detail': str(exc)})}\n\n"
|
||||||
|
|
||||||
|
log.info("reextract[%s]: done — imported=%d skipped=%d errors=%d", handle, imported, skipped, errors)
|
||||||
|
if any_imported:
|
||||||
|
merge_all(user_dir)
|
||||||
|
_trigger_rebuild(handle)
|
||||||
|
|
||||||
|
yield f"data: {json.dumps({'type': 'done', 'imported': imported, 'skipped': skipped, 'errors': errors})}\n\n"
|
||||||
|
|
||||||
|
return StreamingResponse(
|
||||||
|
event_stream(),
|
||||||
|
media_type="text/event-stream",
|
||||||
|
headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/admin/users/{handle}/diag")
|
@app.get("/api/admin/users/{handle}/diag")
|
||||||
async def admin_diag(
|
async def admin_diag(
|
||||||
handle: str,
|
handle: str,
|
||||||
@@ -662,6 +738,25 @@ async def admin_diag(
|
|||||||
except Exception:
|
except Exception:
|
||||||
root_activity_count = -1
|
root_activity_count = -1
|
||||||
|
|
||||||
|
# Peek at a few filenames in activities/ to understand the actual state
|
||||||
|
acts_sample: list[str] = []
|
||||||
|
acts_symlinks = 0
|
||||||
|
if activities_dir.exists():
|
||||||
|
for f in sorted(activities_dir.iterdir())[:10]:
|
||||||
|
acts_sample.append(f.name + (" → symlink" if f.is_symlink() else ""))
|
||||||
|
if f.is_symlink():
|
||||||
|
acts_symlinks += 1
|
||||||
|
|
||||||
|
# Check _merged/activities/ separately
|
||||||
|
merged_acts_dir = merged_dir / "activities"
|
||||||
|
merged_acts_json = _count(merged_acts_dir, "*.json")
|
||||||
|
merged_acts_geojson = _count(merged_acts_dir, "*.geojson")
|
||||||
|
|
||||||
|
# List pending files
|
||||||
|
pending_files: list[str] = []
|
||||||
|
if uploads_dir.exists():
|
||||||
|
pending_files = [f.name for f in uploads_dir.iterdir() if f.is_file()]
|
||||||
|
|
||||||
return JSONResponse({
|
return JSONResponse({
|
||||||
"handle": handle,
|
"handle": handle,
|
||||||
"user_dir": str(user_dir),
|
"user_dir": str(user_dir),
|
||||||
@@ -669,6 +764,8 @@ async def admin_diag(
|
|||||||
"json_files": _count(activities_dir, "*.json"),
|
"json_files": _count(activities_dir, "*.json"),
|
||||||
"geojson_files": _count(activities_dir, "*.geojson"),
|
"geojson_files": _count(activities_dir, "*.geojson"),
|
||||||
"size_mb": round(_size_mb(activities_dir), 2),
|
"size_mb": round(_size_mb(activities_dir), 2),
|
||||||
|
"sample": acts_sample,
|
||||||
|
"symlink_count": acts_symlinks,
|
||||||
},
|
},
|
||||||
"originals": {
|
"originals": {
|
||||||
"exists": originals_dir.exists(),
|
"exists": originals_dir.exists(),
|
||||||
@@ -679,12 +776,15 @@ async def admin_diag(
|
|||||||
"exists": merged_dir.exists(),
|
"exists": merged_dir.exists(),
|
||||||
"activity_count_in_index": merged_activity_count,
|
"activity_count_in_index": merged_activity_count,
|
||||||
"size_mb": round(_size_mb(merged_dir), 2),
|
"size_mb": round(_size_mb(merged_dir), 2),
|
||||||
|
"activities_json": merged_acts_json,
|
||||||
|
"activities_geojson": merged_acts_geojson,
|
||||||
},
|
},
|
||||||
"root_index": {
|
"root_index": {
|
||||||
"exists": root_index.exists(),
|
"exists": root_index.exists(),
|
||||||
"activity_count": root_activity_count,
|
"activity_count": root_activity_count,
|
||||||
},
|
},
|
||||||
"pending_uploads": _count(uploads_dir),
|
"pending_uploads": len(pending_files),
|
||||||
|
"pending_files": pending_files,
|
||||||
"dedup_cache_exists": (user_dir / ".bincio_cache.json").exists(),
|
"dedup_cache_exists": (user_dir / ".bincio_cache.json").exists(),
|
||||||
"athlete_json_exists": (user_dir / "athlete.json").exists(),
|
"athlete_json_exists": (user_dir / "athlete.json").exists(),
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -31,6 +31,16 @@ import Base from '../../layouts/Base.astro';
|
|||||||
</table>
|
</table>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<!-- Re-extract progress modal -->
|
||||||
|
<dialog id="reextract-dialog" class="rounded-xl bg-zinc-900 border border-zinc-700 p-6 text-white max-w-2xl w-full backdrop:bg-black/60">
|
||||||
|
<div class="flex items-center justify-between mb-4">
|
||||||
|
<h3 class="font-semibold text-sm">Re-extract from Strava originals — <span id="reextract-handle" class="text-zinc-400 font-mono"></span></h3>
|
||||||
|
<button id="reextract-close" class="text-zinc-500 hover:text-zinc-200 text-xs px-2 py-1 rounded bg-zinc-800" disabled>Close</button>
|
||||||
|
</div>
|
||||||
|
<div id="reextract-summary" class="text-xs text-zinc-400 mb-2"></div>
|
||||||
|
<div class="bg-zinc-950 rounded p-3 h-64 overflow-y-auto" id="reextract-log"></div>
|
||||||
|
</dialog>
|
||||||
|
|
||||||
<!-- Diag modal -->
|
<!-- Diag modal -->
|
||||||
<dialog id="diag-dialog" class="rounded-xl bg-zinc-900 border border-zinc-700 p-6 text-white max-w-2xl w-full backdrop:bg-black/60">
|
<dialog id="diag-dialog" class="rounded-xl bg-zinc-900 border border-zinc-700 p-6 text-white max-w-2xl w-full backdrop:bg-black/60">
|
||||||
<div class="flex items-center justify-between mb-4">
|
<div class="flex items-center justify-between mb-4">
|
||||||
@@ -62,6 +72,13 @@ import Base from '../../layouts/Base.astro';
|
|||||||
const diagOutput = document.getElementById('diag-output')!;
|
const diagOutput = document.getElementById('diag-output')!;
|
||||||
document.getElementById('diag-close')!.addEventListener('click', () => diagDialog.close());
|
document.getElementById('diag-close')!.addEventListener('click', () => diagDialog.close());
|
||||||
diagDialog.addEventListener('click', e => { if (e.target === diagDialog) diagDialog.close(); });
|
diagDialog.addEventListener('click', e => { if (e.target === diagDialog) diagDialog.close(); });
|
||||||
|
|
||||||
|
const reextractDialog = document.getElementById('reextract-dialog') as HTMLDialogElement;
|
||||||
|
const reextractHandle = document.getElementById('reextract-handle')!;
|
||||||
|
const reextractSummary = document.getElementById('reextract-summary')!;
|
||||||
|
const reextractLog = document.getElementById('reextract-log')!;
|
||||||
|
const reextractClose = document.getElementById('reextract-close') as HTMLButtonElement;
|
||||||
|
reextractClose.addEventListener('click', () => { reextractDialog.close(); load(); });
|
||||||
const confirmOk = document.getElementById('confirm-ok')!;
|
const confirmOk = document.getElementById('confirm-ok')!;
|
||||||
const confirmCancel = document.getElementById('confirm-cancel')!;
|
const confirmCancel = document.getElementById('confirm-cancel')!;
|
||||||
|
|
||||||
@@ -136,6 +153,11 @@ import Base from '../../layouts/Base.astro';
|
|||||||
data-handle="${u.handle}"
|
data-handle="${u.handle}"
|
||||||
title="Show diagnostic snapshot of this user's data directory"
|
title="Show diagnostic snapshot of this user's data directory"
|
||||||
>Diag</button>
|
>Diag</button>
|
||||||
|
<button
|
||||||
|
class="reextract-btn text-xs px-3 py-1.5 rounded-lg bg-zinc-800 hover:bg-amber-900 hover:text-amber-300 text-zinc-400 transition-colors"
|
||||||
|
data-handle="${u.handle}"
|
||||||
|
title="Re-extract activities from stored Strava originals (no API call)"
|
||||||
|
>Re-extract</button>
|
||||||
<button
|
<button
|
||||||
class="rebuild-btn text-xs px-3 py-1.5 rounded-lg bg-zinc-800 hover:bg-zinc-700 text-zinc-400 hover:text-zinc-200 transition-colors"
|
class="rebuild-btn text-xs px-3 py-1.5 rounded-lg bg-zinc-800 hover:bg-zinc-700 text-zinc-400 hover:text-zinc-200 transition-colors"
|
||||||
data-handle="${u.handle}"
|
data-handle="${u.handle}"
|
||||||
@@ -157,6 +179,63 @@ import Base from '../../layouts/Base.astro';
|
|||||||
`;
|
`;
|
||||||
}).join('');
|
}).join('');
|
||||||
|
|
||||||
|
tbodyEl.querySelectorAll<HTMLButtonElement>('.reextract-btn').forEach(btn => {
|
||||||
|
btn.addEventListener('click', async () => {
|
||||||
|
const h = btn.dataset.handle!;
|
||||||
|
reextractHandle.textContent = h;
|
||||||
|
reextractLog.innerHTML = '';
|
||||||
|
reextractSummary.textContent = 'Starting…';
|
||||||
|
reextractClose.disabled = true;
|
||||||
|
reextractDialog.showModal();
|
||||||
|
|
||||||
|
let imported = 0, skipped = 0, errors = 0;
|
||||||
|
try {
|
||||||
|
const es = new EventSource(`/api/admin/users/${h}/reextract-originals`);
|
||||||
|
// EventSource only does GET; use fetch + ReadableStream instead
|
||||||
|
es.close();
|
||||||
|
|
||||||
|
const r = await fetch(`/api/admin/users/${h}/reextract-originals`, {
|
||||||
|
method: 'POST', credentials: 'include',
|
||||||
|
});
|
||||||
|
const reader = r.body!.getReader();
|
||||||
|
const decoder = new TextDecoder();
|
||||||
|
let buf = '';
|
||||||
|
while (true) {
|
||||||
|
const { done, value } = await reader.read();
|
||||||
|
if (done) break;
|
||||||
|
buf += decoder.decode(value, { stream: true });
|
||||||
|
const lines = buf.split('\n\n');
|
||||||
|
buf = lines.pop() ?? '';
|
||||||
|
for (const chunk of lines) {
|
||||||
|
const dataLine = chunk.split('\n').find(l => l.startsWith('data: '));
|
||||||
|
if (!dataLine) continue;
|
||||||
|
const ev = JSON.parse(dataLine.slice(6));
|
||||||
|
if (ev.type === 'progress') {
|
||||||
|
const color = ev.status === 'imported' ? 'text-green-400'
|
||||||
|
: ev.status === 'error' ? 'text-red-400'
|
||||||
|
: 'text-zinc-500';
|
||||||
|
const line = document.createElement('div');
|
||||||
|
line.className = `text-xs font-mono ${color}`;
|
||||||
|
line.textContent = `[${ev.n}/${ev.total}] ${ev.status.padEnd(8)} ${ev.name}${ev.detail ? ' — ' + ev.detail : ''}`;
|
||||||
|
reextractLog.appendChild(line);
|
||||||
|
reextractLog.scrollTop = reextractLog.scrollHeight;
|
||||||
|
if (ev.status === 'imported') imported++;
|
||||||
|
else if (ev.status === 'error') errors++;
|
||||||
|
else skipped++;
|
||||||
|
reextractSummary.textContent = `Processing… ${ev.n}/${ev.total} — imported: ${imported}, skipped: ${skipped}, errors: ${errors}`;
|
||||||
|
} else if (ev.type === 'done') {
|
||||||
|
reextractSummary.textContent = `Done — imported: ${ev.imported}, skipped: ${ev.skipped}, errors: ${ev.errors}`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
reextractSummary.textContent = 'Error: ' + String(err);
|
||||||
|
} finally {
|
||||||
|
reextractClose.disabled = false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
tbodyEl.querySelectorAll<HTMLButtonElement>('.diag-btn').forEach(btn => {
|
tbodyEl.querySelectorAll<HTMLButtonElement>('.diag-btn').forEach(btn => {
|
||||||
btn.addEventListener('click', async () => {
|
btn.addEventListener('click', async () => {
|
||||||
const h = btn.dataset.handle!;
|
const h = btn.dataset.handle!;
|
||||||
|
|||||||
Reference in New Issue
Block a user