From 5ad3aee8f65e628242fa36bd9b08fb521a6708fb Mon Sep 17 00:00:00 2001 From: Davide Scaini Date: Mon, 13 Apr 2026 18:49:20 +0200 Subject: [PATCH] =?UTF-8?q?rename=20privacy=20"private"=20=E2=86=92=20"unl?= =?UTF-8?q?isted";=20enable=20GPS=20for=20unlisted?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - "unlisted" = not shown in the public feed, but GPS track, timeseries and detail JSON are all accessible by direct URL (security by obscurity) - "private" accepted as legacy alias everywhere (backward compat with existing data on disk) - New writes from Strava sync / ZIP upload / sidecar use "unlisted" - Only "no_gps" now suppresses the GPS track - isUnlisted() helper in format.ts used by all Svelte/Astro components - SCHEMA.md and CLAUDE.md document the privacy model and the distinction between "unlisted" and "no_gps" --- .claude/settings.local.json | 31 +++ CLAUDE.md | 5 + SCHEMA.md | 21 +- advice.md | 79 +++++++ bincio/edit/server.py | 2 +- bincio/extract/strava_api.py | 4 +- bincio/extract/timeseries.py | 5 +- bincio/extract/writer.py | 7 +- bincio/render/merge.py | 12 +- bincio/serve/server.py | 2 +- issues.md | 99 ++++++++ site/public/bincio-0.1.0-py3-none-any.whl | Bin 0 -> 60977 bytes site/public/data | 1 + .../components/ActivityDetailLoader.svelte | 3 +- site/src/components/ActivityFeed.svelte | 12 +- site/src/components/AthleteView.svelte | 3 +- site/src/components/CommunityView.svelte | 4 +- site/src/components/StatsView.svelte | 4 +- site/src/layouts/Base.astro | 4 +- site/src/lib/format.ts | 8 +- site/src/lib/types.ts | 4 +- site/src/pages/activity/[id].astro | 4 +- todo.md | 213 ++++++++++++++++++ 23 files changed, 489 insertions(+), 38 deletions(-) create mode 100644 .claude/settings.local.json create mode 100644 advice.md create mode 100644 issues.md create mode 100644 site/public/bincio-0.1.0-py3-none-any.whl create mode 120000 site/public/data create mode 100644 todo.md diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000..58f3bd8 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,31 @@ +{ + "permissions": { + "allow": [ + "Read(//Users/brutsalvadi/src/bincio_data/_merged/**)", + "Read(//Users/brutsalvadi/src/bincio_data/**)", + "Bash(python3 -c \"import json,sys; d=json.load\\(sys.stdin\\); print\\(list\\(d.get\\(''''dependencies'''',{}\\).keys\\(\\)\\)\\)\")", + "Bash(uv run:*)", + "Bash(python3 -c \"import pyodide\")", + "Bash(uv build:*)", + "Bash(npm install:*)", + "Bash(npm run:*)", + "Bash(mkdir -p /tmp/bincio_ci/activities)", + "Bash(BINCIO_DATA_DIR=/tmp/bincio_ci npm run build)", + "Bash(ls /Users/brutsalvadi/src/bincio_activity/*.md)", + "Bash(python3:*)", + "Bash(wc -l /Users/brutsalvadi/src/bincio_activity/bincio/extract/*.py)", + "Bash(grep -E \"\\\\.py$\")", + "Bash([ -f ~/src/cycling_data_davide/activities.csv ])", + "Read(//Users/brutsalvadi/src/cycling_data_davide/**)", + "Bash(ls -la /tmp/bincio_dev_test/dave/_merged/activities/*.json)", + "Bash(ls -la /tmp/bincio_dev_test/dave/_merged/activities/*.geojson)", + "Bash(unzip -l ~/src/cycling_data_davide/export_18885842.zip)", + "Bash(unzip -p export_18885842.zip profile.csv)", + "Bash(unzip -p export_18885842.zip bikes.csv)", + "Bash(unzip -p export_18885842.zip routes.csv)", + "Bash(unzip -p export_18885842.zip privacy_zones.csv)", + "Bash(unzip -p export_18885842.zip activities.csv)", + "Bash(xargs -I{} python3 -c \"import json,sys; d=json.load\\(open\\('{}'\\)\\);print\\({k:d.get\\(k\\) for k in ['title','strava_id','started_at','privacy']}\\)\")" + ] + } +} diff --git a/CLAUDE.md b/CLAUDE.md index f721bea..414a8f9 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -205,6 +205,11 @@ gear: "Trek Domane" Rode with friends. Legs felt great after the rest week... ``` +The sidecar `private: true` flag maps to `privacy: "unlisted"` in the merged JSON. +**`unlisted`** means: not shown in the public feed, but the activity detail, GPS track, +and timeseries are all accessible by direct URL (security by obscurity, same model +as the detail JSON itself). Use `no_gps` if the GPS track must not be published. + ### Editing UX: drawer in Astro + `bincio edit` write API - `bincio edit --data-dir ~/bincio_data` starts a FastAPI server on port 4041 diff --git a/SCHEMA.md b/SCHEMA.md index 0960341..3802199 100644 --- a/SCHEMA.md +++ b/SCHEMA.md @@ -124,7 +124,7 @@ needed to render an activity card in a feed — no timeseries, no full track. | `avg_cadence_rpm` | integer\|null | no | Average cadence (rpm for cycling, spm for running). | | `avg_power_w` | integer\|null | no | Average power in watts. | | `source` | string\|null | no | Origin of data. See **Source values**. | -| `privacy` | string | yes | One of: `public`, `blur_start`, `no_gps`, `private`. | +| `privacy` | string | yes | One of: `public`, `blur_start`, `no_gps`, `unlisted`. (`private` is a deprecated alias for `unlisted`.) | | `mmp` | array\|null | no | Mean Maximal Power curve — `[[duration_s, avg_watts], ...]`. | | `best_efforts` | array\|null | no | Best efforts by distance — `[[distance_km, time_s], ...]`. | | `best_climb_m` | number\|null | no | Best single climb in metres (Kadane's algorithm). | @@ -165,12 +165,21 @@ timestamp alone is sufficient: `2024-06-01T073012Z`. ### Privacy levels -| Level | GPS track published | Timeseries lat/lon | Stats in index | +| Level | GPS track published | Timeseries lat/lon | Shown in feed | |---|---|---|---| -| `public` | Full track | Included | Yes | -| `blur_start` | First/last 200 m removed | Trimmed | Yes | -| `no_gps` | Not published | Not included | Yes | -| `private` | Not published | Not included | No (not in index at all) | +| `public` | Full track | Included | Yes — everyone | +| `blur_start` | First/last 200 m removed | Trimmed | Yes — everyone | +| `no_gps` | Not published | Not included | Yes — everyone | +| `unlisted` | Full track | Included | No — owner only (via direct URL) | +| `private` | *(deprecated alias for `unlisted`)* | Included | No — owner only | + +**`unlisted`** activities are not shown in the public feed but are fully accessible +by direct URL — the GPS track, timeseries, and detail JSON are all served as normal +static files. This is "security by obscurity": knowing the URL is sufficient to +access the activity. If you need true data exclusion, use `no_gps` for GPS removal +while keeping stats public, or delete the activity entirely. + +The legacy `private` value is accepted everywhere `unlisted` is valid. --- diff --git a/advice.md b/advice.md new file mode 100644 index 0000000..6454668 --- /dev/null +++ b/advice.md @@ -0,0 +1,79 @@ +# Advice on ARCHITECTURE.md + +Review of the mobile / offline plan in `ARCHITECTURE.md`. The two-stage extract/render pipeline, edit flow, and federation sections are accurate and clear — this document focuses on where the architectural risk lives: the mobile app and the path to fully-offline operation. + +## What's strong + +- **Workflow framing (1–4) is the right way to think about it.** Each workflow is a concrete user story, not an abstract capability list. Workflow 1 (record → convert → cloud) is achievable with what's already there; that's a good wedge to ship first. +- **Honest status table** at the bottom of "Fully offline — missing pieces". An architecture doc that admits what isn't done is more useful than one that hand-waves. +- **Incremental validation plan (§5).** Testing the service worker approach in the browser before touching native builds is the right call — cheap to run, and the failure mode is informative rather than expensive. + +## Decision: drop `capacitor-nodejs` + +The doc previously listed `capacitor-nodejs` as a fallback if service workers fail on iOS. We are dropping this option entirely. Reasons: + +- It embeds a full Node runtime, meaningfully increasing app size. +- iOS support relies on a fork of Node-mobile and has historically been spotty. +- It introduces a long-term maintenance burden (tracking upstream Node, plugin updates, two runtimes to debug). +- The "Hard" effort label in the original status table understated both the integration risk and the ongoing cost. + +If service workers turn out to be blocked on iOS, the fallback should be one of: + +1. **A data access abstraction** (see next section) that lets the app read from IndexedDB directly via a JS loader, with no `fetch('/data/*')` in the hot path at all. This sidesteps the WKWebView question entirely. +2. **A native Swift/Kotlin micro-server** if a local HTTP origin is genuinely required. +3. **Bundling data as static assets** and re-running `cap sync` on import — crude but boring and reliable. + +`capacitor-nodejs` should not appear in the doc, the test plan, or the status table. + +## Things to fix or add + +### 1. Add a data access abstraction step *before* the service worker work + +The doc frames the offline problem as "serve local data to the WebView at `/data/*`". That skips a prior question: does the Astro site actually need to fetch `/data/*` at runtime, or can the data layer be abstracted behind a thin loader (`loadIndex()`, `loadActivity(id)`) with two implementations? + +- **Cloud build:** loader uses `fetch('/data/...')` as today. +- **App build:** loader reads from IndexedDB directly. + +This is a much smaller change than service worker interception, avoids the iOS WKWebView question entirely, and is useful even if you stay cloud-only (testability, mocking, future federation transports). It should land *before* any service worker work — at which point the SW work may turn out to be unnecessary. + +### 2. Commit to JS for sidecar merge — do not use Pyodide + +The "missing pieces" section lists two options for re-running `merge_all` on save: reimplement in JS (~150 lines) or call into Pyodide. Pick JS. Reasons: + +- Pyodide is lazy-loaded by `/convert/`. It is **not** warm just because the app is open. +- A user tapping Edit → change title → Save would trigger a multi-second Pyodide cold start (~10MB) for a one-line edit. Terrible UX, and it repeats on every cold app launch. +- Porting `merge_all` to JS keeps the edit drawer decoupled from the convert page's machinery. The two subsystems stay independent. + +Pyodide should remain convert-page-only. + +### 3. The Workflow 4 diagram contradicts the test plan + +The "Fully offline on phone" workflow shows a "Local Node server" arrow as if it were the chosen path. The §5 test plan picks service workers first. The diagram should reflect that — show the SW path as primary, and drop the Node server box entirely (per the decision above). + +### 4. Missing: data lifecycle on device + +Nothing in the doc covers: + +- How much storage the app is allowed to use on iOS before the OS evicts it. +- What happens on uninstall / reinstall. +- Sync / conflict resolution if the same activity exists locally and on a cloud instance. + +These don't need solutions today, but they should be acknowledged as open questions. Otherwise Workflow 4 will hit all of them at once during implementation. + +### 5. Reconcile Pyodide payload size + +Line 162 says "~8MB", line 335 says "~10MB". Pick one and use it consistently. + +## Small stuff + +- The federation diagram uses a `Note1` node that won't render as a Mermaid note — it'll appear as a regular box. Use `%%` comments or restructure. +- The "iOS: App Store / TestFlight" cell in the PWA-vs-Capacitor table sits in the Capacitor column but reads like a downside. Clarify it's the distribution path, not a limitation relative to PWA. + +## Bottom line + +The plan is sound and the incremental validation approach is right. The two highest-leverage changes: + +1. **Add a data access abstraction layer** before the service worker work. It's small, useful regardless, and may make the SW work moot. +2. **Port `merge_all` to JS** so the edit drawer doesn't depend on Pyodide warm-up. + +With `capacitor-nodejs` removed, the offline path is: data access abstraction → IndexedDB-backed loader → JS merge → (optionally) service worker for any remaining `fetch('/data/*')` callsites that can't be migrated to the loader. diff --git a/bincio/edit/server.py b/bincio/edit/server.py index cc3acd6..64ea731 100644 --- a/bincio/edit/server.py +++ b/bincio/edit/server.py @@ -183,7 +183,7 @@ textarea { resize: vertical; min-height: 140px; } Highlight in feed diff --git a/bincio/extract/strava_api.py b/bincio/extract/strava_api.py index c60ecd4..4675ae9 100644 --- a/bincio/extract/strava_api.py +++ b/bincio/extract/strava_api.py @@ -201,7 +201,7 @@ def strava_to_parsed(meta: dict, streams: dict) -> ParsedActivity: source = f"strava:{meta['id']}" source_hash = "sha256:" + hashlib.sha256(source.encode()).hexdigest() - # Map Strava visibility to BAS privacy: only_me → private, everything else → public + # Map Strava visibility to BAS privacy: only_me → unlisted, everything else → public visibility = meta.get("visibility") or "" is_private = meta.get("private", False) or visibility == "only_me" @@ -214,5 +214,5 @@ def strava_to_parsed(meta: dict, streams: dict) -> ParsedActivity: title=meta.get("name") or None, description=meta.get("description") or None, strava_id=str(meta["id"]), - privacy="private" if is_private else "public", + privacy="unlisted" if is_private else "public", ) diff --git a/bincio/extract/timeseries.py b/bincio/extract/timeseries.py index 6b6a0b7..dc164b8 100644 --- a/bincio/extract/timeseries.py +++ b/bincio/extract/timeseries.py @@ -14,13 +14,14 @@ def build_timeseries( ) -> dict: """Return the BAS `timeseries` object. - privacy='no_gps' or 'private' → lat/lon set to null. + privacy='no_gps' → lat/lon set to null. All other privacy levels + (including 'unlisted') retain GPS in the timeseries. Downsamples so at most one point per second is emitted. """ if not points: return {"t": []} - include_gps = privacy not in ("no_gps", "private") + include_gps = privacy not in ("no_gps", "private") # "private" = legacy alias for "unlisted" # Downsample: keep at most one point per second sampled: list[DataPoint] = [] diff --git a/bincio/extract/writer.py b/bincio/extract/writer.py index 8648bc6..089c83b 100644 --- a/bincio/extract/writer.py +++ b/bincio/extract/writer.py @@ -48,7 +48,10 @@ def write_activity( acts_dir.mkdir(parents=True, exist_ok=True) source = _infer_source(activity) - has_gps = metrics.bbox is not None and privacy not in ("no_gps", "private") + # "unlisted" activities keep their GPS track (not in the public feed, but the + # URL is not secret — same model as the detail JSON). Only "no_gps" suppresses + # the track. "private" is the legacy alias for "unlisted". + has_gps = metrics.bbox is not None and privacy not in ("no_gps",) # Build timeseries once — written to a separate file to keep detail JSON small. # Treat an empty timeseries (no points) as None so no file is created. @@ -220,7 +223,7 @@ def build_summary( privacy: str = "public", ) -> dict: """Build the Activity Summary object for index.json.""" - has_gps = metrics.bbox is not None and privacy not in ("no_gps", "private") + has_gps = metrics.bbox is not None and privacy not in ("no_gps",) return { "id": activity_id, "title": activity.title or _auto_title(activity), diff --git a/bincio/render/merge.py b/bincio/render/merge.py index c90cb35..7c82308 100644 --- a/bincio/render/merge.py +++ b/bincio/render/merge.py @@ -49,7 +49,7 @@ def apply_sidecar(detail: dict, fm: dict, body: str) -> dict: if "highlight" in fm: d["custom"]["highlight"] = bool(fm["highlight"]) if "private" in fm: - d["privacy"] = "private" if fm["private"] else detail.get("privacy", "public") + d["privacy"] = "unlisted" if fm["private"] else detail.get("privacy", "public") if "hide_stats" in fm: d["custom"]["hide_stats"] = [str(s) for s in (fm["hide_stats"] or [])] @@ -69,7 +69,7 @@ def _apply_sidecar_summary(summary: dict, fm: dict) -> dict: if "highlight" in fm: s["custom"]["highlight"] = bool(fm["highlight"]) if "private" in fm: - s["privacy"] = "private" if fm["private"] else summary.get("privacy", "public") + s["privacy"] = "unlisted" if fm["private"] else summary.get("privacy", "public") return s @@ -260,10 +260,10 @@ def merge_all(data_dir: Path) -> int: s = _apply_sidecar_summary(s, fm) activities.append(s) - # Drop private activities from the published feed - # Sort: newest first, then bring highlighted activities to the top - # Private activities are kept in the index so the owner can see them; - # the feed UI filters them out for non-owners client-side. + # "unlisted" (and legacy "private") activities are kept in the index so + # the owner can reach them by direct URL; the feed UI filters them out + # for non-owners client-side. + # Sort: newest first, then bring highlighted activities to the top. activities.sort(key=lambda a: a.get("started_at", ""), reverse=True) activities.sort(key=lambda a: 0 if a.get("custom", {}).get("highlight") else 1) diff --git a/bincio/serve/server.py b/bincio/serve/server.py index 9a94e70..c8bb828 100644 --- a/bincio/serve/server.py +++ b/bincio/serve/server.py @@ -879,7 +879,7 @@ async def upload_strava_zip( if not file.filename or not file.filename.lower().endswith(".zip"): raise HTTPException(400, "Please upload a .zip file") - privacy = "private" if private.lower() in ("true", "1", "yes") else "public" + privacy = "unlisted" if private.lower() in ("true", "1", "yes") else "public" dd = _get_data_dir() / user.handle import tempfile diff --git a/issues.md b/issues.md new file mode 100644 index 0000000..1a7bb6b --- /dev/null +++ b/issues.md @@ -0,0 +1,99 @@ +# Repository Audit — 2026-03-30 + +## CRITICAL + +| # | Area | File | Issue | Status | +|---|------|------|-------|--------| +| 1 | Security | `edit/server.py:351,386,431,588` | **Path traversal** — `activity_id` from URL is interpolated into filesystem paths without sanitization. Attacker can read/write/delete arbitrary files via `../../` | ✅ Fixed — `_check_id()` validates against `[a-zA-Z0-9\-]+` | +| 2 | Security | `edit/server.py:588` | **Path traversal in `delete_image`** — `filename` param is not `.name`-stripped (unlike `upload_image`), enabling deletion of arbitrary files | ✅ Fixed — `Path(filename).name` strip added | +| 3 | Security | `edit/server.py:542` | **Path traversal in `upload_activity`** — uploaded `file.filename` used directly for staged path | ✅ Fixed — `Path(file.filename).name` strip added | +| 4 | XSS | `ActivityDetail.svelte:67,189` | `marked()` output rendered with `{@html}` without sanitization. Sidecar markdown can inject `