backend: initial commit
This commit is contained in:
+28
@@ -0,0 +1,28 @@
|
|||||||
|
# Python
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*.egg-info/
|
||||||
|
.venv/
|
||||||
|
dist/
|
||||||
|
build/
|
||||||
|
.mypy_cache/
|
||||||
|
.ruff_cache/
|
||||||
|
.pytest_cache/
|
||||||
|
htmlcov/
|
||||||
|
.coverage
|
||||||
|
.idea*
|
||||||
|
|
||||||
|
# uv
|
||||||
|
uv.lock
|
||||||
|
|
||||||
|
# Node / Astro
|
||||||
|
site/node_modules/
|
||||||
|
site/dist/
|
||||||
|
site/.astro/
|
||||||
|
|
||||||
|
# BAS data stores (user data, not committed to the tool repo)
|
||||||
|
bincio_data/
|
||||||
|
*.bincio_cache.json
|
||||||
|
|
||||||
|
# OS
|
||||||
|
.DS_Store
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
3.12
|
||||||
@@ -0,0 +1,369 @@
|
|||||||
|
# BincioActivity Schema (BAS) — v1.0
|
||||||
|
|
||||||
|
The BincioActivity Schema defines how activity data is stored and shared as
|
||||||
|
plain JSON files. It is the **federation protocol**: if you publish a
|
||||||
|
BAS-compliant data store, any BincioActivity instance can read it.
|
||||||
|
|
||||||
|
Any tool — in any language — can produce BAS-compliant JSON without using the
|
||||||
|
`bincio` Python package. The schema is the contract; the package is one
|
||||||
|
implementation.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Files
|
||||||
|
|
||||||
|
A BAS data store is a directory (or URL prefix) with this structure:
|
||||||
|
|
||||||
|
```
|
||||||
|
{store_root}/
|
||||||
|
index.json ← user manifest and activity feed
|
||||||
|
index_{year}.json ← optional yearly shards (large datasets)
|
||||||
|
activities/
|
||||||
|
{id}.json ← full activity detail
|
||||||
|
{id}.geojson ← simplified GPS track (optional)
|
||||||
|
```
|
||||||
|
|
||||||
|
All files are UTF-8 JSON. All timestamps are ISO 8601 with timezone offset.
|
||||||
|
All distances are in metres. All speeds are in km/h. All durations are in
|
||||||
|
seconds. `null` means "not recorded / not available".
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## `index.json`
|
||||||
|
|
||||||
|
The entry point for a data store.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"bas_version": "1.0",
|
||||||
|
"owner": {
|
||||||
|
"handle": "brutsalvadi",
|
||||||
|
"display_name": "Bru",
|
||||||
|
"avatar_url": null
|
||||||
|
},
|
||||||
|
"generated_at": "2026-03-28T10:00:00Z",
|
||||||
|
"shards": [
|
||||||
|
{ "year": 2024, "url": "index_2024.json", "count": 312 }
|
||||||
|
],
|
||||||
|
"activities": [ ... ]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Fields
|
||||||
|
|
||||||
|
| Field | Type | Required | Description |
|
||||||
|
|---|---|---|---|
|
||||||
|
| `bas_version` | string | yes | Schema version. Currently `"1.0"`. |
|
||||||
|
| `owner.handle` | string | yes | URL-safe identifier, e.g. `"brutsalvadi"`. |
|
||||||
|
| `owner.display_name` | string | yes | Human-readable name. |
|
||||||
|
| `owner.avatar_url` | string\|null | no | Absolute URL to an avatar image. |
|
||||||
|
| `generated_at` | string | yes | ISO 8601 timestamp of when this file was generated. |
|
||||||
|
| `shards` | array | no | Pointers to yearly shard files. See below. |
|
||||||
|
| `activities` | array | yes | Array of **Activity Summary** objects. May be empty. |
|
||||||
|
|
||||||
|
`index.json` should contain all activities when the total count is under ~5,000.
|
||||||
|
Above that, use yearly shards and keep only the most recent 200 activities
|
||||||
|
inline in `index.json` for fast feed rendering.
|
||||||
|
|
||||||
|
### Shard object
|
||||||
|
|
||||||
|
| Field | Type | Description |
|
||||||
|
|---|---|---|
|
||||||
|
| `year` | integer | Calendar year covered by this shard. |
|
||||||
|
| `url` | string | Relative or absolute URL to the shard file. |
|
||||||
|
| `count` | integer | Number of activities in the shard. |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Activity Summary object
|
||||||
|
|
||||||
|
Appears in `index.json` (and yearly shard files). Contains only the fields
|
||||||
|
needed to render an activity card in a feed — no timeseries, no full track.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"id": "2024-06-01T073012+0200-morning-ride",
|
||||||
|
"title": "Morning Ride",
|
||||||
|
"sport": "cycling",
|
||||||
|
"sub_sport": "road",
|
||||||
|
"started_at": "2024-06-01T07:30:12+02:00",
|
||||||
|
"distance_m": 42300.0,
|
||||||
|
"duration_s": 5400,
|
||||||
|
"moving_time_s": 5100,
|
||||||
|
"elevation_gain_m": 620.0,
|
||||||
|
"avg_speed_kmh": 28.2,
|
||||||
|
"max_speed_kmh": 52.1,
|
||||||
|
"avg_hr_bpm": 148,
|
||||||
|
"max_hr_bpm": 178,
|
||||||
|
"avg_cadence_rpm": 88,
|
||||||
|
"avg_power_w": null,
|
||||||
|
"source": "strava_export",
|
||||||
|
"privacy": "public",
|
||||||
|
"detail_url": "activities/2024-06-01T073012+0200-morning-ride.json",
|
||||||
|
"track_url": "activities/2024-06-01T073012+0200-morning-ride.geojson"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Fields
|
||||||
|
|
||||||
|
| Field | Type | Required | Description |
|
||||||
|
|---|---|---|---|
|
||||||
|
| `id` | string | yes | Unique identifier. See **Activity ID** section. |
|
||||||
|
| `title` | string | yes | Human-readable name. May be auto-generated if not in source. |
|
||||||
|
| `sport` | string | yes | One of: `cycling`, `running`, `hiking`, `walking`, `swimming`, `other`. |
|
||||||
|
| `sub_sport` | string\|null | no | e.g. `road`, `mountain`, `gravel`, `indoor`, `trail`. |
|
||||||
|
| `started_at` | string | yes | ISO 8601 timestamp with timezone. |
|
||||||
|
| `distance_m` | number\|null | no | Total distance in metres. |
|
||||||
|
| `duration_s` | integer\|null | no | Total elapsed time in seconds. |
|
||||||
|
| `moving_time_s` | integer\|null | no | Time in motion (stopped periods excluded). |
|
||||||
|
| `elevation_gain_m` | number\|null | no | Cumulative positive elevation in metres. |
|
||||||
|
| `avg_speed_kmh` | number\|null | no | Average speed over moving time. |
|
||||||
|
| `max_speed_kmh` | number\|null | no | Maximum instantaneous speed. |
|
||||||
|
| `avg_hr_bpm` | integer\|null | no | Average heart rate. |
|
||||||
|
| `max_hr_bpm` | integer\|null | no | Maximum heart rate. |
|
||||||
|
| `avg_cadence_rpm` | integer\|null | no | Average cadence (rpm for cycling, spm for running). |
|
||||||
|
| `avg_power_w` | integer\|null | no | Average power in watts. |
|
||||||
|
| `source` | string\|null | no | Origin of data. See **Source values**. |
|
||||||
|
| `privacy` | string | yes | One of: `public`, `blur_start`, `no_gps`, `private`. |
|
||||||
|
| `detail_url` | string\|null | no | Relative or absolute URL to the full activity JSON. |
|
||||||
|
| `track_url` | string\|null | no | Relative or absolute URL to the GeoJSON track. `null` if `privacy` is `no_gps`. |
|
||||||
|
|
||||||
|
### Activity ID
|
||||||
|
|
||||||
|
The canonical ID format is:
|
||||||
|
|
||||||
|
```
|
||||||
|
{started_at_compact}[-{slug}]
|
||||||
|
```
|
||||||
|
|
||||||
|
Where `started_at_compact` is the start timestamp with special characters
|
||||||
|
removed: `2024-06-01T073012+0200`, and `slug` is an optional URL-safe
|
||||||
|
lowercase title (spaces → hyphens, non-ASCII stripped).
|
||||||
|
|
||||||
|
Example: `2024-06-01T073012+0200-morning-ride`
|
||||||
|
|
||||||
|
IDs must be unique within a data store. When a title is unavailable, the
|
||||||
|
timestamp alone is sufficient: `2024-06-01T073012+0200`.
|
||||||
|
|
||||||
|
### Source values
|
||||||
|
|
||||||
|
| Value | Description |
|
||||||
|
|---|---|
|
||||||
|
| `strava_export` | Strava bulk data export |
|
||||||
|
| `garmin_connect` | Garmin Connect bulk export |
|
||||||
|
| `wahoo` | Wahoo ELEMNT / SYSTM export |
|
||||||
|
| `komoot` | Komoot GPX export |
|
||||||
|
| `gpx_file` | Generic GPX file |
|
||||||
|
| `fit_file` | Generic FIT file |
|
||||||
|
| `tcx_file` | Generic TCX file |
|
||||||
|
| `karoo` | Hammerhead Karoo device export |
|
||||||
|
| `manual` | Manually created |
|
||||||
|
|
||||||
|
### Privacy levels
|
||||||
|
|
||||||
|
| Level | GPS track published | Timeseries lat/lon | Stats in index |
|
||||||
|
|---|---|---|---|
|
||||||
|
| `public` | Full track | Included | Yes |
|
||||||
|
| `blur_start` | First/last 200 m removed | Trimmed | Yes |
|
||||||
|
| `no_gps` | Not published | Not included | Yes |
|
||||||
|
| `private` | Not published | Not included | No (not in index at all) |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## `activities/{id}.json`
|
||||||
|
|
||||||
|
Full activity record. Extends the Summary with timeseries and metadata.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"bas_version": "1.0",
|
||||||
|
"id": "2024-06-01T073012+0200-morning-ride",
|
||||||
|
"title": "Morning Ride",
|
||||||
|
"description": "Easy morning spin before work.",
|
||||||
|
"sport": "cycling",
|
||||||
|
"sub_sport": "road",
|
||||||
|
"started_at": "2024-06-01T07:30:12+02:00",
|
||||||
|
"distance_m": 42300.0,
|
||||||
|
"duration_s": 5400,
|
||||||
|
"moving_time_s": 5100,
|
||||||
|
"elevation_gain_m": 620.0,
|
||||||
|
"elevation_loss_m": 615.0,
|
||||||
|
"avg_speed_kmh": 28.2,
|
||||||
|
"max_speed_kmh": 52.1,
|
||||||
|
"avg_hr_bpm": 148,
|
||||||
|
"max_hr_bpm": 178,
|
||||||
|
"avg_cadence_rpm": 88,
|
||||||
|
"avg_power_w": null,
|
||||||
|
"max_power_w": null,
|
||||||
|
"gear": "Canyon Ultimate CF SL",
|
||||||
|
"device": "Hammerhead Karoo 2",
|
||||||
|
"bbox": [9.1234, 45.4321, 9.5678, 45.8765],
|
||||||
|
"start_latlng": [45.4321, 9.1234],
|
||||||
|
"end_latlng": [45.4321, 9.1235],
|
||||||
|
"laps": [],
|
||||||
|
"timeseries": {
|
||||||
|
"t": [0, 1, 2],
|
||||||
|
"lat": [45.4321, 45.4322, 45.4323],
|
||||||
|
"lon": [9.1234, 9.1235, 9.1236],
|
||||||
|
"elevation_m": [120.0, 120.5, 121.0],
|
||||||
|
"speed_kmh": [0.0, 15.2, 22.4],
|
||||||
|
"hr_bpm": [null, 142, 145],
|
||||||
|
"cadence_rpm": [null, 85, 88],
|
||||||
|
"power_w": [null, null, null],
|
||||||
|
"temperature_c": [null, null, null]
|
||||||
|
},
|
||||||
|
"source": "karoo",
|
||||||
|
"source_file": "13957.activity.abc123.fit",
|
||||||
|
"source_hash": "sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
|
||||||
|
"strava_id": null,
|
||||||
|
"privacy": "public",
|
||||||
|
"custom": {}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Additional fields (beyond Summary)
|
||||||
|
|
||||||
|
| Field | Type | Required | Description |
|
||||||
|
|---|---|---|---|
|
||||||
|
| `description` | string\|null | no | Free-text description. |
|
||||||
|
| `elevation_loss_m` | number\|null | no | Cumulative negative elevation. |
|
||||||
|
| `max_power_w` | integer\|null | no | Maximum power in watts. |
|
||||||
|
| `gear` | string\|null | no | Equipment used (bike name, shoe model…). |
|
||||||
|
| `device` | string\|null | no | Recording device (e.g. `"Garmin Edge 530"`). |
|
||||||
|
| `bbox` | array\|null | no | `[min_lon, min_lat, max_lon, max_lat]`. Null if no GPS. |
|
||||||
|
| `start_latlng` | array\|null | no | `[lat, lon]` of activity start. |
|
||||||
|
| `end_latlng` | array\|null | no | `[lat, lon]` of activity end. |
|
||||||
|
| `laps` | array | yes | Array of **Lap** objects. Empty array if no laps. |
|
||||||
|
| `timeseries` | object | yes | Parallel arrays of sensor data. See below. |
|
||||||
|
| `source_file` | string\|null | no | Original filename (basename only, no path). |
|
||||||
|
| `source_hash` | string\|null | no | `sha256:{hex}` of the original raw file bytes. Used for deduplication. |
|
||||||
|
| `strava_id` | string\|null | no | Strava activity ID if origin is a Strava export. |
|
||||||
|
| `custom` | object | yes | Free dict for plugin-computed fields. Must be present, may be `{}`. |
|
||||||
|
|
||||||
|
### Timeseries object
|
||||||
|
|
||||||
|
Parallel arrays, all the same length. Index `i` corresponds to `t[i]` seconds
|
||||||
|
after the activity start.
|
||||||
|
|
||||||
|
| Key | Type | Unit | Description |
|
||||||
|
|---|---|---|---|
|
||||||
|
| `t` | int[] | seconds | Seconds since `started_at`. Always present. |
|
||||||
|
| `lat` | float[]\|null | degrees | Latitude. `null` if no GPS or privacy=`no_gps`. |
|
||||||
|
| `lon` | float[]\|null | degrees | Longitude. `null` if no GPS or privacy=`no_gps`. |
|
||||||
|
| `elevation_m` | float[] | metres | Elevation. Array of nulls if unavailable. |
|
||||||
|
| `speed_kmh` | float[] | km/h | Speed. Array of nulls if unavailable. |
|
||||||
|
| `hr_bpm` | int[] | bpm | Heart rate. Array of nulls if no HR sensor. |
|
||||||
|
| `cadence_rpm` | int[] | rpm/spm | Cadence. Array of nulls if unavailable. |
|
||||||
|
| `power_w` | int[] | watts | Power. Array of nulls if no power meter. |
|
||||||
|
| `temperature_c` | float[] | °C | Temperature. Array of nulls if unavailable. |
|
||||||
|
|
||||||
|
Timeseries are downsampled to at most 1 sample per second. The exact
|
||||||
|
downsampling strategy is implementation-defined; linear interpolation or
|
||||||
|
nearest-neighbour are both acceptable.
|
||||||
|
|
||||||
|
`lat` and `lon` arrays are either both present (both non-null arrays) or both
|
||||||
|
`null`. Treat `null` the same as an array of nulls.
|
||||||
|
|
||||||
|
### Lap object
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"started_at": "2024-06-01T07:30:12+02:00",
|
||||||
|
"duration_s": 1800,
|
||||||
|
"distance_m": 21150.0,
|
||||||
|
"elevation_gain_m": 310.0,
|
||||||
|
"avg_speed_kmh": 28.2,
|
||||||
|
"avg_hr_bpm": 145,
|
||||||
|
"avg_power_w": null
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## `activities/{id}.geojson`
|
||||||
|
|
||||||
|
Simplified GPS track for map rendering. Omitted entirely when
|
||||||
|
`privacy` is `no_gps` or `private`.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"type": "Feature",
|
||||||
|
"geometry": {
|
||||||
|
"type": "LineString",
|
||||||
|
"coordinates": [
|
||||||
|
[9.1234, 45.4321, 120.0],
|
||||||
|
[9.1235, 45.4322, 120.5]
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"properties": {
|
||||||
|
"id": "2024-06-01T073012+0200-morning-ride",
|
||||||
|
"speeds": [0.0, 15.2],
|
||||||
|
"simplification": "rdp",
|
||||||
|
"rdp_epsilon": 0.0001,
|
||||||
|
"point_count_original": 7200,
|
||||||
|
"point_count_simplified": 843
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Coordinates are `[longitude, latitude, elevation_metres]` per GeoJSON spec.
|
||||||
|
The `speeds` property is a parallel array to `coordinates` — one speed value
|
||||||
|
per point — used for gradient coloring on the map.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Deduplication
|
||||||
|
|
||||||
|
Activities from different sources (e.g. a Strava export and a Karoo export)
|
||||||
|
may represent the same real-world ride. Producers should detect and handle
|
||||||
|
duplicates before writing the data store.
|
||||||
|
|
||||||
|
### Exact duplicate
|
||||||
|
Two files with the same `source_hash` are byte-for-byte identical. Only one
|
||||||
|
should be processed; the other is silently skipped.
|
||||||
|
|
||||||
|
### Near-duplicate (same ride, different source)
|
||||||
|
Two activities are considered near-duplicates if:
|
||||||
|
- `|started_at difference|` < 5 minutes, **and**
|
||||||
|
- `|distance_m difference| / max(distance_m)` < 5%
|
||||||
|
|
||||||
|
When a near-duplicate is detected:
|
||||||
|
1. One is kept as the **canonical** record (priority: FIT > GPX > TCX,
|
||||||
|
then prefer the source with more sensor channels).
|
||||||
|
2. The duplicate is written with `"duplicate_of": "{canonical_id}"` and
|
||||||
|
`"privacy": "private"` so it is excluded from feeds but remains auditable.
|
||||||
|
|
||||||
|
### Deduplication metadata in detail record
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"source_hash": "sha256:e3b0c...",
|
||||||
|
"duplicate_of": null
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
| Field | Type | Description |
|
||||||
|
|---|---|---|
|
||||||
|
| `source_hash` | string\|null | `sha256:{hex}` of original file bytes. |
|
||||||
|
| `duplicate_of` | string\|null | ID of the canonical activity, if this is a duplicate. |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Versioning
|
||||||
|
|
||||||
|
The `bas_version` field allows consumers to handle schema evolution. Consumers
|
||||||
|
should:
|
||||||
|
- Reject files with a major version higher than they support.
|
||||||
|
- Accept and ignore unknown fields (forward compatibility).
|
||||||
|
- Treat missing optional fields as `null` (backward compatibility).
|
||||||
|
|
||||||
|
Current version: **1.0**
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Changelog
|
||||||
|
|
||||||
|
| Version | Date | Changes |
|
||||||
|
|---|---|---|
|
||||||
|
| 1.0 | 2026-03-28 | Initial release. |
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
"""BincioActivity — federated, open-source activity stats."""
|
||||||
|
|
||||||
|
__version__ = "0.1.0"
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
"""Top-level CLI entry point: `bincio extract` and `bincio render`."""
|
||||||
|
|
||||||
|
import click
|
||||||
|
|
||||||
|
from bincio import __version__
|
||||||
|
|
||||||
|
|
||||||
|
@click.group()
|
||||||
|
@click.version_option(__version__)
|
||||||
|
def main() -> None:
|
||||||
|
"""BincioActivity — federated, open-source activity stats."""
|
||||||
|
|
||||||
|
|
||||||
|
from bincio.extract.cli import extract # noqa: E402
|
||||||
|
from bincio.render.cli import render # noqa: E402
|
||||||
|
|
||||||
|
main.add_command(extract)
|
||||||
|
main.add_command(render)
|
||||||
@@ -0,0 +1,271 @@
|
|||||||
|
"""bincio extract — CLI command."""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
from concurrent.futures import ProcessPoolExecutor, as_completed
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import click
|
||||||
|
from rich.console import Console
|
||||||
|
from rich.progress import BarColumn, MofNCompleteColumn, Progress, TextColumn, TimeElapsedColumn
|
||||||
|
|
||||||
|
from bincio.extract.config import ExtractConfig, default_config, load_config
|
||||||
|
from bincio.extract.dedup import ActivityRecord, DedupIndex
|
||||||
|
from bincio.extract.metrics import compute
|
||||||
|
from bincio.extract.models import ParsedActivity
|
||||||
|
from bincio.extract.parsers.factory import is_supported, parse_file
|
||||||
|
from bincio.extract.strava_csv import StravaMetadata
|
||||||
|
from bincio.extract.writer import build_summary, make_activity_id, write_activity, write_index
|
||||||
|
|
||||||
|
console = Console()
|
||||||
|
|
||||||
|
|
||||||
|
@click.command()
|
||||||
|
@click.option("--config", "config_path", type=click.Path(exists=True), default=None,
|
||||||
|
help="Path to extract_config.yaml (default: ./extract_config.yaml).")
|
||||||
|
@click.option("--input", "input_dir", type=click.Path(exists=True), default=None,
|
||||||
|
help="Input directory (overrides config).")
|
||||||
|
@click.option("--output", "output_dir", type=click.Path(), default=None,
|
||||||
|
help="Output directory (overrides config).")
|
||||||
|
@click.option("--file", "single_file", type=click.Path(exists=True), default=None,
|
||||||
|
help="Process a single file and print JSON to stdout.")
|
||||||
|
@click.option("--since", default=None, metavar="YYYY-MM-DD",
|
||||||
|
help="Only process files modified after this date.")
|
||||||
|
@click.option("--workers", default=4, show_default=True,
|
||||||
|
help="Number of parallel worker processes.")
|
||||||
|
def extract(
|
||||||
|
config_path: Optional[str],
|
||||||
|
input_dir: Optional[str],
|
||||||
|
output_dir: Optional[str],
|
||||||
|
single_file: Optional[str],
|
||||||
|
since: Optional[str],
|
||||||
|
workers: int,
|
||||||
|
) -> None:
|
||||||
|
"""Parse GPX/FIT/TCX files and write BAS JSON data store."""
|
||||||
|
|
||||||
|
# ── single file mode ─────────────────────────────────────────────────────
|
||||||
|
if single_file:
|
||||||
|
_process_single(Path(single_file))
|
||||||
|
return
|
||||||
|
|
||||||
|
# ── load config ──────────────────────────────────────────────────────────
|
||||||
|
cfg = _resolve_config(config_path, input_dir, output_dir)
|
||||||
|
cfg.output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# ── gather files ─────────────────────────────────────────────────────────
|
||||||
|
files = _collect_files(cfg, since)
|
||||||
|
if not files:
|
||||||
|
console.print("[yellow]No supported files found.[/yellow]")
|
||||||
|
return
|
||||||
|
console.print(f"Found [bold]{len(files)}[/bold] activity files.")
|
||||||
|
|
||||||
|
# ── Strava metadata ──────────────────────────────────────────────────────
|
||||||
|
strava_meta: Optional[StravaMetadata] = None
|
||||||
|
if cfg.metadata_csv and cfg.metadata_csv.exists():
|
||||||
|
strava_meta = StravaMetadata(cfg.metadata_csv)
|
||||||
|
console.print(f"Loaded Strava metadata from [cyan]{cfg.metadata_csv.name}[/cyan].")
|
||||||
|
|
||||||
|
# ── dedup index ──────────────────────────────────────────────────────────
|
||||||
|
dedup = DedupIndex(output_dir=cfg.output_dir)
|
||||||
|
|
||||||
|
# ── process ──────────────────────────────────────────────────────────────
|
||||||
|
summaries: list[dict] = []
|
||||||
|
errors: list[tuple[Path, str]] = []
|
||||||
|
skipped = 0
|
||||||
|
|
||||||
|
owner = {"handle": cfg.owner_handle, "display_name": cfg.owner_display_name}
|
||||||
|
|
||||||
|
with Progress(
|
||||||
|
TextColumn("[progress.description]{task.description}"),
|
||||||
|
BarColumn(),
|
||||||
|
MofNCompleteColumn(),
|
||||||
|
TimeElapsedColumn(),
|
||||||
|
console=console,
|
||||||
|
) as progress:
|
||||||
|
task = progress.add_task("Processing...", total=len(files))
|
||||||
|
|
||||||
|
with ProcessPoolExecutor(max_workers=workers) as pool:
|
||||||
|
futures = {pool.submit(_parse_worker, f): f for f in files}
|
||||||
|
for future in as_completed(futures):
|
||||||
|
path = futures[future]
|
||||||
|
progress.advance(task)
|
||||||
|
try:
|
||||||
|
activity = future.result()
|
||||||
|
except Exception as exc:
|
||||||
|
errors.append((path, str(exc)))
|
||||||
|
continue
|
||||||
|
|
||||||
|
# ── incremental skip ──────────────────────────────────────
|
||||||
|
if cfg.incremental:
|
||||||
|
existing_id = dedup.is_exact_duplicate(activity.source_hash)
|
||||||
|
if existing_id:
|
||||||
|
skipped += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# ── enrich from Strava CSV ────────────────────────────────
|
||||||
|
if strava_meta:
|
||||||
|
strava_meta.enrich(activity.source_file, activity)
|
||||||
|
|
||||||
|
# ── compute metrics ───────────────────────────────────────
|
||||||
|
metrics = compute(activity)
|
||||||
|
|
||||||
|
# ── deduplication ─────────────────────────────────────────
|
||||||
|
activity_id = make_activity_id(activity)
|
||||||
|
duplicate_of: Optional[str] = None
|
||||||
|
|
||||||
|
near_dup_id = dedup.find_near_duplicate(
|
||||||
|
activity.started_at, metrics.distance_m
|
||||||
|
)
|
||||||
|
if near_dup_id:
|
||||||
|
source = _infer_source(activity)
|
||||||
|
canonical = dedup.pick_canonical(near_dup_id, source)
|
||||||
|
if canonical == "__new__":
|
||||||
|
# New one is better — mark existing as duplicate
|
||||||
|
existing = dedup._records[near_dup_id]
|
||||||
|
existing.duplicate_of = activity_id
|
||||||
|
else:
|
||||||
|
duplicate_of = near_dup_id
|
||||||
|
|
||||||
|
# ── write files ───────────────────────────────────────────
|
||||||
|
written_id = write_activity(
|
||||||
|
activity, metrics, cfg.output_dir,
|
||||||
|
privacy=cfg.default_privacy,
|
||||||
|
duplicate_of=duplicate_of,
|
||||||
|
rdp_epsilon=cfg.track.rdp_epsilon,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Register in dedup index
|
||||||
|
dedup.register(ActivityRecord(
|
||||||
|
id=written_id,
|
||||||
|
source_hash=activity.source_hash,
|
||||||
|
started_at=activity.started_at,
|
||||||
|
distance_m=metrics.distance_m,
|
||||||
|
source=_infer_source(activity),
|
||||||
|
))
|
||||||
|
|
||||||
|
if duplicate_of is None:
|
||||||
|
summaries.append(
|
||||||
|
build_summary(activity, metrics, written_id, cfg.default_privacy)
|
||||||
|
)
|
||||||
|
|
||||||
|
# ── write index.json ──────────────────────────────────────────────────────
|
||||||
|
# Merge with any existing summaries from previous incremental runs
|
||||||
|
existing_index = _load_existing_summaries(cfg.output_dir)
|
||||||
|
all_summaries = {s["id"]: s for s in existing_index}
|
||||||
|
for s in summaries:
|
||||||
|
all_summaries[s["id"]] = s
|
||||||
|
write_index(list(all_summaries.values()), cfg.output_dir, owner)
|
||||||
|
dedup.save()
|
||||||
|
|
||||||
|
# ── summary ───────────────────────────────────────────────────────────────
|
||||||
|
console.print(
|
||||||
|
f"\n[green]Done.[/green] "
|
||||||
|
f"Processed [bold]{len(summaries)}[/bold] activities, "
|
||||||
|
f"skipped [bold]{skipped}[/bold] (already up to date), "
|
||||||
|
f"errors [bold]{len(errors)}[/bold]."
|
||||||
|
)
|
||||||
|
if errors:
|
||||||
|
console.print("\n[red]Errors:[/red]")
|
||||||
|
for path, msg in errors[:20]:
|
||||||
|
console.print(f" {path.name}: {msg}")
|
||||||
|
if len(errors) > 20:
|
||||||
|
console.print(f" ... and {len(errors) - 20} more.")
|
||||||
|
|
||||||
|
|
||||||
|
# ── helpers ───────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _parse_worker(path: Path) -> ParsedActivity:
|
||||||
|
"""Run in worker process — imports are isolated."""
|
||||||
|
from bincio.extract.parsers.factory import parse_file
|
||||||
|
return parse_file(path)
|
||||||
|
|
||||||
|
|
||||||
|
def _process_single(path: Path) -> None:
|
||||||
|
from bincio.extract.parsers.factory import parse_file
|
||||||
|
try:
|
||||||
|
activity = parse_file(path)
|
||||||
|
metrics = compute(activity)
|
||||||
|
activity_id = make_activity_id(activity)
|
||||||
|
from bincio.extract.writer import build_summary
|
||||||
|
result = build_summary(activity, metrics, activity_id)
|
||||||
|
click.echo(json.dumps(result, indent=2))
|
||||||
|
except Exception as exc:
|
||||||
|
console.print(f"[red]Error:[/red] {exc}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_config(
|
||||||
|
config_path: Optional[str],
|
||||||
|
input_dir: Optional[str],
|
||||||
|
output_dir: Optional[str],
|
||||||
|
) -> ExtractConfig:
|
||||||
|
if config_path:
|
||||||
|
cfg = load_config(Path(config_path))
|
||||||
|
elif Path("extract_config.yaml").exists():
|
||||||
|
cfg = load_config(Path("extract_config.yaml"))
|
||||||
|
elif input_dir:
|
||||||
|
cfg = default_config(
|
||||||
|
Path(input_dir).expanduser(),
|
||||||
|
Path(output_dir or "./bincio_data").expanduser(),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise click.UsageError(
|
||||||
|
"Provide --config, --input, or an extract_config.yaml in the current directory."
|
||||||
|
)
|
||||||
|
if input_dir:
|
||||||
|
cfg.input_dirs = [Path(input_dir).expanduser()]
|
||||||
|
if output_dir:
|
||||||
|
cfg.output_dir = Path(output_dir).expanduser()
|
||||||
|
return cfg
|
||||||
|
|
||||||
|
|
||||||
|
def _collect_files(cfg: ExtractConfig, since: Optional[str]) -> list[Path]:
|
||||||
|
from bincio.extract.parsers.factory import is_supported
|
||||||
|
import os
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
since_ts: Optional[float] = None
|
||||||
|
if since:
|
||||||
|
since_ts = datetime.strptime(since, "%Y-%m-%d").timestamp()
|
||||||
|
|
||||||
|
files = []
|
||||||
|
for d in cfg.input_dirs:
|
||||||
|
if not d.exists():
|
||||||
|
console.print(f"[yellow]Warning:[/yellow] input dir not found: {d}")
|
||||||
|
continue
|
||||||
|
for path in d.rglob("*"):
|
||||||
|
if not path.is_file():
|
||||||
|
continue
|
||||||
|
if not is_supported(path):
|
||||||
|
continue
|
||||||
|
if since_ts and path.stat().st_mtime < since_ts:
|
||||||
|
continue
|
||||||
|
files.append(path)
|
||||||
|
return files
|
||||||
|
|
||||||
|
|
||||||
|
def _load_existing_summaries(output_dir: Path) -> list[dict]:
|
||||||
|
index_path = output_dir / "index.json"
|
||||||
|
if not index_path.exists():
|
||||||
|
return []
|
||||||
|
try:
|
||||||
|
data = json.loads(index_path.read_text())
|
||||||
|
return data.get("activities", [])
|
||||||
|
except Exception:
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def _infer_source(activity: ParsedActivity) -> Optional[str]:
|
||||||
|
if activity.strava_id:
|
||||||
|
return "strava_export"
|
||||||
|
name = activity.source_file.lower()
|
||||||
|
if "activity" in name and len(name.split(".")) >= 3:
|
||||||
|
return "karoo"
|
||||||
|
if name.endswith((".fit", ".fit.gz")):
|
||||||
|
return "fit_file"
|
||||||
|
if name.endswith((".gpx", ".gpx.gz")):
|
||||||
|
return "gpx_file"
|
||||||
|
if name.endswith((".tcx", ".tcx.gz")):
|
||||||
|
return "tcx_file"
|
||||||
|
return None
|
||||||
@@ -0,0 +1,88 @@
|
|||||||
|
"""Extract stage configuration — loaded from extract_config.yaml."""
|
||||||
|
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class TrackConfig:
|
||||||
|
simplify: str = "rdp"
|
||||||
|
rdp_epsilon: float = 0.0001
|
||||||
|
timeseries_hz: int = 1
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SensorsConfig:
|
||||||
|
heart_rate: bool = True
|
||||||
|
cadence: bool = True
|
||||||
|
temperature: bool = True
|
||||||
|
power: bool = True
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ClassifierConfig:
|
||||||
|
enabled: bool = False # off by default; opt-in
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ExtractConfig:
|
||||||
|
input_dirs: list[Path]
|
||||||
|
output_dir: Path
|
||||||
|
metadata_csv: Optional[Path] = None
|
||||||
|
default_privacy: str = "public"
|
||||||
|
sensors: SensorsConfig = field(default_factory=SensorsConfig)
|
||||||
|
track: TrackConfig = field(default_factory=TrackConfig)
|
||||||
|
classifier: ClassifierConfig = field(default_factory=ClassifierConfig)
|
||||||
|
incremental: bool = True
|
||||||
|
owner_handle: str = "me"
|
||||||
|
owner_display_name: str = "Me"
|
||||||
|
|
||||||
|
|
||||||
|
def load_config(path: Path) -> ExtractConfig:
|
||||||
|
raw = yaml.safe_load(path.read_text())
|
||||||
|
|
||||||
|
inp = raw.get("input", {})
|
||||||
|
dirs = [Path(d).expanduser() for d in inp.get("dirs", [])]
|
||||||
|
csv_path = inp.get("metadata_csv")
|
||||||
|
|
||||||
|
out = Path(raw.get("output", {}).get("dir", "./bincio_data")).expanduser()
|
||||||
|
|
||||||
|
owner = raw.get("owner", {})
|
||||||
|
|
||||||
|
sensors_raw = raw.get("sensors", {})
|
||||||
|
sensors = SensorsConfig(
|
||||||
|
heart_rate=sensors_raw.get("heart_rate", True),
|
||||||
|
cadence=sensors_raw.get("cadence", True),
|
||||||
|
temperature=sensors_raw.get("temperature", True),
|
||||||
|
power=sensors_raw.get("power", True),
|
||||||
|
)
|
||||||
|
|
||||||
|
track_raw = raw.get("track", {})
|
||||||
|
track = TrackConfig(
|
||||||
|
simplify=track_raw.get("simplify", "rdp"),
|
||||||
|
rdp_epsilon=track_raw.get("rdp_epsilon", 0.0001),
|
||||||
|
timeseries_hz=track_raw.get("timeseries_hz", 1),
|
||||||
|
)
|
||||||
|
|
||||||
|
cls_raw = raw.get("classifier", {})
|
||||||
|
classifier = ClassifierConfig(enabled=cls_raw.get("enabled", False))
|
||||||
|
|
||||||
|
return ExtractConfig(
|
||||||
|
input_dirs=dirs,
|
||||||
|
output_dir=out,
|
||||||
|
metadata_csv=Path(csv_path).expanduser() if csv_path else None,
|
||||||
|
default_privacy=raw.get("default_privacy", "public"),
|
||||||
|
sensors=sensors,
|
||||||
|
track=track,
|
||||||
|
classifier=classifier,
|
||||||
|
incremental=raw.get("incremental", True),
|
||||||
|
owner_handle=owner.get("handle", "me"),
|
||||||
|
owner_display_name=owner.get("display_name", "Me"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def default_config(input_dir: Path, output_dir: Path) -> ExtractConfig:
|
||||||
|
return ExtractConfig(input_dirs=[input_dir], output_dir=output_dir)
|
||||||
@@ -0,0 +1,127 @@
|
|||||||
|
"""Duplicate activity detection.
|
||||||
|
|
||||||
|
Two kinds of duplicates:
|
||||||
|
|
||||||
|
1. Exact duplicate — same source_hash. Skip entirely.
|
||||||
|
2. Near-duplicate — same ride recorded by two devices / exported from two
|
||||||
|
platforms. Detected by (started_at ± 5 min) AND (distance ± 5%).
|
||||||
|
The "better" source wins; the other gets duplicate_of set.
|
||||||
|
|
||||||
|
The deduplication index is a JSON file persisted in the output directory so
|
||||||
|
that incremental runs don't re-evaluate already-resolved pairs.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
_INDEX_FILE = ".bincio_cache.json"
|
||||||
|
|
||||||
|
# Source quality ranking (higher = preferred when deduplicating)
|
||||||
|
_SOURCE_QUALITY: dict[str, int] = {
|
||||||
|
"karoo": 5,
|
||||||
|
"fit_file": 4,
|
||||||
|
"garmin_connect": 4,
|
||||||
|
"strava_export": 3,
|
||||||
|
"gpx_file": 2,
|
||||||
|
"tcx_file": 1,
|
||||||
|
"wahoo": 3,
|
||||||
|
"komoot": 2,
|
||||||
|
"manual": 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ActivityRecord:
|
||||||
|
"""Minimal record stored in the dedup index."""
|
||||||
|
|
||||||
|
id: str
|
||||||
|
source_hash: str
|
||||||
|
started_at: datetime
|
||||||
|
distance_m: Optional[float]
|
||||||
|
source: Optional[str]
|
||||||
|
duplicate_of: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class DedupIndex:
|
||||||
|
output_dir: Path
|
||||||
|
_records: dict[str, ActivityRecord] = field(default_factory=dict)
|
||||||
|
# source_hash → id, for exact-duplicate lookup
|
||||||
|
_by_hash: dict[str, str] = field(default_factory=dict)
|
||||||
|
|
||||||
|
def __post_init__(self) -> None:
|
||||||
|
self._load()
|
||||||
|
|
||||||
|
def _load(self) -> None:
|
||||||
|
p = self.output_dir / _INDEX_FILE
|
||||||
|
if not p.exists():
|
||||||
|
return
|
||||||
|
data = json.loads(p.read_text())
|
||||||
|
for item in data.get("activities", []):
|
||||||
|
started_at = datetime.fromisoformat(item["started_at"])
|
||||||
|
r = ActivityRecord(
|
||||||
|
id=item["id"],
|
||||||
|
source_hash=item["source_hash"],
|
||||||
|
started_at=started_at,
|
||||||
|
distance_m=item.get("distance_m"),
|
||||||
|
source=item.get("source"),
|
||||||
|
duplicate_of=item.get("duplicate_of"),
|
||||||
|
)
|
||||||
|
self._records[r.id] = r
|
||||||
|
self._by_hash[r.source_hash] = r.id
|
||||||
|
|
||||||
|
def save(self) -> None:
|
||||||
|
p = self.output_dir / _INDEX_FILE
|
||||||
|
data = {
|
||||||
|
"activities": [
|
||||||
|
{
|
||||||
|
"id": r.id,
|
||||||
|
"source_hash": r.source_hash,
|
||||||
|
"started_at": r.started_at.isoformat(),
|
||||||
|
"distance_m": r.distance_m,
|
||||||
|
"source": r.source,
|
||||||
|
"duplicate_of": r.duplicate_of,
|
||||||
|
}
|
||||||
|
for r in self._records.values()
|
||||||
|
]
|
||||||
|
}
|
||||||
|
p.write_text(json.dumps(data, indent=2))
|
||||||
|
|
||||||
|
def is_exact_duplicate(self, source_hash: str) -> Optional[str]:
|
||||||
|
"""Return existing activity ID if hash is already in the index."""
|
||||||
|
return self._by_hash.get(source_hash)
|
||||||
|
|
||||||
|
def find_near_duplicate(
|
||||||
|
self,
|
||||||
|
started_at: datetime,
|
||||||
|
distance_m: Optional[float],
|
||||||
|
) -> Optional[str]:
|
||||||
|
"""Return ID of a near-duplicate if one exists."""
|
||||||
|
for r in self._records.values():
|
||||||
|
if r.duplicate_of is not None:
|
||||||
|
continue # skip already-marked duplicates
|
||||||
|
if abs((r.started_at - started_at).total_seconds()) > 5 * 60:
|
||||||
|
continue
|
||||||
|
if distance_m is None or r.distance_m is None:
|
||||||
|
continue
|
||||||
|
ref = max(distance_m, r.distance_m)
|
||||||
|
if abs(distance_m - r.distance_m) / ref < 0.05:
|
||||||
|
return r.id
|
||||||
|
return None
|
||||||
|
|
||||||
|
def register(self, record: ActivityRecord) -> None:
|
||||||
|
self._records[record.id] = record
|
||||||
|
self._by_hash[record.source_hash] = record.id
|
||||||
|
|
||||||
|
def pick_canonical(self, existing_id: str, new_source: Optional[str]) -> str:
|
||||||
|
"""Return the ID of whichever record should be canonical."""
|
||||||
|
existing = self._records[existing_id]
|
||||||
|
existing_q = _SOURCE_QUALITY.get(existing.source or "", 0)
|
||||||
|
new_q = _SOURCE_QUALITY.get(new_source or "", 0)
|
||||||
|
# New record is strictly better → existing becomes the duplicate
|
||||||
|
if new_q > existing_q:
|
||||||
|
return "__new__"
|
||||||
|
return existing_id
|
||||||
@@ -0,0 +1,210 @@
|
|||||||
|
"""Compute aggregated metrics from a ParsedActivity.
|
||||||
|
|
||||||
|
All calculations are self-contained — no external state needed.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import math
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from geopy.distance import geodesic
|
||||||
|
|
||||||
|
from bincio.extract.models import DataPoint, ParsedActivity
|
||||||
|
|
||||||
|
# Speed below which we consider the athlete stopped (km/h)
|
||||||
|
_STOPPED_THRESHOLD_KMH = 1.0
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ComputedMetrics:
|
||||||
|
distance_m: Optional[float]
|
||||||
|
duration_s: Optional[int]
|
||||||
|
moving_time_s: Optional[int]
|
||||||
|
elevation_gain_m: Optional[float]
|
||||||
|
elevation_loss_m: Optional[float]
|
||||||
|
avg_speed_kmh: Optional[float]
|
||||||
|
max_speed_kmh: Optional[float]
|
||||||
|
avg_hr_bpm: Optional[int]
|
||||||
|
max_hr_bpm: Optional[int]
|
||||||
|
avg_cadence_rpm: Optional[int]
|
||||||
|
avg_power_w: Optional[int]
|
||||||
|
max_power_w: Optional[int]
|
||||||
|
bbox: Optional[tuple[float, float, float, float]] # min_lon, min_lat, max_lon, max_lat
|
||||||
|
start_latlng: Optional[tuple[float, float]]
|
||||||
|
end_latlng: Optional[tuple[float, float]]
|
||||||
|
|
||||||
|
|
||||||
|
def compute(activity: ParsedActivity) -> ComputedMetrics:
|
||||||
|
pts = activity.points
|
||||||
|
if not pts:
|
||||||
|
return _empty()
|
||||||
|
|
||||||
|
duration_s = _duration(pts)
|
||||||
|
distance_m = _distance(pts)
|
||||||
|
moving_time_s, moving_speed_kmh = _moving_stats(pts)
|
||||||
|
gain, loss = _elevation(pts)
|
||||||
|
max_speed = _max_speed(pts)
|
||||||
|
avg_hr, max_hr = _hr_stats(pts)
|
||||||
|
avg_cad = _avg_nonnull([p.cadence_rpm for p in pts])
|
||||||
|
avg_pow = _avg_nonnull([p.power_w for p in pts])
|
||||||
|
max_pow = _max_nonnull([p.power_w for p in pts])
|
||||||
|
bbox = _bbox(pts)
|
||||||
|
start_ll, end_ll = _endpoints(pts)
|
||||||
|
|
||||||
|
return ComputedMetrics(
|
||||||
|
distance_m=distance_m,
|
||||||
|
duration_s=duration_s,
|
||||||
|
moving_time_s=moving_time_s,
|
||||||
|
elevation_gain_m=round(gain, 1) if gain is not None else None,
|
||||||
|
elevation_loss_m=round(abs(loss), 1) if loss is not None else None,
|
||||||
|
avg_speed_kmh=round(moving_speed_kmh, 2) if moving_speed_kmh else None,
|
||||||
|
max_speed_kmh=round(max_speed, 2) if max_speed else None,
|
||||||
|
avg_hr_bpm=avg_hr,
|
||||||
|
max_hr_bpm=max_hr,
|
||||||
|
avg_cadence_rpm=avg_cad,
|
||||||
|
avg_power_w=avg_pow,
|
||||||
|
max_power_w=max_pow,
|
||||||
|
bbox=bbox,
|
||||||
|
start_latlng=start_ll,
|
||||||
|
end_latlng=end_ll,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ── helpers ──────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _duration(pts: list[DataPoint]) -> Optional[int]:
|
||||||
|
if len(pts) < 2:
|
||||||
|
return None
|
||||||
|
return int((pts[-1].timestamp - pts[0].timestamp).total_seconds())
|
||||||
|
|
||||||
|
|
||||||
|
def _distance(pts: list[DataPoint]) -> Optional[float]:
|
||||||
|
"""Prefer device-recorded cumulative distance; fall back to GPS geodesic."""
|
||||||
|
# If the last point has a device distance, use it
|
||||||
|
last_dist = next(
|
||||||
|
(p.distance_m for p in reversed(pts) if p.distance_m is not None), None
|
||||||
|
)
|
||||||
|
if last_dist is not None:
|
||||||
|
return round(last_dist, 1)
|
||||||
|
|
||||||
|
# GPS fallback
|
||||||
|
total = 0.0
|
||||||
|
has_gps = False
|
||||||
|
for a, b in zip(pts, pts[1:]):
|
||||||
|
if a.lat is None or a.lon is None or b.lat is None or b.lon is None:
|
||||||
|
continue
|
||||||
|
has_gps = True
|
||||||
|
total += geodesic((a.lat, a.lon), (b.lat, b.lon)).meters
|
||||||
|
return round(total, 1) if has_gps else None
|
||||||
|
|
||||||
|
|
||||||
|
def _moving_stats(pts: list[DataPoint]) -> tuple[Optional[int], Optional[float]]:
|
||||||
|
"""Return (moving_time_s, avg_speed_kmh_over_moving_time)."""
|
||||||
|
moving_s = 0
|
||||||
|
moving_dist_m = 0.0
|
||||||
|
has_gps = False
|
||||||
|
|
||||||
|
for a, b in zip(pts, pts[1:]):
|
||||||
|
dt = (b.timestamp - a.timestamp).total_seconds()
|
||||||
|
if dt <= 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Compute speed for this interval from GPS
|
||||||
|
if a.lat is not None and a.lon is not None and b.lat is not None and b.lon is not None:
|
||||||
|
has_gps = True
|
||||||
|
seg_m = geodesic((a.lat, a.lon), (b.lat, b.lon)).meters
|
||||||
|
seg_kmh = (seg_m / dt) * 3.6
|
||||||
|
elif a.speed_kmh is not None:
|
||||||
|
seg_kmh = a.speed_kmh
|
||||||
|
seg_m = (seg_kmh / 3.6) * dt
|
||||||
|
has_gps = True # speed data present
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if seg_kmh >= _STOPPED_THRESHOLD_KMH:
|
||||||
|
moving_s += int(dt)
|
||||||
|
moving_dist_m += seg_m
|
||||||
|
|
||||||
|
if not has_gps or moving_s == 0:
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
avg_kmh = (moving_dist_m / moving_s) * 3.6
|
||||||
|
return moving_s, avg_kmh
|
||||||
|
|
||||||
|
|
||||||
|
def _elevation(pts: list[DataPoint]) -> tuple[Optional[float], Optional[float]]:
|
||||||
|
elevations = [p.elevation_m for p in pts if p.elevation_m is not None]
|
||||||
|
if len(elevations) < 2:
|
||||||
|
return None, None
|
||||||
|
gain = loss = 0.0
|
||||||
|
for a, b in zip(elevations, elevations[1:]):
|
||||||
|
diff = b - a
|
||||||
|
if diff > 0:
|
||||||
|
gain += diff
|
||||||
|
else:
|
||||||
|
loss += diff
|
||||||
|
return gain, loss
|
||||||
|
|
||||||
|
|
||||||
|
def _max_speed(pts: list[DataPoint]) -> Optional[float]:
|
||||||
|
# Prefer device speed; fall back to GPS-derived
|
||||||
|
device_speeds = [p.speed_kmh for p in pts if p.speed_kmh is not None]
|
||||||
|
if device_speeds:
|
||||||
|
return max(device_speeds)
|
||||||
|
# GPS-derived max
|
||||||
|
gps_speeds = []
|
||||||
|
for a, b in zip(pts, pts[1:]):
|
||||||
|
if a.lat is None or b.lat is None:
|
||||||
|
continue
|
||||||
|
dt = (b.timestamp - a.timestamp).total_seconds()
|
||||||
|
if dt <= 0:
|
||||||
|
continue
|
||||||
|
m = geodesic((a.lat, a.lon), (b.lat, b.lon)).meters
|
||||||
|
gps_speeds.append((m / dt) * 3.6)
|
||||||
|
return max(gps_speeds) if gps_speeds else None
|
||||||
|
|
||||||
|
|
||||||
|
def _hr_stats(pts: list[DataPoint]) -> tuple[Optional[int], Optional[int]]:
|
||||||
|
hrs = [p.hr_bpm for p in pts if p.hr_bpm is not None]
|
||||||
|
if not hrs:
|
||||||
|
return None, None
|
||||||
|
return int(sum(hrs) / len(hrs)), max(hrs)
|
||||||
|
|
||||||
|
|
||||||
|
def _avg_nonnull(values: list) -> Optional[int]:
|
||||||
|
v = [x for x in values if x is not None]
|
||||||
|
return int(sum(v) / len(v)) if v else None
|
||||||
|
|
||||||
|
|
||||||
|
def _max_nonnull(values: list) -> Optional[int]:
|
||||||
|
v = [x for x in values if x is not None]
|
||||||
|
return max(v) if v else None
|
||||||
|
|
||||||
|
|
||||||
|
def _bbox(pts: list[DataPoint]) -> Optional[tuple[float, float, float, float]]:
|
||||||
|
lats = [p.lat for p in pts if p.lat is not None]
|
||||||
|
lons = [p.lon for p in pts if p.lon is not None]
|
||||||
|
if not lats:
|
||||||
|
return None
|
||||||
|
return (min(lons), min(lats), max(lons), max(lats))
|
||||||
|
|
||||||
|
|
||||||
|
def _endpoints(
|
||||||
|
pts: list[DataPoint],
|
||||||
|
) -> tuple[Optional[tuple[float, float]], Optional[tuple[float, float]]]:
|
||||||
|
gps = [(p.lat, p.lon) for p in pts if p.lat is not None and p.lon is not None]
|
||||||
|
if not gps:
|
||||||
|
return None, None
|
||||||
|
return gps[0], gps[-1]
|
||||||
|
|
||||||
|
|
||||||
|
def _empty() -> ComputedMetrics:
|
||||||
|
return ComputedMetrics(
|
||||||
|
distance_m=None, duration_s=None, moving_time_s=None,
|
||||||
|
elevation_gain_m=None, elevation_loss_m=None,
|
||||||
|
avg_speed_kmh=None, max_speed_kmh=None,
|
||||||
|
avg_hr_bpm=None, max_hr_bpm=None,
|
||||||
|
avg_cadence_rpm=None, avg_power_w=None, max_power_w=None,
|
||||||
|
bbox=None, start_latlng=None, end_latlng=None,
|
||||||
|
)
|
||||||
@@ -0,0 +1,58 @@
|
|||||||
|
"""Core data models for the extract stage.
|
||||||
|
|
||||||
|
ParsedActivity is the internal representation produced by parsers.
|
||||||
|
It gets fed into metrics computation and the BAS JSON writer.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class DataPoint:
|
||||||
|
"""One measurement sample from a GPS/sensor recording."""
|
||||||
|
|
||||||
|
timestamp: datetime
|
||||||
|
lat: Optional[float] = None
|
||||||
|
lon: Optional[float] = None
|
||||||
|
elevation_m: Optional[float] = None
|
||||||
|
hr_bpm: Optional[int] = None
|
||||||
|
cadence_rpm: Optional[int] = None
|
||||||
|
# Speed from device (km/h). May be absent; we compute it from GPS if so.
|
||||||
|
speed_kmh: Optional[float] = None
|
||||||
|
power_w: Optional[int] = None
|
||||||
|
temperature_c: Optional[float] = None
|
||||||
|
# Cumulative distance from device (metres), if recorded.
|
||||||
|
distance_m: Optional[float] = None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class LapData:
|
||||||
|
index: int
|
||||||
|
started_at: datetime
|
||||||
|
duration_s: Optional[int] = None
|
||||||
|
distance_m: Optional[float] = None
|
||||||
|
elevation_gain_m: Optional[float] = None
|
||||||
|
avg_speed_kmh: Optional[float] = None
|
||||||
|
avg_hr_bpm: Optional[int] = None
|
||||||
|
avg_power_w: Optional[int] = None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ParsedActivity:
|
||||||
|
"""Raw activity data as produced by a parser, before metric computation."""
|
||||||
|
|
||||||
|
points: list[DataPoint]
|
||||||
|
sport: str # normalised to BAS sport enum
|
||||||
|
started_at: datetime
|
||||||
|
source_file: str # basename of original file
|
||||||
|
source_hash: str # "sha256:{hex}"
|
||||||
|
|
||||||
|
sub_sport: Optional[str] = None
|
||||||
|
device: Optional[str] = None
|
||||||
|
title: Optional[str] = None
|
||||||
|
description: Optional[str] = None
|
||||||
|
gear: Optional[str] = None
|
||||||
|
strava_id: Optional[str] = None
|
||||||
|
laps: list[LapData] = field(default_factory=list)
|
||||||
@@ -0,0 +1,34 @@
|
|||||||
|
"""Abstract base class for all activity parsers."""
|
||||||
|
|
||||||
|
import gzip
|
||||||
|
import hashlib
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from bincio.extract.models import ParsedActivity
|
||||||
|
|
||||||
|
|
||||||
|
class BaseParser(ABC):
|
||||||
|
@abstractmethod
|
||||||
|
def parse(self, path: Path, raw_bytes: bytes) -> ParsedActivity:
|
||||||
|
"""Parse activity from raw file bytes.
|
||||||
|
|
||||||
|
Receives pre-read bytes so the factory can compute the hash once and
|
||||||
|
handle decompression transparently before dispatching.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _sha256(data: bytes) -> str:
|
||||||
|
return "sha256:" + hashlib.sha256(data).hexdigest()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _read_file(path: Path) -> tuple[bytes, bytes]:
|
||||||
|
"""Return (raw_bytes, decompressed_bytes).
|
||||||
|
|
||||||
|
raw_bytes is the original file content (used for hashing).
|
||||||
|
decompressed_bytes is what parsers should actually parse.
|
||||||
|
"""
|
||||||
|
raw = path.read_bytes()
|
||||||
|
if path.suffix == ".gz":
|
||||||
|
return raw, gzip.decompress(raw)
|
||||||
|
return raw, raw
|
||||||
@@ -0,0 +1,46 @@
|
|||||||
|
"""Parser factory — selects the right parser based on file extension."""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from bincio.extract.models import ParsedActivity
|
||||||
|
from bincio.extract.parsers.base import BaseParser
|
||||||
|
from bincio.extract.parsers.fit import FitParser
|
||||||
|
from bincio.extract.parsers.gpx import GpxParser
|
||||||
|
from bincio.extract.parsers.tcx import TcxParser
|
||||||
|
|
||||||
|
# Supported extensions (including .gz variants)
|
||||||
|
SUPPORTED = {".fit", ".gpx", ".tcx", ".fit.gz", ".gpx.gz", ".tcx.gz"}
|
||||||
|
|
||||||
|
_PARSERS: dict[str, type[BaseParser]] = {
|
||||||
|
".fit": FitParser,
|
||||||
|
".gpx": GpxParser,
|
||||||
|
".tcx": TcxParser,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _base_ext(path: Path) -> str:
|
||||||
|
"""Return the meaningful extension, stripping .gz if present."""
|
||||||
|
if path.suffix == ".gz":
|
||||||
|
return Path(path.stem).suffix # e.g. ".fit" from "ride.fit.gz"
|
||||||
|
return path.suffix
|
||||||
|
|
||||||
|
|
||||||
|
def is_supported(path: Path) -> bool:
|
||||||
|
suffix = "".join(path.suffixes[-2:]) if path.suffix == ".gz" else path.suffix
|
||||||
|
return suffix in SUPPORTED
|
||||||
|
|
||||||
|
|
||||||
|
def parse_file(path: Path) -> ParsedActivity:
|
||||||
|
"""Parse an activity file, handling .gz transparently."""
|
||||||
|
ext = _base_ext(path)
|
||||||
|
parser_cls = _PARSERS.get(ext)
|
||||||
|
if parser_cls is None:
|
||||||
|
raise ValueError(f"Unsupported file type: {path.name!r}")
|
||||||
|
|
||||||
|
raw_bytes, content_bytes = BaseParser._read_file(path)
|
||||||
|
parser = parser_cls()
|
||||||
|
activity = parser.parse(path, content_bytes)
|
||||||
|
# Attach hash of the *original* bytes (compressed if .gz) for dedup
|
||||||
|
activity.source_hash = BaseParser._sha256(raw_bytes)
|
||||||
|
activity.source_file = path.name
|
||||||
|
return activity
|
||||||
@@ -0,0 +1,133 @@
|
|||||||
|
"""FIT file parser (Garmin binary format)."""
|
||||||
|
|
||||||
|
from datetime import timezone
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import fitdecode
|
||||||
|
|
||||||
|
from bincio.extract.models import DataPoint, LapData, ParsedActivity
|
||||||
|
from bincio.extract.sport import normalise_sport
|
||||||
|
|
||||||
|
|
||||||
|
class FitParser:
|
||||||
|
def parse(self, path: Path, raw_bytes: bytes) -> ParsedActivity:
|
||||||
|
import io
|
||||||
|
|
||||||
|
points: list[DataPoint] = []
|
||||||
|
laps: list[LapData] = []
|
||||||
|
sport: str = "cycling"
|
||||||
|
sub_sport: str | None = None
|
||||||
|
device: str | None = None
|
||||||
|
|
||||||
|
with fitdecode.FitReader(io.BytesIO(raw_bytes)) as fit:
|
||||||
|
for frame in fit:
|
||||||
|
if not isinstance(frame, fitdecode.FitDataMessage):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if frame.name == "sport":
|
||||||
|
sport = normalise_sport(_get(frame, "sport", "cycling"))
|
||||||
|
sub_sport = _normalise_sub_sport(_get(frame, "sub_sport"))
|
||||||
|
|
||||||
|
elif frame.name == "device_info":
|
||||||
|
mfr = _get(frame, "manufacturer")
|
||||||
|
prod = _get(frame, "product_name") or _get(frame, "garmin_product")
|
||||||
|
if mfr and prod:
|
||||||
|
device = f"{mfr} {prod}"
|
||||||
|
elif prod:
|
||||||
|
device = str(prod)
|
||||||
|
|
||||||
|
elif frame.name == "record":
|
||||||
|
ts = _get(frame, "timestamp")
|
||||||
|
if ts is None:
|
||||||
|
continue
|
||||||
|
if hasattr(ts, "tzinfo") and ts.tzinfo is None:
|
||||||
|
ts = ts.replace(tzinfo=timezone.utc)
|
||||||
|
|
||||||
|
lat = _semicircles_to_deg(_get(frame, "position_lat"))
|
||||||
|
lon = _semicircles_to_deg(_get(frame, "position_long"))
|
||||||
|
speed_raw = _get(frame, "speed") # m/s
|
||||||
|
|
||||||
|
dp = DataPoint(
|
||||||
|
timestamp=ts,
|
||||||
|
lat=lat,
|
||||||
|
lon=lon,
|
||||||
|
elevation_m=_get(frame, "altitude"),
|
||||||
|
hr_bpm=_get(frame, "heart_rate"),
|
||||||
|
cadence_rpm=_get(frame, "cadence"),
|
||||||
|
speed_kmh=speed_raw * 3.6 if speed_raw is not None else None,
|
||||||
|
power_w=_get(frame, "power"),
|
||||||
|
temperature_c=_get(frame, "temperature"),
|
||||||
|
distance_m=_get(frame, "distance"),
|
||||||
|
)
|
||||||
|
points.append(dp)
|
||||||
|
|
||||||
|
elif frame.name == "lap":
|
||||||
|
ts = _get(frame, "start_time")
|
||||||
|
if ts is not None:
|
||||||
|
if hasattr(ts, "tzinfo") and ts.tzinfo is None:
|
||||||
|
ts = ts.replace(tzinfo=timezone.utc)
|
||||||
|
elapsed = _get(frame, "total_elapsed_time")
|
||||||
|
speed_raw = _get(frame, "avg_speed")
|
||||||
|
laps.append(
|
||||||
|
LapData(
|
||||||
|
index=len(laps),
|
||||||
|
started_at=ts,
|
||||||
|
duration_s=int(elapsed) if elapsed else None,
|
||||||
|
distance_m=_get(frame, "total_distance"),
|
||||||
|
elevation_gain_m=_get(frame, "total_ascent"),
|
||||||
|
avg_speed_kmh=speed_raw * 3.6 if speed_raw else None,
|
||||||
|
avg_hr_bpm=_get(frame, "avg_heart_rate"),
|
||||||
|
avg_power_w=_get(frame, "avg_power"),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if not points:
|
||||||
|
raise ValueError(f"No record messages found in {path.name}")
|
||||||
|
|
||||||
|
return ParsedActivity(
|
||||||
|
points=points,
|
||||||
|
sport=sport,
|
||||||
|
sub_sport=sub_sport,
|
||||||
|
started_at=points[0].timestamp,
|
||||||
|
device=device,
|
||||||
|
laps=laps,
|
||||||
|
source_file=path.name,
|
||||||
|
source_hash="",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _get(frame: fitdecode.FitDataMessage, field: str, default: Any = None) -> Any:
|
||||||
|
try:
|
||||||
|
return frame.get_value(field)
|
||||||
|
except KeyError:
|
||||||
|
return default
|
||||||
|
|
||||||
|
|
||||||
|
def _semicircles_to_deg(value: Any) -> float | None:
|
||||||
|
if value is None:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
deg = float(value) * (180.0 / 2**31)
|
||||||
|
# Sanity check: invalid semicircle values often come out as ±180+
|
||||||
|
if abs(deg) > 180:
|
||||||
|
return None
|
||||||
|
return deg
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _normalise_sub_sport(value: Any) -> str | None:
|
||||||
|
if value is None:
|
||||||
|
return None
|
||||||
|
s = str(value).lower().replace(" ", "_")
|
||||||
|
mapping = {
|
||||||
|
"road": "road",
|
||||||
|
"mountain": "mountain",
|
||||||
|
"gravel_cycling": "gravel",
|
||||||
|
"cyclocross": "gravel",
|
||||||
|
"indoor_cycling": "indoor",
|
||||||
|
"trail": "trail",
|
||||||
|
"track": "track",
|
||||||
|
}
|
||||||
|
return mapping.get(s, s) or None
|
||||||
@@ -0,0 +1,82 @@
|
|||||||
|
"""GPX file parser."""
|
||||||
|
|
||||||
|
from datetime import timezone
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import gpxpy
|
||||||
|
import gpxpy.gpx
|
||||||
|
|
||||||
|
from bincio.extract.models import DataPoint, ParsedActivity
|
||||||
|
from bincio.extract.parsers.base import BaseParser
|
||||||
|
from bincio.extract.sport import normalise_sport
|
||||||
|
|
||||||
|
# Known GPX extension namespaces
|
||||||
|
_NS_GARMIN = "http://www.garmin.com/xmlschemas/TrackPointExtension/v1"
|
||||||
|
_NS_GARMIN_V2 = "http://www.garmin.com/xmlschemas/TrackPointExtension/v2"
|
||||||
|
|
||||||
|
|
||||||
|
class GpxParser(BaseParser):
|
||||||
|
def parse(self, path: Path, raw_bytes: bytes) -> ParsedActivity:
|
||||||
|
gpx = gpxpy.parse(raw_bytes.decode("utf-8", errors="replace"))
|
||||||
|
|
||||||
|
points: list[DataPoint] = []
|
||||||
|
for track in gpx.tracks:
|
||||||
|
for segment in track.segments:
|
||||||
|
for pt in segment.points:
|
||||||
|
if pt.time is None:
|
||||||
|
continue
|
||||||
|
ts = pt.time
|
||||||
|
if ts.tzinfo is None:
|
||||||
|
ts = ts.replace(tzinfo=timezone.utc)
|
||||||
|
|
||||||
|
dp = DataPoint(
|
||||||
|
timestamp=ts,
|
||||||
|
lat=pt.latitude,
|
||||||
|
lon=pt.longitude,
|
||||||
|
elevation_m=pt.elevation,
|
||||||
|
)
|
||||||
|
_apply_extensions(pt, dp)
|
||||||
|
points.append(dp)
|
||||||
|
|
||||||
|
if not points:
|
||||||
|
raise ValueError(f"No trackpoints found in {path.name}")
|
||||||
|
|
||||||
|
sport = normalise_sport(
|
||||||
|
(gpx.tracks[0].type if gpx.tracks else None) or "cycling"
|
||||||
|
)
|
||||||
|
started_at = points[0].timestamp
|
||||||
|
|
||||||
|
return ParsedActivity(
|
||||||
|
points=points,
|
||||||
|
sport=sport,
|
||||||
|
started_at=started_at,
|
||||||
|
source_file=path.name,
|
||||||
|
source_hash="", # set by factory
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _apply_extensions(pt: gpxpy.gpx.GPXTrackPoint, dp: DataPoint) -> None:
|
||||||
|
"""Extract HR, cadence, temperature from Garmin TrackPointExtension."""
|
||||||
|
if pt.extensions is None:
|
||||||
|
return
|
||||||
|
for ext in pt.extensions:
|
||||||
|
ns = _strip_ns(ext.tag)
|
||||||
|
if ns == "TrackPointExtension":
|
||||||
|
for child in ext:
|
||||||
|
tag = _strip_ns(child.tag)
|
||||||
|
val = child.text
|
||||||
|
if val is None:
|
||||||
|
continue
|
||||||
|
if tag == "hr":
|
||||||
|
dp.hr_bpm = int(float(val))
|
||||||
|
elif tag == "cad":
|
||||||
|
dp.cadence_rpm = int(float(val))
|
||||||
|
elif tag == "atemp":
|
||||||
|
dp.temperature_c = float(val)
|
||||||
|
elif tag == "speed":
|
||||||
|
dp.speed_kmh = float(val) * 3.6 # m/s → km/h
|
||||||
|
|
||||||
|
|
||||||
|
def _strip_ns(tag: str) -> str:
|
||||||
|
"""'{namespace}localname' → 'localname'."""
|
||||||
|
return tag.split("}")[-1] if "}" in tag else tag
|
||||||
@@ -0,0 +1,89 @@
|
|||||||
|
"""TCX (Training Center XML) file parser."""
|
||||||
|
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
|
from bincio.extract.models import DataPoint, ParsedActivity
|
||||||
|
from bincio.extract.sport import normalise_sport
|
||||||
|
|
||||||
|
_NS = {
|
||||||
|
"tcx": "http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2",
|
||||||
|
"ext": "http://www.garmin.com/xmlschemas/ActivityExtension/v2",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class TcxParser:
|
||||||
|
def parse(self, path: Path, raw_bytes: bytes) -> ParsedActivity:
|
||||||
|
# Some exporters (e.g. Garmin) prepend whitespace before the XML
|
||||||
|
# declaration, which is technically invalid. Strip it.
|
||||||
|
root = etree.fromstring(raw_bytes.lstrip())
|
||||||
|
|
||||||
|
activities = root.findall(".//tcx:Activity", _NS)
|
||||||
|
if not activities:
|
||||||
|
raise ValueError(f"No Activity elements found in {path.name}")
|
||||||
|
|
||||||
|
# Use the first activity
|
||||||
|
act = activities[0]
|
||||||
|
sport_attr = act.get("Sport", "Biking")
|
||||||
|
sport = normalise_sport(sport_attr)
|
||||||
|
|
||||||
|
points: list[DataPoint] = []
|
||||||
|
for tp in act.findall(".//tcx:Trackpoint", _NS):
|
||||||
|
ts_el = tp.find("tcx:Time", _NS)
|
||||||
|
if ts_el is None or not ts_el.text:
|
||||||
|
continue
|
||||||
|
ts = _parse_ts(ts_el.text)
|
||||||
|
|
||||||
|
lat, lon = None, None
|
||||||
|
pos = tp.find("tcx:Position", _NS)
|
||||||
|
if pos is not None:
|
||||||
|
lat_el = pos.find("tcx:LatitudeDegrees", _NS)
|
||||||
|
lon_el = pos.find("tcx:LongitudeDegrees", _NS)
|
||||||
|
lat = float(lat_el.text) if lat_el is not None and lat_el.text else None
|
||||||
|
lon = float(lon_el.text) if lon_el is not None and lon_el.text else None
|
||||||
|
|
||||||
|
ele_el = tp.find("tcx:AltitudeMeters", _NS)
|
||||||
|
hr_el = tp.find(".//tcx:HeartRateBpm/tcx:Value", _NS)
|
||||||
|
cad_el = tp.find("tcx:Cadence", _NS)
|
||||||
|
dist_el = tp.find("tcx:DistanceMeters", _NS)
|
||||||
|
|
||||||
|
# Extensions (speed, watts)
|
||||||
|
speed_el = tp.find(".//ext:Speed", _NS)
|
||||||
|
power_el = tp.find(".//ext:Watts", _NS)
|
||||||
|
|
||||||
|
dp = DataPoint(
|
||||||
|
timestamp=ts,
|
||||||
|
lat=lat,
|
||||||
|
lon=lon,
|
||||||
|
elevation_m=float(ele_el.text) if ele_el is not None and ele_el.text else None,
|
||||||
|
hr_bpm=int(float(hr_el.text)) if hr_el is not None and hr_el.text else None,
|
||||||
|
cadence_rpm=int(float(cad_el.text)) if cad_el is not None and cad_el.text else None,
|
||||||
|
distance_m=float(dist_el.text) if dist_el is not None and dist_el.text else None,
|
||||||
|
speed_kmh=float(speed_el.text) * 3.6 if speed_el is not None and speed_el.text else None,
|
||||||
|
power_w=int(float(power_el.text)) if power_el is not None and power_el.text else None,
|
||||||
|
)
|
||||||
|
points.append(dp)
|
||||||
|
|
||||||
|
if not points:
|
||||||
|
raise ValueError(f"No trackpoints found in {path.name}")
|
||||||
|
|
||||||
|
return ParsedActivity(
|
||||||
|
points=points,
|
||||||
|
sport=sport,
|
||||||
|
started_at=points[0].timestamp,
|
||||||
|
source_file=path.name,
|
||||||
|
source_hash="",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_ts(s: str) -> datetime:
|
||||||
|
# ISO 8601 with or without fractional seconds
|
||||||
|
s = s.rstrip("Z")
|
||||||
|
for fmt in ("%Y-%m-%dT%H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S"):
|
||||||
|
try:
|
||||||
|
return datetime.strptime(s, fmt).replace(tzinfo=timezone.utc)
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
raise ValueError(f"Cannot parse timestamp: {s!r}")
|
||||||
@@ -0,0 +1,60 @@
|
|||||||
|
"""GPS track simplification using the Ramer-Douglas-Peucker algorithm."""
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from rdp import rdp
|
||||||
|
|
||||||
|
from bincio.extract.models import DataPoint
|
||||||
|
|
||||||
|
|
||||||
|
def simplify_track(
|
||||||
|
points: list[DataPoint],
|
||||||
|
epsilon: float = 0.0001,
|
||||||
|
) -> list[DataPoint]:
|
||||||
|
"""Return a simplified subset of points using RDP.
|
||||||
|
|
||||||
|
epsilon is in degrees (~11m at equator for 0.0001).
|
||||||
|
Points without GPS coordinates are dropped.
|
||||||
|
"""
|
||||||
|
gps_pts = [(p, p.lat, p.lon) for p in points if p.lat is not None and p.lon is not None]
|
||||||
|
if len(gps_pts) < 2:
|
||||||
|
return [p for p, _, _ in gps_pts]
|
||||||
|
|
||||||
|
coords = [[lon, lat] for _, lat, lon in gps_pts]
|
||||||
|
mask = rdp(coords, epsilon=epsilon, return_mask=True)
|
||||||
|
return [p for (p, _, _), keep in zip(gps_pts, mask) if keep]
|
||||||
|
|
||||||
|
|
||||||
|
def build_geojson(
|
||||||
|
points: list[DataPoint],
|
||||||
|
activity_id: str,
|
||||||
|
epsilon: float = 0.0001,
|
||||||
|
original_count: Optional[int] = None,
|
||||||
|
) -> dict:
|
||||||
|
"""Build a GeoJSON Feature for the simplified track."""
|
||||||
|
simplified = simplify_track(points, epsilon=epsilon)
|
||||||
|
|
||||||
|
coordinates = [
|
||||||
|
[p.lon, p.lat, p.elevation_m] if p.elevation_m is not None else [p.lon, p.lat]
|
||||||
|
for p in simplified
|
||||||
|
if p.lon is not None and p.lat is not None
|
||||||
|
]
|
||||||
|
|
||||||
|
# Parallel speed array for gradient coloring
|
||||||
|
speeds = [round(p.speed_kmh, 2) if p.speed_kmh is not None else None for p in simplified]
|
||||||
|
|
||||||
|
return {
|
||||||
|
"type": "Feature",
|
||||||
|
"geometry": {
|
||||||
|
"type": "LineString",
|
||||||
|
"coordinates": coordinates,
|
||||||
|
},
|
||||||
|
"properties": {
|
||||||
|
"id": activity_id,
|
||||||
|
"speeds": speeds,
|
||||||
|
"simplification": "rdp",
|
||||||
|
"rdp_epsilon": epsilon,
|
||||||
|
"point_count_original": original_count or len(points),
|
||||||
|
"point_count_simplified": len(coordinates),
|
||||||
|
},
|
||||||
|
}
|
||||||
@@ -0,0 +1,40 @@
|
|||||||
|
"""Sport name normalisation."""
|
||||||
|
|
||||||
|
_MAPPING: dict[str, str] = {
|
||||||
|
# cycling variants
|
||||||
|
"cycling": "cycling",
|
||||||
|
"biking": "cycling",
|
||||||
|
"bike": "cycling",
|
||||||
|
"road_biking": "cycling",
|
||||||
|
"mountain_biking": "cycling",
|
||||||
|
"gravel_cycling": "cycling",
|
||||||
|
"cyclocross": "cycling",
|
||||||
|
"indoor_cycling": "cycling",
|
||||||
|
"virtual_ride": "cycling",
|
||||||
|
"e-biking": "cycling",
|
||||||
|
# running
|
||||||
|
"running": "running",
|
||||||
|
"run": "running",
|
||||||
|
"trail_running": "running",
|
||||||
|
"treadmill_running": "running",
|
||||||
|
"virtual_run": "running",
|
||||||
|
# hiking
|
||||||
|
"hiking": "hiking",
|
||||||
|
"hike": "hiking",
|
||||||
|
# walking
|
||||||
|
"walking": "walking",
|
||||||
|
"walk": "walking",
|
||||||
|
# swimming
|
||||||
|
"swimming": "swimming",
|
||||||
|
"swim": "swimming",
|
||||||
|
"open_water_swimming": "swimming",
|
||||||
|
}
|
||||||
|
|
||||||
|
BAS_SPORTS = {"cycling", "running", "hiking", "walking", "swimming", "other"}
|
||||||
|
|
||||||
|
|
||||||
|
def normalise_sport(raw: object) -> str:
|
||||||
|
if raw is None:
|
||||||
|
return "other"
|
||||||
|
key = str(raw).lower().strip().replace(" ", "_")
|
||||||
|
return _MAPPING.get(key, "other")
|
||||||
@@ -0,0 +1,55 @@
|
|||||||
|
"""Import metadata from Strava's activities.csv bulk export.
|
||||||
|
|
||||||
|
Strava export columns we care about:
|
||||||
|
Activity ID, Activity Date, Activity Name, Activity Type,
|
||||||
|
Activity Description, Filename
|
||||||
|
"""
|
||||||
|
|
||||||
|
import csv
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
|
_STRAVA_DATE_FMTS = (
|
||||||
|
"%b %d, %Y, %I:%M:%S %p", # "Jun 1, 2024, 7:30:12 AM"
|
||||||
|
"%Y-%m-%d %H:%M:%S",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class StravaMetadata:
|
||||||
|
"""Maps original filename → Strava metadata."""
|
||||||
|
|
||||||
|
def __init__(self, csv_path: Path) -> None:
|
||||||
|
self._by_filename: dict[str, dict] = {}
|
||||||
|
self._load(csv_path)
|
||||||
|
|
||||||
|
def _load(self, path: Path) -> None:
|
||||||
|
with path.open(newline="", encoding="utf-8-sig") as f:
|
||||||
|
reader = csv.DictReader(f)
|
||||||
|
for row in reader:
|
||||||
|
filename = row.get("Filename", "").strip()
|
||||||
|
if not filename:
|
||||||
|
continue
|
||||||
|
# Strava stores paths like "activities/12345.fit.gz"
|
||||||
|
basename = Path(filename).name
|
||||||
|
self._by_filename[basename] = row
|
||||||
|
|
||||||
|
def lookup(self, source_file: str) -> Optional[dict]:
|
||||||
|
"""Return the Strava CSV row for a given source filename, or None."""
|
||||||
|
return self._by_filename.get(source_file)
|
||||||
|
|
||||||
|
def enrich(self, source_file: str, activity: object) -> None:
|
||||||
|
"""Mutate a ParsedActivity with Strava metadata if found."""
|
||||||
|
row = self.lookup(source_file)
|
||||||
|
if row is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
if not activity.title and row.get("Activity Name"): # type: ignore[attr-defined]
|
||||||
|
activity.title = row["Activity Name"].strip() # type: ignore[attr-defined]
|
||||||
|
|
||||||
|
if not activity.description and row.get("Activity Description"): # type: ignore[attr-defined]
|
||||||
|
activity.description = row["Activity Description"].strip() # type: ignore[attr-defined]
|
||||||
|
|
||||||
|
if not activity.strava_id and row.get("Activity ID"): # type: ignore[attr-defined]
|
||||||
|
activity.strava_id = row["Activity ID"].strip() # type: ignore[attr-defined]
|
||||||
@@ -0,0 +1,58 @@
|
|||||||
|
"""Downsample a list of DataPoints to at most 1 sample/second and build
|
||||||
|
the BAS timeseries object (parallel arrays)."""
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from bincio.extract.models import DataPoint
|
||||||
|
|
||||||
|
|
||||||
|
def build_timeseries(
|
||||||
|
points: list[DataPoint],
|
||||||
|
started_at: datetime,
|
||||||
|
privacy: str = "public",
|
||||||
|
) -> dict:
|
||||||
|
"""Return the BAS `timeseries` object.
|
||||||
|
|
||||||
|
privacy='no_gps' or 'private' → lat/lon set to null.
|
||||||
|
Downsamples so at most one point per second is emitted.
|
||||||
|
"""
|
||||||
|
if not points:
|
||||||
|
return {"t": []}
|
||||||
|
|
||||||
|
include_gps = privacy not in ("no_gps", "private")
|
||||||
|
|
||||||
|
# Downsample: keep at most one point per second
|
||||||
|
sampled: list[DataPoint] = []
|
||||||
|
last_t: Optional[int] = None
|
||||||
|
for p in points:
|
||||||
|
t = int((p.timestamp - started_at).total_seconds())
|
||||||
|
if t < 0:
|
||||||
|
continue
|
||||||
|
if last_t is not None and t == last_t:
|
||||||
|
continue # skip sub-second duplicates
|
||||||
|
sampled.append(p)
|
||||||
|
last_t = t
|
||||||
|
|
||||||
|
ts_vals = [int((p.timestamp - started_at).total_seconds()) for p in sampled]
|
||||||
|
lat_vals = [round(p.lat, 7) if p.lat is not None else None for p in sampled] if include_gps else None
|
||||||
|
lon_vals = [round(p.lon, 7) if p.lon is not None else None for p in sampled] if include_gps else None
|
||||||
|
ele_vals = [round(p.elevation_m, 1) if p.elevation_m is not None else None for p in sampled]
|
||||||
|
spd_vals = [round(p.speed_kmh, 2) if p.speed_kmh is not None else None for p in sampled]
|
||||||
|
hr_vals = [p.hr_bpm for p in sampled]
|
||||||
|
cad_vals = [p.cadence_rpm for p in sampled]
|
||||||
|
pwr_vals = [p.power_w for p in sampled]
|
||||||
|
tmp_vals = [round(p.temperature_c, 1) if p.temperature_c is not None else None for p in sampled]
|
||||||
|
|
||||||
|
result: dict = {
|
||||||
|
"t": ts_vals,
|
||||||
|
"lat": lat_vals,
|
||||||
|
"lon": lon_vals,
|
||||||
|
"elevation_m": ele_vals,
|
||||||
|
"speed_kmh": spd_vals,
|
||||||
|
"hr_bpm": hr_vals,
|
||||||
|
"cadence_rpm": cad_vals,
|
||||||
|
"power_w": pwr_vals,
|
||||||
|
"temperature_c": tmp_vals,
|
||||||
|
}
|
||||||
|
return result
|
||||||
@@ -0,0 +1,198 @@
|
|||||||
|
"""Write a processed activity to BAS JSON files."""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import unicodedata
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from bincio.extract.metrics import ComputedMetrics
|
||||||
|
from bincio.extract.models import LapData, ParsedActivity
|
||||||
|
from bincio.extract.simplify import build_geojson
|
||||||
|
from bincio.extract.timeseries import build_timeseries
|
||||||
|
|
||||||
|
|
||||||
|
def make_activity_id(activity: ParsedActivity) -> str:
|
||||||
|
"""Generate a BAS activity ID from started_at + optional title slug."""
|
||||||
|
ts = activity.started_at
|
||||||
|
# Compact ISO format: 2024-06-01T073012+0200
|
||||||
|
tz_str = ts.strftime("%z") # e.g. "+0200" or ""
|
||||||
|
ts_part = ts.strftime("%Y-%m-%dT%H%M%S") + (tz_str or "Z")
|
||||||
|
|
||||||
|
if activity.title:
|
||||||
|
slug = _slugify(activity.title)
|
||||||
|
return f"{ts_part}-{slug}" if slug else ts_part
|
||||||
|
return ts_part
|
||||||
|
|
||||||
|
|
||||||
|
def write_activity(
|
||||||
|
activity: ParsedActivity,
|
||||||
|
metrics: ComputedMetrics,
|
||||||
|
output_dir: Path,
|
||||||
|
privacy: str = "public",
|
||||||
|
duplicate_of: str | None = None,
|
||||||
|
rdp_epsilon: float = 0.0001,
|
||||||
|
) -> str:
|
||||||
|
"""Write {id}.json and (if GPS available) {id}.geojson. Returns the ID."""
|
||||||
|
activity_id = make_activity_id(activity)
|
||||||
|
acts_dir = output_dir / "activities"
|
||||||
|
acts_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
source = _infer_source(activity)
|
||||||
|
has_gps = metrics.bbox is not None and privacy not in ("no_gps", "private")
|
||||||
|
|
||||||
|
# ── detail JSON ──────────────────────────────────────────────────────────
|
||||||
|
detail: dict = {
|
||||||
|
"bas_version": "1.0",
|
||||||
|
"id": activity_id,
|
||||||
|
"title": activity.title or _auto_title(activity),
|
||||||
|
"description": activity.description,
|
||||||
|
"sport": activity.sport,
|
||||||
|
"sub_sport": activity.sub_sport,
|
||||||
|
"started_at": activity.started_at.isoformat(),
|
||||||
|
"distance_m": metrics.distance_m,
|
||||||
|
"duration_s": metrics.duration_s,
|
||||||
|
"moving_time_s": metrics.moving_time_s,
|
||||||
|
"elevation_gain_m": metrics.elevation_gain_m,
|
||||||
|
"elevation_loss_m": metrics.elevation_loss_m,
|
||||||
|
"avg_speed_kmh": metrics.avg_speed_kmh,
|
||||||
|
"max_speed_kmh": metrics.max_speed_kmh,
|
||||||
|
"avg_hr_bpm": metrics.avg_hr_bpm,
|
||||||
|
"max_hr_bpm": metrics.max_hr_bpm,
|
||||||
|
"avg_cadence_rpm": metrics.avg_cadence_rpm,
|
||||||
|
"avg_power_w": metrics.avg_power_w,
|
||||||
|
"max_power_w": metrics.max_power_w,
|
||||||
|
"gear": activity.gear,
|
||||||
|
"device": activity.device,
|
||||||
|
"bbox": list(metrics.bbox) if metrics.bbox else None,
|
||||||
|
"start_latlng": list(metrics.start_latlng) if metrics.start_latlng else None,
|
||||||
|
"end_latlng": list(metrics.end_latlng) if metrics.end_latlng else None,
|
||||||
|
"laps": [_serialise_lap(lap) for lap in activity.laps],
|
||||||
|
"timeseries": build_timeseries(activity.points, activity.started_at, privacy),
|
||||||
|
"source": source,
|
||||||
|
"source_file": activity.source_file,
|
||||||
|
"source_hash": activity.source_hash,
|
||||||
|
"strava_id": activity.strava_id,
|
||||||
|
"duplicate_of": duplicate_of,
|
||||||
|
"privacy": privacy,
|
||||||
|
"custom": {},
|
||||||
|
}
|
||||||
|
|
||||||
|
json_path = acts_dir / f"{activity_id}.json"
|
||||||
|
json_path.write_text(json.dumps(detail, indent=2, ensure_ascii=False))
|
||||||
|
|
||||||
|
# ── GeoJSON track ────────────────────────────────────────────────────────
|
||||||
|
if has_gps:
|
||||||
|
geojson = build_geojson(activity.points, activity_id, epsilon=rdp_epsilon)
|
||||||
|
geojson_path = acts_dir / f"{activity_id}.geojson"
|
||||||
|
geojson_path.write_text(json.dumps(geojson, indent=2, ensure_ascii=False))
|
||||||
|
|
||||||
|
return activity_id
|
||||||
|
|
||||||
|
|
||||||
|
def build_summary(
|
||||||
|
activity: ParsedActivity,
|
||||||
|
metrics: ComputedMetrics,
|
||||||
|
activity_id: str,
|
||||||
|
privacy: str = "public",
|
||||||
|
) -> dict:
|
||||||
|
"""Build the Activity Summary object for index.json."""
|
||||||
|
has_gps = metrics.bbox is not None and privacy not in ("no_gps", "private")
|
||||||
|
return {
|
||||||
|
"id": activity_id,
|
||||||
|
"title": activity.title or _auto_title(activity),
|
||||||
|
"sport": activity.sport,
|
||||||
|
"sub_sport": activity.sub_sport,
|
||||||
|
"started_at": activity.started_at.isoformat(),
|
||||||
|
"distance_m": metrics.distance_m,
|
||||||
|
"duration_s": metrics.duration_s,
|
||||||
|
"moving_time_s": metrics.moving_time_s,
|
||||||
|
"elevation_gain_m": metrics.elevation_gain_m,
|
||||||
|
"avg_speed_kmh": metrics.avg_speed_kmh,
|
||||||
|
"max_speed_kmh": metrics.max_speed_kmh,
|
||||||
|
"avg_hr_bpm": metrics.avg_hr_bpm,
|
||||||
|
"max_hr_bpm": metrics.max_hr_bpm,
|
||||||
|
"avg_cadence_rpm": metrics.avg_cadence_rpm,
|
||||||
|
"avg_power_w": metrics.avg_power_w,
|
||||||
|
"source": _infer_source(activity),
|
||||||
|
"privacy": privacy,
|
||||||
|
"detail_url": f"activities/{activity_id}.json",
|
||||||
|
"track_url": f"activities/{activity_id}.geojson" if has_gps else None,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def write_index(summaries: list[dict], output_dir: Path, owner: dict) -> None:
|
||||||
|
"""Write index.json (sorted newest first)."""
|
||||||
|
sorted_summaries = sorted(
|
||||||
|
summaries,
|
||||||
|
key=lambda s: s["started_at"],
|
||||||
|
reverse=True,
|
||||||
|
)
|
||||||
|
index = {
|
||||||
|
"bas_version": "1.0",
|
||||||
|
"owner": owner,
|
||||||
|
"generated_at": _now_iso(),
|
||||||
|
"shards": [],
|
||||||
|
"activities": sorted_summaries,
|
||||||
|
}
|
||||||
|
(output_dir / "index.json").write_text(
|
||||||
|
json.dumps(index, indent=2, ensure_ascii=False)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ── helpers ──────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _now_iso() -> str:
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
return datetime.now(timezone.utc).isoformat()
|
||||||
|
|
||||||
|
|
||||||
|
def _auto_title(activity: ParsedActivity) -> str:
|
||||||
|
ts = activity.started_at
|
||||||
|
hour = ts.hour
|
||||||
|
if 5 <= hour < 12:
|
||||||
|
time_of_day = "Morning"
|
||||||
|
elif 12 <= hour < 17:
|
||||||
|
time_of_day = "Afternoon"
|
||||||
|
elif 17 <= hour < 21:
|
||||||
|
time_of_day = "Evening"
|
||||||
|
else:
|
||||||
|
time_of_day = "Night"
|
||||||
|
sport = activity.sport.capitalize()
|
||||||
|
return f"{time_of_day} {sport}"
|
||||||
|
|
||||||
|
|
||||||
|
def _infer_source(activity: ParsedActivity) -> str | None:
|
||||||
|
if activity.strava_id:
|
||||||
|
return "strava_export"
|
||||||
|
name = activity.source_file.lower()
|
||||||
|
# Karoo uses UUID-style names
|
||||||
|
if "activity" in name and len(name.split(".")) >= 3:
|
||||||
|
return "karoo"
|
||||||
|
if name.endswith(".fit") or name.endswith(".fit.gz"):
|
||||||
|
return "fit_file"
|
||||||
|
if name.endswith(".gpx") or name.endswith(".gpx.gz"):
|
||||||
|
return "gpx_file"
|
||||||
|
if name.endswith(".tcx") or name.endswith(".tcx.gz"):
|
||||||
|
return "tcx_file"
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _slugify(text: str) -> str:
|
||||||
|
text = unicodedata.normalize("NFKD", text)
|
||||||
|
text = text.encode("ascii", "ignore").decode("ascii")
|
||||||
|
text = text.lower()
|
||||||
|
text = re.sub(r"[^a-z0-9]+", "-", text)
|
||||||
|
return text.strip("-")[:60]
|
||||||
|
|
||||||
|
|
||||||
|
def _serialise_lap(lap: LapData) -> dict:
|
||||||
|
return {
|
||||||
|
"index": lap.index,
|
||||||
|
"started_at": lap.started_at.isoformat(),
|
||||||
|
"duration_s": lap.duration_s,
|
||||||
|
"distance_m": lap.distance_m,
|
||||||
|
"elevation_gain_m": lap.elevation_gain_m,
|
||||||
|
"avg_speed_kmh": lap.avg_speed_kmh,
|
||||||
|
"avg_hr_bpm": lap.avg_hr_bpm,
|
||||||
|
"avg_power_w": lap.avg_power_w,
|
||||||
|
}
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
"""bincio render — CLI command (stub, Astro stage TBD)."""
|
||||||
|
|
||||||
|
import click
|
||||||
|
from rich.console import Console
|
||||||
|
|
||||||
|
console = Console()
|
||||||
|
|
||||||
|
|
||||||
|
@click.command()
|
||||||
|
@click.option("--config", "config_path", default="site_config.yaml")
|
||||||
|
@click.option("--out", "out_dir", default="./site/dist")
|
||||||
|
@click.option("--serve", is_flag=True, help="Start dev server with hot reload.")
|
||||||
|
@click.option("--deploy", default=None, metavar="TARGET",
|
||||||
|
help="Deploy target: 'github'.")
|
||||||
|
def render(config_path: str, out_dir: str, serve: bool, deploy: str | None) -> None:
|
||||||
|
"""Generate static site from BAS data store (Astro stage — coming soon)."""
|
||||||
|
console.print("[yellow]bincio render is not yet implemented.[/yellow]")
|
||||||
|
console.print("The web renderer (Astro + MapLibre + Observable Plot) is next.")
|
||||||
@@ -0,0 +1,32 @@
|
|||||||
|
owner:
|
||||||
|
handle: brutsalvadi
|
||||||
|
display_name: Bru
|
||||||
|
|
||||||
|
input:
|
||||||
|
dirs:
|
||||||
|
- ~/src/cycling_data_davide/activities
|
||||||
|
- ~/src/cycling_data_davide/Karoo_2026
|
||||||
|
- ~/src/cycling_data_davide/Karoo
|
||||||
|
# Strava bulk export metadata — provides names, descriptions, gear
|
||||||
|
metadata_csv: ~/src/cycling_data_davide/activities.csv
|
||||||
|
|
||||||
|
output:
|
||||||
|
dir: ~/bincio_data
|
||||||
|
|
||||||
|
default_privacy: public
|
||||||
|
|
||||||
|
sensors:
|
||||||
|
heart_rate: true
|
||||||
|
cadence: true
|
||||||
|
temperature: true
|
||||||
|
power: true
|
||||||
|
|
||||||
|
track:
|
||||||
|
simplify: rdp
|
||||||
|
rdp_epsilon: 0.0001 # ~11m at equator
|
||||||
|
timeseries_hz: 1 # 1 sample/second max
|
||||||
|
|
||||||
|
classifier:
|
||||||
|
enabled: false # ML activity type classifier (requires scikit-learn extra)
|
||||||
|
|
||||||
|
incremental: true # skip files whose hash hasn't changed since last run
|
||||||
@@ -0,0 +1,76 @@
|
|||||||
|
[build-system]
|
||||||
|
requires = ["hatchling"]
|
||||||
|
build-backend = "hatchling.build"
|
||||||
|
|
||||||
|
[project]
|
||||||
|
name = "bincio"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "Federated, open-source, self-hosted activity stats platform"
|
||||||
|
readme = "README.md"
|
||||||
|
requires-python = ">=3.12"
|
||||||
|
license = { text = "MIT" }
|
||||||
|
authors = [{ name = "Davide Brugali" }]
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
# Parsing
|
||||||
|
"gpxpy>=1.6",
|
||||||
|
"fitdecode>=0.11",
|
||||||
|
"lxml>=5.0", # TCX (XML)
|
||||||
|
# Data
|
||||||
|
"pandas>=2.2",
|
||||||
|
# Geo
|
||||||
|
"geopy>=2.4",
|
||||||
|
"rdp>=0.8",
|
||||||
|
# Config & CLI
|
||||||
|
"pyyaml>=6.0",
|
||||||
|
"click>=8.1",
|
||||||
|
"rich>=13.0", # pretty console output
|
||||||
|
# Schema validation
|
||||||
|
"jsonschema>=4.23",
|
||||||
|
]
|
||||||
|
|
||||||
|
[project.optional-dependencies]
|
||||||
|
classifier = [
|
||||||
|
"scikit-learn>=1.5",
|
||||||
|
]
|
||||||
|
dev = [
|
||||||
|
"pytest>=9.0",
|
||||||
|
"pytest-cov>=5.0",
|
||||||
|
"ruff>=0.9",
|
||||||
|
"mypy>=1.11",
|
||||||
|
"types-pyyaml",
|
||||||
|
"types-jsonschema",
|
||||||
|
]
|
||||||
|
|
||||||
|
[project.scripts]
|
||||||
|
bincio = "bincio.cli:main"
|
||||||
|
|
||||||
|
[dependency-groups]
|
||||||
|
dev = [
|
||||||
|
"pytest>=9.0",
|
||||||
|
"pytest-cov>=5.0",
|
||||||
|
"ruff>=0.9",
|
||||||
|
"mypy>=1.11",
|
||||||
|
"types-pyyaml",
|
||||||
|
"types-jsonschema",
|
||||||
|
]
|
||||||
|
|
||||||
|
[tool.ruff]
|
||||||
|
line-length = 100
|
||||||
|
target-version = "py312"
|
||||||
|
|
||||||
|
[tool.ruff.lint]
|
||||||
|
select = ["E", "F", "I", "UP", "B", "SIM"]
|
||||||
|
ignore = ["E501"]
|
||||||
|
|
||||||
|
[tool.mypy]
|
||||||
|
python_version = "3.12"
|
||||||
|
strict = true
|
||||||
|
ignore_missing_imports = true
|
||||||
|
|
||||||
|
[tool.pytest.ini_options]
|
||||||
|
testpaths = ["tests"]
|
||||||
|
markers = [
|
||||||
|
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
|
||||||
|
"integration: marks tests requiring real activity files",
|
||||||
|
]
|
||||||
@@ -0,0 +1,180 @@
|
|||||||
|
{
|
||||||
|
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
||||||
|
"$id": "https://github.com/bincio-activity/bincio/blob/main/schema/bas-v1.schema.json",
|
||||||
|
"title": "BincioActivity Schema v1.0",
|
||||||
|
"description": "Schema for BincioActivity (BAS) data files.",
|
||||||
|
"$defs": {
|
||||||
|
"sport": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["cycling", "running", "hiking", "walking", "swimming", "other"]
|
||||||
|
},
|
||||||
|
"sub_sport": {
|
||||||
|
"type": ["string", "null"],
|
||||||
|
"enum": ["road", "mountain", "gravel", "indoor", "trail", "track", null]
|
||||||
|
},
|
||||||
|
"privacy": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["public", "blur_start", "no_gps", "private"]
|
||||||
|
},
|
||||||
|
"source": {
|
||||||
|
"type": ["string", "null"],
|
||||||
|
"enum": [
|
||||||
|
"strava_export", "garmin_connect", "wahoo", "komoot",
|
||||||
|
"gpx_file", "fit_file", "tcx_file", "karoo", "manual", null
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"latlng": {
|
||||||
|
"type": ["array", "null"],
|
||||||
|
"items": { "type": "number" },
|
||||||
|
"minItems": 2,
|
||||||
|
"maxItems": 2
|
||||||
|
},
|
||||||
|
"bbox": {
|
||||||
|
"type": ["array", "null"],
|
||||||
|
"items": { "type": "number" },
|
||||||
|
"minItems": 4,
|
||||||
|
"maxItems": 4,
|
||||||
|
"description": "[min_lon, min_lat, max_lon, max_lat]"
|
||||||
|
},
|
||||||
|
"lap": {
|
||||||
|
"type": "object",
|
||||||
|
"required": ["index", "started_at", "duration_s"],
|
||||||
|
"properties": {
|
||||||
|
"index": { "type": "integer", "minimum": 0 },
|
||||||
|
"started_at": { "type": "string", "format": "date-time" },
|
||||||
|
"duration_s": { "type": ["integer", "null"] },
|
||||||
|
"distance_m": { "type": ["number", "null"] },
|
||||||
|
"elevation_gain_m": { "type": ["number", "null"] },
|
||||||
|
"avg_speed_kmh": { "type": ["number", "null"] },
|
||||||
|
"avg_hr_bpm": { "type": ["integer", "null"] },
|
||||||
|
"avg_power_w": { "type": ["integer", "null"] }
|
||||||
|
},
|
||||||
|
"additionalProperties": false
|
||||||
|
},
|
||||||
|
"timeseries": {
|
||||||
|
"type": "object",
|
||||||
|
"required": ["t"],
|
||||||
|
"properties": {
|
||||||
|
"t": { "type": "array", "items": { "type": "integer" } },
|
||||||
|
"lat": { "type": ["array", "null"], "items": { "type": ["number", "null"] } },
|
||||||
|
"lon": { "type": ["array", "null"], "items": { "type": ["number", "null"] } },
|
||||||
|
"elevation_m": { "type": "array", "items": { "type": ["number", "null"] } },
|
||||||
|
"speed_kmh": { "type": "array", "items": { "type": ["number", "null"] } },
|
||||||
|
"hr_bpm": { "type": "array", "items": { "type": ["integer", "null"] } },
|
||||||
|
"cadence_rpm": { "type": "array", "items": { "type": ["integer", "null"] } },
|
||||||
|
"power_w": { "type": "array", "items": { "type": ["integer", "null"] } },
|
||||||
|
"temperature_c": { "type": "array", "items": { "type": ["number", "null"] } }
|
||||||
|
},
|
||||||
|
"additionalProperties": false
|
||||||
|
},
|
||||||
|
"activity_summary": {
|
||||||
|
"type": "object",
|
||||||
|
"required": ["id", "title", "sport", "started_at", "privacy"],
|
||||||
|
"properties": {
|
||||||
|
"id": { "type": "string", "minLength": 1 },
|
||||||
|
"title": { "type": "string" },
|
||||||
|
"sport": { "$ref": "#/$defs/sport" },
|
||||||
|
"sub_sport": { "$ref": "#/$defs/sub_sport" },
|
||||||
|
"started_at": { "type": "string", "format": "date-time" },
|
||||||
|
"distance_m": { "type": ["number", "null"] },
|
||||||
|
"duration_s": { "type": ["integer", "null"] },
|
||||||
|
"moving_time_s": { "type": ["integer", "null"] },
|
||||||
|
"elevation_gain_m": { "type": ["number", "null"] },
|
||||||
|
"avg_speed_kmh": { "type": ["number", "null"] },
|
||||||
|
"max_speed_kmh": { "type": ["number", "null"] },
|
||||||
|
"avg_hr_bpm": { "type": ["integer", "null"] },
|
||||||
|
"max_hr_bpm": { "type": ["integer", "null"] },
|
||||||
|
"avg_cadence_rpm": { "type": ["integer", "null"] },
|
||||||
|
"avg_power_w": { "type": ["integer", "null"] },
|
||||||
|
"source": { "$ref": "#/$defs/source" },
|
||||||
|
"privacy": { "$ref": "#/$defs/privacy" },
|
||||||
|
"detail_url": { "type": ["string", "null"] },
|
||||||
|
"track_url": { "type": ["string", "null"] }
|
||||||
|
},
|
||||||
|
"additionalProperties": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"title": "index.json",
|
||||||
|
"type": "object",
|
||||||
|
"required": ["bas_version", "owner", "generated_at", "activities"],
|
||||||
|
"properties": {
|
||||||
|
"bas_version": { "type": "string", "const": "1.0" },
|
||||||
|
"owner": {
|
||||||
|
"type": "object",
|
||||||
|
"required": ["handle", "display_name"],
|
||||||
|
"properties": {
|
||||||
|
"handle": { "type": "string", "minLength": 1 },
|
||||||
|
"display_name": { "type": "string" },
|
||||||
|
"avatar_url": { "type": ["string", "null"] }
|
||||||
|
},
|
||||||
|
"additionalProperties": false
|
||||||
|
},
|
||||||
|
"generated_at": { "type": "string", "format": "date-time" },
|
||||||
|
"shards": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"required": ["year", "url", "count"],
|
||||||
|
"properties": {
|
||||||
|
"year": { "type": "integer" },
|
||||||
|
"url": { "type": "string" },
|
||||||
|
"count": { "type": "integer" }
|
||||||
|
},
|
||||||
|
"additionalProperties": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"activities": {
|
||||||
|
"type": "array",
|
||||||
|
"items": { "$ref": "#/$defs/activity_summary" }
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "activities/{id}.json",
|
||||||
|
"type": "object",
|
||||||
|
"required": [
|
||||||
|
"bas_version", "id", "title", "sport", "started_at",
|
||||||
|
"privacy", "laps", "timeseries", "custom"
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"bas_version": { "type": "string", "const": "1.0" },
|
||||||
|
"id": { "type": "string", "minLength": 1 },
|
||||||
|
"title": { "type": "string" },
|
||||||
|
"description": { "type": ["string", "null"] },
|
||||||
|
"sport": { "$ref": "#/$defs/sport" },
|
||||||
|
"sub_sport": { "$ref": "#/$defs/sub_sport" },
|
||||||
|
"started_at": { "type": "string", "format": "date-time" },
|
||||||
|
"distance_m": { "type": ["number", "null"] },
|
||||||
|
"duration_s": { "type": ["integer", "null"] },
|
||||||
|
"moving_time_s": { "type": ["integer", "null"] },
|
||||||
|
"elevation_gain_m": { "type": ["number", "null"] },
|
||||||
|
"elevation_loss_m": { "type": ["number", "null"] },
|
||||||
|
"avg_speed_kmh": { "type": ["number", "null"] },
|
||||||
|
"max_speed_kmh": { "type": ["number", "null"] },
|
||||||
|
"avg_hr_bpm": { "type": ["integer", "null"] },
|
||||||
|
"max_hr_bpm": { "type": ["integer", "null"] },
|
||||||
|
"avg_cadence_rpm": { "type": ["integer", "null"] },
|
||||||
|
"avg_power_w": { "type": ["integer", "null"] },
|
||||||
|
"max_power_w": { "type": ["integer", "null"] },
|
||||||
|
"gear": { "type": ["string", "null"] },
|
||||||
|
"device": { "type": ["string", "null"] },
|
||||||
|
"bbox": { "$ref": "#/$defs/bbox" },
|
||||||
|
"start_latlng": { "$ref": "#/$defs/latlng" },
|
||||||
|
"end_latlng": { "$ref": "#/$defs/latlng" },
|
||||||
|
"laps": { "type": "array", "items": { "$ref": "#/$defs/lap" } },
|
||||||
|
"timeseries": { "$ref": "#/$defs/timeseries" },
|
||||||
|
"source": { "$ref": "#/$defs/source" },
|
||||||
|
"source_file": { "type": ["string", "null"] },
|
||||||
|
"source_hash": { "type": ["string", "null"] },
|
||||||
|
"strava_id": { "type": ["string", "null"] },
|
||||||
|
"duplicate_of": { "type": ["string", "null"] },
|
||||||
|
"privacy": { "$ref": "#/$defs/privacy" },
|
||||||
|
"custom": { "type": "object" }
|
||||||
|
},
|
||||||
|
"additionalProperties": false
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
import { defineConfig } from "astro/config";
|
||||||
|
import svelte from "@astrojs/svelte";
|
||||||
|
import tailwind from "@astrojs/tailwind";
|
||||||
|
|
||||||
|
export default defineConfig({
|
||||||
|
integrations: [svelte(), tailwind()],
|
||||||
|
output: "static",
|
||||||
|
// When hosting at a subdirectory (e.g. GitHub Pages project site), set:
|
||||||
|
// base: "/repo-name",
|
||||||
|
});
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
{
|
||||||
|
"name": "bincio-site",
|
||||||
|
"type": "module",
|
||||||
|
"version": "0.1.0",
|
||||||
|
"private": true,
|
||||||
|
"scripts": {
|
||||||
|
"dev": "astro dev",
|
||||||
|
"build": "astro build",
|
||||||
|
"preview": "astro preview",
|
||||||
|
"astro": "astro"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"@astrojs/svelte": "^7.0.0",
|
||||||
|
"@astrojs/tailwind": "^5.1.0",
|
||||||
|
"astro": "^5.0.0",
|
||||||
|
"maplibre-gl": "^5.0.0",
|
||||||
|
"@observablehq/plot": "^0.6.0",
|
||||||
|
"svelte": "^5.0.0",
|
||||||
|
"tailwindcss": "^3.4.0"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"@types/node": "^22.0.0",
|
||||||
|
"typescript": "^5.7.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,74 @@
|
|||||||
|
/** TypeScript types mirroring BAS v1.0 schema. */
|
||||||
|
|
||||||
|
export type Sport = "cycling" | "running" | "hiking" | "walking" | "swimming" | "other";
|
||||||
|
export type SubSport = "road" | "mountain" | "gravel" | "indoor" | "trail" | "track" | null;
|
||||||
|
export type Privacy = "public" | "blur_start" | "no_gps" | "private";
|
||||||
|
|
||||||
|
export interface ActivitySummary {
|
||||||
|
id: string;
|
||||||
|
title: string;
|
||||||
|
sport: Sport;
|
||||||
|
sub_sport: SubSport;
|
||||||
|
started_at: string; // ISO 8601
|
||||||
|
distance_m: number | null;
|
||||||
|
duration_s: number | null;
|
||||||
|
moving_time_s: number | null;
|
||||||
|
elevation_gain_m: number | null;
|
||||||
|
avg_speed_kmh: number | null;
|
||||||
|
max_speed_kmh: number | null;
|
||||||
|
avg_hr_bpm: number | null;
|
||||||
|
max_hr_bpm: number | null;
|
||||||
|
avg_cadence_rpm: number | null;
|
||||||
|
avg_power_w: number | null;
|
||||||
|
source: string | null;
|
||||||
|
privacy: Privacy;
|
||||||
|
detail_url: string | null;
|
||||||
|
track_url: string | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface BASIndex {
|
||||||
|
bas_version: string;
|
||||||
|
owner: { handle: string; display_name: string; avatar_url: string | null };
|
||||||
|
generated_at: string;
|
||||||
|
shards: Array<{ year: number; url: string; count: number }>;
|
||||||
|
activities: ActivitySummary[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface Timeseries {
|
||||||
|
t: number[];
|
||||||
|
lat: number[] | null;
|
||||||
|
lon: number[] | null;
|
||||||
|
elevation_m: (number | null)[];
|
||||||
|
speed_kmh: (number | null)[];
|
||||||
|
hr_bpm: (number | null)[];
|
||||||
|
cadence_rpm: (number | null)[];
|
||||||
|
power_w: (number | null)[];
|
||||||
|
temperature_c: (number | null)[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ActivityDetail extends ActivitySummary {
|
||||||
|
description: string | null;
|
||||||
|
elevation_loss_m: number | null;
|
||||||
|
max_power_w: number | null;
|
||||||
|
gear: string | null;
|
||||||
|
device: string | null;
|
||||||
|
bbox: [number, number, number, number] | null;
|
||||||
|
start_latlng: [number, number] | null;
|
||||||
|
end_latlng: [number, number] | null;
|
||||||
|
laps: Lap[];
|
||||||
|
timeseries: Timeseries;
|
||||||
|
strava_id: string | null;
|
||||||
|
duplicate_of: string | null;
|
||||||
|
custom: Record<string, unknown>;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface Lap {
|
||||||
|
index: number;
|
||||||
|
started_at: string;
|
||||||
|
duration_s: number | null;
|
||||||
|
distance_m: number | null;
|
||||||
|
elevation_gain_m: number | null;
|
||||||
|
avg_speed_kmh: number | null;
|
||||||
|
avg_hr_bpm: number | null;
|
||||||
|
avg_power_w: number | null;
|
||||||
|
}
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
/** @type {import('tailwindcss').Config} */
|
||||||
|
export default {
|
||||||
|
content: ["./src/**/*.{astro,html,js,jsx,md,mdx,svelte,ts,tsx,vue}"],
|
||||||
|
darkMode: "class",
|
||||||
|
theme: {
|
||||||
|
extend: {
|
||||||
|
colors: {
|
||||||
|
// BincioActivity accent — override via CSS variable in site_config
|
||||||
|
accent: "var(--color-accent, #00c8ff)",
|
||||||
|
},
|
||||||
|
fontFamily: {
|
||||||
|
sans: ["Inter", "system-ui", "sans-serif"],
|
||||||
|
mono: ["JetBrains Mono", "monospace"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
plugins: [],
|
||||||
|
};
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
{
|
||||||
|
"extends": "astro/tsconfigs/strict",
|
||||||
|
"compilerOptions": {
|
||||||
|
"baseUrl": ".",
|
||||||
|
"paths": {
|
||||||
|
"@lib/*": ["src/lib/*"],
|
||||||
|
"@components/*": ["src/components/*"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,16 @@
|
|||||||
|
from bincio.extract.sport import normalise_sport
|
||||||
|
|
||||||
|
|
||||||
|
def test_cycling_variants():
|
||||||
|
for raw in ("cycling", "Biking", "road_biking", "virtual_ride", "e-biking"):
|
||||||
|
assert normalise_sport(raw) == "cycling", raw
|
||||||
|
|
||||||
|
|
||||||
|
def test_running_variants():
|
||||||
|
for raw in ("running", "Run", "trail_running", "virtual_run"):
|
||||||
|
assert normalise_sport(raw) == "running", raw
|
||||||
|
|
||||||
|
|
||||||
|
def test_unknown_falls_back_to_other():
|
||||||
|
assert normalise_sport("yoga") == "other"
|
||||||
|
assert normalise_sport(None) == "other"
|
||||||
@@ -0,0 +1,36 @@
|
|||||||
|
from bincio.extract.writer import make_activity_id, _slugify
|
||||||
|
from bincio.extract.models import ParsedActivity, DataPoint
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
|
||||||
|
def _dummy_activity(title=None):
|
||||||
|
ts = datetime(2024, 6, 1, 7, 30, 12, tzinfo=timezone.utc)
|
||||||
|
return ParsedActivity(
|
||||||
|
points=[DataPoint(timestamp=ts)],
|
||||||
|
sport="cycling",
|
||||||
|
started_at=ts,
|
||||||
|
source_file="test.fit",
|
||||||
|
source_hash="sha256:abc",
|
||||||
|
title=title,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_id_with_title():
|
||||||
|
act = _dummy_activity("Morning Ride")
|
||||||
|
aid = make_activity_id(act)
|
||||||
|
assert aid.startswith("2024-06-01T")
|
||||||
|
assert "morning-ride" in aid
|
||||||
|
|
||||||
|
|
||||||
|
def test_id_without_title():
|
||||||
|
act = _dummy_activity()
|
||||||
|
aid = make_activity_id(act)
|
||||||
|
assert "2024-06-01T" in aid
|
||||||
|
# No trailing dash
|
||||||
|
assert not aid.endswith("-")
|
||||||
|
|
||||||
|
|
||||||
|
def test_slugify():
|
||||||
|
assert _slugify("Morning Ride!") == "morning-ride"
|
||||||
|
assert _slugify(" Vélo ") == "velo" # é → e via NFKD + ASCII
|
||||||
|
assert _slugify("") == ""
|
||||||
Reference in New Issue
Block a user