parallelizing extraction, fix tcx files

This commit is contained in:
Davide Scaini
2026-03-28 14:24:16 +01:00
parent 38c5423aeb
commit 5d58126d2f
6 changed files with 226 additions and 192 deletions
+59 -61
View File
@@ -1,19 +1,28 @@
"""Compute aggregated metrics from a ParsedActivity.
All calculations are self-contained — no external state needed.
Uses inline haversine rather than geopy.geodesic to keep the hot path fast.
"""
import math
from dataclasses import dataclass
from datetime import datetime
from typing import Optional
from geopy.distance import geodesic
from bincio.extract.models import DataPoint, ParsedActivity
# Speed below which we consider the athlete stopped (km/h)
_STOPPED_THRESHOLD_KMH = 1.0
_EARTH_R = 6_371_000.0 # metres
def _haversine_m(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
"""Great-circle distance in metres. ~10x faster than geopy.geodesic."""
phi1 = math.radians(lat1)
phi2 = math.radians(lat2)
dphi = phi2 - phi1
dlam = math.radians(lon2 - lon1)
a = math.sin(dphi * 0.5) ** 2 + math.cos(phi1) * math.cos(phi2) * math.sin(dlam * 0.5) ** 2
return 2.0 * _EARTH_R * math.asin(math.sqrt(min(a, 1.0)))
@dataclass
@@ -30,7 +39,7 @@ class ComputedMetrics:
avg_cadence_rpm: Optional[int]
avg_power_w: Optional[int]
max_power_w: Optional[int]
bbox: Optional[tuple[float, float, float, float]] # min_lon, min_lat, max_lon, max_lat
bbox: Optional[tuple[float, float, float, float]] # min_lon, min_lat, max_lon, max_lat
start_latlng: Optional[tuple[float, float]]
end_latlng: Optional[tuple[float, float]]
@@ -41,10 +50,8 @@ def compute(activity: ParsedActivity) -> ComputedMetrics:
return _empty()
duration_s = _duration(pts)
distance_m = _distance(pts)
moving_time_s, moving_speed_kmh = _moving_stats(pts)
distance_m, moving_time_s, avg_speed_kmh, max_speed_kmh = _gps_stats(pts)
gain, loss = _elevation(pts)
max_speed = _max_speed(pts)
avg_hr, max_hr = _hr_stats(pts)
avg_cad = _avg_nonnull([p.cadence_rpm for p in pts])
avg_pow = _avg_nonnull([p.power_w for p in pts])
@@ -58,8 +65,8 @@ def compute(activity: ParsedActivity) -> ComputedMetrics:
moving_time_s=moving_time_s,
elevation_gain_m=round(gain, 1) if gain is not None else None,
elevation_loss_m=round(abs(loss), 1) if loss is not None else None,
avg_speed_kmh=round(moving_speed_kmh, 2) if moving_speed_kmh else None,
max_speed_kmh=round(max_speed, 2) if max_speed else None,
avg_speed_kmh=round(avg_speed_kmh, 2) if avg_speed_kmh else None,
max_speed_kmh=round(max_speed_kmh, 2) if max_speed_kmh else None,
avg_hr_bpm=avg_hr,
max_hr_bpm=max_hr,
avg_cadence_rpm=avg_cad,
@@ -71,66 +78,75 @@ def compute(activity: ParsedActivity) -> ComputedMetrics:
)
# ── helpers ──────────────────────────────────────────────────────────────────
# ── single-pass GPS stats ──────────────────────────────────────────────────────
# distance, moving time, avg speed, and max speed are all derived from the same
# per-segment loop, so we compute them in one pass instead of four.
def _duration(pts: list[DataPoint]) -> Optional[int]:
if len(pts) < 2:
return None
return int((pts[-1].timestamp - pts[0].timestamp).total_seconds())
def _gps_stats(
pts: list[DataPoint],
) -> tuple[Optional[float], Optional[int], Optional[float], Optional[float]]:
"""Return (distance_m, moving_time_s, avg_speed_kmh, max_speed_kmh)."""
def _distance(pts: list[DataPoint]) -> Optional[float]:
"""Prefer device-recorded cumulative distance; fall back to GPS geodesic."""
# If the last point has a device distance, use it
last_dist = next(
# Prefer device-recorded cumulative distance (FIT files always have this)
device_dist = next(
(p.distance_m for p in reversed(pts) if p.distance_m is not None), None
)
if last_dist is not None:
return round(last_dist, 1)
# GPS fallback
total = 0.0
has_gps = False
for a, b in zip(pts, pts[1:]):
if a.lat is None or a.lon is None or b.lat is None or b.lon is None:
continue
has_gps = True
total += geodesic((a.lat, a.lon), (b.lat, b.lon)).meters
return round(total, 1) if has_gps else None
def _moving_stats(pts: list[DataPoint]) -> tuple[Optional[int], Optional[float]]:
"""Return (moving_time_s, avg_speed_kmh_over_moving_time)."""
moving_s = 0
moving_dist_m = 0.0
has_gps = False
total_dist_m = 0.0
max_seg_kmh = 0.0
has_data = False
# Device speed values (used for max if present)
device_max_kmh: Optional[float] = None
if any(p.speed_kmh is not None for p in pts):
device_max_kmh = max(p.speed_kmh for p in pts if p.speed_kmh is not None)
for a, b in zip(pts, pts[1:]):
dt = (b.timestamp - a.timestamp).total_seconds()
if dt <= 0:
continue
# Compute speed for this interval from GPS
if a.lat is not None and a.lon is not None and b.lat is not None and b.lon is not None:
has_gps = True
seg_m = geodesic((a.lat, a.lon), (b.lat, b.lon)).meters
seg_m = _haversine_m(a.lat, a.lon, b.lat, b.lon)
seg_kmh = (seg_m / dt) * 3.6
has_data = True
elif a.speed_kmh is not None:
seg_kmh = a.speed_kmh
seg_m = (seg_kmh / 3.6) * dt
has_gps = True # speed data present
has_data = True
else:
continue
total_dist_m += seg_m
if seg_kmh > max_seg_kmh:
max_seg_kmh = seg_kmh
if seg_kmh >= _STOPPED_THRESHOLD_KMH:
moving_s += int(dt)
moving_dist_m += seg_m
if not has_gps or moving_s == 0:
return None, None
if not has_data:
return device_dist, None, None, None
avg_kmh = (moving_dist_m / moving_s) * 3.6
return moving_s, avg_kmh
distance_m = device_dist if device_dist is not None else round(total_dist_m, 1)
moving_time_s = moving_s if moving_s > 0 else None
avg_speed_kmh = (moving_dist_m / moving_s) * 3.6 if moving_s > 0 else None
# Prefer device speed for max (more stable than GPS-derived per-second spikes)
max_speed_kmh = device_max_kmh if device_max_kmh is not None else (
max_seg_kmh if max_seg_kmh > 0 else None
)
return distance_m, moving_time_s, avg_speed_kmh, max_speed_kmh
# ── remaining helpers ──────────────────────────────────────────────────────────
def _duration(pts: list[DataPoint]) -> Optional[int]:
if len(pts) < 2:
return None
return int((pts[-1].timestamp - pts[0].timestamp).total_seconds())
def _elevation(pts: list[DataPoint]) -> tuple[Optional[float], Optional[float]]:
@@ -147,24 +163,6 @@ def _elevation(pts: list[DataPoint]) -> tuple[Optional[float], Optional[float]]:
return gain, loss
def _max_speed(pts: list[DataPoint]) -> Optional[float]:
# Prefer device speed; fall back to GPS-derived
device_speeds = [p.speed_kmh for p in pts if p.speed_kmh is not None]
if device_speeds:
return max(device_speeds)
# GPS-derived max
gps_speeds = []
for a, b in zip(pts, pts[1:]):
if a.lat is None or b.lat is None:
continue
dt = (b.timestamp - a.timestamp).total_seconds()
if dt <= 0:
continue
m = geodesic((a.lat, a.lon), (b.lat, b.lon)).meters
gps_speeds.append((m / dt) * 3.6)
return max(gps_speeds) if gps_speeds else None
def _hr_stats(pts: list[DataPoint]) -> tuple[Optional[int], Optional[int]]:
hrs = [p.hr_bpm for p in pts if p.hr_bpm is not None]
if not hrs: