parallelizing extraction, fix tcx files
This commit is contained in:
+59
-61
@@ -1,19 +1,28 @@
|
||||
"""Compute aggregated metrics from a ParsedActivity.
|
||||
|
||||
All calculations are self-contained — no external state needed.
|
||||
Uses inline haversine rather than geopy.geodesic to keep the hot path fast.
|
||||
"""
|
||||
|
||||
import math
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
from geopy.distance import geodesic
|
||||
|
||||
from bincio.extract.models import DataPoint, ParsedActivity
|
||||
|
||||
# Speed below which we consider the athlete stopped (km/h)
|
||||
_STOPPED_THRESHOLD_KMH = 1.0
|
||||
_EARTH_R = 6_371_000.0 # metres
|
||||
|
||||
|
||||
def _haversine_m(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
|
||||
"""Great-circle distance in metres. ~10x faster than geopy.geodesic."""
|
||||
phi1 = math.radians(lat1)
|
||||
phi2 = math.radians(lat2)
|
||||
dphi = phi2 - phi1
|
||||
dlam = math.radians(lon2 - lon1)
|
||||
a = math.sin(dphi * 0.5) ** 2 + math.cos(phi1) * math.cos(phi2) * math.sin(dlam * 0.5) ** 2
|
||||
return 2.0 * _EARTH_R * math.asin(math.sqrt(min(a, 1.0)))
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -30,7 +39,7 @@ class ComputedMetrics:
|
||||
avg_cadence_rpm: Optional[int]
|
||||
avg_power_w: Optional[int]
|
||||
max_power_w: Optional[int]
|
||||
bbox: Optional[tuple[float, float, float, float]] # min_lon, min_lat, max_lon, max_lat
|
||||
bbox: Optional[tuple[float, float, float, float]] # min_lon, min_lat, max_lon, max_lat
|
||||
start_latlng: Optional[tuple[float, float]]
|
||||
end_latlng: Optional[tuple[float, float]]
|
||||
|
||||
@@ -41,10 +50,8 @@ def compute(activity: ParsedActivity) -> ComputedMetrics:
|
||||
return _empty()
|
||||
|
||||
duration_s = _duration(pts)
|
||||
distance_m = _distance(pts)
|
||||
moving_time_s, moving_speed_kmh = _moving_stats(pts)
|
||||
distance_m, moving_time_s, avg_speed_kmh, max_speed_kmh = _gps_stats(pts)
|
||||
gain, loss = _elevation(pts)
|
||||
max_speed = _max_speed(pts)
|
||||
avg_hr, max_hr = _hr_stats(pts)
|
||||
avg_cad = _avg_nonnull([p.cadence_rpm for p in pts])
|
||||
avg_pow = _avg_nonnull([p.power_w for p in pts])
|
||||
@@ -58,8 +65,8 @@ def compute(activity: ParsedActivity) -> ComputedMetrics:
|
||||
moving_time_s=moving_time_s,
|
||||
elevation_gain_m=round(gain, 1) if gain is not None else None,
|
||||
elevation_loss_m=round(abs(loss), 1) if loss is not None else None,
|
||||
avg_speed_kmh=round(moving_speed_kmh, 2) if moving_speed_kmh else None,
|
||||
max_speed_kmh=round(max_speed, 2) if max_speed else None,
|
||||
avg_speed_kmh=round(avg_speed_kmh, 2) if avg_speed_kmh else None,
|
||||
max_speed_kmh=round(max_speed_kmh, 2) if max_speed_kmh else None,
|
||||
avg_hr_bpm=avg_hr,
|
||||
max_hr_bpm=max_hr,
|
||||
avg_cadence_rpm=avg_cad,
|
||||
@@ -71,66 +78,75 @@ def compute(activity: ParsedActivity) -> ComputedMetrics:
|
||||
)
|
||||
|
||||
|
||||
# ── helpers ──────────────────────────────────────────────────────────────────
|
||||
# ── single-pass GPS stats ──────────────────────────────────────────────────────
|
||||
# distance, moving time, avg speed, and max speed are all derived from the same
|
||||
# per-segment loop, so we compute them in one pass instead of four.
|
||||
|
||||
def _duration(pts: list[DataPoint]) -> Optional[int]:
|
||||
if len(pts) < 2:
|
||||
return None
|
||||
return int((pts[-1].timestamp - pts[0].timestamp).total_seconds())
|
||||
def _gps_stats(
|
||||
pts: list[DataPoint],
|
||||
) -> tuple[Optional[float], Optional[int], Optional[float], Optional[float]]:
|
||||
"""Return (distance_m, moving_time_s, avg_speed_kmh, max_speed_kmh)."""
|
||||
|
||||
|
||||
def _distance(pts: list[DataPoint]) -> Optional[float]:
|
||||
"""Prefer device-recorded cumulative distance; fall back to GPS geodesic."""
|
||||
# If the last point has a device distance, use it
|
||||
last_dist = next(
|
||||
# Prefer device-recorded cumulative distance (FIT files always have this)
|
||||
device_dist = next(
|
||||
(p.distance_m for p in reversed(pts) if p.distance_m is not None), None
|
||||
)
|
||||
if last_dist is not None:
|
||||
return round(last_dist, 1)
|
||||
|
||||
# GPS fallback
|
||||
total = 0.0
|
||||
has_gps = False
|
||||
for a, b in zip(pts, pts[1:]):
|
||||
if a.lat is None or a.lon is None or b.lat is None or b.lon is None:
|
||||
continue
|
||||
has_gps = True
|
||||
total += geodesic((a.lat, a.lon), (b.lat, b.lon)).meters
|
||||
return round(total, 1) if has_gps else None
|
||||
|
||||
|
||||
def _moving_stats(pts: list[DataPoint]) -> tuple[Optional[int], Optional[float]]:
|
||||
"""Return (moving_time_s, avg_speed_kmh_over_moving_time)."""
|
||||
moving_s = 0
|
||||
moving_dist_m = 0.0
|
||||
has_gps = False
|
||||
total_dist_m = 0.0
|
||||
max_seg_kmh = 0.0
|
||||
has_data = False
|
||||
|
||||
# Device speed values (used for max if present)
|
||||
device_max_kmh: Optional[float] = None
|
||||
if any(p.speed_kmh is not None for p in pts):
|
||||
device_max_kmh = max(p.speed_kmh for p in pts if p.speed_kmh is not None)
|
||||
|
||||
for a, b in zip(pts, pts[1:]):
|
||||
dt = (b.timestamp - a.timestamp).total_seconds()
|
||||
if dt <= 0:
|
||||
continue
|
||||
|
||||
# Compute speed for this interval from GPS
|
||||
if a.lat is not None and a.lon is not None and b.lat is not None and b.lon is not None:
|
||||
has_gps = True
|
||||
seg_m = geodesic((a.lat, a.lon), (b.lat, b.lon)).meters
|
||||
seg_m = _haversine_m(a.lat, a.lon, b.lat, b.lon)
|
||||
seg_kmh = (seg_m / dt) * 3.6
|
||||
has_data = True
|
||||
elif a.speed_kmh is not None:
|
||||
seg_kmh = a.speed_kmh
|
||||
seg_m = (seg_kmh / 3.6) * dt
|
||||
has_gps = True # speed data present
|
||||
has_data = True
|
||||
else:
|
||||
continue
|
||||
|
||||
total_dist_m += seg_m
|
||||
if seg_kmh > max_seg_kmh:
|
||||
max_seg_kmh = seg_kmh
|
||||
|
||||
if seg_kmh >= _STOPPED_THRESHOLD_KMH:
|
||||
moving_s += int(dt)
|
||||
moving_dist_m += seg_m
|
||||
|
||||
if not has_gps or moving_s == 0:
|
||||
return None, None
|
||||
if not has_data:
|
||||
return device_dist, None, None, None
|
||||
|
||||
avg_kmh = (moving_dist_m / moving_s) * 3.6
|
||||
return moving_s, avg_kmh
|
||||
distance_m = device_dist if device_dist is not None else round(total_dist_m, 1)
|
||||
moving_time_s = moving_s if moving_s > 0 else None
|
||||
avg_speed_kmh = (moving_dist_m / moving_s) * 3.6 if moving_s > 0 else None
|
||||
# Prefer device speed for max (more stable than GPS-derived per-second spikes)
|
||||
max_speed_kmh = device_max_kmh if device_max_kmh is not None else (
|
||||
max_seg_kmh if max_seg_kmh > 0 else None
|
||||
)
|
||||
|
||||
return distance_m, moving_time_s, avg_speed_kmh, max_speed_kmh
|
||||
|
||||
|
||||
# ── remaining helpers ──────────────────────────────────────────────────────────
|
||||
|
||||
def _duration(pts: list[DataPoint]) -> Optional[int]:
|
||||
if len(pts) < 2:
|
||||
return None
|
||||
return int((pts[-1].timestamp - pts[0].timestamp).total_seconds())
|
||||
|
||||
|
||||
def _elevation(pts: list[DataPoint]) -> tuple[Optional[float], Optional[float]]:
|
||||
@@ -147,24 +163,6 @@ def _elevation(pts: list[DataPoint]) -> tuple[Optional[float], Optional[float]]:
|
||||
return gain, loss
|
||||
|
||||
|
||||
def _max_speed(pts: list[DataPoint]) -> Optional[float]:
|
||||
# Prefer device speed; fall back to GPS-derived
|
||||
device_speeds = [p.speed_kmh for p in pts if p.speed_kmh is not None]
|
||||
if device_speeds:
|
||||
return max(device_speeds)
|
||||
# GPS-derived max
|
||||
gps_speeds = []
|
||||
for a, b in zip(pts, pts[1:]):
|
||||
if a.lat is None or b.lat is None:
|
||||
continue
|
||||
dt = (b.timestamp - a.timestamp).total_seconds()
|
||||
if dt <= 0:
|
||||
continue
|
||||
m = geodesic((a.lat, a.lon), (b.lat, b.lon)).meters
|
||||
gps_speeds.append((m / dt) * 3.6)
|
||||
return max(gps_speeds) if gps_speeds else None
|
||||
|
||||
|
||||
def _hr_stats(pts: list[DataPoint]) -> tuple[Optional[int], Optional[int]]:
|
||||
hrs = [p.hr_bpm for p in pts if p.hr_bpm is not None]
|
||||
if not hrs:
|
||||
|
||||
Reference in New Issue
Block a user