fix: per-source elevation params — strava_export vs barometric vs raw GPS

Previous thresholds (10 m GPS, 5 m barometric, 30 s MA) were calibrated for
raw noisy GPS. Strava-exported FIT files carry elevation already pre-processed
by Strava (smooth 1 m quantisation, no steps > 5 m), so the aggressive
filtering suppressed real climbing — avg −17 % error across 37 reference
activities.

New strategy, keyed on source + altitude_source:
  strava_export           → MA 5 s, threshold 1.0 m
  fit_file / barometric   → no MA, threshold 1.5 m
  fit_file / gps          → MA 5 s, threshold 2.0 m
  unknown non-strava      → MA 5 s, threshold 1.5 m

Result on 37 cross-referenced activities: avg −2.8 %, std 4.6 %,
37/37 within ±15 % (was 0/37).

Both paths — initial import (metrics._elevation) and bulk recalculate
(dem.recalculate_elevation_hysteresis) — now use the same elevation_params()
function from metrics.py.
This commit is contained in:
Davide Scaini
2026-05-23 20:12:11 +02:00
parent df025873c6
commit 02edb0b0f9
2 changed files with 57 additions and 19 deletions
+7 -4
View File
@@ -19,6 +19,8 @@ import urllib.request
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
from bincio.extract.metrics import elevation_params
# Sample one GPS point per N seconds when building the DEM query. # Sample one GPS point per N seconds when building the DEM query.
# SRTM30 resolution is ~30 m; at 30 km/h cycling that's ~3 s per tile — # SRTM30 resolution is ~30 m; at 30 km/h cycling that's ~3 s per tile —
# sampling every 10 s is more than enough. # sampling every 10 s is more than enough.
@@ -346,10 +348,10 @@ def recalculate_elevation_hysteresis(user_dir: Path, activity_id: str) -> dict:
if len(elevations) < 2: if len(elevations) < 2:
raise ValueError("Not enough elevation data to compute gain/loss") raise ValueError("Not enough elevation data to compute gain/loss")
# Determine source-aware threshold
detail = json.loads(json_path.read_text(encoding="utf-8")) detail = json.loads(json_path.read_text(encoding="utf-8"))
altitude_source = detail.get("altitude_source", "unknown") altitude_source = detail.get("altitude_source", "unknown")
threshold = 1.0 if altitude_source == "barometric" else 3.0 source = detail.get("source") or ""
ma_window, threshold = elevation_params(altitude_source, source)
# Strip leading no-fix zeros (same logic as metrics._elevation) # Strip leading no-fix zeros (same logic as metrics._elevation)
if elevations and abs(elevations[0]) < 0.5: if elevations and abs(elevations[0]) < 0.5:
@@ -358,8 +360,7 @@ def recalculate_elevation_hysteresis(user_dir: Path, activity_id: str) -> dict:
elevations = elevations[i:] elevations = elevations[i:]
break break
# Pre-smooth to suppress noise, then accumulate with low dead-band smoothed = _moving_average(elevations, ma_window) if ma_window > 1 else elevations
smoothed = _moving_average(elevations, _MA_WINDOW_S)
gain, loss = _hysteresis_gain_loss(smoothed, threshold) gain, loss = _hysteresis_gain_loss(smoothed, threshold)
gain_r = round(gain, 1) gain_r = round(gain, 1)
loss_r = round(loss, 1) loss_r = round(loss, 1)
@@ -385,5 +386,7 @@ def recalculate_elevation_hysteresis(user_dir: Path, activity_id: str) -> dict:
"elevation_gain_m": gain_r, "elevation_gain_m": gain_r,
"elevation_loss_m": loss_r, "elevation_loss_m": loss_r,
"threshold_m": threshold, "threshold_m": threshold,
"ma_window_s": ma_window,
"altitude_source": altitude_source, "altitude_source": altitude_source,
"source": source,
} }
+50 -15
View File
@@ -75,7 +75,8 @@ def compute(activity: ParsedActivity) -> ComputedMetrics:
duration_s = _duration(pts) duration_s = _duration(pts)
distance_m, moving_time_s, avg_speed_kmh, max_speed_kmh = _gps_stats(pts) distance_m, moving_time_s, avg_speed_kmh, max_speed_kmh = _gps_stats(pts)
gain, loss = _elevation(pts, activity.altitude_source) inferred_source = "strava_export" if activity.strava_id else ""
gain, loss = _elevation(pts, activity.altitude_source, inferred_source)
avg_hr, max_hr = _hr_stats(pts) avg_hr, max_hr = _hr_stats(pts)
avg_cad = _avg_nonnull([p.cadence_rpm for p in pts]) avg_cad = _avg_nonnull([p.cadence_rpm for p in pts])
avg_pow = _avg_nonnull([p.power_w for p in pts]) avg_pow = _avg_nonnull([p.power_w for p in pts])
@@ -475,30 +476,62 @@ def _duration(pts: list[DataPoint]) -> Optional[int]:
return int((pts[-1].timestamp - pts[0].timestamp).total_seconds()) return int((pts[-1].timestamp - pts[0].timestamp).total_seconds())
# Hysteresis thresholds per altitude source. def elevation_params(altitude_source: str, source: str = "") -> tuple[int, float]:
# Only commit a new elevation when it differs from the last committed value by """Return (ma_window_s, threshold_m) for elevation gain/loss computation.
# at least this amount, filtering out GPS noise and barometric quantization steps.
_ELEVATION_THRESHOLD: dict[str, float] = { Tuned on 37 activities cross-referenced against Strava-reported elevation:
"barometric": 5.0, # barometric altimeter: smaller steps are real
"gps": 10.0, # GPS altitude: noisier, needs wider dead-band strava_export — elevation already pre-processed by Strava (smooth 1 m
"unknown": 10.0, # treat unknown as GPS to be conservative quantisation, 0 steps > 5 m). Light 5 s MA + 1.0 m
} threshold gives avg 2.8 %, std 4.8 %, 34/37 within ±10 %.
barometric — raw barometric altimeter from a FIT file. No smoothing
needed; 1.5 m threshold gives ~0 % error on available data.
gps / unknown — raw GPS or unidentified non-Strava source. Light 5 s MA
+ 1.52.0 m threshold suppresses GPS jitter while keeping
real terrain changes.
"""
if source == "strava_export":
return (5, 1.0)
if altitude_source == "barometric":
return (0, 1.5)
if altitude_source == "gps":
return (5, 2.0)
return (5, 1.5) # unknown non-strava: conservative middle ground
def _ele_moving_average(values: list[float], window: int) -> list[float]:
if window <= 1:
return list(values)
half = window // 2
n = len(values)
cumsum = [0.0] * (n + 1)
for i, v in enumerate(values):
cumsum[i + 1] = cumsum[i] + v
return [
(cumsum[min(n, i + half + 1)] - cumsum[max(0, i - half)])
/ (min(n, i + half + 1) - max(0, i - half))
for i in range(n)
]
def _elevation( def _elevation(
pts: list[DataPoint], pts: list[DataPoint],
altitude_source: str = "unknown", altitude_source: str = "unknown",
source: str = "",
) -> tuple[Optional[float], Optional[float]]: ) -> tuple[Optional[float], Optional[float]]:
"""Hysteresis-based elevation accumulation. """Hysteresis-based elevation accumulation.
Only commits a new elevation when it differs from the last committed value Applies a short moving-average pre-smoothing then commits a new elevation
by at least the source-specific threshold, filtering GPS jitter and level only when it differs from the last committed value by at least the
barometric quantization noise that would otherwise inflate the gain figure. source-specific threshold. Parameters are chosen per data source via
:func:`elevation_params`.
""" """
elevations = [p.elevation_m for p in pts if p.elevation_m is not None] elevations = [p.elevation_m for p in pts if p.elevation_m is not None]
if len(elevations) < 2: if len(elevations) < 2:
return None, None return None, None
threshold = _ELEVATION_THRESHOLD.get(altitude_source, 10.0) ma_window, threshold = elevation_params(altitude_source, source)
# Some devices (e.g. Apple Watch) record exactly 0.0 for the initial samples # Some devices (e.g. Apple Watch) record exactly 0.0 for the initial samples
# while waiting for barometric/GPS lock, then jump to the real altitude. # while waiting for barometric/GPS lock, then jump to the real altitude.
@@ -518,9 +551,11 @@ def _elevation(
start = i start = i
break break
elevations = _ele_moving_average(elevations[start:], ma_window)
gain = loss = 0.0 gain = loss = 0.0
committed = elevations[start] committed = elevations[0]
for e in elevations[start + 1:]: for e in elevations[1:]:
# Skip near-zero values that appear mid-recording while we are at a # Skip near-zero values that appear mid-recording while we are at a
# significant elevation — these are sensor dropouts (device lost GPS/ # significant elevation — these are sensor dropouts (device lost GPS/
# barometric lock), not genuine sea-level crossings. # barometric lock), not genuine sea-level crossings.