fix: refine hysteresis recalculation with MA pre-smoothing and lower thresholds

- dem.py: pre-smooth elevation with 30s moving average before hysteresis
  in recalculate_elevation_hysteresis(); thresholds drop from 5m/10m to
  1m (barometric) / 3m (GPS) — accurate after noise is smoothed out
- dem.py: widen DEM median-filter window 45s → 60s
- dem.py: rename response key source → altitude_source for consistency
- writer.py: write altitude_source into detail JSON at extract time
- tests/test_dem.py: 21 unit tests for pure functions and file-level hysteresis
- tests/test_edit_server.py: 11 TestClient API tests for both recalculate endpoints
- add httpx as dev dependency (required by FastAPI TestClient)
This commit is contained in:
Davide Scaini
2026-04-22 10:57:28 +02:00
parent 88b24a6274
commit df496a017f
6 changed files with 481 additions and 13 deletions
+54 -13
View File
@@ -35,7 +35,32 @@ _DEM_HYSTERESIS_M = 10.0
# Median filter window (seconds / samples at 1 Hz) applied to DEM-interpolated
# series before hysteresis. 45 s smooths SRTM tile steps while keeping real
# climbs (typical cycling ramp > 100 m over > 2 min).
_MEDIAN_WINDOW_S = 45
_MEDIAN_WINDOW_S = 60
# Moving-average window (seconds) applied to the 1 Hz elevation series before
# hysteresis in the on-demand recalculation. Pre-smoothing lets us use a
# much lower dead-band (capturing real small climbs) while still suppressing
# GPS jitter and barometric quantization noise.
_MA_WINDOW_S = 30
def _moving_average(values: list[float], window: int) -> list[float]:
"""Apply a centred sliding-window moving average to *values*.
Edge handling: window shrinks symmetrically at both ends (same effective
behaviour as scipy's 'nearest' / numpy's 'reflect' mode).
"""
half = window // 2
n = len(values)
out: list[float] = []
cumsum = [0.0] * (n + 1)
for i, v in enumerate(values):
cumsum[i + 1] = cumsum[i] + v
for i in range(n):
lo = max(0, i - half)
hi = min(n, i + half + 1)
out.append((cumsum[hi] - cumsum[lo]) / (hi - lo))
return out
def _median_filter(values: list[float], window: int) -> list[float]:
@@ -275,18 +300,32 @@ def recalculate_elevation(
def recalculate_elevation_hysteresis(user_dir: Path, activity_id: str) -> dict:
"""Recompute elevation gain/loss from the original recorded elevation data.
Uses the same source-aware hysteresis thresholds as the extract pipeline:
Algorithm
---------
1. Read ``elevation_m_original`` (backup from a prior DEM run) if present,
otherwise read ``elevation_m`` from the timeseries.
2. Apply a :data:`_MA_WINDOW_S` (30 s) moving average to smooth out
barometric quantization steps and GPS jitter.
3. Apply a low dead-band threshold to the smoothed series:
- **1 m** for barometric altimeters (FIT files with ``enhanced_altitude``)
- **3 m** for GPS-derived altitude (GPX, TCX, FIT without enhanced_altitude)
- 5 m for barometric altimeters (FIT files with ``enhanced_altitude``)
- 10 m for GPS-derived altitude (GPX, TCX, FIT without barometric)
The 30 s pre-smoothing makes the low thresholds safe: after averaging,
0.2 m barometric quantization noise and short-period GPS jitter are
suppressed below the threshold, while real terrain changes (which persist
across the window) are preserved.
The elevation array in the timeseries is **not** modified. If a DEM
correction was previously applied, the backup in ``elevation_m_original``
is used as the source so the original sensor data is recovered.
The elevation array in the timeseries is **not** modified — only the
summary stats in the detail JSON and ``index.json`` are patched.
``altitude_source`` is read from the detail JSON (written by the extractor
for activities recorded after this field was added). For older activities
it falls back to ``"unknown"`` → 3 m GPS threshold.
Returns
-------
dict with keys ``elevation_gain_m``, ``elevation_loss_m``.
dict with keys ``elevation_gain_m``, ``elevation_loss_m``,
``threshold_m``, ``altitude_source``.
"""
acts_dir = user_dir / "activities"
json_path = acts_dir / f"{activity_id}.json"
@@ -299,7 +338,7 @@ def recalculate_elevation_hysteresis(user_dir: Path, activity_id: str) -> dict:
ts = json.loads(ts_path.read_text(encoding="utf-8"))
# Use original elevation if a DEM backup exists, otherwise use current
# Prefer the pre-DEM backup; fall back to the current elevation array
ele_arr: list[Optional[float]] = (
ts.get("elevation_m_original") or ts.get("elevation_m") or []
)
@@ -307,12 +346,14 @@ def recalculate_elevation_hysteresis(user_dir: Path, activity_id: str) -> dict:
if len(elevations) < 2:
raise ValueError("Not enough elevation data to compute gain/loss")
# Determine threshold from altitude_source stored in detail JSON
# Determine source-aware threshold
detail = json.loads(json_path.read_text(encoding="utf-8"))
altitude_source = detail.get("altitude_source", "unknown")
threshold = 5.0 if altitude_source == "barometric" else 10.0
threshold = 1.0 if altitude_source == "barometric" else 3.0
gain, loss = _hysteresis_gain_loss(elevations, threshold)
# Pre-smooth to suppress noise, then accumulate with low dead-band
smoothed = _moving_average(elevations, _MA_WINDOW_S)
gain, loss = _hysteresis_gain_loss(smoothed, threshold)
gain_r = round(gain, 1)
loss_r = round(loss, 1)
@@ -337,5 +378,5 @@ def recalculate_elevation_hysteresis(user_dir: Path, activity_id: str) -> dict:
"elevation_gain_m": gain_r,
"elevation_loss_m": loss_r,
"threshold_m": threshold,
"source": altitude_source,
"altitude_source": altitude_source,
}
+1
View File
@@ -93,6 +93,7 @@ def write_activity(
"source": source,
"source_file": activity.source_file,
"source_hash": activity.source_hash,
"altitude_source": activity.altitude_source,
"strava_id": activity.strava_id,
"duplicate_of": duplicate_of,
"privacy": privacy,