From 290eef6c728370a39d13689f9beef2e7365d8743 Mon Sep 17 00:00:00 2001 From: Davide Scaini Date: Wed, 15 Apr 2026 14:06:20 +0200 Subject: [PATCH] metrics: guard against corrupted time streams causing OOM Strava originals with absolute Unix timestamps stored as elapsed-second offsets produce a t_max of ~1.6 billion. compute_mmp and compute_best_efforts both create dense 1Hz arrays via range(t_min, t_max+1), which for a 1.6B span allocates 44+ GB and OOM-kills the process. Add a >1-week sanity check and return None early for corrupt streams. Root cause: old Strava activities (seen from 1970-epoch start_date) where the time stream contains absolute Unix timestamps instead of elapsed seconds. --- bincio/extract/metrics.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/bincio/extract/metrics.py b/bincio/extract/metrics.py index 216db4e..d5bb97a 100644 --- a/bincio/extract/metrics.py +++ b/bincio/extract/metrics.py @@ -131,6 +131,10 @@ def compute_mmp(pts: list[DataPoint], started_at: datetime) -> Optional[list[lis t_min = min(sparse) t_max = max(sparse) + # Guard against corrupted time data (e.g. absolute Unix timestamps stored as + # elapsed offsets, which can make t_max astronomically large and OOM the process). + if t_max - t_min > 7 * 24 * 3600: # > 1 week → corrupted stream + return None power_1hz: list[int] = [sparse.get(t, 0) for t in range(t_min, t_max + 1)] n = len(power_1hz) @@ -190,6 +194,10 @@ def compute_best_efforts( t_min = min(sparse_speed) t_max = max(sparse_speed) + # Guard against corrupted time data (e.g. absolute Unix timestamps stored as + # elapsed offsets, which can make t_max astronomically large and OOM the process). + if t_max - t_min > 7 * 24 * 3600: # > 1 week → corrupted stream + return None, None speed_1hz: list[float] = [sparse_speed.get(t, 0.0) for t in range(t_min, t_max + 1)] ele_1hz: list[Optional[float]] = [sparse_ele.get(t) for t in range(t_min, t_max + 1)]