extract: auto-detect gzip by magic bytes, not just .gz extension

Files compressed with gzip but named without .gz (e.g. activity.gpx
containing gzip data) now decompress transparently.
This commit is contained in:
Davide Scaini
2026-04-16 18:49:01 +02:00
parent b22b5deb9e
commit cd1cdca33b
+7 -1
View File
@@ -27,8 +27,14 @@ class BaseParser(ABC):
raw_bytes is the original file content (used for hashing).
decompressed_bytes is what parsers should actually parse.
Gzip is handled both by extension (.gz) and by magic bytes (0x1f 0x8b),
so files that are gzip-compressed but named without .gz still parse correctly.
"""
raw = path.read_bytes()
if path.suffix == ".gz":
if path.suffix == ".gz" or raw[:2] == b'\x1f\x8b':
try:
return raw, gzip.decompress(raw)
except Exception:
pass # not actually gzip despite the magic bytes — fall through
return raw, raw