extract: auto-detect gzip by magic bytes, not just .gz extension

Files compressed with gzip but named without .gz (e.g. activity.gpx containing gzip data) now decompress transparently.
2026-04-16 18:49:01 +02:00
parent b22b5deb9e
commit cd1cdca33b
1 changed files with 8 additions and 2 deletions
@@ -27,8 +27,14 @@ class BaseParser(ABC):
        raw_bytes is the original file content (used for hashing).
        decompressed_bytes is what parsers should actually parse.
        Gzip is handled both by extension (.gz) and by magic bytes (0x1f 0x8b),
        so files that are gzip-compressed but named without .gz still parse correctly.
        """
        raw = path.read_bytes()
-        if path.suffix == ".gz":
+        if path.suffix == ".gz" or raw[:2] == b'\x1f\x8b':
-            return raw, gzip.decompress(raw)
+            try:
                return raw, gzip.decompress(raw)
            except Exception:
                pass  # not actually gzip despite the magic bytes — fall through
        return raw, raw