From cd1cdca33b036146c7399d4e480a9dca15ff9bf9 Mon Sep 17 00:00:00 2001 From: Davide Scaini Date: Thu, 16 Apr 2026 18:49:01 +0200 Subject: [PATCH] extract: auto-detect gzip by magic bytes, not just .gz extension Files compressed with gzip but named without .gz (e.g. activity.gpx containing gzip data) now decompress transparently. --- bincio/extract/parsers/base.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/bincio/extract/parsers/base.py b/bincio/extract/parsers/base.py index c68f424..543b95e 100644 --- a/bincio/extract/parsers/base.py +++ b/bincio/extract/parsers/base.py @@ -27,8 +27,14 @@ class BaseParser(ABC): raw_bytes is the original file content (used for hashing). decompressed_bytes is what parsers should actually parse. + + Gzip is handled both by extension (.gz) and by magic bytes (0x1f 0x8b), + so files that are gzip-compressed but named without .gz still parse correctly. """ raw = path.read_bytes() - if path.suffix == ".gz": - return raw, gzip.decompress(raw) + if path.suffix == ".gz" or raw[:2] == b'\x1f\x8b': + try: + return raw, gzip.decompress(raw) + except Exception: + pass # not actually gzip despite the magic bytes — fall through return raw, raw