diff --git a/bincio/extract/parsers/base.py b/bincio/extract/parsers/base.py index c68f424..543b95e 100644 --- a/bincio/extract/parsers/base.py +++ b/bincio/extract/parsers/base.py @@ -27,8 +27,14 @@ class BaseParser(ABC): raw_bytes is the original file content (used for hashing). decompressed_bytes is what parsers should actually parse. + + Gzip is handled both by extension (.gz) and by magic bytes (0x1f 0x8b), + so files that are gzip-compressed but named without .gz still parse correctly. """ raw = path.read_bytes() - if path.suffix == ".gz": - return raw, gzip.decompress(raw) + if path.suffix == ".gz" or raw[:2] == b'\x1f\x8b': + try: + return raw, gzip.decompress(raw) + except Exception: + pass # not actually gzip despite the magic bytes — fall through return raw, raw