extract: auto-detect gzip by magic bytes, not just .gz extension
Files compressed with gzip but named without .gz (e.g. activity.gpx containing gzip data) now decompress transparently.
This commit is contained in:
@@ -27,8 +27,14 @@ class BaseParser(ABC):
|
||||
|
||||
raw_bytes is the original file content (used for hashing).
|
||||
decompressed_bytes is what parsers should actually parse.
|
||||
|
||||
Gzip is handled both by extension (.gz) and by magic bytes (0x1f 0x8b),
|
||||
so files that are gzip-compressed but named without .gz still parse correctly.
|
||||
"""
|
||||
raw = path.read_bytes()
|
||||
if path.suffix == ".gz":
|
||||
return raw, gzip.decompress(raw)
|
||||
if path.suffix == ".gz" or raw[:2] == b'\x1f\x8b':
|
||||
try:
|
||||
return raw, gzip.decompress(raw)
|
||||
except Exception:
|
||||
pass # not actually gzip despite the magic bytes — fall through
|
||||
return raw, raw
|
||||
|
||||
Reference in New Issue
Block a user