extract: auto-detect gzip by magic bytes, not just .gz extension
Files compressed with gzip but named without .gz (e.g. activity.gpx containing gzip data) now decompress transparently.
This commit is contained in:
@@ -27,8 +27,14 @@ class BaseParser(ABC):
|
|||||||
|
|
||||||
raw_bytes is the original file content (used for hashing).
|
raw_bytes is the original file content (used for hashing).
|
||||||
decompressed_bytes is what parsers should actually parse.
|
decompressed_bytes is what parsers should actually parse.
|
||||||
|
|
||||||
|
Gzip is handled both by extension (.gz) and by magic bytes (0x1f 0x8b),
|
||||||
|
so files that are gzip-compressed but named without .gz still parse correctly.
|
||||||
"""
|
"""
|
||||||
raw = path.read_bytes()
|
raw = path.read_bytes()
|
||||||
if path.suffix == ".gz":
|
if path.suffix == ".gz" or raw[:2] == b'\x1f\x8b':
|
||||||
return raw, gzip.decompress(raw)
|
try:
|
||||||
|
return raw, gzip.decompress(raw)
|
||||||
|
except Exception:
|
||||||
|
pass # not actually gzip despite the magic bytes — fall through
|
||||||
return raw, raw
|
return raw, raw
|
||||||
|
|||||||
Reference in New Issue
Block a user