diff --git a/.gitattributes b/.gitattributes index b547d5ebf..be7e7a8ba 100644 --- a/.gitattributes +++ b/.gitattributes @@ -13,7 +13,9 @@ tests/_data/volumes/bde/enc-volume.bin filter=lfs diff=lfs merge=lfs -text tests/_data/volumes/md/md-nested.bin.gz filter=lfs diff=lfs merge=lfs -text tests/_data/loaders/tar/test-anon-filesystems.tar filter=lfs diff=lfs merge=lfs -text tests/_data/plugins/apps/browser/firefox/cookies.sqlite filter=lfs diff=lfs merge=lfs -text +tests/_data/plugins/apps/container/docker/docker.tgz filter=lfs diff=lfs merge=lfs -text +tests/_data/loaders/cpio/initrd.img-6.1.0-15-amd64 filter=lfs diff=lfs merge=lfs -text +tests/_data/loaders/cpio/initrd.img-6.1.0-17-amd64 filter=lfs diff=lfs merge=lfs -text tests/_data/plugins/os/unix/locate/locatedb filter=lfs diff=lfs merge=lfs -text tests/_data/plugins/os/unix/locate/mlocate.db filter=lfs diff=lfs merge=lfs -text tests/_data/plugins/os/unix/locate/plocate.db filter=lfs diff=lfs merge=lfs -text -tests/_data/plugins/apps/container/docker/docker.tgz filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/dissect/target/filesystem.py b/dissect/target/filesystem.py index 8d26a8c5c..3d409ad1f 100644 --- a/dissect/target/filesystem.py +++ b/dissect/target/filesystem.py @@ -1588,5 +1588,7 @@ def open_multi_volume(fhs: list[BinaryIO], *args, **kwargs) -> Filesystem: register("exfat", "ExfatFilesystem") register("squashfs", "SquashFSFilesystem") register("zip", "ZipFilesystem") +register("tar", "TarFilesystem") +register("cpio", "CpioFilesystem") register("ad1", "AD1Filesystem") register("jffs", "JFFSFilesystem") diff --git a/dissect/target/filesystems/cpio.py b/dissect/target/filesystems/cpio.py new file mode 100644 index 000000000..5e8d2aee2 --- /dev/null +++ b/dissect/target/filesystems/cpio.py @@ -0,0 +1,18 @@ +from typing import BinaryIO, Optional + +from dissect.util import cpio + +from dissect.target.filesystems.tar import TarFilesystem +from dissect.target.helpers.fsutil import open_decompress + + +class CpioFilesystem(TarFilesystem): + __type__ = "cpio" + + def __init__(self, fh: BinaryIO, base: Optional[str] = None, *args, **kwargs): + super().__init__(open_decompress(fileobj=fh), base, tarinfo=cpio.CpioInfo, *args, **kwargs) + + @staticmethod + def _detect(fh: BinaryIO) -> bool: + """Detect a cpio file on a given file-like object.""" + return cpio.detect_header(open_decompress(fileobj=fh)) != cpio.FORMAT_CPIO_UNKNOWN diff --git a/dissect/target/helpers/fsutil.py b/dissect/target/helpers/fsutil.py index 7866eead1..cab58d1cc 100644 --- a/dissect/target/helpers/fsutil.py +++ b/dissect/target/helpers/fsutil.py @@ -20,6 +20,13 @@ except ImportError: HAVE_BZ2 = False +try: + import zstandard + + HAVE_ZSTD = True +except ImportError: + HAVE_ZSTD = False + import dissect.target.filesystem as filesystem from dissect.target.exceptions import FileNotFoundError, SymlinkRecursionError from dissect.target.helpers.polypath import ( @@ -445,17 +452,22 @@ def resolve_link( def open_decompress( - path: TargetPath, + path: Optional[TargetPath] = None, mode: str = "rb", + *, + fileobj: Optional[BinaryIO] = None, encoding: Optional[str] = "UTF-8", errors: Optional[str] = "backslashreplace", newline: Optional[str] = None, ) -> Union[BinaryIO, TextIO]: - """Open and decompress a file. Handles gz and bz2 files. Uncompressed files are opened as-is. + """Open and decompress a file. Handles gz, bz2 and zstd files. Uncompressed files are opened as-is. + + When passing in an already opened ``fileobj``, the mode, encoding, errors and newline arguments are ignored. Args: path: The path to the file to open and decompress. It is assumed this path exists. mode: The mode in which to open the file. + fileobj: The file-like object to open and decompress. This is mutually exclusive with path. encoding: The decoding for text streams. By default UTF-8 encoding is used. errors: The error handling for text streams. By default we're more lenient and use ``backslashreplace``. newline: How newlines are handled for text streams. @@ -469,7 +481,17 @@ def open_decompress( for line in open_decompress(Path("/dir/file.gz"), "rt"): print(line) """ - file = path.open() + if path and fileobj: + raise ValueError("path and fileobj are mutually exclusive") + + if not path and not fileobj: + raise ValueError("path or fileobj is required") + + if path: + file = path.open("rb") + else: + file = fileobj + magic = file.read(4) file.seek(0) @@ -480,13 +502,22 @@ def open_decompress( if magic[:2] == b"\x1f\x8b": return gzip.open(file, mode, encoding=encoding, errors=errors, newline=newline) - # In a valid bz2 header the 4th byte is in the range b'1' ... b'9'. - elif HAVE_BZ2 and magic[:3] == b"BZh" and 0x31 <= magic[3] <= 0x39: + + if HAVE_BZ2 and magic[:3] == b"BZh" and 0x31 <= magic[3] <= 0x39: + # In a valid bz2 header the 4th byte is in the range b'1' ... b'9'. return bz2.open(file, mode, encoding=encoding, errors=errors, newline=newline) - else: + + if HAVE_ZSTD and magic[:4] in [b"\xfd\x2f\xb5\x28", b"\x28\xb5\x2f\xfd"]: + # stream_reader is not seekable, so we have to resort to the less + # efficient decompressor which returns bytes. + return io.BytesIO(zstandard.decompress(file.read())) + + if path: file.close() return path.open(mode, encoding=encoding, errors=errors, newline=newline) + return file + def reverse_readlines(fh: TextIO, chunk_size: int = 1024 * 1024 * 8) -> Iterator[str]: """Like iterating over a ``TextIO`` file-like object, but starting from the end of the file. diff --git a/tests/_data/loaders/cpio/initrd.img-6.1.0-15-amd64 b/tests/_data/loaders/cpio/initrd.img-6.1.0-15-amd64 new file mode 100644 index 000000000..5d150b16e --- /dev/null +++ b/tests/_data/loaders/cpio/initrd.img-6.1.0-15-amd64 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a12f63a0c0783d9f647e46d058cfd1cd973ae0fc47332741d5ada70c91838426 +size 30818636 diff --git a/tests/_data/loaders/cpio/initrd.img-6.1.0-17-amd64 b/tests/_data/loaders/cpio/initrd.img-6.1.0-17-amd64 new file mode 100644 index 000000000..fb46ce16b --- /dev/null +++ b/tests/_data/loaders/cpio/initrd.img-6.1.0-17-amd64 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da629b5cf297525debef2388df1259f6c272f07a42c22162070e601dcfbe79a4 +size 38204386 diff --git a/tests/filesystems/test_cpio.py b/tests/filesystems/test_cpio.py new file mode 100644 index 000000000..0944d50bb --- /dev/null +++ b/tests/filesystems/test_cpio.py @@ -0,0 +1,35 @@ +from pathlib import Path + +from dissect.target.filesystems.cpio import CpioFilesystem +from tests._utils import absolute_path + + +def test_cpio_uncompressed() -> None: + cpio_path = Path(absolute_path("_data/loaders/cpio/initrd.img-6.1.0-17-amd64")) + + with cpio_path.open("rb") as fh: + assert CpioFilesystem.detect(fh) + + fs = CpioFilesystem(fh) + assert [f.name for f in fs.path("/").iterdir()] == ["kernel"] + + +def test_cpio_compressed_zstd() -> None: + cpio_path = Path(absolute_path("_data/loaders/cpio/initrd.img-6.1.0-15-amd64")) + + with cpio_path.open("rb") as fh: + assert CpioFilesystem.detect(fh) + + fs = CpioFilesystem(fh) + assert [f.name for f in fs.path("/").iterdir()] == [ + "bin", + "conf", + "etc", + "init", + "lib", + "lib64", + "run", + "sbin", + "scripts", + "usr", + ]