diff --git a/dissect/target/helpers/hashutil.py b/dissect/target/helpers/hashutil.py index d69cb60ba..b68bee018 100644 --- a/dissect/target/helpers/hashutil.py +++ b/dissect/target/helpers/hashutil.py @@ -15,13 +15,9 @@ BUFFER_SIZE = 32768 -HashRecord = RecordDescriptor( - "filesystem/file/digest", - [ - ("path[]", "paths"), - ("digest[]", "digests"), - ], -) +RECORD_NAME = "filesystem/file/digest" +NAME_SUFFIXES = ["_resolved", "_digest"] +RECORD_TYPES = ["path", "digest"] def _hash(fh: BinaryIO, ctx: Union[HASH, list[HASH]]) -> tuple[str]: @@ -76,13 +72,13 @@ def hash_uri_records(target: Target, record: Record) -> Record: def hash_path_records(target: Target, record: Record) -> Record: """Hash files from path fields inside the record.""" - hashed_paths = [] - if target.os == "windows": path_type = fieldtypes.windows_path else: path_type = fieldtypes.posix_path + hash_records = [] + for field_name, field_type in record._field_types.items(): if not issubclass(field_type, fieldtypes.path): continue @@ -98,15 +94,24 @@ def hash_path_records(target: Target, record: Record) -> Record: pass else: resolved_path = path_type(resolved_path) - hashed_paths.append((resolved_path, path_hash)) + record_kwargs = dict() + record_def = list() - if not hashed_paths: - return record + fields = [resolved_path, path_hash] + + for type, name, field in zip(RECORD_TYPES, NAME_SUFFIXES, fields): + hashed_field_name = f"{field_name}{name}" + record_kwargs.update({hashed_field_name: field}) + record_def.append((type, hashed_field_name)) - paths, digests = zip(*hashed_paths) - hash_record = HashRecord(paths=paths, digests=digests) + _record = RecordDescriptor(RECORD_NAME, record_def) + + hash_records.append(_record(**record_kwargs)) + + if not hash_records: + return record - return GroupedRecord(record._desc.name, [record, hash_record]) + return GroupedRecord(record._desc.name, [record] + hash_records) def hash_uri(target: Target, path: str) -> tuple[str, str]: diff --git a/tests/helpers/test_hashutil.py b/tests/helpers/test_hashutil.py index 4d14dd63c..97bab7992 100644 --- a/tests/helpers/test_hashutil.py +++ b/tests/helpers/test_hashutil.py @@ -3,7 +3,7 @@ import pytest from flow.record import Record -from flow.record.fieldtypes import path +from flow.record.fieldtypes import digest, path import dissect.target.helpers.hashutil as hashutil from dissect.target.exceptions import FileNotFoundError, IsADirectoryError @@ -43,11 +43,11 @@ def test_hash_uri_records() -> None: @pytest.mark.parametrize( - "test_input,expected", + "test_input, expected_records", [ - ({"name": path}, 1), - ({"name": path, "test": path}, 2), - ({"name": path, "test": str}, 1), + ({"name": path}, 2), + ({"name": path, "test": path}, 3), + ({"name": path, "test": str}, 2), ], ) @patch("flow.record.Record") @@ -55,17 +55,23 @@ def test_hash_path_records_with_paths( record: Record, mock_target: Mock, test_input: dict[str, Union[type[path], type[str]]], - expected: int, + expected_records: int, ) -> None: record._desc.name = "test" record._field_types = test_input + record_names = [key for key, value in test_input.items() if value is path] + hashed_record = hashutil.hash_path_records(mock_target, record) assert hashed_record.name == "test" - assert len(hashed_record.records) == 2 + assert len(hashed_record.records) == expected_records assert hashed_record.records[0] == record - assert len(hashed_record.records[1].paths) == expected - assert len(hashed_record.records[1].digests) == expected + + _record = hashed_record.records[1] + + for name, _record in zip(record_names, hashed_record.records[1:]): + assert getattr(_record, f"{name}_resolved") is not None + assert getattr(_record, f"{name}_digest").__dict__ == digest(HASHES).__dict__ @pytest.mark.parametrize( @@ -108,10 +114,14 @@ def test_hash_path_records_with_exception( ) -> None: record._desc.name = "test" field_types = {} - for ii in range(len(side_effects)): - field_name = f"path_{ii}" + found_type_names = [] + for idx, data in enumerate(side_effects): + field_name = f"path_{idx}" field_types[field_name] = path setattr(record, field_name, "test") + if data is HASHES: + found_type_names.append(field_name) + record._field_types = field_types with ( @@ -123,9 +133,9 @@ def test_hash_path_records_with_exception( if not expected: assert hashed_record == record else: - assert len(hashed_record.records[1].paths) == expected - assert hashed_record.records[1].paths == [resolve_func("test")] * expected - assert len(hashed_record.records[1].digests) == expected + for _record, key in zip(hashed_record.records[1:], found_type_names): + assert getattr(_record, f"{key}_resolved") == resolve_func("test") + assert getattr(_record, f"{key}_digest").__dict__ == digest(HASHES).__dict__ def test_hash_uri(mock_target: Mock) -> None: