From 156bc5275b9be27598cf036c9b26e45cb4d7b817 Mon Sep 17 00:00:00 2001 From: Miauwkeru Date: Tue, 14 Nov 2023 08:15:42 +0000 Subject: [PATCH 1/5] Add specific field names --- dissect/target/helpers/hashutil.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/dissect/target/helpers/hashutil.py b/dissect/target/helpers/hashutil.py index d69cb60ba..4cdf5d611 100644 --- a/dissect/target/helpers/hashutil.py +++ b/dissect/target/helpers/hashutil.py @@ -76,13 +76,13 @@ def hash_uri_records(target: Target, record: Record) -> Record: def hash_path_records(target: Target, record: Record) -> Record: """Hash files from path fields inside the record.""" - hashed_paths = [] - if target.os == "windows": path_type = fieldtypes.windows_path else: path_type = fieldtypes.posix_path + hash_records = [] + for field_name, field_type in record._field_types.items(): if not issubclass(field_type, fieldtypes.path): continue @@ -98,15 +98,24 @@ def hash_path_records(target: Target, record: Record) -> Record: pass else: resolved_path = path_type(resolved_path) - hashed_paths.append((resolved_path, path_hash)) - - if not hashed_paths: + _record = RecordDescriptor( + "filesystem/file/digest", [ + ("path", f"{field_name}_path"), + ("path", f"{field_name}_resolved_path"), + ("digest", f"{field_name}_digest") + ] + ) + + hash_records.append(_record(**{ + f"{field_name}_path": path, + f"{field_name}_resolved_path": resolved_path, + f"{field_name}_digest": path_hash + })) + + if not hash_records: return record - paths, digests = zip(*hashed_paths) - hash_record = HashRecord(paths=paths, digests=digests) - - return GroupedRecord(record._desc.name, [record, hash_record]) + return GroupedRecord(record._desc.name, [record] + hash_records) def hash_uri(target: Target, path: str) -> tuple[str, str]: From 5a29f2b47b5deb6ddb76733a44a011942fe7e7d2 Mon Sep 17 00:00:00 2001 From: Miauwkeru Date: Fri, 24 Nov 2023 13:59:25 +0000 Subject: [PATCH 2/5] Cleaning up changes --- dissect/target/helpers/hashutil.py | 36 ++++++++++++--------------- tests/helpers/test_hashutil.py | 40 +++++++++++++++++++----------- 2 files changed, 42 insertions(+), 34 deletions(-) diff --git a/dissect/target/helpers/hashutil.py b/dissect/target/helpers/hashutil.py index 4cdf5d611..e29e626de 100644 --- a/dissect/target/helpers/hashutil.py +++ b/dissect/target/helpers/hashutil.py @@ -15,13 +15,9 @@ BUFFER_SIZE = 32768 -HashRecord = RecordDescriptor( - "filesystem/file/digest", - [ - ("path[]", "paths"), - ("digest[]", "digests"), - ], -) +RECORD_NAME = "filesystem/file/digest" +NAME_SUFFIXES = ["_path", "_resolved_path", "_digest"] +RECORD_TYPES = ["path", "path", "digest"] def _hash(fh: BinaryIO, ctx: Union[HASH, list[HASH]]) -> tuple[str]: @@ -98,19 +94,19 @@ def hash_path_records(target: Target, record: Record) -> Record: pass else: resolved_path = path_type(resolved_path) - _record = RecordDescriptor( - "filesystem/file/digest", [ - ("path", f"{field_name}_path"), - ("path", f"{field_name}_resolved_path"), - ("digest", f"{field_name}_digest") - ] - ) - - hash_records.append(_record(**{ - f"{field_name}_path": path, - f"{field_name}_resolved_path": resolved_path, - f"{field_name}_digest": path_hash - })) + record_kwargs = dict() + record_def = list() + + fields = [path, resolved_path, path_hash] + + for type, name, field in zip(RECORD_TYPES, NAME_SUFFIXES, fields): + hashed_field_name = f"{field_name}{name}" + record_kwargs.update({hashed_field_name: field}) + record_def.append((type, hashed_field_name)) + + _record = RecordDescriptor(RECORD_NAME, record_def) + + hash_records.append(_record(**record_kwargs)) if not hash_records: return record diff --git a/tests/helpers/test_hashutil.py b/tests/helpers/test_hashutil.py index 4d14dd63c..c6068932e 100644 --- a/tests/helpers/test_hashutil.py +++ b/tests/helpers/test_hashutil.py @@ -3,7 +3,7 @@ import pytest from flow.record import Record -from flow.record.fieldtypes import path +from flow.record.fieldtypes import path, digest import dissect.target.helpers.hashutil as hashutil from dissect.target.exceptions import FileNotFoundError, IsADirectoryError @@ -43,11 +43,11 @@ def test_hash_uri_records() -> None: @pytest.mark.parametrize( - "test_input,expected", + "test_input, expected_records", [ - ({"name": path}, 1), - ({"name": path, "test": path}, 2), - ({"name": path, "test": str}, 1), + ({"name": path}, 2), + ({"name": path, "test": path}, 3), + ({"name": path, "test": str}, 2), ], ) @patch("flow.record.Record") @@ -55,17 +55,24 @@ def test_hash_path_records_with_paths( record: Record, mock_target: Mock, test_input: dict[str, Union[type[path], type[str]]], - expected: int, + expected_records: int, ) -> None: record._desc.name = "test" record._field_types = test_input + record_names = [key for key, value in test_input.items() if value is path] + hashed_record = hashutil.hash_path_records(mock_target, record) assert hashed_record.name == "test" - assert len(hashed_record.records) == 2 + assert len(hashed_record.records) == expected_records assert hashed_record.records[0] == record - assert len(hashed_record.records[1].paths) == expected - assert len(hashed_record.records[1].digests) == expected + + _record = hashed_record.records[1] + + for name, _record in zip(record_names, hashed_record.records[1:]): + assert getattr(_record, f"{name}_path") is not None + assert getattr(_record, f"{name}_resolved_path") is not None + assert getattr(_record, f"{name}_digest").__dict__ == digest(HASHES).__dict__ @pytest.mark.parametrize( @@ -108,10 +115,14 @@ def test_hash_path_records_with_exception( ) -> None: record._desc.name = "test" field_types = {} - for ii in range(len(side_effects)): - field_name = f"path_{ii}" + found_type_names = [] + for idx, data in enumerate(side_effects): + field_name = f"path_{idx}" field_types[field_name] = path setattr(record, field_name, "test") + if data is HASHES: + found_type_names.append(field_name) + record._field_types = field_types with ( @@ -123,9 +134,10 @@ def test_hash_path_records_with_exception( if not expected: assert hashed_record == record else: - assert len(hashed_record.records[1].paths) == expected - assert hashed_record.records[1].paths == [resolve_func("test")] * expected - assert len(hashed_record.records[1].digests) == expected + for _record, key in zip(hashed_record.records[1:], found_type_names): + assert getattr(_record, f"{key}_path") == "test" + assert getattr(_record, f"{key}_resolved_path") == resolve_func("test") + assert getattr(_record, f"{key}_digest").__dict__ == digest(HASHES).__dict__ def test_hash_uri(mock_target: Mock) -> None: From a79f4bc1c26db9005d49a891d860c90ed6571576 Mon Sep 17 00:00:00 2001 From: Miauwkeru Date: Fri, 24 Nov 2023 14:12:29 +0000 Subject: [PATCH 3/5] Fix linting --- tests/helpers/test_hashutil.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/helpers/test_hashutil.py b/tests/helpers/test_hashutil.py index c6068932e..22e41cc52 100644 --- a/tests/helpers/test_hashutil.py +++ b/tests/helpers/test_hashutil.py @@ -3,7 +3,7 @@ import pytest from flow.record import Record -from flow.record.fieldtypes import path, digest +from flow.record.fieldtypes import digest, path import dissect.target.helpers.hashutil as hashutil from dissect.target.exceptions import FileNotFoundError, IsADirectoryError From 32cba045fde2d9c298979a975024e11bd3f2cf6a Mon Sep 17 00:00:00 2001 From: Miauwkeru Date: Mon, 27 Nov 2023 08:48:24 +0000 Subject: [PATCH 4/5] Add suggestions --- dissect/target/helpers/hashutil.py | 6 +++--- tests/helpers/test_hashutil.py | 2 -- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/dissect/target/helpers/hashutil.py b/dissect/target/helpers/hashutil.py index e29e626de..180f218b9 100644 --- a/dissect/target/helpers/hashutil.py +++ b/dissect/target/helpers/hashutil.py @@ -16,8 +16,8 @@ BUFFER_SIZE = 32768 RECORD_NAME = "filesystem/file/digest" -NAME_SUFFIXES = ["_path", "_resolved_path", "_digest"] -RECORD_TYPES = ["path", "path", "digest"] +NAME_SUFFIXES = ["_resolved_path", "_digest"] +RECORD_TYPES = ["path", "digest"] def _hash(fh: BinaryIO, ctx: Union[HASH, list[HASH]]) -> tuple[str]: @@ -97,7 +97,7 @@ def hash_path_records(target: Target, record: Record) -> Record: record_kwargs = dict() record_def = list() - fields = [path, resolved_path, path_hash] + fields = [resolved_path, path_hash] for type, name, field in zip(RECORD_TYPES, NAME_SUFFIXES, fields): hashed_field_name = f"{field_name}{name}" diff --git a/tests/helpers/test_hashutil.py b/tests/helpers/test_hashutil.py index 22e41cc52..ea2417053 100644 --- a/tests/helpers/test_hashutil.py +++ b/tests/helpers/test_hashutil.py @@ -70,7 +70,6 @@ def test_hash_path_records_with_paths( _record = hashed_record.records[1] for name, _record in zip(record_names, hashed_record.records[1:]): - assert getattr(_record, f"{name}_path") is not None assert getattr(_record, f"{name}_resolved_path") is not None assert getattr(_record, f"{name}_digest").__dict__ == digest(HASHES).__dict__ @@ -135,7 +134,6 @@ def test_hash_path_records_with_exception( assert hashed_record == record else: for _record, key in zip(hashed_record.records[1:], found_type_names): - assert getattr(_record, f"{key}_path") == "test" assert getattr(_record, f"{key}_resolved_path") == resolve_func("test") assert getattr(_record, f"{key}_digest").__dict__ == digest(HASHES).__dict__ From c440e738ca7f8bd56d92e9f884572e526bcb5008 Mon Sep 17 00:00:00 2001 From: Miauwkeru Date: Mon, 27 Nov 2023 12:47:40 +0000 Subject: [PATCH 5/5] Add suggestion --- dissect/target/helpers/hashutil.py | 2 +- tests/helpers/test_hashutil.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dissect/target/helpers/hashutil.py b/dissect/target/helpers/hashutil.py index 180f218b9..b68bee018 100644 --- a/dissect/target/helpers/hashutil.py +++ b/dissect/target/helpers/hashutil.py @@ -16,7 +16,7 @@ BUFFER_SIZE = 32768 RECORD_NAME = "filesystem/file/digest" -NAME_SUFFIXES = ["_resolved_path", "_digest"] +NAME_SUFFIXES = ["_resolved", "_digest"] RECORD_TYPES = ["path", "digest"] diff --git a/tests/helpers/test_hashutil.py b/tests/helpers/test_hashutil.py index ea2417053..97bab7992 100644 --- a/tests/helpers/test_hashutil.py +++ b/tests/helpers/test_hashutil.py @@ -70,7 +70,7 @@ def test_hash_path_records_with_paths( _record = hashed_record.records[1] for name, _record in zip(record_names, hashed_record.records[1:]): - assert getattr(_record, f"{name}_resolved_path") is not None + assert getattr(_record, f"{name}_resolved") is not None assert getattr(_record, f"{name}_digest").__dict__ == digest(HASHES).__dict__ @@ -134,7 +134,7 @@ def test_hash_path_records_with_exception( assert hashed_record == record else: for _record, key in zip(hashed_record.records[1:], found_type_names): - assert getattr(_record, f"{key}_resolved_path") == resolve_func("test") + assert getattr(_record, f"{key}_resolved") == resolve_func("test") assert getattr(_record, f"{key}_digest").__dict__ == digest(HASHES).__dict__