Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve hash functionality #457

Merged
merged 5 commits into from
Nov 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 20 additions & 15 deletions dissect/target/helpers/hashutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,9 @@

BUFFER_SIZE = 32768

HashRecord = RecordDescriptor(
"filesystem/file/digest",
[
("path[]", "paths"),
("digest[]", "digests"),
],
)
RECORD_NAME = "filesystem/file/digest"
NAME_SUFFIXES = ["_resolved", "_digest"]
RECORD_TYPES = ["path", "digest"]


def _hash(fh: BinaryIO, ctx: Union[HASH, list[HASH]]) -> tuple[str]:
Expand Down Expand Up @@ -76,13 +72,13 @@ def hash_uri_records(target: Target, record: Record) -> Record:

def hash_path_records(target: Target, record: Record) -> Record:
"""Hash files from path fields inside the record."""
hashed_paths = []

if target.os == "windows":
path_type = fieldtypes.windows_path
else:
path_type = fieldtypes.posix_path

hash_records = []

for field_name, field_type in record._field_types.items():
if not issubclass(field_type, fieldtypes.path):
continue
Expand All @@ -98,15 +94,24 @@ def hash_path_records(target: Target, record: Record) -> Record:
pass
else:
resolved_path = path_type(resolved_path)
hashed_paths.append((resolved_path, path_hash))
record_kwargs = dict()
record_def = list()

if not hashed_paths:
return record
fields = [resolved_path, path_hash]

for type, name, field in zip(RECORD_TYPES, NAME_SUFFIXES, fields):
hashed_field_name = f"{field_name}{name}"
record_kwargs.update({hashed_field_name: field})
record_def.append((type, hashed_field_name))

paths, digests = zip(*hashed_paths)
hash_record = HashRecord(paths=paths, digests=digests)
_record = RecordDescriptor(RECORD_NAME, record_def)

hash_records.append(_record(**record_kwargs))

if not hash_records:
return record

return GroupedRecord(record._desc.name, [record, hash_record])
return GroupedRecord(record._desc.name, [record] + hash_records)


def hash_uri(target: Target, path: str) -> tuple[str, str]:
Expand Down
38 changes: 24 additions & 14 deletions tests/helpers/test_hashutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import pytest
from flow.record import Record
from flow.record.fieldtypes import path
from flow.record.fieldtypes import digest, path

import dissect.target.helpers.hashutil as hashutil
from dissect.target.exceptions import FileNotFoundError, IsADirectoryError
Expand Down Expand Up @@ -43,29 +43,35 @@ def test_hash_uri_records() -> None:


@pytest.mark.parametrize(
"test_input,expected",
"test_input, expected_records",
[
({"name": path}, 1),
({"name": path, "test": path}, 2),
({"name": path, "test": str}, 1),
({"name": path}, 2),
({"name": path, "test": path}, 3),
({"name": path, "test": str}, 2),
],
)
@patch("flow.record.Record")
def test_hash_path_records_with_paths(
record: Record,
mock_target: Mock,
test_input: dict[str, Union[type[path], type[str]]],
expected: int,
expected_records: int,
) -> None:
record._desc.name = "test"
record._field_types = test_input

record_names = [key for key, value in test_input.items() if value is path]

hashed_record = hashutil.hash_path_records(mock_target, record)
assert hashed_record.name == "test"
assert len(hashed_record.records) == 2
assert len(hashed_record.records) == expected_records
assert hashed_record.records[0] == record
assert len(hashed_record.records[1].paths) == expected
assert len(hashed_record.records[1].digests) == expected

_record = hashed_record.records[1]

for name, _record in zip(record_names, hashed_record.records[1:]):
assert getattr(_record, f"{name}_resolved") is not None
assert getattr(_record, f"{name}_digest").__dict__ == digest(HASHES).__dict__


@pytest.mark.parametrize(
Expand Down Expand Up @@ -108,10 +114,14 @@ def test_hash_path_records_with_exception(
) -> None:
record._desc.name = "test"
field_types = {}
for ii in range(len(side_effects)):
field_name = f"path_{ii}"
found_type_names = []
for idx, data in enumerate(side_effects):
field_name = f"path_{idx}"
field_types[field_name] = path
setattr(record, field_name, "test")
if data is HASHES:
found_type_names.append(field_name)

record._field_types = field_types

with (
Expand All @@ -123,9 +133,9 @@ def test_hash_path_records_with_exception(
if not expected:
assert hashed_record == record
else:
assert len(hashed_record.records[1].paths) == expected
assert hashed_record.records[1].paths == [resolve_func("test")] * expected
assert len(hashed_record.records[1].digests) == expected
for _record, key in zip(hashed_record.records[1:], found_type_names):
assert getattr(_record, f"{key}_resolved") == resolve_func("test")
assert getattr(_record, f"{key}_digest").__dict__ == digest(HASHES).__dict__


def test_hash_uri(mock_target: Mock) -> None:
Expand Down