From bb810f4aa03ae1b9097a77574ac06db95f931742 Mon Sep 17 00:00:00 2001 From: JSCU-CNI <121175071+JSCU-CNI@users.noreply.github.com> Date: Thu, 4 Apr 2024 18:03:33 +0200 Subject: [PATCH 1/8] Add target-diff --- dissect/target/tools/diff.py | 907 +++++++++++++++++++++++++++++++++ dissect/target/tools/shell.py | 30 ++ pyproject.toml | 1 + tests/_data/tools/diff/dst.tar | 3 + tests/_data/tools/diff/src.tar | 3 + tests/tools/test_diff.py | 358 +++++++++++++ 6 files changed, 1302 insertions(+) create mode 100644 dissect/target/tools/diff.py create mode 100644 tests/_data/tools/diff/dst.tar create mode 100644 tests/_data/tools/diff/src.tar create mode 100644 tests/tools/test_diff.py diff --git a/dissect/target/tools/diff.py b/dissect/target/tools/diff.py new file mode 100644 index 000000000..8d5ca4d9b --- /dev/null +++ b/dissect/target/tools/diff.py @@ -0,0 +1,907 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +from __future__ import annotations + +import argparse +import dataclasses +import logging +import re +import shutil +import sys +from difflib import diff_bytes, unified_diff +from fnmatch import fnmatch, translate +from io import BytesIO +from typing import Iterable, Iterator, TextIO + +from flow.record import ( + IGNORE_FIELDS_FOR_COMPARISON, + Record, + RecordOutput, + set_ignored_fields_for_comparison, +) + +from dissect.target import Target +from dissect.target.exceptions import FileNotFoundError +from dissect.target.filesystem import FilesystemEntry +from dissect.target.helpers import fsutil +from dissect.target.helpers.record import TargetRecordDescriptor +from dissect.target.plugin import arg +from dissect.target.tools.query import record_output +from dissect.target.tools.shell import ( + ExtendedCmd, + TargetCli, + arg_str_to_arg_list, + build_pipe_stdout, + fmt_ls_colors, + print_extensive_file_stat, + python_shell, + run_cli, +) +from dissect.target.tools.utils import ( + catch_sigpipe, + configure_generic_arguments, + generate_argparse_for_bound_method, + process_generic_arguments, +) + +log = logging.getLogger(__name__) +logging.lastResort = None +logging.raiseExceptions = False + +BLOCK_SIZE = 2048 +FILE_LIMIT = BLOCK_SIZE * 16 + +FILE_DIFF_RECORD_FIELDS = [ + ("string", "src_target"), + ("string", "dst_target"), + ("string", "path"), +] +RECORD_DIFF_RECORD_FIELDS = [ + ("string", "src_target"), + ("string", "dst_target"), + ("record", "record"), +] + +FileDeletedRecord = TargetRecordDescriptor("differential/file/deleted", FILE_DIFF_RECORD_FIELDS) +FileCreatedRecord = TargetRecordDescriptor("differential/file/created", FILE_DIFF_RECORD_FIELDS) +FileModifiedRecord = TargetRecordDescriptor( + "differential/file/modified", + FILE_DIFF_RECORD_FIELDS + + [ + ("bytes[]", "diff"), + ], +) + +RecordCreatedRecord = TargetRecordDescriptor("differential/record/created", RECORD_DIFF_RECORD_FIELDS) +RecordDeletedRecord = TargetRecordDescriptor("differential/record/deleted", RECORD_DIFF_RECORD_FIELDS) +RecordUnchangedRecord = TargetRecordDescriptor("differential/record/unchanged", RECORD_DIFF_RECORD_FIELDS) + + +@dataclasses.dataclass +class DifferentialEntry: + """Signifies a change for a FilesystemEntry between two versions of a target.""" + + path: str + name: str + src_target_entry: FilesystemEntry + dst_target_entry: FilesystemEntry + diff: list[bytes] + + +@dataclasses.dataclass +class DirectoryDifferential: + """For a given directory, contains the unchanged, created, modified and deleted entries, as well as a list of + subdirectories.""" + + directory: str + unchanged: list[FilesystemEntry] = dataclasses.field(default_factory=list) + created: list[FilesystemEntry] = dataclasses.field(default_factory=list) + modified: list[DifferentialEntry] = dataclasses.field(default_factory=list) + deleted: list[FilesystemEntry] = dataclasses.field(default_factory=list) + + +def likely_unchanged(src: fsutil.stat_result, dst: fsutil.stat_result) -> bool: + """Determine whether or not, based on the file stats, we can assume a file hasn't been changed.""" + if src.st_size != dst.st_size or src.st_mtime != dst.st_mtime or src.st_ctime != dst.st_ctime: + return False + return True + + +def get_plugin_output_records(plugin_name: str, plugin_arg_parts: list[str], target: Target) -> Iterable[Record]: + """Command exection helper for target plugins. Highly similar to target-shell's _exec_target, however this function + only accepts plugins that outputs records, and returns an iterable of records rather than a function that outputs + to stdout.""" + attr = target + for part in plugin_name.split("."): + attr = getattr(attr, part) + + if getattr(attr, "__output__", "default") != "record": + raise ValueError("Comparing plugin output is only supported for plugins outputting records.") + + if callable(attr): + argparser = generate_argparse_for_bound_method(attr) + try: + args = argparser.parse_args(plugin_arg_parts) + except SystemExit: + return False + + return attr(**vars(args)) + else: + return attr + + +class TargetComparison: + """This class wraps functionality that for two given targets can identify similarities and differences between them. + Currently supports differentiating between the target filesystems, and between plugin outputs.""" + + def __init__( + self, + src_target: Target, + dst_target: Target, + deep: bool = False, + file_limit: int = FILE_LIMIT, + ): + self.src_target = src_target + self.dst_target = dst_target + self.deep = deep + self.file_limit = file_limit + + def scandir(self, path: str) -> DirectoryDifferential: + """Scan a given directory for files that have been unchanged, modified, created or deleted from one target to + the next. Add these results (as well as subdirectories) to a DirectoryDifferential object.""" + unchanged = [] + modified = [] + exists_as_directory_src = self.src_target.fs.exists(path) and self.src_target.fs.get(path).is_dir() + exists_as_directory_dst = self.dst_target.fs.exists(path) and self.dst_target.fs.get(path).is_dir() + + if not (exists_as_directory_src and exists_as_directory_dst): + if exists_as_directory_src: + # Path only exists on src target, hence all entries can be considered 'deleted' + entries = list(self.src_target.fs.scandir(path)) + return DirectoryDifferential(path, deleted=entries) + elif exists_as_directory_dst: + # Path only exists on dst target, hence all entries can be considered 'created' + entries = list(self.dst_target.fs.scandir(path)) + return DirectoryDifferential(path, created=entries) + raise ValueError(f"{path} is not a directory on either the source or destination target!") + + src_target_entries = list(self.src_target.fs.scandir(path)) + src_target_children_paths = set([entry.path for entry in src_target_entries]) + + dst_target_entries = list(self.dst_target.fs.scandir(path)) + dst_target_children_paths = set([entry.path for entry in dst_target_entries]) + + paths_only_on_src_target = src_target_children_paths - dst_target_children_paths + paths_only_on_dst_target = dst_target_children_paths - src_target_children_paths + + deleted = [entry for entry in src_target_entries if entry.path in paths_only_on_src_target] + created = [entry for entry in dst_target_entries if entry.path in paths_only_on_dst_target] + + paths_on_both = src_target_children_paths.intersection(dst_target_children_paths) + entry_pairs = [] + + for dst_entry in dst_target_entries: + if dst_entry.path not in paths_on_both: + continue + src_entry = next((entry for entry in src_target_entries if entry.path == dst_entry.path), None) + entry_pairs.append((src_entry, dst_entry)) + + for entry_pair in entry_pairs: + src_entry, dst_entry = entry_pair + entry_path = src_entry.path + + # It's possible that there is an entry, but upon trying to retrieve its stats / content, we get a + # FileNotFoundError. We account for this by wrapping both stat retrievals in a try except + src_target_notfound = False + dst_target_notfound = False + src_target_isdir = None + dst_target_isdir = None + + try: + src_target_stat = src_entry.stat() + src_target_isdir = src_entry.is_dir() + except FileNotFoundError: + src_target_notfound = True + + try: + dst_target_stat = dst_entry.stat() + dst_target_isdir = dst_entry.is_dir() + except FileNotFoundError: + dst_target_notfound = True + + if src_target_notfound or dst_target_notfound: + if src_target_notfound and not dst_target_notfound: + created.append(dst_entry) + elif dst_target_notfound and not src_target_notfound: + deleted.append(src_entry) + else: + # Not found on both + unchanged.append(src_entry) + # We can't continue as we cannot access the stats (or buffer) + continue + + if src_target_isdir or dst_target_isdir: + if src_target_isdir == dst_target_isdir: + unchanged.append(src_entry) + else: + # Went from a file to a dir, or from a dir to a file. Either way, we consider the source entry + # 'deleted' and the dst entry 'Created' + deleted.append(src_entry) + created.append(dst_entry) + continue + + if self.deep is False and likely_unchanged(src_target_stat, dst_target_stat): + unchanged.append(src_entry) + continue + + # If we get here, we have two files that we need to compare contents of + src_fh = src_entry.open() + dst_fh = dst_entry.open() + + while True: + chunk_a = src_fh.read(BLOCK_SIZE) + chunk_b = dst_fh.read(BLOCK_SIZE) + if chunk_a != chunk_b: + # We immediately break after discovering a difference in file contents + # This means that we won't return a full diff of the file, merely the first block where a difference + # is observed + content_difference = list(diff_bytes(unified_diff, [chunk_a], [chunk_b])) + differential_entry = DifferentialEntry( + entry_path, + src_entry.name, + src_entry, + dst_entry, + content_difference, + ) + modified.append(differential_entry) + break + + if src_fh.tell() > self.file_limit: + unchanged.append(src_entry) + break + + if len(chunk_a) == 0: + # End of file + unchanged.append(src_entry) + break + + return DirectoryDifferential(path, unchanged, created, modified, deleted) + + def walkdir( + self, + path: str, + exclude: list[str] | str | None = None, + already_iterated: list[str] = None, + ) -> Iterator[DirectoryDifferential]: + """Recursively iterate directories and yield DirectoryDifferentials.""" + if already_iterated is None: + already_iterated = [] + + if path in already_iterated: + return + + if exclude is not None and not isinstance(exclude, list): + exclude = [exclude] + + already_iterated.append(path) + + diff = self.scandir(path) + yield diff + + subentries = diff.created + diff.unchanged + diff.deleted + subdirectories = [entry for entry in subentries if entry.is_dir()] + # Check if the scandir lead to the discovery of new directories that we have to scan for differentials + # Directories are always in 'unchanged' + for subdirectory in subdirectories: + if subdirectory in already_iterated: + continue + + # Right-pad with a '/' + subdirectory_path = subdirectory.path if subdirectory.path.endswith("/") else subdirectory.path + "/" + if exclude: + match = next((pattern for pattern in exclude if fnmatch(subdirectory_path, pattern)), None) + if match: + continue + yield from self.walkdir(subdirectory.path, exclude, already_iterated) + + def differentiate_plugin_outputs(self, plugin_name: str, plugin_arg_parts: list[str]) -> Iterator[Record]: + """Run a plugin on the source and destination targets and yield RecordUnchanged, RecordCreated and RecordDeleted + records. There is no equivalent for the FileModifiedRecord. For files and directories, we can use the path to + reliably track changes from one target to the next. There is no equivalent for plugin outputs, so we just assume + that all records are either deleted (only on src), created (only on dst) or unchanged (on both).""" + old_ignored_values = IGNORE_FIELDS_FOR_COMPARISON + set_ignored_fields_for_comparison(["_generated", "_source", "hostname", "domain"]) + + src_records = set(get_plugin_output_records(plugin_name, plugin_arg_parts, self.src_target)) + src_records_seen = set() + + for dst_record in get_plugin_output_records(plugin_name, plugin_arg_parts, self.dst_target): + if dst_record in src_records: + src_records_seen.add(dst_record) + yield RecordUnchangedRecord( + src_target=self.src_target.path, dst_target=self.dst_target.path, record=dst_record + ) + else: + yield RecordCreatedRecord( + src_target=self.src_target.path, dst_target=self.dst_target.path, record=dst_record + ) + for record in src_records - src_records_seen: + yield RecordDeletedRecord(src_target=self.src_target.path, dst_target=self.dst_target.path, record=record) + + set_ignored_fields_for_comparison(old_ignored_values) + + +class DifferentialCli(ExtendedCmd): + """CLI for browsing the differential between two or more targets.""" + + doc_header_prefix = "Target Diff\n" "==========\n" + doc_header_suffix = "\n\nDocumented commands (type help ):" + doc_header_multiple_targets = "Use 'list', 'prev' and 'next' to list and select targets to differentiate between." + + def __init__(self, *targets: tuple[Target], deep: bool = False, limit: int = FILE_LIMIT): + self.targets = targets + self.deep = deep + self.limit = limit + + self.src_index = 0 + self.dst_index = 0 + self.comparison: TargetComparison = None + + self.cwd = "/" + self.alt_separator = "/" + + doc_header_middle = self.doc_header_multiple_targets if len(targets) > 2 else "" + self.doc_header = self.doc_header_prefix + doc_header_middle + self.doc_header_suffix + + self._select_source_and_dest(0, 1) + if len(self.targets) > 2: + # Some help may be nice if you are diffing more than 2 targets at once + self.do_help(arg=None) + + start_in_cyber = any(target.props.get("cyber") for target in self.targets) + super().__init__(start_in_cyber) + + @property + def src_target(self) -> Target: + return self.targets[self.src_index] + + @property + def dst_target(self) -> Target: + return self.targets[self.dst_index] + + @property + def prompt(self) -> str: + if self.comparison.src_target.name != self.comparison.dst_target.name: + prompt_base = f"({self.comparison.src_target.name}/{self.comparison.dst_target.name})" + else: + prompt_base = self.comparison.src_target.name + + suffix = f"{prompt_base}/diff {self.cwd}>" + + if len(self.targets) <= 2: + return suffix + + chain_prefix = "[ " + for i in range(len(self.targets)): + char = "O " if i == self.src_index or i == self.dst_index else ". " + chain_prefix += char + chain_prefix += "] " + + return f"{chain_prefix}{suffix}" + + def _select_source_and_dest(self, src_index: int, dst_index: int) -> None: + """Set local variables according to newly selected source and destination index, and re-instatiate + TargetComparison.""" + self.src_index = src_index + self.dst_index = dst_index + if not self.src_target.fs.exists(self.cwd) and not self.dst_target.fs.exists(self.cwd): + logging.warning("The current directory exists on neither of the selected targets.") + if self.src_target.fs.alt_separator != self.dst_target.fs.alt_separator: + raise NotImplementedError("No support for handling targets with different path separators") + + self.alt_separator = self.src_target.fs.alt_separator + self.comparison = TargetComparison(self.src_target, self.dst_target, self.deep, self.limit) + + def _annotate_differential( + self, + diff: DirectoryDifferential, + unchanged: bool = True, + created: bool = True, + modified: bool = True, + deleted: bool = True, + absolute: bool = False, + ) -> list[tuple[fsutil.TargetPath | DifferentialEntry], str]: + """Given a DirectoryDifferential instance, construct a list of tuples where the first element is a Filesystem / + DifferentialEntry entries and the second a color-formatted string.""" + r = [] + + attr = "path" if absolute else "name" + if unchanged: + for entry in diff.unchanged: + color = "di" if entry.is_dir() else "fi" + r.append((entry, fmt_ls_colors(color, getattr(entry, attr)))) + + if created: + for entry in diff.created: + color = "tw" if entry.is_dir() else "ex" + r.append((entry, fmt_ls_colors(color, f"{getattr(entry, attr)} (created)"))) + + if modified: + for entry in diff.modified: + # Modified entries are always files + r.append((entry, fmt_ls_colors("ln", f"{getattr(entry, attr)} (modified)"))) + if deleted: + for entry in diff.deleted: + color = "su" if entry.is_dir() else "or" + r.append((entry, fmt_ls_colors(color, f"{getattr(entry, attr)} (deleted)"))) + + r.sort(key=lambda e: e[0].name) + return r + + def _targets_with_path(self, path: str, warn_when_incomplete: bool = False) -> list[Target]: + """Return targets where a given path exists, checking the src and dst target of this class. Optionally log a + warning if the path only exists on one of the two targets.""" + targets_with_path = [] + if self.comparison.src_target.fs.exists(path): + targets_with_path.append(self.comparison.src_target) + if self.comparison.dst_target.fs.exists(path): + targets_with_path.append(self.comparison.dst_target) + if warn_when_incomplete and len(targets_with_path) == 1: + log.warning("'%s' is only present on '%s'.", path, targets_with_path[0]) + return targets_with_path + + def _write_entry_contents_to_stdout(self, entry: FilesystemEntry, stdout: TextIO): + """Copy the contents of a Filesystementry to stdout.""" + stdout = stdout.buffer + fh = entry.open() + shutil.copyfileobj(fh, stdout) + stdout.flush() + print("") + + def completedefault(self, text: str, line: str, begidx: int, endidx: int): + """Autocomplete based on files / directories found in the current path.""" + path = line[:begidx].rsplit(" ")[-1] + textlower = text.lower() + + path = fsutil.abspath(path, cwd=str(self.cwd), alt_separator=self.alt_separator) + + diff = self.comparison.scandir(path) + names = [item.name for group in [diff.created, diff.modified, diff.unchanged, diff.deleted] for item in group] + + r = [name for name in names if name.lower().startswith(textlower)] + return r + + def do_list(self, line): + """Prints a list of targets to differentiate between. Useful when differentiating between three or more + targets. Looks quite bad on small terminal screens.""" + columns = ["#", "Name", "Path", "From", "To"] + + rows = [] + + for i, target in enumerate(self.targets): + rows.append( + [ + f"{i:2d}", + target.name, + str(target.path), + "**" if i == self.src_index else "", + "**" if i == self.dst_index else "", + ] + ) + + longest_name = max(len(row[1]) + 4 for row in rows) + longest_path = max(len(row[2]) + 4 for row in rows) + name_len = max(10, longest_name) + path_len = max(15, longest_path) + + fmt = "{:^5} | {:<" + str(name_len) + "} | {:<" + str(path_len) + "} | {:^6} | {:^6} |" + print(fmt.format(*columns)) + print("") + for row in rows: + print(fmt.format(*row)) + print("") + + @arg("-a", "--absolute", action="store_true", help="Only move the destination target one position back.") + def cmd_previous(self, args: argparse.Namespace, line: str) -> bool: + """When three or more targets are available, move the 'comparison window' one position back.""" + src_index = self.src_index - 1 if not args.absolute else 0 + if src_index < 0: + src_index = len(self.targets) - 1 + dst_index = self.dst_index - 1 + if dst_index < 0: + dst_index = len(self.targets) - 1 + if dst_index <= src_index: + src_index, dst_index = dst_index, src_index + self._select_source_and_dest(src_index, dst_index) + + @arg("-a", "--absolute", action="store_true", help="Only move the destination target one position back.") + def cmd_prev(self, args: argparse.Namespace, line: str) -> bool: + """Alias for previous.""" + self.cmd_previous(args, line) + + @arg("-a", "--absolute", action="store_true", help="Only move the destination target one position forward.") + def cmd_next(self, args: argparse.Namespace, line: str) -> bool: + """When three or more targets are available, move the 'comparison window' one position forward.""" + dst_index = (self.dst_index + 1) % len(self.targets) + src_index = self.src_index + 1 % len(self.targets) if not args.absolute else 0 + + if dst_index <= src_index: + src_index, dst_index = dst_index, src_index + self._select_source_and_dest(src_index, dst_index) + + def do_cd(self, path: str) -> None: + """Change directory to the given path.""" + path = fsutil.abspath(path, cwd=str(self.cwd), alt_separator=self.alt_separator) + targets_with_path = self._targets_with_path(path, warn_when_incomplete=True) + if len(targets_with_path) == 0: + return + self.cwd = path + + @arg("path", nargs="?") + @arg("-l", action="store_true") + @arg("-a", "--all", action="store_true") # ignored but included for proper argument parsing + @arg("-h", "--human-readable", action="store_true") + def cmd_ls(self, args: argparse.Namespace, stdout: TextIO): + """List contents of a directory for two targets.""" + path = args.path if args.path is not None else self.cwd + diff = self.comparison.scandir(path) + results = self._annotate_differential(diff) + if not args.l: + print("\n".join([name for _, name in results]), file=stdout) + else: + for entry, name in results: + if not isinstance(entry, DifferentialEntry): + print_extensive_file_stat(stdout, name, entry) + else: + # We have to choose for which version of this file we are going to print detailed info. The + # destination target seems to make the most sense: it is likely newer + print_extensive_file_stat(stdout, name, entry.dst_target_entry) + + @arg("path", nargs="?") + def cmd_cat(self, args: argparse.Namespace, stdout: TextIO): + """Output the contents of a file.""" + base_dir, _, name = args.path.rpartition("/") + if not base_dir: + base_dir = self.cwd + + directory_differential = self.comparison.scandir(base_dir) + entry = None + for entry in directory_differential.unchanged: + if entry.name == name: + return self._write_entry_contents_to_stdout(entry, stdout) + for entry in directory_differential.created: + if entry.name == name: + log.warning("'%s' is only present on '%s'.", entry.name, self.comparison.dst_target.path) + return self._write_entry_contents_to_stdout(entry, stdout) + for entry in directory_differential.deleted: + if entry.name == name: + log.warning("'%s' is only present on '%s'.", entry.name, self.comparison.src_target.path) + return self._write_entry_contents_to_stdout(entry, stdout) + for entry in directory_differential.modified: + if entry.name == name: + log.warning( + "Concatinating latest version of '%s'. Use 'diff' to differentiate between target versions.", + entry.name, + ) + return self._write_entry_contents_to_stdout(entry.dst_target_entry, stdout) + print(f"File {name} not found.") + + @arg("path", nargs="?") + def cmd_diff(self, args: argparse.Namespace, stdout: TextIO): + """Output the difference in file contents between two targets.""" + stdout = stdout.buffer + base_dir, _, name = args.path.rpartition("/") + if not base_dir: + base_dir = self.cwd + directory_differential = self.comparison.scandir(base_dir) + for entry in directory_differential.modified: + if entry.name == name: + primary_fh_lines = entry.src_target_entry.open().readlines() + secondary_fh_lines = entry.dst_target_entry.open().readlines() + for chunk in diff_bytes(unified_diff, primary_fh_lines, secondary_fh_lines): + if chunk.startswith(b"@@"): + chunk = fmt_ls_colors("ln", chunk.decode()).encode() + elif chunk.startswith(b"+"): + chunk = fmt_ls_colors("ex", chunk.decode()).encode() + elif chunk.startswith(b"-"): + chunk = fmt_ls_colors("or", chunk.decode()).encode() + + shutil.copyfileobj(BytesIO(chunk), stdout) + stdout.flush() + print("") + return + + # Check if this file is even present on one of the targets + files = directory_differential.unchanged + directory_differential.created + directory_differential.deleted + match = next((entry for entry in files if entry.name == name), None) + if match is None: + print(f"File {name} not found.") + else: + print(f"No two versions available for {name} to differentiate between.") + + @arg("index", type=str) + @arg("type", choices=["src", "dst"]) + def cmd_set(self, args: argparse.Namespace, stdout: TextIO): + """Change either the source or destination target for differentiation. Index can be given relative (when + prefixed with '+' or '-', e.g. "set dst +1") or absolute (e.g. set src 0).""" + index = args.index.strip() + pos = self.src_index if args.type == "src" else self.dst_index + + if index.startswith(("+", "-")): + multiplier = 1 if index[0] == "+" else -1 + index = index[1:].strip() + if not index.isdigit(): + return + pos += int(index) * multiplier + elif index.isdigit(): + pos = int(index) + else: + raise ValueError(f"Could not set {args.type} to {index}.") + if args.type == "src": + self._select_source_and_dest(pos, self.dst_index) + else: + self._select_source_and_dest(self.src_index, pos) + + @arg("target", choices=["src", "dst"]) + def cmd_enter(self, args: argparse.Namespace, stdout: TextIO): + """Open a subshell for the source or destination target.""" + target = self.src_target if args.target == "src" else self.dst_target + cli = TargetCli(target) + if target.fs.exists(self.cwd): + cli.chdir(self.cwd) + + # Cyber doesn't work well with subshells + cli.cyber = False + run_cli(cli) + + @arg("path", nargs="?") + @arg("-name", default="*") + @arg("-iname") + @arg("-c", "--created", action="store_true") + @arg("-m", "--modified", action="store_true") + @arg("-d", "--deleted", action="store_true") + @arg("-u", "--unchanged", action="store_true") + def cmd_find(self, args: argparse.Namespace, stdout: TextIO) -> bool | None: + """Search for files in a directory hierarchy.""" + path = fsutil.abspath(args.path, cwd=str(self.cwd), alt_separator=self.comparison.src_target.fs.alt_separator) + if not path: + return + targets_with_path = self._targets_with_path(path, warn_when_incomplete=True) + if len(targets_with_path) < 0: + return + + if args.iname: + pattern = re.compile(translate(args.iname), re.IGNORECASE) + else: + pattern = re.compile(translate(args.name)) + + include_all_changes = not (args.created or args.modified or args.deleted or args.unchanged) + + include_unchanged = args.unchanged + include_modified = include_all_changes or args.modified + include_created = include_all_changes or args.created + include_deleted = include_all_changes or args.deleted + + for differential in self.comparison.walkdir(path): + for entry, line in self._annotate_differential( + differential, include_unchanged, include_created, include_modified, include_deleted, absolute=True + ): + if not pattern.match(entry.name): + continue + + print(line, file=stdout) + + def do_plugin(self, line: str): + """Yield RecordCreated, RecordUnchanged and RecordDeleted Records by comparing plugin outputs for two + targets.""" + argparts = arg_str_to_arg_list(line) + pipeparts = [] + if "|" in argparts: + pipeidx = argparts.index("|") + argparts, pipeparts = argparts[:pipeidx], argparts[pipeidx + 1 :] + + if len(argparts) < 1: + raise ValueError("Provide a plugin name, and optionally parameters to pass to the plugin.") + + plugin = argparts.pop(0) + + iterator = self.comparison.differentiate_plugin_outputs(plugin, argparts) + if pipeparts: + try: + with build_pipe_stdout(pipeparts) as pipe_stdin: + rs = RecordOutput(pipe_stdin.buffer) + for record in iterator: + rs.write(record) + except OSError as e: + # in case of a failure in a subprocess + print(e) + else: + for record in iterator: + print(record, file=sys.stdout) + + def do_python(self, line: str) -> bool | None: + """drop into a Python shell.""" + python_shell(list(self.targets)) + + +def make_target_pairs(targets: tuple[Target], absolute: bool = False) -> list[tuple[Target, Target]]: + """Make 'pairs' of targets that we are going to compare against one another. A list of targets can be treated in two + ways: compare every target with the one that came before it, or compare all targets against a 'base' target (which + has to be supplied as initial target in the list).""" + target_pairs = [] + + previous_target = targets[0] + for target in targets[1:]: + target_pairs.append((previous_target, target)) + if not absolute: + # The next target should be compared against the one we just opened + previous_target = target + return target_pairs + + +def differentiate_target_filesystems( + *targets: tuple[Target], + deep: bool = False, + limit: int = FILE_LIMIT, + absolute: bool = False, + include: list[str] = None, + exclude: list[str] = None, +) -> Iterator[Record]: + """Given a list of targets, compare targets against one another and yield File[Created|Modified|Deleted]Records + indicating the differences between them.""" + if len(targets) < 2: + raise ValueError("Provide two or more targets to differentiate between.") + + for target_pair in make_target_pairs(targets, absolute): + # Unpack the tuple and initialize the comparison class + src_target, dst_target = target_pair + comparison = TargetComparison(src_target, dst_target, deep, limit) + + paths = ["/"] if include is None else include + + for path in paths: + for directory_diff in comparison.walkdir(path, exclude=exclude): + for creation_entry in directory_diff.created: + yield FileCreatedRecord( + path=creation_entry.path, + src_target=src_target.path, + dst_target=dst_target.path, + ) + + for deletion_entry in directory_diff.deleted: + yield FileDeletedRecord( + path=deletion_entry.path, + src_target=src_target.path, + dst_target=dst_target.path, + ) + + for entry_difference in directory_diff.modified: + yield FileModifiedRecord( + path=entry_difference.path, + diff=entry_difference.diff, + src_target=src_target.path, + dst_target=dst_target.path, + ) + + +def differentiate_target_plugin_outputs( + *targets: tuple[Target], absolute: bool = False, plugin: str, plugin_args: str = "" +) -> Iterator[Record]: + """Given a list of targets, yielding records indicating which records from this plugin are new, unmodified or + deleted.""" + for target_pair in make_target_pairs(targets, absolute): + src_target, dst_target = target_pair + comparison = TargetComparison(src_target, dst_target) + yield from comparison.differentiate_plugin_outputs(plugin, plugin_args) + + +@catch_sigpipe +def main() -> None: + help_formatter = argparse.ArgumentDefaultsHelpFormatter + parser = argparse.ArgumentParser( + description="target-diff", + fromfile_prefix_chars="@", + formatter_class=help_formatter, + ) + + parser.add_argument( + "-d", + "--deep", + action="store_true", + help="Compare file contents even if metadata suggests they have been left unchanged", + ) + parser.add_argument( + "-l", + "--limit", + default=FILE_LIMIT, + type=int, + help="How many bytes to compare before assuming a file is left unchanged (0 for no limit)", + ) + subparsers = parser.add_subparsers(help="Mode for differentiating targets", dest="mode") + + shell_mode = subparsers.add_parser("shell", help="Open an interactive shell to compare two or more targets.") + shell_mode.add_argument("targets", metavar="TARGETS", nargs="*", help="Targets to differentiate between") + + fs_mode = subparsers.add_parser("fs", help="Yield records about differences between target filesystems.") + fs_mode.add_argument("targets", metavar="TARGETS", nargs="*", help="Targets to differentiate between") + fs_mode.add_argument("-s", "--strings", action="store_true", help="print records as strings") + fs_mode.add_argument("-e", "--exclude", action="append", help="Path(s) on targets not to check for differences") + fs_mode.add_argument( + "-i", + "--include", + action="append", + help="Path(s) on targets to check for differences (all will be checked if left omitted)", + ) + fs_mode.add_argument( + "-a", + "--absolute", + action="store_true", + help=( + "Treat every target as an absolute. The first given target is treated as the 'base' target to compare " + "subsequent targets against. If omitted, every target is treated as a 'delta' and compared against the " + "target that came before it." + ), + ) + + query_mode = subparsers.add_parser("query", help="Differentiate plugin outputs between two or more targets.") + query_mode.add_argument("targets", metavar="TARGETS", nargs="*", help="Targets to differentiate between") + query_mode.add_argument("-s", "--strings", action="store_true", help="print records as strings") + query_mode.add_argument( + "-p", + "--parameters", + type=str, + required=False, + default="", + help="Parameters for the plugin", + ) + query_mode.add_argument( + "-f", + "--plugin", + type=str, + required=True, + help="Function to execute", + ) + query_mode.add_argument( + "-a", + "--absolute", + action="store_true", + help=( + "Treat every target as an absolute. The first given target is treated as the 'base' target to compare " + "subsequent targets against. If omitted, every target is treated as a 'delta' and compared against the " + "target that came before it." + ), + ) + + configure_generic_arguments(parser) + + args = parser.parse_args() + process_generic_arguments(args) + + target_list = [Target.open(path) for path in args.targets] + if args.mode == "shell": + cli = DifferentialCli(*target_list, deep=args.deep, limit=args.limit) + run_cli(cli) + else: + writer = record_output(args.strings) + if args.mode == "fs": + iterator = differentiate_target_filesystems( + *target_list, + deep=args.deep, + limit=args.limit, + absolute=args.absolute, + include=args.include, + exclude=args.exclude, + ) + elif args.mode == "query": + iterator = differentiate_target_plugin_outputs( + *target_list, + absolute=args.absolute, + plugin=args.plugin, + plugin_args=arg_str_to_arg_list(args.parameters), + ) + for record in iterator: + writer.write(record) + + +if __name__ == "__main__": + main() diff --git a/dissect/target/tools/shell.py b/dissect/target/tools/shell.py index 175f960fc..32c111b49 100644 --- a/dissect/target/tools/shell.py +++ b/dissect/target/tools/shell.py @@ -1265,6 +1265,36 @@ def _target_name(target: Target) -> str: return target.name +def arg_str_to_arg_list(args: str) -> list[str]: + """Convert a commandline string to a list of command line arguments.""" + lexer = shlex.shlex(args, posix=True, punctuation_chars=True) + lexer.wordchars += "$" + lexer.whitespace_split = True + return list(lexer) + + +def print_extensive_file_stat( + stdout: TextIO, name: str, entry: Optional[FilesystemEntry] = None, timestamp: Optional[datetime.datetime] = None +) -> None: + """Print the file status.""" + if entry is not None: + try: + stat = entry.lstat() + if timestamp is None: + timestamp = stat.st_mtime + symlink = f" -> {entry.readlink()}" if entry.is_symlink() else "" + utc_time = datetime.datetime.utcfromtimestamp(timestamp).isoformat() + + print( + f"{stat_modestr(stat)} {stat.st_uid:4d} {stat.st_gid:4d} {stat.st_size:6d} {utc_time} {name}{symlink}", + file=stdout, + ) + return + except FileNotFoundError: + pass + print(f"?????????? ? ? ? ????-??-??T??:??:??.?????? {name}", file=stdout) + + @contextmanager def build_pipe(pipe_parts: list[str], pipe_stdout: int = subprocess.PIPE) -> Iterator[tuple[TextIO, BinaryIO]]: """ diff --git a/pyproject.toml b/pyproject.toml index 931a2e4d2..2396d6fd6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -128,6 +128,7 @@ mqtt = [ target-build-pluginlist = "dissect.target.tools.build_pluginlist:main" target-dump = "dissect.target.tools.dump.run:main" target-dd = "dissect.target.tools.dd:main" +target-diff = "dissect.target.tools.diff:main" target-fs = "dissect.target.tools.fs:main" target-info = "dissect.target.tools.info:main" target-mount = "dissect.target.tools.mount:main" diff --git a/tests/_data/tools/diff/dst.tar b/tests/_data/tools/diff/dst.tar new file mode 100644 index 000000000..0b80bb5bd --- /dev/null +++ b/tests/_data/tools/diff/dst.tar @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d132b85ef4ce2c9527ec89d768569a4f16827e1462a4eebdc99b06629485aba9 +size 9728 diff --git a/tests/_data/tools/diff/src.tar b/tests/_data/tools/diff/src.tar new file mode 100644 index 000000000..c03bd039e --- /dev/null +++ b/tests/_data/tools/diff/src.tar @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55810bf488ef39a9422da56ea020a806cc0c58e694331549c2c09623a998e9b8 +size 9728 diff --git a/tests/tools/test_diff.py b/tests/tools/test_diff.py new file mode 100644 index 000000000..0ae65522d --- /dev/null +++ b/tests/tools/test_diff.py @@ -0,0 +1,358 @@ +from __future__ import annotations + +import textwrap +from io import BytesIO, StringIO +from pathlib import Path +from typing import Iterator + +import pytest + +import dissect.target.tools.shell as shell +from dissect.target.filesystem import VirtualFilesystem +from dissect.target.helpers.fsutil import stat_result +from dissect.target.plugins.os.unix._os import UnixPlugin +from dissect.target.target import Target +from dissect.target.tools.diff import ( + DifferentialCli, + TargetComparison, + differentiate_target_filesystems, + differentiate_target_plugin_outputs, + likely_unchanged, +) +from dissect.target.tools.diff import main as target_diff +from tests._utils import absolute_path +from tests.conftest import make_os_target + +PASSWD_CONTENTS = """ + root:x:0:0:root:/root:/bin/bash + user:x:1000:1000:user:/home/user:/bin/bash + """ + + +class TargetUnixFactory: + def __init__(self, tmp_path: Path): + self.tmp_path = tmp_path + + def new(self, hostname: str) -> tuple[Target, VirtualFilesystem]: + """Initialize a virtual unix target.""" + fs = VirtualFilesystem() + + fs.makedirs("var") + fs.makedirs("etc") + fs.map_file_fh("/etc/hostname", BytesIO(hostname.encode())) + + return make_os_target(self.tmp_path, UnixPlugin, root_fs=fs), fs + + +@pytest.fixture +def target_unix_factory(tmp_path: Path) -> TargetUnixFactory: + """This fixture returns a class that can instantiate a virtual unix targets from a blueprint. This can then be used + to create a fixture for the source target and the desination target, without them 'bleeding' into each other.""" + return TargetUnixFactory(tmp_path) + + +@pytest.fixture +def src_target(target_unix_factory) -> Iterator[Target]: + target, fs_unix = target_unix_factory.new("src_target") + + passwd_contents = PASSWD_CONTENTS + "\nsrc_user:x:1001:1001:src_user:/home/src_user:/bin/bash" + + fs_unix.map_file_fh("/etc/passwd", BytesIO(textwrap.dedent(passwd_contents).encode())) + + fs_unix.map_file_fh("changes/unchanged", BytesIO(b"Unchanged")) + fs_unix.map_file_fh("changes/changed", BytesIO(b"Hello From Source Target")) + fs_unix.map_file_fh("changes/only_on_src", BytesIO(b"FooBarBaz")) + + fs_unix.map_file_fh("changes/subdirectory_both/on_both", BytesIO(b"On Both")) + fs_unix.map_file_fh("changes/subdirectory_src/only_on_src", BytesIO(b"Hello From Source Target")) + + fs_unix.map_file_fh("changes/file_on_src", BytesIO(b"Hello From Source Target")) + fs_unix.map_file_fh("changes/dir_on_src/file", BytesIO(b"Hello From Source Target")) + yield target + + +@pytest.fixture +def dst_target(target_unix_factory) -> Iterator[Target]: + target, fs_unix = target_unix_factory.new("dst_target") + + passwd_contents = PASSWD_CONTENTS + "\ndst_user:x:1002:1002:dst_user:/home/dst_user:/bin/bash" + + fs_unix.map_file_fh("/etc/passwd", BytesIO(textwrap.dedent(passwd_contents).encode())) + + fs_unix.map_file_fh("changes/unchanged", BytesIO(b"Unchanged")) + fs_unix.map_file_fh("changes/changed", BytesIO(b"Hello From Destination Target")) + fs_unix.map_file_fh("changes/only_on_dst", BytesIO(b"BazBarFoo")) + + fs_unix.map_file_fh("changes/subdirectory_both/on_both", BytesIO(b"On Both")) + fs_unix.map_file_fh("changes/subdirectory_dst/only_on_dst", BytesIO(b"Hello From Destination Target")) + + fs_unix.map_file_fh("changes/dir_on_src", BytesIO(b"Hello From Destination Target")) + fs_unix.map_file_fh("changes/file_on_src/file", BytesIO(b"Hello From Destination Target")) + yield target + + +def test_scandir(src_target: Target, dst_target: Target) -> None: + comparison = TargetComparison(src_target, dst_target, deep=True) + diff = comparison.scandir("changes") + + assert len(diff.deleted) == 4 + assert diff.deleted[0].name == "only_on_src" + assert diff.deleted[0].open().read() == b"FooBarBaz" + assert diff.deleted[1].name == "subdirectory_src" + assert diff.deleted[2].name == "dir_on_src" + assert diff.deleted[3].open().read() == b"Hello From Source Target" + + assert len(diff.created) == 4 + assert diff.created[0].open().read() == b"BazBarFoo" + assert diff.created[0].name == "only_on_dst" + assert diff.created[1].name == "subdirectory_dst" + + assert diff.created[2].name == "dir_on_src" + assert diff.created[2].open().read() == b"Hello From Destination Target" + assert diff.created[3].name == "file_on_src" + assert diff.created[3].is_dir() + + assert len(diff.unchanged) == 2 + assert diff.unchanged[0].open().read() == b"Unchanged" + assert diff.unchanged[0].name == "unchanged" + + assert diff.unchanged[1].name == "subdirectory_both" + + assert len(diff.modified) == 1 + differential_entry = diff.modified[0] + assert differential_entry.src_target_entry.open().read() == b"Hello From Source Target" + assert differential_entry.dst_target_entry.open().read() == b"Hello From Destination Target" + assert differential_entry.diff == [ + b"--- \n", + b"+++ \n", + b"@@ -1 +1 @@\n", + b"-Hello From Source Target", + b"+Hello From Destination Target", + ] + + +def test_walkdir(src_target: Target, dst_target: Target) -> None: + comparison = TargetComparison(src_target, dst_target, deep=True) + differentials = list(comparison.walkdir("changes")) + + assert len(differentials) == 6 + assert sorted(differential.directory for differential in differentials) == [ + "/changes/dir_on_src", + "/changes/file_on_src", + "/changes/subdirectory_both", + "/changes/subdirectory_dst", + "/changes/subdirectory_src", + "changes", + ] + + assert differentials[0].directory == "changes" + + subdirectories_only_on_dst = ["/changes/subdirectory_dst", "/changes/file_on_src"] + for subdirectory in subdirectories_only_on_dst: + differential = next((differential for differential in differentials if differential.directory == subdirectory)) + + # All entries should be 'created' as this directory doesn't exist on the source target + assert len(differential.modified) == 0 + assert len(differential.deleted) == 0 + assert len(differential.unchanged) == 0 + assert len(differential.created) == 1 + assert differential.created[0].open().read() == b"Hello From Destination Target" + + subdirectories_only_on_src = ["/changes/subdirectory_src", "/changes/dir_on_src"] + + for subdirectory in subdirectories_only_on_src: + differential = next((differential for differential in differentials if differential.directory == subdirectory)) + + # All entries should be 'created' as this directory doesn't exist on the destination target + assert len(differential.modified) == 0 + assert len(differential.deleted) == 1 + assert len(differential.unchanged) == 0 + assert len(differential.created) == 0 + assert differential.deleted[0].open().read() == b"Hello From Source Target" + + # All entries should be 'unchanged' as this folder is identical on both + assert len(differentials[3].modified) == 0 + assert len(differentials[3].deleted) == 0 + assert len(differentials[3].unchanged) == 1 + assert len(differentials[3].created) == 0 + assert differentials[3].unchanged[0].open().read() == b"On Both" + + +def test_likely_unchanged() -> None: + # ['mode', 'addr', 'dev', 'nlink', 'uid', 'gid', 'size', 'atime', 'mtime', 'ctime'] + mock_stat = stat_result([0o1777, 1, 2, 3, 1337, 7331, 999, 0, 0, 0]) + mock_stat_accessed = stat_result([0o1777, 1, 2, 3, 1337, 7331, 999, 999, 0, 0]) + mock_stat_changed = stat_result([0o1777, 1, 2, 3, 1337, 7331, 999, 999, 999, 0]) + + assert likely_unchanged(mock_stat, mock_stat_accessed) + assert not likely_unchanged(mock_stat, mock_stat_changed) + + +def test_differentiate_filesystems(src_target: Target, dst_target: Target) -> None: + records = list(differentiate_target_filesystems(src_target, dst_target, deep=True, exclude="/etc/*")) + + created = [record for record in records if "created" in record._desc.name] + modified = [record for record in records if "modified" in record._desc.name] + deleted = [record for record in records if "deleted" in record._desc.name] + + assert len(created) == 6 + assert all(record._desc.name == "differential/file/created" for record in created) + + assert len(modified) == 1 + assert all(record._desc.name == "differential/file/modified" for record in modified) + + assert len(deleted) == 6 + assert all(record._desc.name == "differential/file/deleted" for record in deleted) + + +def test_differentiate_plugins(src_target: Target, dst_target: Target) -> None: + records = list(differentiate_target_plugin_outputs(src_target, dst_target, plugin="users")) + assert len(records) == 4 + + created = [record for record in records if "created" in record._desc.name] + unchanged = [record for record in records if "unchanged" in record._desc.name] + deleted = [record for record in records if "deleted" in record._desc.name] + + assert len(unchanged) == 2 + assert len(created) == 1 + assert len(deleted) == 1 + + assert created[0].record.name == "dst_user" + assert created[0].record.hostname == "dst_target" + assert deleted[0].record.name == "src_user" + assert deleted[0].record.hostname == "src_target" + + +def test_shell_ls(src_target: Target, dst_target: Target, capsys, monkeypatch) -> None: + monkeypatch.setattr(shell, "LS_COLORS", {}) + + cli = DifferentialCli(src_target, dst_target, deep=True) + cli.onecmd("ls changes") + + captured = capsys.readouterr() + + expected = [ + "changed (modified)", + "dir_on_src (created)", + "dir_on_src (deleted)", + "file_on_src (created)", + "file_on_src (deleted)", + "only_on_dst (created)", + "only_on_src (deleted)", + "subdirectory_both", + "subdirectory_dst (created)", + "subdirectory_src (deleted)", + "unchanged", + ] + + assert captured.out == "\n".join(expected) + "\n" + + +def test_shell_find(src_target: Target, dst_target: Target, capsys, monkeypatch) -> None: + monkeypatch.setattr(shell, "LS_COLORS", {}) + + cli = DifferentialCli(src_target, dst_target, deep=True) + cli.onecmd("find /changes -cmd") + + captured = capsys.readouterr() + + expected = [ + "/changes/changed (modified)", + "/changes/dir_on_src (created)", + "/changes/dir_on_src (deleted)", + "/changes/file_on_src (created)", + "/changes/file_on_src (deleted)", + "/changes/only_on_dst (created)", + "/changes/only_on_src (deleted)", + "/changes/subdirectory_dst (created)", + "/changes/subdirectory_src (deleted)", + "/changes/subdirectory_dst/only_on_dst (created)", + "/changes/file_on_src/file (created)", + "/changes/subdirectory_src/only_on_src (deleted)", + "/changes/dir_on_src/file (deleted)", + ] + + assert captured.out == "\n".join(expected) + "\n" + + +def test_shell_cat(src_target: Target, dst_target: Target, capsys) -> None: + cli = DifferentialCli(src_target, dst_target, deep=True) + + cli.onecmd("cat /changes/unchanged") + captured = capsys.readouterr() + assert captured.out == "Unchanged\n" + + cli.onecmd("cat /changes/subdirectory_dst/only_on_dst") + captured = capsys.readouterr() + assert captured.out == "Hello From Destination Target\n" + + cli.onecmd("cat /changes/subdirectory_src/only_on_src") + captured = capsys.readouterr() + assert captured.out == "Hello From Source Target\n" + + # When a file is present on both, we want the last version of the file to be outputted. + cli.onecmd("cat /changes/changed") + captured = capsys.readouterr() + assert captured.out == "Hello From Destination Target\n" + + +def test_shell_plugin(src_target: Target, dst_target: Target, capsys) -> None: + cli = DifferentialCli(src_target, dst_target, deep=True) + + cli.onecmd("plugin users") + captured = capsys.readouterr() + + assert "differential/record/created" in captured.out + assert "differential/record/unchanged" in captured.out + assert "differential/record/deleted" in captured.out + + +def test_target_diff_shell(capsys, monkeypatch) -> None: + with monkeypatch.context() as m: + m.setattr(shell, "LS_COLORS", {}) + src_target_path = absolute_path("_data/tools/diff/src.tar") + dst_target_path = absolute_path("_data/tools/diff/dst.tar") + m.setattr("sys.argv", ["target-diff", "--deep", "shell", src_target_path, dst_target_path]) + m.setattr("sys.stdin", StringIO("ls changes")) + target_diff() + out, err = capsys.readouterr() + out = out.replace("(src_target/dst_target)/diff />", "").strip() + + expected = [ + "changed (modified)", + "only_on_dst (created)", + "only_on_src (deleted)", + "subdirectory_both", + "subdirectory_dst (created)", + "subdirectory_src (deleted)", + "unchanged", + ] + + assert out == "\n".join(expected) + assert "unrecognized arguments" not in err + + +def test_target_diff_fs(capsys, monkeypatch) -> None: + with monkeypatch.context() as m: + src_target_path = absolute_path("_data/tools/diff/src.tar") + dst_target_path = absolute_path("_data/tools/diff/dst.tar") + m.setattr("sys.argv", ["target-diff", "--deep", "fs", "--strings", src_target_path, dst_target_path]) + target_diff() + out, _ = capsys.readouterr() + + assert "differential/file/created" in out + assert "differential/file/modified" in out + assert "differential/file/deleted" in out + + +def test_target_diff_query(capsys, monkeypatch) -> None: + with monkeypatch.context() as m: + src_target_path = absolute_path("_data/tools/diff/src.tar") + dst_target_path = absolute_path("_data/tools/diff/dst.tar") + m.setattr("sys.argv", ["target-diff", "query", "--strings", "-f", "users", src_target_path, dst_target_path]) + target_diff() + out, _ = capsys.readouterr() + + assert "differential/record/created" in out + assert "differential/record/unchanged" in out + assert "differential/record/deleted" in out From 283490e299f078c7856ddce95f6f898239ec8c3b Mon Sep 17 00:00:00 2001 From: JSCU-CNI <121175071+JSCU-CNI@users.noreply.github.com> Date: Wed, 12 Jun 2024 12:27:04 +0200 Subject: [PATCH 2/8] Use ignore_fields_for_comparison context manager --- dissect/target/tools/diff.py | 43 +++++++++++++++--------------------- 1 file changed, 18 insertions(+), 25 deletions(-) diff --git a/dissect/target/tools/diff.py b/dissect/target/tools/diff.py index 8d5ca4d9b..d6cc3b0e2 100644 --- a/dissect/target/tools/diff.py +++ b/dissect/target/tools/diff.py @@ -13,12 +13,7 @@ from io import BytesIO from typing import Iterable, Iterator, TextIO -from flow.record import ( - IGNORE_FIELDS_FOR_COMPARISON, - Record, - RecordOutput, - set_ignored_fields_for_comparison, -) +from flow.record import Record, RecordOutput, ignore_fields_for_comparison from dissect.target import Target from dissect.target.exceptions import FileNotFoundError @@ -309,26 +304,24 @@ def differentiate_plugin_outputs(self, plugin_name: str, plugin_arg_parts: list[ records. There is no equivalent for the FileModifiedRecord. For files and directories, we can use the path to reliably track changes from one target to the next. There is no equivalent for plugin outputs, so we just assume that all records are either deleted (only on src), created (only on dst) or unchanged (on both).""" - old_ignored_values = IGNORE_FIELDS_FOR_COMPARISON - set_ignored_fields_for_comparison(["_generated", "_source", "hostname", "domain"]) - - src_records = set(get_plugin_output_records(plugin_name, plugin_arg_parts, self.src_target)) - src_records_seen = set() - - for dst_record in get_plugin_output_records(plugin_name, plugin_arg_parts, self.dst_target): - if dst_record in src_records: - src_records_seen.add(dst_record) - yield RecordUnchangedRecord( - src_target=self.src_target.path, dst_target=self.dst_target.path, record=dst_record - ) - else: - yield RecordCreatedRecord( - src_target=self.src_target.path, dst_target=self.dst_target.path, record=dst_record + with ignore_fields_for_comparison(["_generated", "_source", "hostname", "domain"]): + src_records = set(get_plugin_output_records(plugin_name, plugin_arg_parts, self.src_target)) + src_records_seen = set() + + for dst_record in get_plugin_output_records(plugin_name, plugin_arg_parts, self.dst_target): + if dst_record in src_records: + src_records_seen.add(dst_record) + yield RecordUnchangedRecord( + src_target=self.src_target.path, dst_target=self.dst_target.path, record=dst_record + ) + else: + yield RecordCreatedRecord( + src_target=self.src_target.path, dst_target=self.dst_target.path, record=dst_record + ) + for record in src_records - src_records_seen: + yield RecordDeletedRecord( + src_target=self.src_target.path, dst_target=self.dst_target.path, record=record ) - for record in src_records - src_records_seen: - yield RecordDeletedRecord(src_target=self.src_target.path, dst_target=self.dst_target.path, record=record) - - set_ignored_fields_for_comparison(old_ignored_values) class DifferentialCli(ExtendedCmd): From a06b16e5fb8699d17970e57978394f8f0e66fae1 Mon Sep 17 00:00:00 2001 From: JSCU-CNI <121175071+JSCU-CNI@users.noreply.github.com> Date: Wed, 12 Jun 2024 12:31:14 +0200 Subject: [PATCH 3/8] Add --hex and --only-changed * `--hex` can be used to diff binary files in a readable way. * `--only-changed` can be used to omit unchanged records when comparing plugin outputs --- dissect/target/tools/diff.py | 44 +++++++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/dissect/target/tools/diff.py b/dissect/target/tools/diff.py index d6cc3b0e2..87a89d833 100644 --- a/dissect/target/tools/diff.py +++ b/dissect/target/tools/diff.py @@ -13,6 +13,7 @@ from io import BytesIO from typing import Iterable, Iterator, TextIO +from dissect.cstruct import hexdump from flow.record import Record, RecordOutput, ignore_fields_for_comparison from dissect.target import Target @@ -299,7 +300,9 @@ def walkdir( continue yield from self.walkdir(subdirectory.path, exclude, already_iterated) - def differentiate_plugin_outputs(self, plugin_name: str, plugin_arg_parts: list[str]) -> Iterator[Record]: + def differentiate_plugin_outputs( + self, plugin_name: str, plugin_arg_parts: list[str], only_changed: bool = False + ) -> Iterator[Record]: """Run a plugin on the source and destination targets and yield RecordUnchanged, RecordCreated and RecordDeleted records. There is no equivalent for the FileModifiedRecord. For files and directories, we can use the path to reliably track changes from one target to the next. There is no equivalent for plugin outputs, so we just assume @@ -311,9 +314,10 @@ def differentiate_plugin_outputs(self, plugin_name: str, plugin_arg_parts: list[ for dst_record in get_plugin_output_records(plugin_name, plugin_arg_parts, self.dst_target): if dst_record in src_records: src_records_seen.add(dst_record) - yield RecordUnchangedRecord( - src_target=self.src_target.path, dst_target=self.dst_target.path, record=dst_record - ) + if not only_changed: + yield RecordUnchangedRecord( + src_target=self.src_target.path, dst_target=self.dst_target.path, record=dst_record + ) else: yield RecordCreatedRecord( src_target=self.src_target.path, dst_target=self.dst_target.path, record=dst_record @@ -580,6 +584,7 @@ def cmd_cat(self, args: argparse.Namespace, stdout: TextIO): print(f"File {name} not found.") @arg("path", nargs="?") + @arg("--hex", action="store_true", default=False) def cmd_diff(self, args: argparse.Namespace, stdout: TextIO): """Output the difference in file contents between two targets.""" stdout = stdout.buffer @@ -589,8 +594,19 @@ def cmd_diff(self, args: argparse.Namespace, stdout: TextIO): directory_differential = self.comparison.scandir(base_dir) for entry in directory_differential.modified: if entry.name == name: - primary_fh_lines = entry.src_target_entry.open().readlines() - secondary_fh_lines = entry.dst_target_entry.open().readlines() + if args.hex: + primary_fh_lines = [ + line.encode() + for line in hexdump(entry.src_target_entry.open().read(), output="string").split("\n") + ] + secondary_fh_lines = [ + line.encode() + for line in hexdump(entry.dst_target_entry.open().read(), output="string").split("\n") + ] + else: + primary_fh_lines = entry.src_target_entry.open().readlines() + secondary_fh_lines = entry.dst_target_entry.open().readlines() + for chunk in diff_bytes(unified_diff, primary_fh_lines, secondary_fh_lines): if chunk.startswith(b"@@"): chunk = fmt_ls_colors("ln", chunk.decode()).encode() @@ -600,7 +616,12 @@ def cmd_diff(self, args: argparse.Namespace, stdout: TextIO): chunk = fmt_ls_colors("or", chunk.decode()).encode() shutil.copyfileobj(BytesIO(chunk), stdout) + + if args.hex: + stdout.write(b"\n") + stdout.flush() + print("") return @@ -778,14 +799,14 @@ def differentiate_target_filesystems( def differentiate_target_plugin_outputs( - *targets: tuple[Target], absolute: bool = False, plugin: str, plugin_args: str = "" + *targets: tuple[Target], absolute: bool = False, only_changed: bool = False, plugin: str, plugin_args: str = "" ) -> Iterator[Record]: """Given a list of targets, yielding records indicating which records from this plugin are new, unmodified or deleted.""" for target_pair in make_target_pairs(targets, absolute): src_target, dst_target = target_pair comparison = TargetComparison(src_target, dst_target) - yield from comparison.differentiate_plugin_outputs(plugin, plugin_args) + yield from comparison.differentiate_plugin_outputs(plugin, plugin_args, only_changed) @catch_sigpipe @@ -864,6 +885,12 @@ def main() -> None: "target that came before it." ), ) + query_mode.add_argument( + "--only-changed", + action="store_true", + help="Do not output unchanged records", + default=False, + ) configure_generic_arguments(parser) @@ -889,6 +916,7 @@ def main() -> None: iterator = differentiate_target_plugin_outputs( *target_list, absolute=args.absolute, + only_changed=args.only_changed, plugin=args.plugin, plugin_args=arg_str_to_arg_list(args.parameters), ) From 8a0c6af13967237e91727664928ff0472aab2db4 Mon Sep 17 00:00:00 2001 From: JSCU-CNI <121175071+JSCU-CNI@users.noreply.github.com> Date: Tue, 20 Aug 2024 12:16:59 +0200 Subject: [PATCH 4/8] fix target-diff with changes from main --- dissect/target/tools/diff.py | 73 ++++++++++++++++++++--------------- dissect/target/tools/shell.py | 30 -------------- pyproject.toml | 2 +- tests/tools/test_diff.py | 8 ++-- 4 files changed, 47 insertions(+), 66 deletions(-) diff --git a/dissect/target/tools/diff.py b/dissect/target/tools/diff.py index 87a89d833..1142d5721 100644 --- a/dissect/target/tools/diff.py +++ b/dissect/target/tools/diff.py @@ -21,7 +21,8 @@ from dissect.target.filesystem import FilesystemEntry from dissect.target.helpers import fsutil from dissect.target.helpers.record import TargetRecordDescriptor -from dissect.target.plugin import arg +from dissect.target.plugin import alias, arg +from dissect.target.tools.fsutils import print_extensive_file_stat_listing from dissect.target.tools.query import record_output from dissect.target.tools.shell import ( ExtendedCmd, @@ -29,7 +30,6 @@ arg_str_to_arg_list, build_pipe_stdout, fmt_ls_colors, - print_extensive_file_stat, python_shell, run_cli, ) @@ -368,15 +368,16 @@ def dst_target(self) -> Target: @property def prompt(self) -> str: + if self.comparison.src_target.name != self.comparison.dst_target.name: prompt_base = f"({self.comparison.src_target.name}/{self.comparison.dst_target.name})" else: prompt_base = self.comparison.src_target.name - suffix = f"{prompt_base}/diff {self.cwd}>" + suffix = f"{prompt_base}:{self.cwd}$ " if len(self.targets) <= 2: - return suffix + return f"(diff) {suffix}" chain_prefix = "[ " for i in range(len(self.targets)): @@ -384,7 +385,7 @@ def prompt(self) -> str: chain_prefix += char chain_prefix += "] " - return f"{chain_prefix}{suffix}" + return f"(diff) {chain_prefix}{suffix}" def _select_source_and_dest(self, src_index: int, dst_index: int) -> None: """Set local variables according to newly selected source and destination index, and re-instatiate @@ -447,15 +448,16 @@ def _targets_with_path(self, path: str, warn_when_incomplete: bool = False) -> l log.warning("'%s' is only present on '%s'.", path, targets_with_path[0]) return targets_with_path - def _write_entry_contents_to_stdout(self, entry: FilesystemEntry, stdout: TextIO): + def _write_entry_contents_to_stdout(self, entry: FilesystemEntry, stdout: TextIO) -> bool: """Copy the contents of a Filesystementry to stdout.""" stdout = stdout.buffer fh = entry.open() shutil.copyfileobj(fh, stdout) stdout.flush() print("") + return False - def completedefault(self, text: str, line: str, begidx: int, endidx: int): + def completedefault(self, text: str, line: str, begidx: int, endidx: int) -> list[str]: """Autocomplete based on files / directories found in the current path.""" path = line[:begidx].rsplit(" ")[-1] textlower = text.lower() @@ -468,7 +470,7 @@ def completedefault(self, text: str, line: str, begidx: int, endidx: int): r = [name for name in names if name.lower().startswith(textlower)] return r - def do_list(self, line): + def do_list(self, line: str) -> bool: """Prints a list of targets to differentiate between. Useful when differentiating between three or more targets. Looks quite bad on small terminal screens.""" columns = ["#", "Name", "Path", "From", "To"] @@ -497,7 +499,9 @@ def do_list(self, line): for row in rows: print(fmt.format(*row)) print("") + return False + @alias("prev") @arg("-a", "--absolute", action="store_true", help="Only move the destination target one position back.") def cmd_previous(self, args: argparse.Namespace, line: str) -> bool: """When three or more targets are available, move the 'comparison window' one position back.""" @@ -510,11 +514,7 @@ def cmd_previous(self, args: argparse.Namespace, line: str) -> bool: if dst_index <= src_index: src_index, dst_index = dst_index, src_index self._select_source_and_dest(src_index, dst_index) - - @arg("-a", "--absolute", action="store_true", help="Only move the destination target one position back.") - def cmd_prev(self, args: argparse.Namespace, line: str) -> bool: - """Alias for previous.""" - self.cmd_previous(args, line) + return False @arg("-a", "--absolute", action="store_true", help="Only move the destination target one position forward.") def cmd_next(self, args: argparse.Namespace, line: str) -> bool: @@ -525,20 +525,21 @@ def cmd_next(self, args: argparse.Namespace, line: str) -> bool: if dst_index <= src_index: src_index, dst_index = dst_index, src_index self._select_source_and_dest(src_index, dst_index) + return False - def do_cd(self, path: str) -> None: + def do_cd(self, path: str) -> bool: """Change directory to the given path.""" path = fsutil.abspath(path, cwd=str(self.cwd), alt_separator=self.alt_separator) targets_with_path = self._targets_with_path(path, warn_when_incomplete=True) - if len(targets_with_path) == 0: - return - self.cwd = path + if len(targets_with_path) != 0: + self.cwd = path + return False @arg("path", nargs="?") @arg("-l", action="store_true") @arg("-a", "--all", action="store_true") # ignored but included for proper argument parsing @arg("-h", "--human-readable", action="store_true") - def cmd_ls(self, args: argparse.Namespace, stdout: TextIO): + def cmd_ls(self, args: argparse.Namespace, stdout: TextIO) -> bool: """List contents of a directory for two targets.""" path = args.path if args.path is not None else self.cwd diff = self.comparison.scandir(path) @@ -548,14 +549,15 @@ def cmd_ls(self, args: argparse.Namespace, stdout: TextIO): else: for entry, name in results: if not isinstance(entry, DifferentialEntry): - print_extensive_file_stat(stdout, name, entry) + print_extensive_file_stat_listing(stdout, name, entry) else: # We have to choose for which version of this file we are going to print detailed info. The # destination target seems to make the most sense: it is likely newer - print_extensive_file_stat(stdout, name, entry.dst_target_entry) + print_extensive_file_stat_listing(stdout, name, entry.dst_target_entry) + return False @arg("path", nargs="?") - def cmd_cat(self, args: argparse.Namespace, stdout: TextIO): + def cmd_cat(self, args: argparse.Namespace, stdout: TextIO) -> bool: """Output the contents of a file.""" base_dir, _, name = args.path.rpartition("/") if not base_dir: @@ -582,10 +584,11 @@ def cmd_cat(self, args: argparse.Namespace, stdout: TextIO): ) return self._write_entry_contents_to_stdout(entry.dst_target_entry, stdout) print(f"File {name} not found.") + return False @arg("path", nargs="?") @arg("--hex", action="store_true", default=False) - def cmd_diff(self, args: argparse.Namespace, stdout: TextIO): + def cmd_diff(self, args: argparse.Namespace, stdout: TextIO) -> bool: """Output the difference in file contents between two targets.""" stdout = stdout.buffer base_dir, _, name = args.path.rpartition("/") @@ -623,7 +626,7 @@ def cmd_diff(self, args: argparse.Namespace, stdout: TextIO): stdout.flush() print("") - return + return False # Check if this file is even present on one of the targets files = directory_differential.unchanged + directory_differential.created + directory_differential.deleted @@ -635,7 +638,7 @@ def cmd_diff(self, args: argparse.Namespace, stdout: TextIO): @arg("index", type=str) @arg("type", choices=["src", "dst"]) - def cmd_set(self, args: argparse.Namespace, stdout: TextIO): + def cmd_set(self, args: argparse.Namespace, stdout: TextIO) -> bool: """Change either the source or destination target for differentiation. Index can be given relative (when prefixed with '+' or '-', e.g. "set dst +1") or absolute (e.g. set src 0).""" index = args.index.strip() @@ -645,7 +648,7 @@ def cmd_set(self, args: argparse.Namespace, stdout: TextIO): multiplier = 1 if index[0] == "+" else -1 index = index[1:].strip() if not index.isdigit(): - return + return False pos += int(index) * multiplier elif index.isdigit(): pos = int(index) @@ -655,9 +658,10 @@ def cmd_set(self, args: argparse.Namespace, stdout: TextIO): self._select_source_and_dest(pos, self.dst_index) else: self._select_source_and_dest(self.src_index, pos) + return False @arg("target", choices=["src", "dst"]) - def cmd_enter(self, args: argparse.Namespace, stdout: TextIO): + def cmd_enter(self, args: argparse.Namespace, stdout: TextIO) -> bool: """Open a subshell for the source or destination target.""" target = self.src_target if args.target == "src" else self.dst_target cli = TargetCli(target) @@ -667,6 +671,7 @@ def cmd_enter(self, args: argparse.Namespace, stdout: TextIO): # Cyber doesn't work well with subshells cli.cyber = False run_cli(cli) + return False @arg("path", nargs="?") @arg("-name", default="*") @@ -675,14 +680,15 @@ def cmd_enter(self, args: argparse.Namespace, stdout: TextIO): @arg("-m", "--modified", action="store_true") @arg("-d", "--deleted", action="store_true") @arg("-u", "--unchanged", action="store_true") - def cmd_find(self, args: argparse.Namespace, stdout: TextIO) -> bool | None: + def cmd_find(self, args: argparse.Namespace, stdout: TextIO) -> bool: """Search for files in a directory hierarchy.""" path = fsutil.abspath(args.path, cwd=str(self.cwd), alt_separator=self.comparison.src_target.fs.alt_separator) if not path: - return + return False + targets_with_path = self._targets_with_path(path, warn_when_incomplete=True) if len(targets_with_path) < 0: - return + return False if args.iname: pattern = re.compile(translate(args.iname), re.IGNORECASE) @@ -705,7 +711,9 @@ def cmd_find(self, args: argparse.Namespace, stdout: TextIO) -> bool | None: print(line, file=stdout) - def do_plugin(self, line: str): + return False + + def do_plugin(self, line: str) -> bool: """Yield RecordCreated, RecordUnchanged and RecordDeleted Records by comparing plugin outputs for two targets.""" argparts = arg_str_to_arg_list(line) @@ -733,9 +741,12 @@ def do_plugin(self, line: str): for record in iterator: print(record, file=sys.stdout) - def do_python(self, line: str) -> bool | None: + return False + + def do_python(self, line: str) -> bool: """drop into a Python shell.""" python_shell(list(self.targets)) + return False def make_target_pairs(targets: tuple[Target], absolute: bool = False) -> list[tuple[Target, Target]]: diff --git a/dissect/target/tools/shell.py b/dissect/target/tools/shell.py index 32c111b49..175f960fc 100644 --- a/dissect/target/tools/shell.py +++ b/dissect/target/tools/shell.py @@ -1265,36 +1265,6 @@ def _target_name(target: Target) -> str: return target.name -def arg_str_to_arg_list(args: str) -> list[str]: - """Convert a commandline string to a list of command line arguments.""" - lexer = shlex.shlex(args, posix=True, punctuation_chars=True) - lexer.wordchars += "$" - lexer.whitespace_split = True - return list(lexer) - - -def print_extensive_file_stat( - stdout: TextIO, name: str, entry: Optional[FilesystemEntry] = None, timestamp: Optional[datetime.datetime] = None -) -> None: - """Print the file status.""" - if entry is not None: - try: - stat = entry.lstat() - if timestamp is None: - timestamp = stat.st_mtime - symlink = f" -> {entry.readlink()}" if entry.is_symlink() else "" - utc_time = datetime.datetime.utcfromtimestamp(timestamp).isoformat() - - print( - f"{stat_modestr(stat)} {stat.st_uid:4d} {stat.st_gid:4d} {stat.st_size:6d} {utc_time} {name}{symlink}", - file=stdout, - ) - return - except FileNotFoundError: - pass - print(f"?????????? ? ? ? ????-??-??T??:??:??.?????? {name}", file=stdout) - - @contextmanager def build_pipe(pipe_parts: list[str], pipe_stdout: int = subprocess.PIPE) -> Iterator[tuple[TextIO, BinaryIO]]: """ diff --git a/pyproject.toml b/pyproject.toml index 2396d6fd6..c3da8830c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ dependencies = [ "dissect.regf>=3.3,<4", "dissect.util>=3,<4", "dissect.volume>=2,<4", - "flow.record~=3.15.0", + "flow.record>=3.16.dev5", # Temporary bump as long as 3.16 is not released yet. "structlog", ] dynamic = ["version"] diff --git a/tests/tools/test_diff.py b/tests/tools/test_diff.py index 0ae65522d..77d934b62 100644 --- a/tests/tools/test_diff.py +++ b/tests/tools/test_diff.py @@ -7,11 +7,11 @@ import pytest -import dissect.target.tools.shell as shell from dissect.target.filesystem import VirtualFilesystem from dissect.target.helpers.fsutil import stat_result from dissect.target.plugins.os.unix._os import UnixPlugin from dissect.target.target import Target +from dissect.target.tools import fsutils from dissect.target.tools.diff import ( DifferentialCli, TargetComparison, @@ -224,7 +224,7 @@ def test_differentiate_plugins(src_target: Target, dst_target: Target) -> None: def test_shell_ls(src_target: Target, dst_target: Target, capsys, monkeypatch) -> None: - monkeypatch.setattr(shell, "LS_COLORS", {}) + monkeypatch.setattr(fsutils, "LS_COLORS", {}) cli = DifferentialCli(src_target, dst_target, deep=True) cli.onecmd("ls changes") @@ -249,7 +249,7 @@ def test_shell_ls(src_target: Target, dst_target: Target, capsys, monkeypatch) - def test_shell_find(src_target: Target, dst_target: Target, capsys, monkeypatch) -> None: - monkeypatch.setattr(shell, "LS_COLORS", {}) + monkeypatch.setattr(fsutils, "LS_COLORS", {}) cli = DifferentialCli(src_target, dst_target, deep=True) cli.onecmd("find /changes -cmd") @@ -309,7 +309,7 @@ def test_shell_plugin(src_target: Target, dst_target: Target, capsys) -> None: def test_target_diff_shell(capsys, monkeypatch) -> None: with monkeypatch.context() as m: - m.setattr(shell, "LS_COLORS", {}) + m.setattr(fsutils, "LS_COLORS", {}) src_target_path = absolute_path("_data/tools/diff/src.tar") dst_target_path = absolute_path("_data/tools/diff/dst.tar") m.setattr("sys.argv", ["target-diff", "--deep", "shell", src_target_path, dst_target_path]) From 59c965212152788660e8d813b23d26417f17cc48 Mon Sep 17 00:00:00 2001 From: JSCU-CNI <121175071+JSCU-CNI@users.noreply.github.com> Date: Mon, 16 Sep 2024 11:00:21 +0200 Subject: [PATCH 5/8] Small fixes Fix a test and update ls and completedefault to how target-shell does it. --- dissect/target/tools/diff.py | 66 ++++++++++++++++++++++++------------ pyproject.toml | 2 +- tests/tools/test_diff.py | 2 +- 3 files changed, 46 insertions(+), 24 deletions(-) diff --git a/dissect/target/tools/diff.py b/dissect/target/tools/diff.py index 1142d5721..8590cbfab 100644 --- a/dissect/target/tools/diff.py +++ b/dissect/target/tools/diff.py @@ -368,7 +368,6 @@ def dst_target(self) -> Target: @property def prompt(self) -> str: - if self.comparison.src_target.name != self.comparison.dst_target.name: prompt_base = f"({self.comparison.src_target.name}/{self.comparison.dst_target.name})" else: @@ -410,7 +409,7 @@ def _annotate_differential( absolute: bool = False, ) -> list[tuple[fsutil.TargetPath | DifferentialEntry], str]: """Given a DirectoryDifferential instance, construct a list of tuples where the first element is a Filesystem / - DifferentialEntry entries and the second a color-formatted string.""" + DifferentialEntry and the second a color-formatted string.""" r = [] attr = "path" if absolute else "name" @@ -436,17 +435,29 @@ def _annotate_differential( r.sort(key=lambda e: e[0].name) return r - def _targets_with_path(self, path: str, warn_when_incomplete: bool = False) -> list[Target]: - """Return targets where a given path exists, checking the src and dst target of this class. Optionally log a - warning if the path only exists on one of the two targets.""" - targets_with_path = [] - if self.comparison.src_target.fs.exists(path): - targets_with_path.append(self.comparison.src_target) - if self.comparison.dst_target.fs.exists(path): - targets_with_path.append(self.comparison.dst_target) - if warn_when_incomplete and len(targets_with_path) == 1: - log.warning("'%s' is only present on '%s'.", path, targets_with_path[0]) - return targets_with_path + def _targets_with_directory(self, path: str, warn_when_incomplete: bool = False) -> int: + """Return whether a given path is an existing directory for neither, one of, or both of the targets being + compared. Optionally log a warning if the directory only exists on one of the two targets.""" + src_has_dir = False + dst_has_dir = False + try: + entry = self.comparison.src_target.fs.get(path) + src_has_dir = entry.is_dir() + except FileNotFoundError: + pass + try: + entry = self.comparison.dst_target.fs.get(path) + dst_has_dir = entry.is_dir() + except FileNotFoundError: + pass + + if (src_has_dir is False or dst_has_dir is False) and warn_when_incomplete: + if src_has_dir != dst_has_dir: + target_with_dir = self.comparison.src_target if src_has_dir else self.comparison.dst_target + log.warning("'%s' is only a valid path on '%s'.", path, target_with_dir) + else: + log.warning("'%s' is not a valid path on either target.", path) + return int(src_has_dir) + int(dst_has_dir) def _write_entry_contents_to_stdout(self, entry: FilesystemEntry, stdout: TextIO) -> bool: """Copy the contents of a Filesystementry to stdout.""" @@ -465,10 +476,21 @@ def completedefault(self, text: str, line: str, begidx: int, endidx: int) -> lis path = fsutil.abspath(path, cwd=str(self.cwd), alt_separator=self.alt_separator) diff = self.comparison.scandir(path) - names = [item.name for group in [diff.created, diff.modified, diff.unchanged, diff.deleted] for item in group] + items = [ + (item.entry.is_dir(), item.name) for group in [diff.created, diff.unchanged, diff.deleted] for item in group + ] + items += [ + (item.src_target_entry.is_dir() and item.dst_target_entry.is_dir(), item.name) for item in diff.modified + ] + suggestions = [] + for is_dir, fname in items: + if not fname.lower().startswith(textlower): + continue - r = [name for name in names if name.lower().startswith(textlower)] - return r + # Add a trailing slash to directories, to allow for easier traversal of the filesystem + suggestion = f"{fname}/" if is_dir else fname + suggestions.append(suggestion) + return suggestions def do_list(self, line: str) -> bool: """Prints a list of targets to differentiate between. Useful when differentiating between three or more @@ -530,8 +552,7 @@ def cmd_next(self, args: argparse.Namespace, line: str) -> bool: def do_cd(self, path: str) -> bool: """Change directory to the given path.""" path = fsutil.abspath(path, cwd=str(self.cwd), alt_separator=self.alt_separator) - targets_with_path = self._targets_with_path(path, warn_when_incomplete=True) - if len(targets_with_path) != 0: + if self._targets_with_directory(path, warn_when_incomplete=True) != 0: self.cwd = path return False @@ -549,11 +570,13 @@ def cmd_ls(self, args: argparse.Namespace, stdout: TextIO) -> bool: else: for entry, name in results: if not isinstance(entry, DifferentialEntry): - print_extensive_file_stat_listing(stdout, name, entry) + print_extensive_file_stat_listing(stdout, name, entry, human_readable=args.human_readable) else: # We have to choose for which version of this file we are going to print detailed info. The # destination target seems to make the most sense: it is likely newer - print_extensive_file_stat_listing(stdout, name, entry.dst_target_entry) + print_extensive_file_stat_listing( + stdout, name, entry.dst_target_entry, human_readable=args.human_readable + ) return False @arg("path", nargs="?") @@ -686,8 +709,7 @@ def cmd_find(self, args: argparse.Namespace, stdout: TextIO) -> bool: if not path: return False - targets_with_path = self._targets_with_path(path, warn_when_incomplete=True) - if len(targets_with_path) < 0: + if self._targets_with_directory(path, warn_when_incomplete=True) == 0: return False if args.iname: diff --git a/pyproject.toml b/pyproject.toml index c3da8830c..bf5e4a745 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ dependencies = [ "dissect.regf>=3.3,<4", "dissect.util>=3,<4", "dissect.volume>=2,<4", - "flow.record>=3.16.dev5", # Temporary bump as long as 3.16 is not released yet. + "flow.record~=3.16.0", "structlog", ] dynamic = ["version"] diff --git a/tests/tools/test_diff.py b/tests/tools/test_diff.py index 77d934b62..b04065869 100644 --- a/tests/tools/test_diff.py +++ b/tests/tools/test_diff.py @@ -316,7 +316,7 @@ def test_target_diff_shell(capsys, monkeypatch) -> None: m.setattr("sys.stdin", StringIO("ls changes")) target_diff() out, err = capsys.readouterr() - out = out.replace("(src_target/dst_target)/diff />", "").strip() + out = out.replace("(diff) (src_target/dst_target):/$", "").strip() expected = [ "changed (modified)", From 0a522e9d4a276b62c5ae99b1d0d93b4926b28335 Mon Sep 17 00:00:00 2001 From: JSCU-CNI <121175071+JSCU-CNI@users.noreply.github.com> Date: Mon, 11 Nov 2024 12:38:11 +0100 Subject: [PATCH 6/8] implement review feedback --- dissect/target/tools/diff.py | 53 ++++++++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/dissect/target/tools/diff.py b/dissect/target/tools/diff.py index 8590cbfab..00e3d7e7d 100644 --- a/dissect/target/tools/diff.py +++ b/dissect/target/tools/diff.py @@ -331,7 +331,7 @@ def differentiate_plugin_outputs( class DifferentialCli(ExtendedCmd): """CLI for browsing the differential between two or more targets.""" - doc_header_prefix = "Target Diff\n" "==========\n" + doc_header_prefix = "target-diff\n" "==========\n" doc_header_suffix = "\n\nDocumented commands (type help ):" doc_header_multiple_targets = "Use 'list', 'prev' and 'next' to list and select targets to differentiate between." @@ -351,13 +351,14 @@ def __init__(self, *targets: tuple[Target], deep: bool = False, limit: int = FIL self.doc_header = self.doc_header_prefix + doc_header_middle + self.doc_header_suffix self._select_source_and_dest(0, 1) - if len(self.targets) > 2: - # Some help may be nice if you are diffing more than 2 targets at once - self.do_help(arg=None) start_in_cyber = any(target.props.get("cyber") for target in self.targets) super().__init__(start_in_cyber) + if len(self.targets) > 2: + # Some help may be nice if you are diffing more than 2 targets at once + self.do_help(arg=None) + @property def src_target(self) -> Target: return self.targets[self.src_index] @@ -369,11 +370,11 @@ def dst_target(self) -> Target: @property def prompt(self) -> str: if self.comparison.src_target.name != self.comparison.dst_target.name: - prompt_base = f"({self.comparison.src_target.name}/{self.comparison.dst_target.name})" + prompt_base = f"{self.comparison.src_target.name}/{self.comparison.dst_target.name}" else: prompt_base = self.comparison.src_target.name - suffix = f"{prompt_base}:{self.cwd}$ " + suffix = f"\x1b[1;32m{prompt_base}\x1b[0m:\x1b[1;34m{self.cwd}\x1b[0m$ " if len(self.targets) <= 2: return f"(diff) {suffix}" @@ -658,6 +659,14 @@ def cmd_diff(self, args: argparse.Namespace, stdout: TextIO) -> bool: print(f"File {name} not found.") else: print(f"No two versions available for {name} to differentiate between.") + return False + + @arg("path", nargs="?") + @alias("xxd") + def cmd_hexdump(self, args: argparse.Namespace, stdout: TextIO) -> bool: + """Output difference of the given file between targets in hexdump.""" + setattr(args, "hex", True) + return self.cmd_diff(args, stdout) @arg("index", type=str) @arg("type", choices=["src", "dst"]) @@ -796,8 +805,6 @@ def differentiate_target_filesystems( ) -> Iterator[Record]: """Given a list of targets, compare targets against one another and yield File[Created|Modified|Deleted]Records indicating the differences between them.""" - if len(targets) < 2: - raise ValueError("Provide two or more targets to differentiate between.") for target_pair in make_target_pairs(targets, absolute): # Unpack the tuple and initialize the comparison class @@ -864,13 +871,13 @@ def main() -> None: type=int, help="How many bytes to compare before assuming a file is left unchanged (0 for no limit)", ) - subparsers = parser.add_subparsers(help="Mode for differentiating targets", dest="mode") + subparsers = parser.add_subparsers(help="Mode for differentiating targets", dest="mode", required=True) shell_mode = subparsers.add_parser("shell", help="Open an interactive shell to compare two or more targets.") - shell_mode.add_argument("targets", metavar="TARGETS", nargs="*", help="Targets to differentiate between") + shell_mode.add_argument("targets", metavar="TARGETS", nargs="+", help="Targets to differentiate between") fs_mode = subparsers.add_parser("fs", help="Yield records about differences between target filesystems.") - fs_mode.add_argument("targets", metavar="TARGETS", nargs="*", help="Targets to differentiate between") + fs_mode.add_argument("targets", metavar="TARGETS", nargs="+", help="Targets to differentiate between") fs_mode.add_argument("-s", "--strings", action="store_true", help="print records as strings") fs_mode.add_argument("-e", "--exclude", action="append", help="Path(s) on targets not to check for differences") fs_mode.add_argument( @@ -891,7 +898,7 @@ def main() -> None: ) query_mode = subparsers.add_parser("query", help="Differentiate plugin outputs between two or more targets.") - query_mode.add_argument("targets", metavar="TARGETS", nargs="*", help="Targets to differentiate between") + query_mode.add_argument("targets", metavar="TARGETS", nargs="+", help="Targets to differentiate between") query_mode.add_argument("-s", "--strings", action="store_true", help="print records as strings") query_mode.add_argument( "-p", @@ -930,6 +937,10 @@ def main() -> None: args = parser.parse_args() process_generic_arguments(args) + if len(args.targets) < 2: + print("At least two targets are required for target-diff.") + parser.exit(1) + target_list = [Target.open(path) for path in args.targets] if args.mode == "shell": cli = DifferentialCli(*target_list, deep=args.deep, limit=args.limit) @@ -946,6 +957,14 @@ def main() -> None: exclude=args.exclude, ) elif args.mode == "query": + if args.deep: + log.error("argument --deep is not available in target-diff query mode") + parser.exit(1) + + if args.limit != FILE_LIMIT: + log.error("argument --limit is not available in target-diff query mode") + parser.exit(1) + iterator = differentiate_target_plugin_outputs( *target_list, absolute=args.absolute, @@ -953,8 +972,14 @@ def main() -> None: plugin=args.plugin, plugin_args=arg_str_to_arg_list(args.parameters), ) - for record in iterator: - writer.write(record) + + try: + for record in iterator: + writer.write(record) + + except Exception as e: + log.error(e) + parser.exit(1) if __name__ == "__main__": From c3266b9ad2ad921c2303680d9480269682e2e62e Mon Sep 17 00:00:00 2001 From: JSCU-CNI <121175071+JSCU-CNI@users.noreply.github.com> Date: Mon, 11 Nov 2024 17:26:58 +0100 Subject: [PATCH 7/8] fix tests --- dissect/target/tools/diff.py | 6 +++++- tests/tools/test_diff.py | 3 ++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/dissect/target/tools/diff.py b/dissect/target/tools/diff.py index 00e3d7e7d..a4082edb0 100644 --- a/dissect/target/tools/diff.py +++ b/dissect/target/tools/diff.py @@ -5,6 +5,7 @@ import argparse import dataclasses import logging +import os import re import shutil import sys @@ -374,7 +375,10 @@ def prompt(self) -> str: else: prompt_base = self.comparison.src_target.name - suffix = f"\x1b[1;32m{prompt_base}\x1b[0m:\x1b[1;34m{self.cwd}\x1b[0m$ " + if os.getenv("NO_COLOR"): + suffix = f"{prompt_base}:{self.cwd}$ " + else: + suffix = f"\x1b[1;32m{prompt_base}\x1b[0m:\x1b[1;34m{self.cwd}\x1b[0m$ " if len(self.targets) <= 2: return f"(diff) {suffix}" diff --git a/tests/tools/test_diff.py b/tests/tools/test_diff.py index b04065869..df6bae6ce 100644 --- a/tests/tools/test_diff.py +++ b/tests/tools/test_diff.py @@ -310,13 +310,14 @@ def test_shell_plugin(src_target: Target, dst_target: Target, capsys) -> None: def test_target_diff_shell(capsys, monkeypatch) -> None: with monkeypatch.context() as m: m.setattr(fsutils, "LS_COLORS", {}) + m.setenv("NO_COLOR", 1) src_target_path = absolute_path("_data/tools/diff/src.tar") dst_target_path = absolute_path("_data/tools/diff/dst.tar") m.setattr("sys.argv", ["target-diff", "--deep", "shell", src_target_path, dst_target_path]) m.setattr("sys.stdin", StringIO("ls changes")) target_diff() out, err = capsys.readouterr() - out = out.replace("(diff) (src_target/dst_target):/$", "").strip() + out = out.replace("(diff) src_target/dst_target:/$", "").strip() expected = [ "changed (modified)", From 99b71777ba995f22bae8fe23500fe719f8a68f5a Mon Sep 17 00:00:00 2001 From: Computer Network Investigation <121175071+JSCU-CNI@users.noreply.github.com> Date: Wed, 4 Dec 2024 12:54:37 +0100 Subject: [PATCH 8/8] Apply suggestions from code review Co-authored-by: Yun Zheng Hu --- dissect/target/tools/diff.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dissect/target/tools/diff.py b/dissect/target/tools/diff.py index a4082edb0..6dd74d2f9 100644 --- a/dissect/target/tools/diff.py +++ b/dissect/target/tools/diff.py @@ -163,10 +163,10 @@ def scandir(self, path: str) -> DirectoryDifferential: raise ValueError(f"{path} is not a directory on either the source or destination target!") src_target_entries = list(self.src_target.fs.scandir(path)) - src_target_children_paths = set([entry.path for entry in src_target_entries]) + src_target_children_paths = set(entry.path for entry in src_target_entries) dst_target_entries = list(self.dst_target.fs.scandir(path)) - dst_target_children_paths = set([entry.path for entry in dst_target_entries]) + dst_target_children_paths = set(entry.path for entry in dst_target_entries) paths_only_on_src_target = src_target_children_paths - dst_target_children_paths paths_only_on_dst_target = dst_target_children_paths - src_target_children_paths