From c528241c020f163277e0106572bdfd5b5546daff Mon Sep 17 00:00:00 2001 From: Joost Jansen Date: Tue, 23 Jan 2024 13:18:44 +0100 Subject: [PATCH 01/36] Initial commit Improvements, formatting, added tests Added more tests Move CRCMismatchException to general exceptions.py Refactor of functions, removal of logging Added another test, simplified code --- dissect/target/exceptions.py | 4 + .../plugins/apps/texteditor/__init__.py | 0 .../plugins/apps/texteditor/texteditor.py | 17 ++ .../plugins/apps/texteditor/windowsnotepad.py | 215 ++++++++++++++++++ .../plugins/os/windows/regf/shimcache.py | 11 +- .../3d0cc86e-dfc9-4f16-b74a-918c2c24188c.bin | Bin 0 -> 6263 bytes .../3f915e17-cf6c-462b-9bd1-2f23314cb979.bin | Bin 0 -> 145 bytes .../85167c9d-aac2-4469-ae44-db5dccf8f7f4.bin | Bin 0 -> 250 bytes .../c515e86f-08b3-4d76-844a-cddfcd43fcbb.bin | Bin 0 -> 200 bytes .../cfe38135-9dca-4480-944f-d5ea0e1e589f.bin | Bin 0 -> 230828 bytes .../dae80df8-e1e5-4996-87fe-b453f63fcb19.bin | Bin 0 -> 330 bytes .../windowsnotepad/wrong-checksum.bin | Bin 0 -> 145 bytes tests/plugins/apps/texteditor/__init__.py | 0 .../apps/texteditor/test_texteditor.py | 86 +++++++ 14 files changed, 327 insertions(+), 6 deletions(-) create mode 100644 dissect/target/plugins/apps/texteditor/__init__.py create mode 100644 dissect/target/plugins/apps/texteditor/texteditor.py create mode 100644 dissect/target/plugins/apps/texteditor/windowsnotepad.py create mode 100644 tests/_data/plugins/apps/texteditor/windowsnotepad/3d0cc86e-dfc9-4f16-b74a-918c2c24188c.bin create mode 100755 tests/_data/plugins/apps/texteditor/windowsnotepad/3f915e17-cf6c-462b-9bd1-2f23314cb979.bin create mode 100644 tests/_data/plugins/apps/texteditor/windowsnotepad/85167c9d-aac2-4469-ae44-db5dccf8f7f4.bin create mode 100644 tests/_data/plugins/apps/texteditor/windowsnotepad/c515e86f-08b3-4d76-844a-cddfcd43fcbb.bin create mode 100644 tests/_data/plugins/apps/texteditor/windowsnotepad/cfe38135-9dca-4480-944f-d5ea0e1e589f.bin create mode 100755 tests/_data/plugins/apps/texteditor/windowsnotepad/dae80df8-e1e5-4996-87fe-b453f63fcb19.bin create mode 100755 tests/_data/plugins/apps/texteditor/windowsnotepad/wrong-checksum.bin create mode 100644 tests/plugins/apps/texteditor/__init__.py create mode 100644 tests/plugins/apps/texteditor/test_texteditor.py diff --git a/dissect/target/exceptions.py b/dissect/target/exceptions.py index 22f46a604..1c435bcc7 100644 --- a/dissect/target/exceptions.py +++ b/dissect/target/exceptions.py @@ -114,3 +114,7 @@ class RegistryCorruptError(RegistryError): class ConfigurationParsingError(Error): """An error occurred during configuration parsing.""" + + +class CRCMismatchException(Error): + """A mismatch between CRC checksums has occurred.""" diff --git a/dissect/target/plugins/apps/texteditor/__init__.py b/dissect/target/plugins/apps/texteditor/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/dissect/target/plugins/apps/texteditor/texteditor.py b/dissect/target/plugins/apps/texteditor/texteditor.py new file mode 100644 index 000000000..853b384a9 --- /dev/null +++ b/dissect/target/plugins/apps/texteditor/texteditor.py @@ -0,0 +1,17 @@ +from dissect.target.helpers.descriptor_extensions import UserRecordDescriptorExtension +from dissect.target.helpers.record import create_extended_descriptor +from dissect.target.plugin import NamespacePlugin + +GENERIC_TAB_CONTENTS_RECORD_FIELDS = [ + ("string", "content"), + ("string", "content_length"), + ("string", "filename"), +] + +TexteditorTabContentRecord = create_extended_descriptor([UserRecordDescriptorExtension])( + "texteditor/tab", GENERIC_TAB_CONTENTS_RECORD_FIELDS +) + + +class TexteditorTabPlugin(NamespacePlugin): + __namespace__ = "texteditortab" diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py new file mode 100644 index 000000000..1fd316a57 --- /dev/null +++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py @@ -0,0 +1,215 @@ +import io +import zlib +from typing import BinaryIO, Iterator + +from dissect.target.exceptions import CRCMismatchException, UnsupportedPluginError +from dissect.target.helpers.descriptor_extensions import UserRecordDescriptorExtension +from dissect.target.helpers.fsutil import TargetPath +from dissect.target.helpers.record import create_extended_descriptor +from dissect.target.plugin import export +from dissect.target.plugins.apps.texteditor.texteditor import ( + GENERIC_TAB_CONTENTS_RECORD_FIELDS, + TexteditorTabPlugin, +) + + +def seek_size(fh: BinaryIO) -> int: + """ + Find the size of a file on disk. + + Args: + fh: A file-like object that we want to calculate the size of. + + Returns: + An integer representing the size (in bytes) of the file. + """ + pos = fh.tell() + fh.seek(0, io.SEEK_END) + size = fh.tell() + fh.seek(pos) + return size + + +def parse_large_structure_data_length(fh: BinaryIO) -> (int, bytes): + """ + Read a variable-length representation of a length field. Acts much like a ``varint`` object + from ``dissect.ntfs``, however it introduces some additional bit shifts and masking. + + The position of ``fh`` will be restored before returning. + + Args: + fh: A file-like object where we want to read the length bytes from. + + Returns: + Length of the data as an integer + The original bytes that have been processed to determine the length + """ + offset = fh.tell() + original_bytes = b"" + modified_bytes = b"" + + while True: + # Read the original byte + bt = fh.read(1) + + # Transform into an integer + bt_int = int.from_bytes(bt) + + # Shift this new byte a few places to the right, depending on the number of bytes that have already + # been processed + new_bt = bt_int >> len(original_bytes) + + # Add this byte back to + modified_bytes += new_bt.to_bytes(length=1) + + # Add the processed byte to the list of original by tes + original_bytes += bt + + # If the first bit of the original byte is a zero, this is the final byte + # Otherwise, continue until we find the zero-led byte + if not bt_int & 128: + break + + # Convert it to an integer + f = int.from_bytes(bytes=modified_bytes, byteorder="little") + + # Apply the mask + f = f ^ (2 ** ((len(original_bytes) - 1) * 8) >> 1) + + # Restore to original cursor + fh.seek(offset) + + return f, original_bytes + + +def _calc_crc32(data: bytes) -> bytes: + """Perform a CRC32 checksum on the data and return it as a big-endian uint32""" + return zlib.crc32(data).to_bytes(length=4, byteorder="big") + + +def _parse_large_structure_tab(handle: BinaryIO, header_has_crc: bool, header: bytes) -> str: + # A dictionary where the data will be stored in the correct order + content = dict() + + while True: + offset_bytes = handle.read(2) + + # If we reach the end of the file, break + if offset_bytes == b"": + break + + offset = int.from_bytes(offset_bytes, byteorder="big") + + # Parse the length field based on the first one, two, three or four bytes. + data_length, data_length_bytes = parse_large_structure_data_length(handle) + + # Move the cursor past the length bytes + handle.seek(handle.tell() + len(data_length_bytes)) + + chunk_data = b"" + for i in range(data_length): + r = handle.read(2) + chunk_data += r + + # Insert the chunk data into the correct offset. I have not yet encountered a file + # where the chunks were placed in a non-sequential order, but you never know. + for i in range(len(chunk_data)): + content[offset + i] = chunk_data[i].to_bytes(length=1) + + # CRC32 consists of the following data + crc_data_reconstructed = offset_bytes + data_length_bytes + chunk_data + + # If the header did not have a CRC, this means that it is combined with the only data entry + # in the file. So we need to prepend this extra header data. + if not header_has_crc: + # Furthermore, if the header does not have its own CRC32 it + # places a byte at the end to indicate the start + # of the CRC32. This should be included in the CRC32 calculation + crc_data_reconstructed = header + crc_data_reconstructed + handle.read(1) + + # Finally, read the CRC32 from disk and compare it + crc32_on_disk = handle.read(4) + + crc32_calculated = _calc_crc32(crc_data_reconstructed) + + if not crc32_on_disk == crc32_calculated: + raise CRCMismatchException(message=f"data, calculated={crc32_calculated}, expected={crc32_on_disk}") + + # Reconstruct the text + text_reconstructed = b"".join(content.values()) + text = text_reconstructed.decode("utf-16-le") + return text + + +class WindowsNotepadPlugin(TexteditorTabPlugin): + """Windows notepad tab content plugin.""" + + __namespace__ = "windowsnotepad" + + DIRECTORY = "AppData/Local/Packages/Microsoft.WindowsNotepad_8wekyb3d8bbwe/LocalState/TabState" + TextEditorTabRecord = create_extended_descriptor([UserRecordDescriptorExtension])( + "texteditor/windowsnotepad/tab", GENERIC_TAB_CONTENTS_RECORD_FIELDS + ) + + def __init__(self, target): + super().__init__(target) + self.users_dirs = [] + for user_details in self.target.user_details.all_with_home(): + cur_dir = user_details.home_path.joinpath(self.DIRECTORY) + if not cur_dir.exists(): + continue + self.users_dirs.append((user_details.user, cur_dir)) + + def check_compatible(self) -> None: + if not len(self.users_dirs): + raise UnsupportedPluginError("No tabs directories found") + + def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord: + handle: BinaryIO = file.open(mode="rb") + + # Skip the presumed magic bytes 0x4e5000 (NP\x00) + handle.read(3) + + # Read some of the info in the header. Not entirely sure at this point what info is in there, + # there seems to be an indication of the length of the file. + header = handle.read(6) + + # Whenever the bytes between the two \x01 bytes in the header are zeroed out, it means that the + # header itself has a CRC32 checksum + header_has_crc32 = True if header[2:4] == b"\x00\x00" else False + + if header_has_crc32: + # Header CRC32 is composed of the header, plus four more bytes. + header_crc_data = header + handle.read(4) + # After that, the CRC32 of the header is stored. + header_crc_on_disk = handle.read(4) + + # This should match + header_crc_calculated = _calc_crc32(header_crc_data) + if not header_crc_on_disk == header_crc_calculated: + raise CRCMismatchException( + message=f"header, calculated={header_crc_calculated}, " f"expected={header_crc_on_disk}" + ) + + text = _parse_large_structure_tab(handle, header_has_crc32, header) + + return self.TextEditorTabRecord(content=text, content_length=len(text), filename=file.name) + + @export(record=TextEditorTabRecord) + def tabs(self) -> Iterator[TextEditorTabRecord]: + """Return contents from the notepad tab. + + Yields TextEditorTabRecord with the following fields: + contents (string): The contents of the tab. + title (string): The title of the tab. + """ + for user, directory in self.users_dirs: + for file in self.target.fs.path(directory).iterdir(): + if file.name.endswith(".1.bin") or file.name.endswith(".0.bin"): + continue + + try: + yield self._process_tab_file(file) + except CRCMismatchException as e: + self.target.log.warning("CRC32 checksum mismatch in file: %s", file.name, exc_info=e) + continue diff --git a/dissect/target/plugins/os/windows/regf/shimcache.py b/dissect/target/plugins/os/windows/regf/shimcache.py index af72a068f..3aea545fd 100644 --- a/dissect/target/plugins/os/windows/regf/shimcache.py +++ b/dissect/target/plugins/os/windows/regf/shimcache.py @@ -7,7 +7,11 @@ from dissect.cstruct import Structure, cstruct from dissect.util.ts import wintimestamp -from dissect.target.exceptions import Error, RegistryError, UnsupportedPluginError +from dissect.target.exceptions import ( + CRCMismatchException, + RegistryError, + UnsupportedPluginError, +) from dissect.target.helpers.record import TargetRecordDescriptor from dissect.target.plugin import Plugin, export @@ -179,11 +183,6 @@ def nt61_entry_type(_) -> Structure: }, } - -class CRCMismatchException(Error): - pass - - ShimCacheGeneratorType = Union[CRCMismatchException, Tuple[Optional[datetime], str]] diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/3d0cc86e-dfc9-4f16-b74a-918c2c24188c.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/3d0cc86e-dfc9-4f16-b74a-918c2c24188c.bin new file mode 100644 index 0000000000000000000000000000000000000000..874ab51f54fd4b8f4cdabcf687ab13b26876b2a4 GIT binary patch literal 6263 zcmai&O>P@U5QRGj$UaN@0^Z~Rf)HpI7%_rmPe~)0fG8@WWcV1lNZ?I&S>!~yL+b0t z=c=9=+OQ>&Gt>3+>ecJ&dG||e-}V2kwTr(#|JmB-AAW9!_S~knZ=1GjPwm)V`{&!Z z{?PAj;@Ymg#QkmDnc~yi_~>Gb4N+#_8w#H)c%MoukqO{4(%@f?P7(kZ@Y(2ab=3=_1v!8-nIK! zNesll51WdHOftJ&yABWB!57hO+K=&X6V}M>7=PuDxRuc{Y(2!&Fa7Dq_zimJpHFWf z_gk#ngk|D=?N|x3ZCr5_AcM`Dc!IxV01BI)2l)EQdT+$dIOFrOM@t2XTDFz1J-JF# z$4tI!6>Kg2emtQ7>f9yz^$0d$6I+UPAJIR=5B^!Rw4;c*1Mh?j^;6y-W7WgSmN0n= zG!Ol0ve7j$={{ED%d95K$M`Xy%nUSUXmb%)Hy85=~ zg07%SAT-4la=7Web?o=m|H6FRGyiG8i~QzHlCL=%;FeM=>%;B9zHrptyq z)E0#&3Lpf)UkDs~?Cy2-`BSe^svH`8(>`?6q`Zf)q;8=z@Uq^OV6Lpk3Z^m;DSA$X zL`1(Ur;0cD?7~a(ajcbmROZlYYg)eTc)`HulRfQVLRxm}T_LCnONEIPEYK4vG8wRo z!6aFLF-B#HxcCN!{4W|G#-`FMf*PfT}8L%Zkgy- zRtwL$zSZo-{MuvaGVAaTW*6b*>J-ta2AjA#-+6^jRDycIU}~YRW);4i7uHq5 z+HI^gI^2Ev!p}LX-hZ&m1fy9+`)T&95HNFKCUe0#Dl6@*%vbBUBw?UY|BmbS5^< z0mQam2{419R*u{`HQVJ>Bi&mv8|*=p{Sd9=ujUMVgYUQZFjYi+Q3o>}saLo<$47jl zvWNJw&aLg`>Md0X{%AjzHj|O3s1a)zV88;<7%Bg*JmD8 zPwcNKx2GLID&zgl{g;(Bf7d3v5gDlhd>s+0 z6Q$Wvuc_u!XZ5d<3$sc+F`kJS%svG-`wCSPN( z8VNtMDz)4U0Tp1)_e;(&uqPdFa>{ZoNSAom3KFw27C5S&8W3>S6_!maINnM>eNhpp8H4EQis$xampO?ylEcfWNE*=nV{yVBshvo zR@|j-!E12$`C9mm^9q{64uLJ7X<-oaUR6=nlCNeq$9(=B!#VxQT-XI=bv}vu8Qvt| z6Yke@3bA@pkTQ+l#4x?pLVL}~ACIe-q zK2p?`2{k9JKIdntLpjTh3O88BqRp3onxC~3MLT?)GI-=@wzgfuh zlye*TLw>AA(X>+fpr>n>sZNeve&IcO0Wb4ZY)#~jG-YII?Za^{N8Q&>-il@ldseUI z{OZXNoYZ@~?*M1B8Nctt0{fpdroI+bm;#o@9Ty4#*fB*a6KmP;Xc9zTl literal 0 HcmV?d00001 diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/3f915e17-cf6c-462b-9bd1-2f23314cb979.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/3f915e17-cf6c-462b-9bd1-2f23314cb979.bin new file mode 100755 index 0000000000000000000000000000000000000000..a177bccb2e002a900e7916a6cf1d61b1767a357a GIT binary patch literal 145 zcmXwy(G5UA5JcZ~q5@IEX+V5v;4UO2g18e^=t9(K9DZhJ-%e($8jQ6Dunj!}f$USq z#7q?JSVe=}EEnaYSdixOrE;z2F7~H3#>9ML4zK;WMm1zCIQ4{&ykB$7c@)5W?JnV&bn5P6Mg?yE6H=v?t9vQeE|O*A{77t literal 0 HcmV?d00001 diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/cfe38135-9dca-4480-944f-d5ea0e1e589f.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/cfe38135-9dca-4480-944f-d5ea0e1e589f.bin new file mode 100644 index 0000000000000000000000000000000000000000..bedf49e05d5194ce5b18ceb671f176cb9a142e2e GIT binary patch literal 230828 zcmeI5OKu*?a)p17JnB7A8|X<-JQIWlXBQ0%jlOeIq8@-K(xP;s4S4R27zVrxPrV}N zSBFOtS(Vj2_259QCiOq+5fvGab0Q*t``63mpSJ%lm&?ol{ICDGT>j_Z|M_p1hs&4C z-R1srb@_OCygXgLZNI-AuYcO!yBe>3ynG$+-;8(e#&185->;uPcQ?NM<@s~B;~O`Z z&zHB`=Rc1BKaEe`J%93Xy!U0ib~BFfZTxmMK6f|1ad&yMy~1a|ZRdG2j`w+d_wMp< zZV6@_u}iYvB6t$1!y^+)3Tt$IIf$`jh{Ln*R4Bx8prxHKK*t3^yl$^q<8-LIvbMvV|?#w9GUC= zw#iB|yBV+eDv%7vd^0}5PwoIITy6J(eErG(-YhrEnRC9{u9hdrRqNQgU!TcO>28ym z&fTAak5=dX$FmfWI`4Az>npe#$K+VL*86ewpT=L#&v%Z;(G~L!dMCN?{B+;{7~lHz zd@RZ2aY*xF`!si>*Q81B$G16`wVJE^JpNjrtl7tFKmxPAKaFp(M((#KEgk=}r+zoi z`e#r2c+Oe@?}lvNZ+FjCa0fTzXfMY3z3%SEE7EUP7&?wD^?Z>WGSaF=+Gs0xq-V}I zjw|NoS%W#({qq|CKE6}h%Xv5lYl5}$aY!9KuIonrx7&HOQXa;UvR`uKMUa%`zB_wn=Uf~|zY-{Ujdtd8cGQZjG{MSid z+}~c4bgsQNpl8xQOIlBlfBlt};0eARXDUtCF}y;3M9C8iAPFFUN#JR_cCU4<^T+KO zm6gK=zqx$aq$bPzFpi|v!ph(*>s5)&b=UI~EX%-^V$Zoku88%nd+PE=J|D+fN+0KE zrE_%6VXduf`OPLTH1Nyw@#MiIX&tA(D+%fe%L;R)$O3yJi_9J1#n2=j0gbU#juMx? zAw%woC!ycVlfK%H^)S{9_szRy&F{v)(Joz?pEPIG9lgB#H2z$V)mq-Nq}#Px^4#0E ze)rP+zQ(Z2e2=puvzOy6*UwipTZ60d>fZB8I$dY5Dvv{Y_{*DN2e?rt}MjrxyX0;T>AW%?fg6iu6u1+WO=X>JP$OOXQ5Tiw>X#Y zi=*oa_SMFBEgjzd`a?hWQnml#xI{3DRpd`=%}N5+95j=-;M0&bXF}qvFXX7$?C;}u zG`e3CcgxlKsAU;tVas0}t!?=g`CzW(-Sauou=4NochT(9Wq#Mws-vu}>hUNa#@VzY z^mMr9b+m~^Ct_0^z_qQP1TsTIZ8^$wDz>YrMt1M0*~lJ=;t#QP{8XI5xsmT*`!GEb z>5En{(UH~)ukPg|edEbKjK8*XD_^d)rKg1c$REo#b4QM%W?7>#wuHyn)Y^(Y=1Fp; z#O|_cB~kt@&x!M z;mHxf6A{XP{x-h%dA!=A&vneBrzh?2tGuNAar02ff@q)nb7aREt(%e~pS7Lg$kMBy zpFM!AjQyL}Up-mt?<%sJB_peVzRnWr5v8@Gzout?w`uk7<6ekWdd7Gb6zco39-Cudd-tZdzNqukzkcKm2IL zJ-X-PO6r!`7MUx*9oM1b@^rW^?hD_eNSph$?DdQ!KWkOla%%`uLGP4Fa_m}VmU_>> z75!i#ykfe4p5tAv=6Zb2XVufxszNU3ahoJq&!c2euPIBlCLYlP$yV{2^hs95Rtlf` zH13T8g)Hg*&Yex~@8+n%j=jrf^k?Z+eLK#G zTsub4qh_wpasRBfJcqJx(kbhh<4x;9MV3A<$OO+ED~TMXOSZU2yMHr~2l=VGOnbi0Srin6^{WT3m!I?`2lP3ZTeZ=ap# zXoo76n>8HiVqezwO}JYBhOC5%zHtP@f$G5R) z`ljrIKHXQDo|CU#=SAs{|U;`K@K>#je0 zuS;}GsAIP;j-~XPpSI%GunS=qls`-F(K+Ej^Ga6!1t*05V0Gsx25mg|{9Zq+`0jWd z+2}dH!zN+96wQTQ@H$5#*bV$1Rz|*2afvbyVN-g#es-%IbVj~-2G`H1roBJs;T&NX z3Ky8UrmzcP7x3gAb$PW#$zd1dRq!KNA*_{bRYxy69%!G@?N`_Z+4QgrVHd(KIAS%v z*?A!%m2ORp3xr)z-bfrbOQ=Vb#rp6)>j=a<_ga;j3cW;rXs|xGwbPH zI7$lrJaY81_0rE?*@INmyP(>Lbp?g%HL6NuEptiu%rYEnpd5{z)#=t}E((PRHr*+QP$}{jp*GFsbQXKrWyn>0JQP5U)xcfIg$aXqPILsDQ!lIIcu; zLQHzC2j7+U!I1QNyqUqRN)e8m-UUl-wmo>os;<$~A*N05f{@I0rGVoIEN@k~SM7-L z<5||K5jl%kTvfKNQA_WFy--Qnm1I;J5-Fy40o_XPLiZMxfu?t%_m_lS2)mHp1?ng( z-1sbkC;v6|q>ForR{p8W96maF)TY`ZM*7-aU9kowx>B&x&@r!s==YyMT>I z??NUcd8LG1s7_&IA|IN`NV3iL5{h9}iJ!?x_^!QYQ(tCHGpx@1(yG~GrFWsG#Lkkj z?xA&^!|#0x=AQa%>0NMEplC_aS$Y>_)6tmy9E-CY+?C?1EWBO|yAXCE>_XTD>`Eph ztz#nP-}8JT#ZH-wG|$88*aneLCL?7s604_A*9yC^#~xMxsLIMU>emrR#dRI6GtC@Wlgu{c#XWs?tqxw!hsd&iomkm-LHj*Yi+7Zt;(KcM<_HrWn%xdwjCZb|afV)1hRq`vs9G1*o{9hH zbIbkgRypWAwL_Uhc6D~=;J$lKer3+K$jHJj$bx9UOlPQ>jFf#B)DhQnI_K53Yj&@t zwa@7GEBh|Urf1)UOh(GS3)y#}W?mT|TSa!qNE7rJyrA3L9#N)uK|8$loji(_k>64* zP0a4!l=F0C{%ppWjwRftGSEy$%4DQ{?iY3;yE+#VlgUV$%82L6uFmp2pt_(nuQy!R zJr3zzNbdqjqF8^;@>Q2%c6ELj)g&NO#B1qY==}-lU2x`8JL^z!@G#a9Q*OaFSyhV4 z_LwkHJv~zD8kbzP2G@B+CL_)9KDktOb*>pfVHY}wn8`?)jP$ro0(#O<)ju0}7; zI_!cbNVq;!Y(e&2Fj9Q<#`v7o$wp{II)d7iCn9f-UX_&s5+!8=5b>*u`|{6u8KLl{(g$2xH_UG zGa5zprlW|$o@On@BEEXY zp2&oVS?K*jf7d%6s!H*0tOz}o;~ojNC9A$CojvSA)zzNuwvow5j+4VKgk4Za0bW_P zFj#nORM>^E3ur{o1=G7gZ-;l=aZP7ksp8La!-444Dc0m%zz6*|WL3**L_;&SW zX5R(P^TEEn+hUmXE?iyy*z%&P(WClBlf+;Ohya;`^mdC++_9d?NUSo)Yha1OwlWzB zmf&t+g4#bz@xO9{>u10IecNkK{0h1icH!-oU!hU^>OILX>;iZehz{uUZ-e#zJbv~Z zwC^KSHr`0b8S0;1owKVm)qPP(C;OoHi}AI4wyPU}?NvBnG8L< zI@6Pz-UaDS*ag^JSI9ZO&92U_gO;z7?+Uvh_CM@`Cms;t{(Sjq{J9<;247RiQ`iMK zr}E^LMTcFWI??_W$= zm`%s3Lo)7X3A=#z`epn%&O)gE9@hlE7@w*R0A!h6ojvhruDHNHgTzTTu?ssiiKSumRZeS zv_&eEE4>S07s4*M_v`6-)qv8i@RhI&WJvR@e&iT}A$Wcs{ajLKj?%xi(fM(qPSXJxXGwi~ft(FbxxI4-fY?+MY{yD5P zK27xkLI6E+uDW=z-+k5DVuf9xOS3AktLnm5JM2Q(g|G`@7i48MFA}TKAmXsKtcBgJbpOtH}H<1-m4y$jCN!!A6G3>+jZy$fL%RPjnb zQ`m*8!8lQWFN9C^WcyCAs;XYSb2drBS<~#XV79XF!k3Y$pXnY}s!OQP+B8DlUxQO7 zra-HIA3w=r)k`6K_ADw8gXZ2qvi}d`uO?M;eXNcnzA5_|c7Z1WHjZDJWv^!>`Dv=B ze)qEryMSe&8Y1k1VsvaGjDYtrVAWBlbzFVM>cTF_wz2lAMgog9Tf2^PrguRTvhFvJ zkA+vIUwRi-Qv<>-V3qoQAYm7na^#tgJwlVNtYhO$MgrMlimLb9u~+MJAOGBPA1f%8 z$$uZ#{%)(7gI_12W-j9U@r;OxmC>lTSWoEKjb)F^@x-w{ZXV@n990#YT(_z==a`q3 zfsa1_e4VXoZN#yC+pZHI&I}i_E5+jW``LGaswL#Yn&O#?)1>pa__<6*%4DQM^t6J~ zyAXB(U()jeX@;igrgs6Wn%;$DMY%9x>NN67RX=KaZtYf$Mk%i7tF3HNCL?7s zQrHEtuVELkxW<9WCd@$|Yk;vUuydJ=B>SA+g|G{I^gtOjm6+O#oB2E-RYs@q)nOO* z*j?BK&5~i3%GV#Y_GVK+62KdX`ubi7J&O>66m~&zO<`Nb!!_yh5*>BtO$khc6?0JMO7z)OF5|)u%su6x~thU@PBY5`Zf` zYn6zttbZ#D_f9O}czh!4g5-n-@Qj#!bvyjLkaDUhYJ!43`8eJSyWol6S_NSjR+UxY zJFmvyOhzIDP4B{7l_(s~ee&sDAQr~jf<|cNs1u>%omD@Q-i5FWpEiFTc46gvv_qO^ zEQehnu18NCyVs5hb7d?n%ID2vceaMSp`vYmS4LV#dH?L)6jK{*o$WnG`7l1G+CgzY z_{DX!`7}OBJ&4c&u3Z_xp64M_z1`M0AUb^%UqkKKh`2)iH_j`iby3}uXk z-BRyM+(fPJ$_`a5*KyKJM(X)@*oCkQVHZ>tLG^v_b1kb;)d1A!V{?=>gw0x&h_U!X?NLkSG7b zD*H3(UHCjaw{`^xyFk1%?<|LG=DGILCwLn4sjtJZ3t<<+E`(hWmy4ZH%}6FA;eF4T z0?BEG%$$EG?8322WsU-2XNUxa_PBpOy$iy=dPS`2zKpG>cOktCRf7|Dftu~g3p%$# z6*<#>SWEKxnT&L_!RcK%%A~UI>N-g8LV6c8(I@P}zApwFOcf3FTEZgIyO4bs`utnP zu$s=$`%1$u^!S7o6m~&0IqRisSg`z=jFf#BG8u_D zDEls)d;YKsVHeCw%~4?A536Suc&+QLLUP4pvEJmR!Y+he2)l5%MS7}<47;Ft0CUdZ zP8PB&YHE2lsNxQP|0(n^2g*PL&Vxkg^OC}?wcR~0A+4I+t3%j2??1E4% z=4v?ZgXvE1f~wEMF5pcn-YP+tYQYwzSA9i<-{t4R)yWm~lC5gVu!Y&9o$5T^tWwfq) zsjv%S7pUkx+86T%$|unwYHh+Uz^fGQrYM+Il*veB)MreAvxQ?Ge*PWXtFQ}|VI1Wb zb|LJ7dNSxSbUYn)0cj93GwoXzM4gs5+uVsh{in0&iX!{Fw(xK;e{2j4$NVtg84eaDDkcj*lLE8h^QFjs!A{HQ~C_yMX2BT|E1KXKMpbHoXf^LvO+^WLM`^ z_l_;$vD*^6rJCdHyFjewNSNq0y$gSpFN9sF*uUd8m4Rm8h2GDQ-i7oo(CMRz_GN9) z?9AEgW@s`Q34BznFW#$XM&c$)g4xx%YWV1KcK(-r7cv>?&4_|Eal;4!lcKbrEU#4V zUX`B3nF|xBe|I*Ao_nDNWcg%hbc^3_M`y(I1j#ffkn?4x6K*}Bf& z^jOM_G~Cap?K+8!h?a=ud|cVs<4Op-Fn2EzgDS%5yFuFDq<6s>#M;-aG@I3=l>_RI zX76)@zwW=&6X8ha2k*zRl&kRS`*Boq0W+0+o=gD_lUc#6D6Z|z_&@9=t6taQIO1ko zu{s8y)v;@y`SE_wBbKlWNLRhy#A_XTD z>YfUTnNk1rE@xk1>smQFYK^^eu1u?@3vvCP+sm>33$xw!!D$E zA-xN-&>d$!v*qbsAZ8|7G&V^*7NEmc0rlcI`^zOK8kz7F2GhuVrLOc*oCkQ zciTRWzi$z+d*Zc%{wiMxyAXCE?84k*N$lnt3-i8nWtra1WTfJ9&+N?E>-OyFT>w=U zjswzwJ@za=X<12dW-+LDP*o3A4SbJ`LwHCbY2Zj8MZDW_=|b2v6QqBG>!^SGlP|`r zub$V1@RfJlwTdGU8s2-s*Abrlje60;E<6srD(r%^6Q<;G(`0rGKGyKO8M;T^&Rl!P zzr!wMGE#aMIv>zyvCGqnPvA53(S%)C%?D7Ot@@364Pw{@usiwuUdQKZq`R@_r6#u! zIdqIz`qnEkcqO|!yVu2Cm(*daV{KlcmLFM&s%4KA+kLICBfRXpkbM`j??U!n$Ydn# z&g}f!7{rnDcYi?kT?o4nb|LJ7ErsGSqFn#RPxTWymJ`BttSmZ<&9d)8VU5a5)l~-? zMC7X8@aoGX523TMqldOqM=XeU=nbQji)YpAGS+dio~$Cq>v09@6yvO_F5?xhkSh~A zE9}$N9{ao5N9?>x(%fQcQ$_%w6%1E4%mZs|%=Kp-e zunR~?n)r4+gI$1sYBy>gVE$l+rg|G|sw;Us%Oh&>w%+cpM<|&Vy$w=q1 zzojIko&SH>qHy9Gp|aGkf)eAqgo%*T5nnwGPtR05kXz#>s+t8i5!3O<_DNy9pesD9 z{q$l@Bd)A(?z$k4SY5+$Z73^HZlU`MyPz886~=y+gPWhJ=TTXBy%u&MlaVqRDU*@# z66sx7c|T#6@Y!J(>{AqF964uDKRh;AV%UZ3y8zmTKJ@D5unT+aQTuQR(OmNZ!s-Epgzv(xmiUM23zDQ*1lJ7hT zUXtxyFXvfm*ac(`79)A?+0(EKjwq#7VHb?!gk1=`5Ox7i)aOMIEB`hSZP4ak+Y)vG zJRTf5y$i-MDmL_Q{7mmc)ytg6ZOTTANw8Pyb5g=Cgk1=`5OyK#f^s{MKNxttu(I67%Tn4Bb|JkB;IiJmo$MI9!D^{!R5GFt zjI6MBMO7ZO?)I7YaV5_VyWqU>IGZo*g6*>=d9sIa*aad#>L=AhA=~|GTZMg2Xj${S z@#(65M1Javp--Q0zr6f3{&*ek>pS7y!!BUs_K`O-Ro@#cSo{m03%g((C+tGlg|G`@ z7w(4-QSW4rC>>+>*TOEyI~&KS*wDZ6Gwedxg{t-_{zSU7k5hN`U_HH%B$dzL&+hLExd0&qsWHQp(Q}0hPy$fL%j9#U8p>t7T7o7Q& zh0kQ9unTw$d1^fF5ko5+mQVIt`uk_Uu6RQ;R(~C@Rc$?9KfMdWy@`|7JyKy8h!4Xq zP#N)NtgzcH)^+quw2lT5X|qCmUVZc9mEN)9X?mR!&mZQ$yeI?CFr9*^?j z$5F(3bIrF~_J&>Ox<8ITXW1E_-shg#)mb_+%NmWzWTf;iIK~dUz+9%sVPEiDVHf6J zUD;xJU^HgG`#^dZ!Y+`DsY>$GSV!bu*p%9_@5OlKdaSLZhbCG{??QSP(z_rmBJ9E* zdFaTnIsdQ=>0MAai)&qBWX0TxH=NO{u0xrO6n4RxPuPX`TiqE5PS}O83ruey-+MJ| zDbaez7R|PtRZzHIqh=ExQKzsAYZRL-Nc{?77ru`SGVDUxqCKWP_lK!!K$-R3R^@~p zS0xpAxh96W`wSh^yK}Za*GPq3*zaH`9%}z2!Y+heP?dEiBc*rY^RTVXMr-efbqr)2 zLtHJ4rhgM2N}S{xS79C5)meUx+!c9Qf`z@8udCO@lW}*`wzB2{FeMMhS+1>T1;zxJrOyy&12+9q(v1GdV}!*HGt~HaC-zuq+)Jw#5p&5OyK#!dcuo`~6Htazs_EisQ|& z3t<&;J!!EdD7t9Uh5zN>1@l{*DuOBhC5O#sA_ipS0 zA9jK1OV*t|rX6;Hb(P))*FnoyNnc!{24(}I!>jw=4nB{qp+kzuNR03IjhUjtnz$My z$hnp}Oz8d*f?V@A97WAo6xU@-c*I1_Z7IVi#|2&DC)ZSb4gY3`oq4yx$L&>);cmgb zHI)rPLssvcp)tm~<~zYM_f9C|y$^%4qq7P&FYH2>Q14T)cJ$ZKAoK$y^Y`(SEVfSo zoNK~_XEHBQI&Q5Z@5!#tTK}vF)~i`$`u!{~RzK^DCjs`zCuiB~8L8i_&-w7Po4d=D zN%Aael^Ip;)qRS7IzvMDnZK9b1;sdMzp4O)0q?suJk4a!m8|TVV(+rd{aMu zSh%XPcs#SstQ52=%W7RcnG~FhTQv4AFa+iXLHI+*H=*1Ou419UCz(S zI37n=%scYWe5&r-(KE+V#Y83}rFQ{KEc+_G4!h7}gPy?>_kP$`76^+b`Gj3C`Vn@) z^@V+pccPqk;}0{BJcCB7tw*I{7ZkIWEeg9((RRhRVHaeZ9ZN}H!Y&Z!{aFU}W>_C` z#h%@kR?Tr_*oESnX31Fh_PJ|k5*%aq*TOC+Q!lF)#7hl`AUoO9l z_xd-mp7n2MsV~N>My$dv7@6Cn^2pg9r(^v3T-XJ6zz(|*cA?i!{5dat#5k0h9jmuM z;8oi_cV#MjIa)Nwh#XH=P#JWjm5EH-h_`RD6wSb;yr_l#v7 z<4#4Y#{A8q6_Q>kyFM9q0m;Kb!bKoAF%I&VdsY&!2Og%(OS0o>oaJ9)1bJ%L zxq9c%p1Qfhvkc>SPIODxJ=d?`Dd06|?1s%B9!x$2tw+;Zy-I3W8C z&Gn8QPt-ksAIB{1<&HQ9YeKxTDyr(bk^k+uTIw};wKK0icf~Ng(z(NarY`fNq*!l9 zFRHbz^MqYkM|kO7(9RmN!Z4{YElfA>QOS9J|ipa!ut_J2-5+{BenuVktB)1K;4j8RlPfkC_IjD!THhi@^zX_JWnje z->&NVlqY0YXQZbt17yon5iZW}g}k0)N4Atbix0bCDW-R!Fv@cs3cC8|xi*L0O^ZCUq|bzLLpE#F|u^XEGAZ zzZo6Ra^p2t5$o{h%TMFa-^TB(L*D5TS9$rW@b+{k)hxJj=*##%$G93*%jh57D2mTi zzwD~4V|?IvF1-uLtVa$=h9{EV1xHlHsyN;ZyO4bsvhM=TphJ6QD@3BGE zhOH|oT(40`@fwj(WHz(Esye{_+n!f`?kxKZ8B$(8GeJ5>?|M0}fn&xX-pdhX_j5W< ze^+MR&z$cXxSP+=l-_v3T4 zeXJxK=&EpYcwraFYqW1^dKbbjXciwi%j=;PWG1T2$Yi8UM#8G910=l*Fp@&KX7sAh zPB~UkDw99Xc{xL*ggI55S-6`#A;-&PBrzLcFvOVh^Y&?y(+Zh6_xwZ-pNC}0m+qf) zB9600c!ZI12C>R~D(r$IF|m5?fuwhjD2?Y-Y@!$#q(=-$#-MNc*{yQWxmsBmTP7oU z*Q5Q63fYl4+d{vMLt4`NX`kow)11wfrt27H8hym-*;E`6b|LJ78T@^A*SQr?f_d^l zdKbbjDB|k=;wY2yE45?7yl1HW`}|!Zy08mj7s4)tUGU^VJW_UbKCApl??Pqf>0Kzl zo5@IF7swCi{SZ1nqBzc|Kqe#E-yHY%RpxG%UkFJiBlY=Dj^?|UB6m#h0$59W7nrXG zLhOnb*Szo=C{=nF=-`Jx54)f!&iIr%gTpRp^<>|L8IJ@n%DxNeM%aZT@0i{NkVNVP zdS&FRjpOT`yw!Dew|Qb`PUevck$t)K7haEDQdXTIFj;&DZ4`SLcEMddJcFJyXuWDW zDiu%ZT~LG=c0nu0k@C!sD8FKTeIJ#V^Nu3qTg1|;F_&M2T_$EwFRB>&unS=q!Y&x? z3A=#5?zl~5pf{uY-?0GJh1zgy1VEeY`@?W@m&uR;Jh8-9qIBXH*wF_Db;r zsG8%oK06@23p^VnLL8@x9(B834@+-PklqEMviqK^ef%l;rFTK`=KVI8|F;e;0!Y*W2XLTE>6F?|}a%k}h^8AHpDa#JKz@!&Rh&2Q% z)lq_dwfcE2x@yXuRUK!q9&wE7-s#zbs^7sdUJOfkz2O(?FU@2me2Av7De@=gm0r)& zb~+BrCwpXfKO~CeC39sGzdlP@$Ku`eE)=IS_m*WclA^lo>b$~Wh3c1AL0ec=#K!VT za4CMJ&%<&BP4B(Bdq!J6Zg!bmU)b%jizn>DjA1z!F_y(7Mpks#g|G{BHr8&_=Mf96 zs8C7m@TBH3dI|j^a(X?E&imPSfi>vJ4m*hbTBCH>L$OwxUz^ED zL~VO3f4Aw^ADaxp)P)rwx0&U`7l6lCT|BQmK7UFmeqrr3*YOyktL8{HlV>EP%fIQV z@fr7_3hOw={$UqnM~*dq^^DTHK%OF9OYeg7x{kGLy?Lg>Syo~h?z_l)Bt z>$fBRtYc4KmCc^d*5_*w1@mcYoT+b-e+1nOyYS0og^_c&%8pIJIbJ<$zVMZIn?!{c zhh0zwl_UCnoFy;WaS>v%$DxhVX};&&>h%vF*3oyr+r1S2H@ypJY@`$ht-TNyVAbXR~2Sp(6B1TXY3|Do#MOBUg?uRGqIoLir>~UGo zEIOmJemX}ls<=0LW$lXY(7!^R z)V+a@Iri(>Iumo0 z)n=x5p;md=1v)RB_d8!u-p~KD??SiCw#Za|gk1=`;O-{k0%K*UOk=Wgvkw=>l1ciuS40@8GH*1Ti&)bY3|l@e4pNh z^e$vq=bBIt_Sb9aGa1R9C!OCvqE6Khu(mVlU7-3T`!3AW>0O&c2NN2ESHi1`m`jF(aMHNMRSMx<);1#Lc%GtAbtEY&>8Z z?pihS4b~T^I4kh?@sljJ&lft23c@ZNdAhI**yLgoieahOG8rlC0zF^aE35F1Gdpwk zx^c%$Mxu(vo=!Ccu6j`YS4Z}1`}WHvy$k7GSXJnCy>tzWW*yda aF(zb`hVGGJukx@9|6kaJ|NWO2|NVdP^V?)9*g#S~o1FHugQOm~q|RXAU@)H%EQM@b21EFgP*d`Z<6vM+ zWauvI;pGHb5MOfpCl>=_8N(OT=-=E7jHwJOb{XH~VPGs`keAGk=Vf55WH{J6TZIp# z_-zRLOMa0192Y5^6JTJ>VBk`D%`V8mn9oqb8d4wx(skgZ(;i_4#uA3rpU-rOfOH*a v={O|{5>eS~sU-%o!r)ZGD{%(K3WiU|RNW*%Qg@X9&X#0g)MHpEp7<32c6?II literal 0 HcmV?d00001 diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/wrong-checksum.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/wrong-checksum.bin new file mode 100755 index 0000000000000000000000000000000000000000..9e33f0c67d828f71b7e0cbfe6f4fa8a9059f7a84 GIT binary patch literal 145 zcmXwy(G5UA5JcZ~q5?IX2E>O3?m|K$h&xf07L=j|Ef|NN+1a;~*|Gv-tpRLJM^7O8 z)G;y Date: Wed, 14 Feb 2024 09:25:27 +0100 Subject: [PATCH 02/36] Removed unused 'seek_size' function --- .../plugins/apps/texteditor/windowsnotepad.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py index 1fd316a57..0f26ddfea 100644 --- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py +++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py @@ -1,4 +1,3 @@ -import io import zlib from typing import BinaryIO, Iterator @@ -13,23 +12,6 @@ ) -def seek_size(fh: BinaryIO) -> int: - """ - Find the size of a file on disk. - - Args: - fh: A file-like object that we want to calculate the size of. - - Returns: - An integer representing the size (in bytes) of the file. - """ - pos = fh.tell() - fh.seek(0, io.SEEK_END) - size = fh.tell() - fh.seek(pos) - return size - - def parse_large_structure_data_length(fh: BinaryIO) -> (int, bytes): """ Read a variable-length representation of a length field. Acts much like a ``varint`` object From b1bcd69fd5b86aeb76f8fdd6b34643c755f88445 Mon Sep 17 00:00:00 2001 From: Joost Jansen Date: Thu, 15 Feb 2024 23:22:50 +0100 Subject: [PATCH 03/36] Refactored the code to work with new LEB128 structure, added some more test data & test cases --- .../plugins/apps/texteditor/windowsnotepad.py | 234 +++++++++--------- .../ba291ccd-f1c3-4ca8-949c-c01f6633789d.bin | Bin 0 -> 560 bytes .../e609218e-94f2-45fa-84e2-f29df2190b26.bin | Bin 0 -> 141143 bytes .../apps/texteditor/test_texteditor.py | 41 +-- 4 files changed, 122 insertions(+), 153 deletions(-) create mode 100755 tests/_data/plugins/apps/texteditor/windowsnotepad/ba291ccd-f1c3-4ca8-949c-c01f6633789d.bin create mode 100755 tests/_data/plugins/apps/texteditor/windowsnotepad/e609218e-94f2-45fa-84e2-f29df2190b26.bin diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py index 0f26ddfea..cea3172a2 100644 --- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py +++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py @@ -1,6 +1,9 @@ +import io import zlib from typing import BinaryIO, Iterator +from dissect import cstruct + from dissect.target.exceptions import CRCMismatchException, UnsupportedPluginError from dissect.target.helpers.descriptor_extensions import UserRecordDescriptorExtension from dissect.target.helpers.fsutil import TargetPath @@ -11,57 +14,38 @@ TexteditorTabPlugin, ) - -def parse_large_structure_data_length(fh: BinaryIO) -> (int, bytes): - """ - Read a variable-length representation of a length field. Acts much like a ``varint`` object - from ``dissect.ntfs``, however it introduces some additional bit shifts and masking. - - The position of ``fh`` will be restored before returning. - - Args: - fh: A file-like object where we want to read the length bytes from. - - Returns: - Length of the data as an integer - The original bytes that have been processed to determine the length - """ - offset = fh.tell() - original_bytes = b"" - modified_bytes = b"" - - while True: - # Read the original byte - bt = fh.read(1) - - # Transform into an integer - bt_int = int.from_bytes(bt) - - # Shift this new byte a few places to the right, depending on the number of bytes that have already - # been processed - new_bt = bt_int >> len(original_bytes) - - # Add this byte back to - modified_bytes += new_bt.to_bytes(length=1) - - # Add the processed byte to the list of original by tes - original_bytes += bt - - # If the first bit of the original byte is a zero, this is the final byte - # Otherwise, continue until we find the zero-led byte - if not bt_int & 128: - break - - # Convert it to an integer - f = int.from_bytes(bytes=modified_bytes, byteorder="little") - - # Apply the mask - f = f ^ (2 ** ((len(original_bytes) - 1) * 8) >> 1) - - # Restore to original cursor - fh.seek(offset) - - return f, original_bytes +c_def = """ +struct data_entry_multi_block { + uint16 offset; + uleb128 len; + char data[len * 2]; + char crc32[4]; +}; + +struct data_entry_single_block { + uint16 offset; + uleb128 len; + char data[len * 2]; + char unk1; + char crc32[4]; +}; + +struct tab_header { + char magic[3]; // NP\x00 + char header_start[2]; // \x00\x01 + uleb128 len1; + uleb128 len2; + char header_end[2]; // \x01\x00 +}; + +struct tab_crc { + char unk[4]; + char crc32[4]; +}; +""" + +c_windowstab = cstruct.cstruct() +c_windowstab.load(c_def) def _calc_crc32(data: bytes) -> bytes: @@ -69,58 +53,21 @@ def _calc_crc32(data: bytes) -> bytes: return zlib.crc32(data).to_bytes(length=4, byteorder="big") -def _parse_large_structure_tab(handle: BinaryIO, header_has_crc: bool, header: bytes) -> str: - # A dictionary where the data will be stored in the correct order - content = dict() - - while True: - offset_bytes = handle.read(2) - - # If we reach the end of the file, break - if offset_bytes == b"": - break - - offset = int.from_bytes(offset_bytes, byteorder="big") - - # Parse the length field based on the first one, two, three or four bytes. - data_length, data_length_bytes = parse_large_structure_data_length(handle) - - # Move the cursor past the length bytes - handle.seek(handle.tell() + len(data_length_bytes)) - - chunk_data = b"" - for i in range(data_length): - r = handle.read(2) - chunk_data += r - - # Insert the chunk data into the correct offset. I have not yet encountered a file - # where the chunks were placed in a non-sequential order, but you never know. - for i in range(len(chunk_data)): - content[offset + i] = chunk_data[i].to_bytes(length=1) - - # CRC32 consists of the following data - crc_data_reconstructed = offset_bytes + data_length_bytes + chunk_data - - # If the header did not have a CRC, this means that it is combined with the only data entry - # in the file. So we need to prepend this extra header data. - if not header_has_crc: - # Furthermore, if the header does not have its own CRC32 it - # places a byte at the end to indicate the start - # of the CRC32. This should be included in the CRC32 calculation - crc_data_reconstructed = header + crc_data_reconstructed + handle.read(1) - - # Finally, read the CRC32 from disk and compare it - crc32_on_disk = handle.read(4) - - crc32_calculated = _calc_crc32(crc_data_reconstructed) +def seek_size(fh: BinaryIO) -> int: + """ + Find the size of a file on disk. - if not crc32_on_disk == crc32_calculated: - raise CRCMismatchException(message=f"data, calculated={crc32_calculated}, expected={crc32_on_disk}") + Args: + fh: A file-like object that we want to calculate the size of. - # Reconstruct the text - text_reconstructed = b"".join(content.values()) - text = text_reconstructed.decode("utf-16-le") - return text + Returns: + An integer representing the size (in bytes) of the file. + """ + pos = fh.tell() + fh.seek(0, io.SEEK_END) + size = fh.tell() + fh.seek(pos) + return size class WindowsNotepadPlugin(TexteditorTabPlugin): @@ -147,33 +94,80 @@ def check_compatible(self) -> None: raise UnsupportedPluginError("No tabs directories found") def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord: - handle: BinaryIO = file.open(mode="rb") + """ + Function that parses a binary tab file and reconstructs the contents. - # Skip the presumed magic bytes 0x4e5000 (NP\x00) - handle.read(3) + Args: + file: The binary file on disk that needs to be parsed. - # Read some of the info in the header. Not entirely sure at this point what info is in there, - # there seems to be an indication of the length of the file. - header = handle.read(6) + Returns: + A TextEditorTabRecord containing information that is in the tab. + """ + fh: BinaryIO = file.open(mode="rb") - # Whenever the bytes between the two \x01 bytes in the header are zeroed out, it means that the - # header itself has a CRC32 checksum - header_has_crc32 = True if header[2:4] == b"\x00\x00" else False + # There is always a 4 byte value at the end. The offset is always 2 bytes, and the length is always at + # least 1 byte. That means that if we reach the end of a data section, and we have equal or less + # than 4 + 2 + 1 = 7 bytes left, we should stop parsing new data blobs. + data_threshold = seek_size(fh) - 4 - 2 - 1 - if header_has_crc32: - # Header CRC32 is composed of the header, plus four more bytes. - header_crc_data = header + handle.read(4) - # After that, the CRC32 of the header is stored. - header_crc_on_disk = handle.read(4) + # Parse the generic header + header = c_windowstab.tab_header(fh) - # This should match - header_crc_calculated = _calc_crc32(header_crc_data) - if not header_crc_on_disk == header_crc_calculated: + # Some tabs are stored as one big block. In this case, the data is contiguous and the file + # only contains one CRC32 at the end which checksums the entire file (excluding the file magic). + # It is likely stored as a single block whenever a length field is nonzero in the header. + is_single_blob = header.len1 != 0 + + if is_single_blob: + # In this case, we parse the single block + data_entry = c_windowstab.data_entry_single_block(fh) + + # The header (minus the magic) plus all data (exluding the CRC32 at the end) is included + actual_crc32 = _calc_crc32(header.dumps()[3:] + data_entry.dumps()[:-4]) + + if data_entry.crc32 != actual_crc32: raise CRCMismatchException( - message=f"header, calculated={header_crc_calculated}, " f"expected={header_crc_on_disk}" + f"CRC32 mismatch in single-block file. " + f"expected={data_entry.crc32.hex()}, actual={actual_crc32.hex()} " ) - text = _parse_large_structure_tab(handle, header_has_crc32, header) + # Finally, decode the block using UTF16-LE, common for Windows. + text = data_entry.data.decode("utf-16-le") + + else: + text = ["\x00"] * 100 + + # in this case, the header contains a separate CRC32 checksum as well + header_crc = c_windowstab.tab_crc(fh) + + # the header, minus the file magic, plus some bytes from the extra header are + # required in the calculation + assert header_crc.crc32 == _calc_crc32(header.dumps()[3:] + header_crc.unk.dumps()) + + # otherwise, the file can be reconstructed out of many smaller entries + while fh.tell() < data_threshold: + data_entry = c_windowstab.data_entry_multi_block(fh) + + # Check for CRC mismatch in a data block + actual_crc32 = _calc_crc32(data_entry.dumps()[:-4]) + if data_entry.crc32 != actual_crc32: + raise CRCMismatchException( + f"CRC32 mismatch in single-block file. " + f"expected={data_entry.crc32.hex()}, actual={actual_crc32.hex()} " + ) + + # insert the text at the right offset in the textfile + # since we don't know the size of the file in the beginning, gradually increase the size + # of the list that holds the data + while data_entry.offset + data_entry.len > len(text) and data_entry.len > 0: + text += ["\x00"] * 100 + + # place the text on the correct spot + for i in range(data_entry.len): + text[data_entry.offset + i] = data_entry.data[(2 * i) : (2 * i) + 2].decode("utf-16-le") + + # join the data and strip off excess null bytes + text = "".join(text).rstrip("\x00") return self.TextEditorTabRecord(content=text, content_length=len(text), filename=file.name) diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/ba291ccd-f1c3-4ca8-949c-c01f6633789d.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/ba291ccd-f1c3-4ca8-949c-c01f6633789d.bin new file mode 100755 index 0000000000000000000000000000000000000000..1e3160846e22f77f2e16dfecc188500c2cd6c38c GIT binary patch literal 560 zcmeYZU|?YEW9nmKWME+EV+vu&V8~=BW>A2Vi9l8fLn=_D1jtHc$YV$Wi(~-BATkwT v8Hk=7hJ2ukbf8=jR24{v0)r8QDT5i1PGYEpnx+TjjoLM|!jN&tvh;5NE}~$Q literal 0 HcmV?d00001 diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/e609218e-94f2-45fa-84e2-f29df2190b26.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/e609218e-94f2-45fa-84e2-f29df2190b26.bin new file mode 100755 index 0000000000000000000000000000000000000000..fe17b9ad2b264d4aed4bbb90a7b2403d0499442e GIT binary patch literal 141143 zcmeI*K~5B56ot{(2@B~JFd?`M;=qAZ41^{zm?#6;jWajo5?Ba-b@jIg2PYcAZqUS7 zk9*JiySV#2bNKW3YW+OS%zE=>PV+p!=6Lz_e!cfPkL!`w_1^uw%)|MS(|T-w=J)xT z{d2z0kM$8x>$#tokJ>)tIG^Tr{kU0gkMnKbE`Q?n`klYezo-8%`xWMTFkj41UmtwE zEq%xn@{;_yC2#p!f7n`?KBjN#(|m%zY`Nfc7Ae<*>%sNldf;dLjGyr{e#Q&z8+S3{ z1-w8%(vS2b{YXF3kMtw`NI%k#^dtRfKUtw4=|}pJexx7iNBWU|q#x-=`jLL5AL&QV z*Li)a*Qa`Y>Yj-7BmGD}(vS2b{YXF3kMtw`NI%k#^dtSq{lT^!y8D&5U&)?|To0}X z_d9gIlE-nr&iOj$>zuE1zRv#0{>c8w{>c8w{>c8w{>c8w{>c8w{>c8w{)m_Ja$e5M zdHKuQBl~_?HDvd%vp;e@*dN&+*&o>-*&o>-*&o>-m5!Y^b>7r@Q{Nwt7c>!h0WaVM zynq+*0$#uicmXfq1*KzNzzcW*FW?1DL|(uPcmXfq1-yV4@B&`I3wS~4m>2K@Ucd`@ zK@*V|@B&`I3wQx9;03&Z7w`gJP&(!Xynq+*0$$KWyor1-yV4G!c0LFW?2dfEVxrUcd`@0WaVMrDI;e3wQx9-~~-YUcd`@0WaVMynq+* z0$#uictPoy7w`gJzzcXm6OkA20$#uicmXfq1-yV4@B&^?I_3qufEVxrUeHA31-yV4 z@B&`I3wQx9;03&Z7nF{90WaVMynq)p5qSYG;03&Z7w`gJzzcW*FW?2GV_v`ucmXfq z1x-X=zzcW*FW?2dfEVxrUcd`@LFt$m@B&`I3wS{jkr(g+Ucd`@0WaVMynq+*0$xx$ z<^{Zf7w`gJ&_v_~ynq+*0$#uicmXfq1-yV4l#Y1;FW?2dfEP3oc>yor1-yV4@B&`I z3wQx9;02{)Ucd`@0WaVMO+;S63wQx9;03&Z7w`gJzzcXm>6jPr0$#uictI197w`gJ zzzcW*FW?2dfEVxrUQjyb1-yV4@B&`YMC1j$fEVxrUcd`@0WaVMynq*!j(Gtu;03&Z z7c>!h0WaVMynq+*0$#uicmXfq1*KzNzzcW*FW?1DL|(uPcmXfq1-yV4@B&`I3wS~4 zm>2K@Ucd`@K@*V|@B&`I3wQx9;03&Z7w`gJP&(!Xynq+*0$$KWyor1-yV4G!c0LFW?2dfEVxrUcd`@0WaVMrDI;e3wQx9-~~-YUcd`@ z0WaVMynq+*0$#uictPoy7w`gJzzcXm6OkA20$#uicmXfq1-yV4@B&^?I_3qufEVxr zUeHA31-yV4@B&`I3wQx9;03&Z7nF{90WaVMynq)p5qSYG;03&Z7w`gJzzcW*FW?2G zV_v`ucmXfq1x-X=zzcW*FW?2dfEVxrUcd`@LFt$m@B&`I3wS{jkr(g+Ucd`@!T;$6 Khkw`aUq1l19XFi- literal 0 HcmV?d00001 diff --git a/tests/plugins/apps/texteditor/test_texteditor.py b/tests/plugins/apps/texteditor/test_texteditor.py index e7546f923..21e722a6f 100644 --- a/tests/plugins/apps/texteditor/test_texteditor.py +++ b/tests/plugins/apps/texteditor/test_texteditor.py @@ -1,9 +1,6 @@ import os from dissect.target.plugins.apps.texteditor import windowsnotepad -from dissect.target.plugins.apps.texteditor.windowsnotepad import ( - parse_large_structure_data_length, -) from tests._utils import absolute_path text1 = "This is an unsaved tab, UTF-8 encoded with Windows (CRLF). It's only 88 characters long." @@ -13,40 +10,19 @@ ) text3 = "This is a very short text." text4 = "This is another short test. And we should be able to parse this." -text5 = "A bit more text. This requires two bytes for the length! " +text5 = "This is a test and the text is longer than 256 bytes. " +text6 = "This is a test and the text is longer than 65536 bytes. " loremipsum = """Lorem ipsum dolor sit amet. Eum error blanditiis eum pariatur delectus ut consequuntur officiis a excepturi dignissimos et doloribus quia 33 perspiciatis soluta nam perspiciatis dolor. Ut repudiandae quidem cum sint modi qui sint consequatur. Aut autem quidem eum enim consequatur qui voluptate consequatur non similique voluptate. A vitae modi vel sint provident ut galisum tenetur sit voluptatem amet. Est impedit perspiciatis est repudiandae voluptates ut fugit alias! Eum magni esse aut velit illum qui excepturi aperiam. Ex dolores asperiores ut debitis omnis qui consequuntur dolore. Est voluptatem mollitia et quibusdam unde ea accusamus fuga. Cum quis galisum et impedit sunt qui aliquam perspiciatis sed modi quidem qui nisi molestias. Aut temporibus architecto ut neque voluptatem et consequatur deleniti sed accusantium quibusdam et omnis dignissimos ad rerum ipsam et rerum quia. Ut nihil repellat et eaque molestias quo iusto ipsum At optio sint eos quidem earum?\r\rEx deleniti unde eum tenetur rerum ea dolore numquam? Eos aperiam officiis et neque explicabo et enim atque ut eaque omnis non illum eveniet est molestias itaque et ratione voluptatem. Ea deserunt nemo et quos tempora et nostrum aperiam sit necessitatibus illo sit culpa placeat. Vel tempore quibusdam ut velit voluptate aut odio facere non voluptas earum est odio galisum et voluptas harum. Et blanditiis sapiente et nostrum laborum aut voluptatem explicabo a quasi assumenda. Est voluptatem quia eum minima galisum quo totam excepturi aut facilis enim vel voluptate repudiandae sit distinctio laboriosam. Quo possimus molestiae et molestiae accusantium est voluptas omnis sed obcaecati natus. Non vitae asperiores qui nostrum enim id saepe fugiat et incidunt quasi.\r\rEos ipsa facilis aut excepturi voluptatem a omnis magni vel magni iste. Sed ipsum consequatur qui reprehenderit deleniti et soluta molestiae. Ut vero assumenda id dolor ipsum in deleniti voluptatem aut quis quisquam sed repudiandae temporibus ab quia inventore. Sed velit fugit vel facere cumque et delectus ullam sed eaque impedit. Est veritatis dignissimos aut doloribus dolorem vel pariatur repellendus sit nesciunt similique eum architecto quia. Ea expedita veritatis eum dolorem molestiae ut enim fugit aut beatae quibusdam. Aut voluptas natus in quidem deleniti aut animi iure est incidunt tenetur qui culpa maiores! Et nostrum quaerat qui consequatur consequatur aut aliquam atque aut praesentium rerum et consequuntur exercitationem. Non accusantium ipsa vel consectetur vitae ut magnam autem et natus rerum ut consectetur inventore est doloremque temporibus 33 dolores doloribus! Aut perferendis optio et nostrum repellendus et fugit itaque ut nisi neque sed sint quaerat. Aut placeat architecto et eius sapiente eum molestiae quam. Quo mollitia sapiente non Quis neque non tempora laudantium. Quo distinctio quos et molestias natus sit veritatis consequuntur aut repellendus neque a porro galisum cum numquam nesciunt et animi earum? Aut dolorum dolore non assumenda omnis et molestiae amet id sint vero est eligendi harum sit temporibus magnam aut ipsam quos.\r\r""" # noqa: E501 -def test_read_length(tmp_path): - # 3-byte length - testfile1 = tmp_path / "test_file1.bin" - - with open(testfile1, "wb+") as file: - file.write(b"\xc9\x85\x07") - - with open(testfile1, "rb") as file: - read_length, original_bytes = parse_large_structure_data_length(file) - assert read_length == 115401 - assert original_bytes == b"\xc9\x85\x07" - - # 2-byte length - testfile2 = tmp_path / "test_file2.bin" - - with open(testfile2, "wb+") as file: - file.write(b"\xaf\x18") - - with open(testfile2, "rb") as file: - read_length, original_bytes = parse_large_structure_data_length(file) - assert read_length == 3119 - assert original_bytes == b"\xaf\x18" - - def test_texteditor_plugin(target_win, fs_win, tmp_path, target_win_users, caplog): file_text_map = { "c515e86f-08b3-4d76-844a-cddfcd43fcbb.bin": text1, "85167c9d-aac2-4469-ae44-db5dccf8f7f4.bin": text2, "dae80df8-e1e5-4996-87fe-b453f63fcb19.bin": text3, "3f915e17-cf6c-462b-9bd1-2f23314cb979.bin": text4, + "ba291ccd-f1c3-4ca8-949c-c01f6633789d.bin": (text5 * 5), + "e609218e-94f2-45fa-84e2-f29df2190b26.bin": (text6 * 1260), "3d0cc86e-dfc9-4f16-b74a-918c2c24188c.bin": loremipsum, "wrong-checksum.bin": "", # only added to check for corrupt checksum, not validity "cfe38135-9dca-4480-944f-d5ea0e1e589f.bin": (loremipsum * 37)[:-2], # removed the two newlines in this file @@ -70,15 +46,14 @@ def test_texteditor_plugin(target_win, fs_win, tmp_path, target_win_users, caplo # Check the amount of files assert len(list(tab_dir.iterdir())) == len(file_text_map.keys()) - # Only six should be parsed correctly, without errors/warnings - assert len(records) == 6 + # Only one should not be parsed correctly, without errors/warnings + assert len(records) == len(file_text_map.keys()) - 1 # One file should not return any contents, there should be an entry for this in the logging. assert "CRC32 checksum mismatch in file: wrong-checksum.bin" in caplog.text assert ( - "dissect.target.exceptions.CRCMismatchException: data, calculated=b'\\xa4\\x8d0\\xa6', " - "expected=b'\\xde\\xad\\xbe\\xef'" - ) in caplog.text + "CRCMismatchException: CRC32 mismatch in single-block file. expected=deadbeef, actual=a48d30a6" in caplog.text + ) # The recovered content in the records should match the original data, as well as the length for rec in records: From c634987fc7890d83612a60ca629152df06cb6845 Mon Sep 17 00:00:00 2001 From: Joost Jansen Date: Fri, 16 Feb 2024 00:01:01 +0100 Subject: [PATCH 04/36] Added more comments --- .../plugins/apps/texteditor/windowsnotepad.py | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py index cea3172a2..425b84409 100644 --- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py +++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py @@ -135,16 +135,19 @@ def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord: text = data_entry.data.decode("utf-16-le") else: - text = ["\x00"] * 100 - - # in this case, the header contains a separate CRC32 checksum as well + # In this case, the header contains a separate CRC32 checksum as well header_crc = c_windowstab.tab_crc(fh) - # the header, minus the file magic, plus some bytes from the extra header are + # The header, minus the file magic, plus some bytes from the extra header are # required in the calculation assert header_crc.crc32 == _calc_crc32(header.dumps()[3:] + header_crc.unk.dumps()) - # otherwise, the file can be reconstructed out of many smaller entries + # We don't know how many blocks there will be beforehand. So we also don't know the exact file + # size, since the file, next to data, also contains quite some metadata and checksums. + # Also, because blocks can possibly be present in a non-contiguous order, a list is used + # that gradually increases in size. This allows for quick and flexible insertion of chars. + text = ["\x00"] * 100 + while fh.tell() < data_threshold: data_entry = c_windowstab.data_entry_multi_block(fh) @@ -156,17 +159,16 @@ def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord: f"expected={data_entry.crc32.hex()}, actual={actual_crc32.hex()} " ) - # insert the text at the right offset in the textfile - # since we don't know the size of the file in the beginning, gradually increase the size - # of the list that holds the data + # Since we don't know the size of the file in the beginning, gradually increase the size + # of the list that holds the data if there is not enough room while data_entry.offset + data_entry.len > len(text) and data_entry.len > 0: text += ["\x00"] * 100 - # place the text on the correct spot + # Place the text at the correct offset. UTF16-LE consumes two bytes for one character. for i in range(data_entry.len): text[data_entry.offset + i] = data_entry.data[(2 * i) : (2 * i) + 2].decode("utf-16-le") - # join the data and strip off excess null bytes + # Join the chars and strip off excess null bytes that may be present text = "".join(text).rstrip("\x00") return self.TextEditorTabRecord(content=text, content_length=len(text), filename=file.name) From d3d35a11759e5e4e3c26981988b0a692797734b6 Mon Sep 17 00:00:00 2001 From: Joost Jansen <12032793+joost-j@users.noreply.github.com> Date: Mon, 19 Feb 2024 15:44:08 +0100 Subject: [PATCH 05/36] Refactor c_def to include parsing of both variants --- .../plugins/apps/texteditor/windowsnotepad.py | 119 ++++++++---------- 1 file changed, 50 insertions(+), 69 deletions(-) diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py index 425b84409..638a125ce 100644 --- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py +++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py @@ -1,4 +1,3 @@ -import io import zlib from typing import BinaryIO, Iterator @@ -15,14 +14,14 @@ ) c_def = """ -struct data_entry_multi_block { +struct multi_block_entry { uint16 offset; uleb128 len; char data[len * 2]; char crc32[4]; }; -struct data_entry_single_block { +struct single_block_entry { uint16 offset; uleb128 len; char data[len * 2]; @@ -30,18 +29,33 @@ char crc32[4]; }; -struct tab_header { - char magic[3]; // NP\x00 - char header_start[2]; // \x00\x01 - uleb128 len1; - uleb128 len2; - char header_end[2]; // \x01\x00 -}; - -struct tab_crc { +struct header_crc { char unk[4]; char crc32[4]; }; + +struct tab { + char magic[3]; // NP\x00 + char header_start[2]; // \x00\x01 + uleb128 len1; + uleb128 len2; + char header_end[2]; // \x01\x00 + + // Data can be stored in two says: + // 1. A single, contiguous block of data that holds all the data + // In this case, the header is included in the single CRC32 checksum present at the end of the block + // 2. Multiple blocks of data that, when combined, hold all the data + // In this case, the header has a separate CRC32 value stored at the end of the header + // The following bitmask operations basically check whether len1 is nonzero (boolean check) and depending + // on the outcome, parse 0 or 1 (so basically, parse or not parse) structs. + header_crc header_crc[((len1 | -len1) >> 31) ^ 1]; // Optional, only if len1 == 0 + single_block_entry single_block_entry[((len1 | (~len1 + 1)) >> 31) & 1]; // Optional, only if len1 > 0 + + + multi_block_entry multi_block_entries[EOF]; // Optional. If a single_block_entry is present + // this will already be at EOF, so it won't do anything. + // Otherwise, it will parse the individual blocks. +}; """ c_windowstab = cstruct.cstruct() @@ -49,27 +63,10 @@ def _calc_crc32(data: bytes) -> bytes: - """Perform a CRC32 checksum on the data and return it as a big-endian uint32""" + """Perform a CRC32 checksum on the data and return it as bytes""" return zlib.crc32(data).to_bytes(length=4, byteorder="big") -def seek_size(fh: BinaryIO) -> int: - """ - Find the size of a file on disk. - - Args: - fh: A file-like object that we want to calculate the size of. - - Returns: - An integer representing the size (in bytes) of the file. - """ - pos = fh.tell() - fh.seek(0, io.SEEK_END) - size = fh.tell() - fh.seek(pos) - return size - - class WindowsNotepadPlugin(TexteditorTabPlugin): """Windows notepad tab content plugin.""" @@ -105,25 +102,14 @@ def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord: """ fh: BinaryIO = file.open(mode="rb") - # There is always a 4 byte value at the end. The offset is always 2 bytes, and the length is always at - # least 1 byte. That means that if we reach the end of a data section, and we have equal or less - # than 4 + 2 + 1 = 7 bytes left, we should stop parsing new data blobs. - data_threshold = seek_size(fh) - 4 - 2 - 1 - - # Parse the generic header - header = c_windowstab.tab_header(fh) + tab = c_windowstab.tab(fh) - # Some tabs are stored as one big block. In this case, the data is contiguous and the file - # only contains one CRC32 at the end which checksums the entire file (excluding the file magic). - # It is likely stored as a single block whenever a length field is nonzero in the header. - is_single_blob = header.len1 != 0 + if tab.len1 != 0: + # Reconstruct the text of the single_block_entry variant + data_entry = tab.single_block_entry[0] - if is_single_blob: - # In this case, we parse the single block - data_entry = c_windowstab.data_entry_single_block(fh) - - # The header (minus the magic) plus all data (exluding the CRC32 at the end) is included - actual_crc32 = _calc_crc32(header.dumps()[3:] + data_entry.dumps()[:-4]) + # The header (minus the magic) plus all data (exluding the CRC32 at the end) is included in the checksum + actual_crc32 = _calc_crc32(tab.dumps()[3:-4]) if data_entry.crc32 != actual_crc32: raise CRCMismatchException( @@ -131,27 +117,19 @@ def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord: f"expected={data_entry.crc32.hex()}, actual={actual_crc32.hex()} " ) - # Finally, decode the block using UTF16-LE, common for Windows. text = data_entry.data.decode("utf-16-le") else: - # In this case, the header contains a separate CRC32 checksum as well - header_crc = c_windowstab.tab_crc(fh) - - # The header, minus the file magic, plus some bytes from the extra header are - # required in the calculation - assert header_crc.crc32 == _calc_crc32(header.dumps()[3:] + header_crc.unk.dumps()) + # Reconstruct the text of the multi_block_entry variant + # CRC32 is calculated based on the entire header, up to the point where the CRC32 value is stored + assert tab.header_crc[0].crc32 == _calc_crc32(tab.dumps()[3 : tab.dumps().index(tab.header_crc[0].crc32)]) - # We don't know how many blocks there will be beforehand. So we also don't know the exact file - # size, since the file, next to data, also contains quite some metadata and checksums. - # Also, because blocks can possibly be present in a non-contiguous order, a list is used - # that gradually increases in size. This allows for quick and flexible insertion of chars. - text = ["\x00"] * 100 + # Since we don't know the size of the file up front, and offsets don't necessarily have to be in order, + # a list is used to easily insert text at offsets + text = ["\x00"] - while fh.tell() < data_threshold: - data_entry = c_windowstab.data_entry_multi_block(fh) - - # Check for CRC mismatch in a data block + for data_entry in tab.multi_block_entries: + # Check the CRC32 checksum for this block actual_crc32 = _calc_crc32(data_entry.dumps()[:-4]) if data_entry.crc32 != actual_crc32: raise CRCMismatchException( @@ -159,17 +137,20 @@ def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord: f"expected={data_entry.crc32.hex()}, actual={actual_crc32.hex()} " ) - # Since we don't know the size of the file in the beginning, gradually increase the size - # of the list that holds the data if there is not enough room - while data_entry.offset + data_entry.len > len(text) and data_entry.len > 0: - text += ["\x00"] * 100 + # If there is no data to be added, skip. This may happen sometimes. + if data_entry.len <= 0: + continue + + # Extend the list if required. All characters need to fit in the list. + while data_entry.offset + data_entry.len > len(text): + text += "\x00" # Place the text at the correct offset. UTF16-LE consumes two bytes for one character. for i in range(data_entry.len): text[data_entry.offset + i] = data_entry.data[(2 * i) : (2 * i) + 2].decode("utf-16-le") - # Join the chars and strip off excess null bytes that may be present - text = "".join(text).rstrip("\x00") + # Join all the characters to reconstruct the original text + text = "".join(text) return self.TextEditorTabRecord(content=text, content_length=len(text), filename=file.name) From cef81d0ef141eeb5a288e02c96f91fc3add27879 Mon Sep 17 00:00:00 2001 From: Joost Jansen <12032793+joost-j@users.noreply.github.com> Date: Mon, 19 Feb 2024 15:49:09 +0100 Subject: [PATCH 06/36] Bump dissect.cstruct version to >=4.0.dev for clarity --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 0e912cfe8..961f45c01 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,7 @@ classifiers = [ ] dependencies = [ "defusedxml", - "dissect.cstruct>=3.0.dev,<4.0.dev", + "dissect.cstruct>=4.0.dev,<5.0.dev", "dissect.eventlog>=3.0.dev,<4.0.dev", "dissect.evidence>=3.0.dev,<4.0.dev", "dissect.hypervisor>=3.0.dev,<4.0.dev", From 7934f3e79c53050e9915259efc4401eea31d721a Mon Sep 17 00:00:00 2001 From: Joost Jansen <12032793+joost-j@users.noreply.github.com> Date: Mon, 26 Feb 2024 16:26:14 +0100 Subject: [PATCH 07/36] Apply suggestions from code review Co-authored-by: Stefan de Reuver <9864602+Horofic@users.noreply.github.com> --- .../plugins/apps/texteditor/texteditor.py | 6 +- .../plugins/apps/texteditor/windowsnotepad.py | 84 +++++++++---------- .../apps/texteditor/test_texteditor.py | 21 ++--- 3 files changed, 54 insertions(+), 57 deletions(-) diff --git a/dissect/target/plugins/apps/texteditor/texteditor.py b/dissect/target/plugins/apps/texteditor/texteditor.py index 853b384a9..f2988cd99 100644 --- a/dissect/target/plugins/apps/texteditor/texteditor.py +++ b/dissect/target/plugins/apps/texteditor/texteditor.py @@ -5,7 +5,7 @@ GENERIC_TAB_CONTENTS_RECORD_FIELDS = [ ("string", "content"), ("string", "content_length"), - ("string", "filename"), + ("path", "path"), ] TexteditorTabContentRecord = create_extended_descriptor([UserRecordDescriptorExtension])( @@ -13,5 +13,5 @@ ) -class TexteditorTabPlugin(NamespacePlugin): - __namespace__ = "texteditortab" +class TexteditorPlugin(NamespacePlugin): + __namespace__ = "texteditor" diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py index 638a125ce..9608b8f23 100644 --- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py +++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py @@ -10,24 +10,25 @@ from dissect.target.plugin import export from dissect.target.plugins.apps.texteditor.texteditor import ( GENERIC_TAB_CONTENTS_RECORD_FIELDS, - TexteditorTabPlugin, + TexteditorPlugin, ) c_def = """ struct multi_block_entry { uint16 offset; uleb128 len; - char data[len * 2]; + wchar data[len]; char crc32[4]; }; struct single_block_entry { uint16 offset; uleb128 len; - char data[len * 2]; + wchar data[len]; char unk1; char crc32[4]; }; +}; struct header_crc { char unk[4]; @@ -57,6 +58,9 @@ // Otherwise, it will parse the individual blocks. }; """ +TextEditorTabRecord = create_extended_descriptor([UserRecordDescriptorExtension])( + "texteditor/windowsnotepad/tab", GENERIC_TAB_CONTENTS_RECORD_FIELDS +) c_windowstab = cstruct.cstruct() c_windowstab.load(c_def) @@ -67,28 +71,27 @@ def _calc_crc32(data: bytes) -> bytes: return zlib.crc32(data).to_bytes(length=4, byteorder="big") -class WindowsNotepadPlugin(TexteditorTabPlugin): +class WindowsNotepadPlugin(TexteditorPlugin): """Windows notepad tab content plugin.""" __namespace__ = "windowsnotepad" - DIRECTORY = "AppData/Local/Packages/Microsoft.WindowsNotepad_8wekyb3d8bbwe/LocalState/TabState" - TextEditorTabRecord = create_extended_descriptor([UserRecordDescriptorExtension])( - "texteditor/windowsnotepad/tab", GENERIC_TAB_CONTENTS_RECORD_FIELDS - ) + GLOB = "AppData/Local/Packages/Microsoft.WindowsNotepad_*/LocalState/TabState/*.bin" def __init__(self, target): super().__init__(target) - self.users_dirs = [] + self.users_tabs = [] + for user_details in self.target.user_details.all_with_home(): - cur_dir = user_details.home_path.joinpath(self.DIRECTORY) - if not cur_dir.exists(): - continue - self.users_dirs.append((user_details.user, cur_dir)) + for tab_file in user_details.home_path.glob(self.GLOB): + if tab_file.name.endswith(".1.bin") or tab_file.name.endswith(".0.bin"): + continue + + self.users_tabs.append(tab_file) def check_compatible(self) -> None: - if not len(self.users_dirs): - raise UnsupportedPluginError("No tabs directories found") + if not self.users_tabs: + raise UnsupportedPluginError("No Windows Notepad temporary tab files found") def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord: """ @@ -107,17 +110,18 @@ def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord: if tab.len1 != 0: # Reconstruct the text of the single_block_entry variant data_entry = tab.single_block_entry[0] + size = data_entry.len # The header (minus the magic) plus all data (exluding the CRC32 at the end) is included in the checksum actual_crc32 = _calc_crc32(tab.dumps()[3:-4]) if data_entry.crc32 != actual_crc32: - raise CRCMismatchException( - f"CRC32 mismatch in single-block file. " - f"expected={data_entry.crc32.hex()}, actual={actual_crc32.hex()} " + self.target.log.warning( + "CRC32 mismatch in single-block file: %s " + "expected=%s, actual=%s", file.name, data_entry.crc32.hex(), actual_crc32.hex() ) - text = data_entry.data.decode("utf-16-le") + text = data_entry.data else: # Reconstruct the text of the multi_block_entry variant @@ -126,49 +130,45 @@ def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord: # Since we don't know the size of the file up front, and offsets don't necessarily have to be in order, # a list is used to easily insert text at offsets - text = ["\x00"] + text = [] + size = 0 for data_entry in tab.multi_block_entries: + # If there is no data to be added, skip. This may happen sometimes. + if data_entry.len <= 0: + continue + + size += data_entry.len # Check the CRC32 checksum for this block actual_crc32 = _calc_crc32(data_entry.dumps()[:-4]) if data_entry.crc32 != actual_crc32: - raise CRCMismatchException( - f"CRC32 mismatch in single-block file. " - f"expected={data_entry.crc32.hex()}, actual={actual_crc32.hex()} " + self.target.log.warning( + "CRC32 mismatch in multi-block file: %s " + "expected=%s, actual=%s", file.name, data_entry.crc32.hex(), actual_crc32.hex() ) - # If there is no data to be added, skip. This may happen sometimes. - if data_entry.len <= 0: - continue # Extend the list if required. All characters need to fit in the list. while data_entry.offset + data_entry.len > len(text): - text += "\x00" + text.append("\x00") # Place the text at the correct offset. UTF16-LE consumes two bytes for one character. - for i in range(data_entry.len): - text[data_entry.offset + i] = data_entry.data[(2 * i) : (2 * i) + 2].decode("utf-16-le") + for idx in range(data_entry.len): + text[data_entry.offset + idx] = data_entry.data[(2 * idx) : (2 * idx) + 2] # Join all the characters to reconstruct the original text text = "".join(text) - return self.TextEditorTabRecord(content=text, content_length=len(text), filename=file.name) + return TextEditorTabRecord(content=text, content_length=size, path=file) @export(record=TextEditorTabRecord) def tabs(self) -> Iterator[TextEditorTabRecord]: - """Return contents from the notepad tab. + """Return contents from Windows 11 temporary Notepad tabs. Yields TextEditorTabRecord with the following fields: contents (string): The contents of the tab. - title (string): The title of the tab. + content_length (int): The length of the tab content. + path (path): The path the content originates from. """ - for user, directory in self.users_dirs: - for file in self.target.fs.path(directory).iterdir(): - if file.name.endswith(".1.bin") or file.name.endswith(".0.bin"): - continue - - try: - yield self._process_tab_file(file) - except CRCMismatchException as e: - self.target.log.warning("CRC32 checksum mismatch in file: %s", file.name, exc_info=e) - continue + for file in self.users_tabs: + yield self._process_tab_file(file) diff --git a/tests/plugins/apps/texteditor/test_texteditor.py b/tests/plugins/apps/texteditor/test_texteditor.py index 21e722a6f..e797ad1d0 100644 --- a/tests/plugins/apps/texteditor/test_texteditor.py +++ b/tests/plugins/apps/texteditor/test_texteditor.py @@ -24,14 +24,16 @@ def test_texteditor_plugin(target_win, fs_win, tmp_path, target_win_users, caplo "ba291ccd-f1c3-4ca8-949c-c01f6633789d.bin": (text5 * 5), "e609218e-94f2-45fa-84e2-f29df2190b26.bin": (text6 * 1260), "3d0cc86e-dfc9-4f16-b74a-918c2c24188c.bin": loremipsum, - "wrong-checksum.bin": "", # only added to check for corrupt checksum, not validity + "wrong-checksum.bin": text4, # only added to check for corrupt checksum, not validity "cfe38135-9dca-4480-944f-d5ea0e1e589f.bin": (loremipsum * 37)[:-2], # removed the two newlines in this file } tabcache = absolute_path("_data/plugins/apps/texteditor/windowsnotepad/") user = target_win_users.user_details.find(username="John") - tab_dir = user.home_path.joinpath(windowsnotepad.WindowsNotepadPlugin.DIRECTORY) + tab_dir = user.home_path.joinpath( + "AppData/Local/Packages/Microsoft.WindowsNotepad_8wekyb3d8bbwe/LocalState/TabState" + ) fs_win.map_dir("Users\\John", tmp_path) @@ -45,17 +47,12 @@ def test_texteditor_plugin(target_win, fs_win, tmp_path, target_win_users, caplo # Check the amount of files assert len(list(tab_dir.iterdir())) == len(file_text_map.keys()) + assert len(records) == len(file_text_map.keys()) - # Only one should not be parsed correctly, without errors/warnings - assert len(records) == len(file_text_map.keys()) - 1 - - # One file should not return any contents, there should be an entry for this in the logging. - assert "CRC32 checksum mismatch in file: wrong-checksum.bin" in caplog.text - assert ( - "CRCMismatchException: CRC32 mismatch in single-block file. expected=deadbeef, actual=a48d30a6" in caplog.text - ) + # One file should still return contents, but there should be an entry for in the logging for a CRC missmatch. + assert "CRC32 mismatch in single-block file: wrong-checksum.bin expected=deadbeef, actual=a48d30a6" in caplog.text # The recovered content in the records should match the original data, as well as the length for rec in records: - assert rec.content == file_text_map[rec.filename] - assert len(rec.content) == len(file_text_map[rec.filename]) + assert rec.content == file_text_map[rec.path.name] + assert len(rec.content) == len(file_text_map[rec.path.name]) From e6ea0195a42f53563d06e9884186c7f1eab1cfca Mon Sep 17 00:00:00 2001 From: Joost Jansen <12032793+joost-j@users.noreply.github.com> Date: Mon, 26 Feb 2024 16:36:07 +0100 Subject: [PATCH 08/36] Removed duplicate brackets and refactor assertion into warning log --- .../target/plugins/apps/texteditor/windowsnotepad.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py index 9608b8f23..b723118a9 100644 --- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py +++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py @@ -28,7 +28,6 @@ char unk1; char crc32[4]; }; -}; struct header_crc { char unk[4]; @@ -126,7 +125,13 @@ def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord: else: # Reconstruct the text of the multi_block_entry variant # CRC32 is calculated based on the entire header, up to the point where the CRC32 value is stored - assert tab.header_crc[0].crc32 == _calc_crc32(tab.dumps()[3 : tab.dumps().index(tab.header_crc[0].crc32)]) + defined_header_crc32 = tab.header_crc[0].crc32 + actual_header_crc32 = _calc_crc32(tab.dumps()[3 : tab.dumps().index(defined_header_crc32)]) + if defined_header_crc32 != actual_header_crc32: + self.target.log.warning( + "CRC32 mismatch in header of multi-block file: %s " + "expected=%s, actual=%s", file.name, defined_header_crc32.hex(), actual_header_crc32.hex(), + ) # Since we don't know the size of the file up front, and offsets don't necessarily have to be in order, # a list is used to easily insert text at offsets @@ -147,7 +152,6 @@ def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord: "expected=%s, actual=%s", file.name, data_entry.crc32.hex(), actual_crc32.hex() ) - # Extend the list if required. All characters need to fit in the list. while data_entry.offset + data_entry.len > len(text): text.append("\x00") From 12fdd4ab76b3810c4b87cba538a8c8266ebcbdae Mon Sep 17 00:00:00 2001 From: Joost Jansen <12032793+joost-j@users.noreply.github.com> Date: Mon, 26 Feb 2024 16:46:27 +0100 Subject: [PATCH 09/36] Change variable names to fsize1 and fsize2, plus some linting --- .../plugins/apps/texteditor/windowsnotepad.py | 38 +++++++++++-------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py index b723118a9..909f640db 100644 --- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py +++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py @@ -3,7 +3,7 @@ from dissect import cstruct -from dissect.target.exceptions import CRCMismatchException, UnsupportedPluginError +from dissect.target.exceptions import UnsupportedPluginError from dissect.target.helpers.descriptor_extensions import UserRecordDescriptorExtension from dissect.target.helpers.fsutil import TargetPath from dissect.target.helpers.record import create_extended_descriptor @@ -37,19 +37,19 @@ struct tab { char magic[3]; // NP\x00 char header_start[2]; // \x00\x01 - uleb128 len1; - uleb128 len2; + uleb128 fsize1; + uleb128 fsize2; char header_end[2]; // \x01\x00 - + // Data can be stored in two says: // 1. A single, contiguous block of data that holds all the data // In this case, the header is included in the single CRC32 checksum present at the end of the block // 2. Multiple blocks of data that, when combined, hold all the data // In this case, the header has a separate CRC32 value stored at the end of the header - // The following bitmask operations basically check whether len1 is nonzero (boolean check) and depending + // The following bitmask operations basically check whether fsize1 is nonzero (boolean check) and depending // on the outcome, parse 0 or 1 (so basically, parse or not parse) structs. - header_crc header_crc[((len1 | -len1) >> 31) ^ 1]; // Optional, only if len1 == 0 - single_block_entry single_block_entry[((len1 | (~len1 + 1)) >> 31) & 1]; // Optional, only if len1 > 0 + header_crc header_crc[((fsize1 | -fsize1) >> 31) ^ 1]; // Optional, only if fsize1 == 0 + single_block_entry single_block_entry[((fsize1 | (~fsize1 + 1)) >> 31) & 1]; // Optional, only if fsize1 > 0 # noqa: E501 multi_block_entry multi_block_entries[EOF]; // Optional. If a single_block_entry is present @@ -58,7 +58,7 @@ }; """ TextEditorTabRecord = create_extended_descriptor([UserRecordDescriptorExtension])( - "texteditor/windowsnotepad/tab", GENERIC_TAB_CONTENTS_RECORD_FIELDS + "texteditor/windowsnotepad/tab", GENERIC_TAB_CONTENTS_RECORD_FIELDS ) c_windowstab = cstruct.cstruct() @@ -80,7 +80,7 @@ class WindowsNotepadPlugin(TexteditorPlugin): def __init__(self, target): super().__init__(target) self.users_tabs = [] - + for user_details in self.target.user_details.all_with_home(): for tab_file in user_details.home_path.glob(self.GLOB): if tab_file.name.endswith(".1.bin") or tab_file.name.endswith(".0.bin"): @@ -116,8 +116,10 @@ def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord: if data_entry.crc32 != actual_crc32: self.target.log.warning( - "CRC32 mismatch in single-block file: %s " - "expected=%s, actual=%s", file.name, data_entry.crc32.hex(), actual_crc32.hex() + "CRC32 mismatch in single-block file: %s " "expected=%s, actual=%s", + file.name, + data_entry.crc32.hex(), + actual_crc32.hex(), ) text = data_entry.data @@ -129,8 +131,10 @@ def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord: actual_header_crc32 = _calc_crc32(tab.dumps()[3 : tab.dumps().index(defined_header_crc32)]) if defined_header_crc32 != actual_header_crc32: self.target.log.warning( - "CRC32 mismatch in header of multi-block file: %s " - "expected=%s, actual=%s", file.name, defined_header_crc32.hex(), actual_header_crc32.hex(), + "CRC32 mismatch in header of multi-block file: %s " "expected=%s, actual=%s", + file.name, + defined_header_crc32.hex(), + actual_header_crc32.hex(), ) # Since we don't know the size of the file up front, and offsets don't necessarily have to be in order, @@ -142,14 +146,16 @@ def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord: # If there is no data to be added, skip. This may happen sometimes. if data_entry.len <= 0: continue - + size += data_entry.len # Check the CRC32 checksum for this block actual_crc32 = _calc_crc32(data_entry.dumps()[:-4]) if data_entry.crc32 != actual_crc32: self.target.log.warning( - "CRC32 mismatch in multi-block file: %s " - "expected=%s, actual=%s", file.name, data_entry.crc32.hex(), actual_crc32.hex() + "CRC32 mismatch in multi-block file: %s " "expected=%s, actual=%s", + file.name, + data_entry.crc32.hex(), + actual_crc32.hex(), ) # Extend the list if required. All characters need to fit in the list. From 39a34a7c6450852d17b726cf2c11c67518e1516b Mon Sep 17 00:00:00 2001 From: Joost Jansen <12032793+joost-j@users.noreply.github.com> Date: Mon, 4 Mar 2024 12:45:03 +0100 Subject: [PATCH 10/36] Refactored to work with LEB128 backport --- .../plugins/apps/texteditor/windowsnotepad.py | 37 ++++++++----------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py index 909f640db..9090fc4c7 100644 --- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py +++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py @@ -40,21 +40,6 @@ uleb128 fsize1; uleb128 fsize2; char header_end[2]; // \x01\x00 - - // Data can be stored in two says: - // 1. A single, contiguous block of data that holds all the data - // In this case, the header is included in the single CRC32 checksum present at the end of the block - // 2. Multiple blocks of data that, when combined, hold all the data - // In this case, the header has a separate CRC32 value stored at the end of the header - // The following bitmask operations basically check whether fsize1 is nonzero (boolean check) and depending - // on the outcome, parse 0 or 1 (so basically, parse or not parse) structs. - header_crc header_crc[((fsize1 | -fsize1) >> 31) ^ 1]; // Optional, only if fsize1 == 0 - single_block_entry single_block_entry[((fsize1 | (~fsize1 + 1)) >> 31) & 1]; // Optional, only if fsize1 > 0 # noqa: E501 - - - multi_block_entry multi_block_entries[EOF]; // Optional. If a single_block_entry is present - // this will already be at EOF, so it won't do anything. - // Otherwise, it will parse the individual blocks. }; """ TextEditorTabRecord = create_extended_descriptor([UserRecordDescriptorExtension])( @@ -106,13 +91,13 @@ def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord: tab = c_windowstab.tab(fh) - if tab.len1 != 0: - # Reconstruct the text of the single_block_entry variant - data_entry = tab.single_block_entry[0] + if tab.fsize1 != 0: + data_entry = c_windowstab.single_block_entry(fh) + size = data_entry.len # The header (minus the magic) plus all data (exluding the CRC32 at the end) is included in the checksum - actual_crc32 = _calc_crc32(tab.dumps()[3:-4]) + actual_crc32 = _calc_crc32(tab.dumps()[3:] + data_entry.dumps()[:-4]) if data_entry.crc32 != actual_crc32: self.target.log.warning( @@ -125,10 +110,13 @@ def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord: text = data_entry.data else: + header_crc = c_windowstab.header_crc(fh) + # Reconstruct the text of the multi_block_entry variant # CRC32 is calculated based on the entire header, up to the point where the CRC32 value is stored - defined_header_crc32 = tab.header_crc[0].crc32 - actual_header_crc32 = _calc_crc32(tab.dumps()[3 : tab.dumps().index(defined_header_crc32)]) + defined_header_crc32 = header_crc.crc32 + + actual_header_crc32 = _calc_crc32(tab.dumps()[3:] + header_crc.unk) if defined_header_crc32 != actual_header_crc32: self.target.log.warning( "CRC32 mismatch in header of multi-block file: %s " "expected=%s, actual=%s", @@ -142,7 +130,12 @@ def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord: text = [] size = 0 - for data_entry in tab.multi_block_entries: + while True: + try: + data_entry = c_windowstab.multi_block_entry(fh) + except EOFError: + break + # If there is no data to be added, skip. This may happen sometimes. if data_entry.len <= 0: continue From 85660284fcf4d1995aebe070e34a39b4197441da Mon Sep 17 00:00:00 2001 From: Joost Jansen <12032793+joost-j@users.noreply.github.com> Date: Mon, 4 Mar 2024 13:06:50 +0100 Subject: [PATCH 11/36] Process feedback --- dissect/target/exceptions.py | 4 - .../plugins/apps/texteditor/windowsnotepad.py | 149 +++++++++--------- .../plugins/os/windows/regf/shimcache.py | 11 +- 3 files changed, 83 insertions(+), 81 deletions(-) diff --git a/dissect/target/exceptions.py b/dissect/target/exceptions.py index 1c435bcc7..22f46a604 100644 --- a/dissect/target/exceptions.py +++ b/dissect/target/exceptions.py @@ -114,7 +114,3 @@ class RegistryCorruptError(RegistryError): class ConfigurationParsingError(Error): """An error occurred during configuration parsing.""" - - -class CRCMismatchException(Error): - """A mismatch between CRC checksums has occurred.""" diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py index 9090fc4c7..0152123d4 100644 --- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py +++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py @@ -1,12 +1,16 @@ import zlib -from typing import BinaryIO, Iterator +from typing import Iterator, List, Union -from dissect import cstruct +from dissect.cstruct import cstruct from dissect.target.exceptions import UnsupportedPluginError from dissect.target.helpers.descriptor_extensions import UserRecordDescriptorExtension from dissect.target.helpers.fsutil import TargetPath -from dissect.target.helpers.record import create_extended_descriptor +from dissect.target.helpers.record import ( + UnixUserRecord, + WindowsUserRecord, + create_extended_descriptor, +) from dissect.target.plugin import export from dissect.target.plugins.apps.texteditor.texteditor import ( GENERIC_TAB_CONTENTS_RECORD_FIELDS, @@ -46,12 +50,12 @@ "texteditor/windowsnotepad/tab", GENERIC_TAB_CONTENTS_RECORD_FIELDS ) -c_windowstab = cstruct.cstruct() +c_windowstab = cstruct() c_windowstab.load(c_def) def _calc_crc32(data: bytes) -> bytes: - """Perform a CRC32 checksum on the data and return it as bytes""" + """Perform a CRC32 checksum on the data and return it as bytes.""" return zlib.crc32(data).to_bytes(length=4, byteorder="big") @@ -64,20 +68,22 @@ class WindowsNotepadPlugin(TexteditorPlugin): def __init__(self, target): super().__init__(target) - self.users_tabs = [] + self.users_tabs: List[TargetPath, Union[UnixUserRecord, WindowsUserRecord]] = [] for user_details in self.target.user_details.all_with_home(): for tab_file in user_details.home_path.glob(self.GLOB): if tab_file.name.endswith(".1.bin") or tab_file.name.endswith(".0.bin"): continue - self.users_tabs.append(tab_file) + self.users_tabs.append((tab_file, user_details.user)) def check_compatible(self) -> None: if not self.users_tabs: raise UnsupportedPluginError("No Windows Notepad temporary tab files found") - def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord: + def _process_tab_file( + self, file: TargetPath, user: Union[UnixUserRecord, WindowsUserRecord] + ) -> TextEditorTabRecord: """ Function that parses a binary tab file and reconstructs the contents. @@ -87,82 +93,81 @@ def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord: Returns: A TextEditorTabRecord containing information that is in the tab. """ - fh: BinaryIO = file.open(mode="rb") - - tab = c_windowstab.tab(fh) - - if tab.fsize1 != 0: - data_entry = c_windowstab.single_block_entry(fh) - - size = data_entry.len - - # The header (minus the magic) plus all data (exluding the CRC32 at the end) is included in the checksum - actual_crc32 = _calc_crc32(tab.dumps()[3:] + data_entry.dumps()[:-4]) - - if data_entry.crc32 != actual_crc32: - self.target.log.warning( - "CRC32 mismatch in single-block file: %s " "expected=%s, actual=%s", - file.name, - data_entry.crc32.hex(), - actual_crc32.hex(), - ) - - text = data_entry.data - - else: - header_crc = c_windowstab.header_crc(fh) + with file.open("rb") as fh: + tab = c_windowstab.tab(fh) - # Reconstruct the text of the multi_block_entry variant - # CRC32 is calculated based on the entire header, up to the point where the CRC32 value is stored - defined_header_crc32 = header_crc.crc32 + if tab.fsize1 != 0: + data_entry = c_windowstab.single_block_entry(fh) - actual_header_crc32 = _calc_crc32(tab.dumps()[3:] + header_crc.unk) - if defined_header_crc32 != actual_header_crc32: - self.target.log.warning( - "CRC32 mismatch in header of multi-block file: %s " "expected=%s, actual=%s", - file.name, - defined_header_crc32.hex(), - actual_header_crc32.hex(), - ) + size = data_entry.len - # Since we don't know the size of the file up front, and offsets don't necessarily have to be in order, - # a list is used to easily insert text at offsets - text = [] - size = 0 + # The header (minus the magic) plus all data (exluding the CRC32 at the end) is included in the checksum + actual_crc32 = _calc_crc32(tab.dumps()[3:] + data_entry.dumps()[:-4]) - while True: - try: - data_entry = c_windowstab.multi_block_entry(fh) - except EOFError: - break - - # If there is no data to be added, skip. This may happen sometimes. - if data_entry.len <= 0: - continue - - size += data_entry.len - # Check the CRC32 checksum for this block - actual_crc32 = _calc_crc32(data_entry.dumps()[:-4]) if data_entry.crc32 != actual_crc32: self.target.log.warning( - "CRC32 mismatch in multi-block file: %s " "expected=%s, actual=%s", + "CRC32 mismatch in single-block file: %s " "expected=%s, actual=%s", file.name, data_entry.crc32.hex(), actual_crc32.hex(), ) - # Extend the list if required. All characters need to fit in the list. - while data_entry.offset + data_entry.len > len(text): - text.append("\x00") + text = data_entry.data + + else: + header_crc = c_windowstab.header_crc(fh) - # Place the text at the correct offset. UTF16-LE consumes two bytes for one character. - for idx in range(data_entry.len): - text[data_entry.offset + idx] = data_entry.data[(2 * idx) : (2 * idx) + 2] + # Reconstruct the text of the multi_block_entry variant + # CRC32 is calculated based on the entire header, up to the point where the CRC32 value is stored + defined_header_crc32 = header_crc.crc32 - # Join all the characters to reconstruct the original text - text = "".join(text) + actual_header_crc32 = _calc_crc32(tab.dumps()[3:] + header_crc.unk) + if defined_header_crc32 != actual_header_crc32: + self.target.log.warning( + "CRC32 mismatch in header of multi-block file: %s " "expected=%s, actual=%s", + file.name, + defined_header_crc32.hex(), + actual_header_crc32.hex(), + ) - return TextEditorTabRecord(content=text, content_length=size, path=file) + # Since we don't know the size of the file up front, and offsets don't necessarily have to be in order, + # a list is used to easily insert text at offsets + text = [] + size = 0 + + while True: + try: + data_entry = c_windowstab.multi_block_entry(fh) + except EOFError: + break + + # If there is no data to be added, skip. This may happen sometimes. + if data_entry.len <= 0: + continue + + size += data_entry.len + # Check the CRC32 checksum for this block + actual_crc32 = _calc_crc32(data_entry.dumps()[:-4]) + if data_entry.crc32 != actual_crc32: + self.target.log.warning( + "CRC32 mismatch in multi-block file: %s " "expected=%s, actual=%s", + file.name, + data_entry.crc32.hex(), + actual_crc32.hex(), + ) + + # Extend the list if required. All characters need to fit in the list. + while data_entry.offset + data_entry.len > len(text): + text.append("\x00") + + # Place the text at the correct offset. UTF16-LE consumes two bytes for one character. + for idx in range(data_entry.len): + text[data_entry.offset + idx] = data_entry.data[(2 * idx) : (2 * idx) + 2] + + # Join all the characters to reconstruct the original text + text = "".join(text) + + return TextEditorTabRecord(content=text, content_length=size, path=file, _target=self.target, _user=user) @export(record=TextEditorTabRecord) def tabs(self) -> Iterator[TextEditorTabRecord]: @@ -173,5 +178,5 @@ def tabs(self) -> Iterator[TextEditorTabRecord]: content_length (int): The length of the tab content. path (path): The path the content originates from. """ - for file in self.users_tabs: - yield self._process_tab_file(file) + for file, user in self.users_tabs: + yield self._process_tab_file(file, user) diff --git a/dissect/target/plugins/os/windows/regf/shimcache.py b/dissect/target/plugins/os/windows/regf/shimcache.py index 3aea545fd..06db50558 100644 --- a/dissect/target/plugins/os/windows/regf/shimcache.py +++ b/dissect/target/plugins/os/windows/regf/shimcache.py @@ -7,11 +7,7 @@ from dissect.cstruct import Structure, cstruct from dissect.util.ts import wintimestamp -from dissect.target.exceptions import ( - CRCMismatchException, - RegistryError, - UnsupportedPluginError, -) +from dissect.target.exceptions import Error, RegistryError, UnsupportedPluginError from dissect.target.helpers.record import TargetRecordDescriptor from dissect.target.plugin import Plugin, export @@ -183,6 +179,11 @@ def nt61_entry_type(_) -> Structure: }, } + +class CRCMismatchException(Error): + """A mismatch between CRC checksums has occurred.""" + + ShimCacheGeneratorType = Union[CRCMismatchException, Tuple[Optional[datetime], str]] From 56a26fa05b49ea805ff03c6566d1718a1d50d55d Mon Sep 17 00:00:00 2001 From: Joost Jansen <12032793+joost-j@users.noreply.github.com> Date: Mon, 4 Mar 2024 13:07:08 +0100 Subject: [PATCH 12/36] Set cstruct dependency to next release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 961f45c01..ff583249c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,7 @@ classifiers = [ ] dependencies = [ "defusedxml", - "dissect.cstruct>=4.0.dev,<5.0.dev", + "dissect.cstruct>=3.13.dev,<4.0.dev", "dissect.eventlog>=3.0.dev,<4.0.dev", "dissect.evidence>=3.0.dev,<4.0.dev", "dissect.hypervisor>=3.0.dev,<4.0.dev", From b18e97584702281b07fe4995ff29472dd1a4cef6 Mon Sep 17 00:00:00 2001 From: Joost Jansen <12032793+joost-j@users.noreply.github.com> Date: Mon, 4 Mar 2024 13:09:34 +0100 Subject: [PATCH 13/36] Restore original shimcache.py file --- dissect/target/plugins/os/windows/regf/shimcache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dissect/target/plugins/os/windows/regf/shimcache.py b/dissect/target/plugins/os/windows/regf/shimcache.py index 06db50558..af72a068f 100644 --- a/dissect/target/plugins/os/windows/regf/shimcache.py +++ b/dissect/target/plugins/os/windows/regf/shimcache.py @@ -181,7 +181,7 @@ def nt61_entry_type(_) -> Structure: class CRCMismatchException(Error): - """A mismatch between CRC checksums has occurred.""" + pass ShimCacheGeneratorType = Union[CRCMismatchException, Tuple[Optional[datetime], str]] From 1a1d80d2d3ece5b4d11767e9946a9e0574d27be2 Mon Sep 17 00:00:00 2001 From: Joost Jansen <12032793+joost-j@users.noreply.github.com> Date: Mon, 25 Mar 2024 16:21:08 +0100 Subject: [PATCH 14/36] Move TextEditorTabRecord definition --- dissect/target/plugins/apps/texteditor/windowsnotepad.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py index 0152123d4..47c1de0a2 100644 --- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py +++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py @@ -46,13 +46,14 @@ char header_end[2]; // \x01\x00 }; """ -TextEditorTabRecord = create_extended_descriptor([UserRecordDescriptorExtension])( - "texteditor/windowsnotepad/tab", GENERIC_TAB_CONTENTS_RECORD_FIELDS -) c_windowstab = cstruct() c_windowstab.load(c_def) +TextEditorTabRecord = create_extended_descriptor([UserRecordDescriptorExtension])( + "texteditor/windowsnotepad/tab", GENERIC_TAB_CONTENTS_RECORD_FIELDS +) + def _calc_crc32(data: bytes) -> bytes: """Perform a CRC32 checksum on the data and return it as bytes.""" From b00bdc31d2f5ff96e4bd565ba5d634ea05ad9ca4 Mon Sep 17 00:00:00 2001 From: Joost Jansen <12032793+joost-j@users.noreply.github.com> Date: Mon, 25 Mar 2024 17:00:52 +0100 Subject: [PATCH 15/36] Remove content_length field from record --- dissect/target/plugins/apps/texteditor/texteditor.py | 1 - dissect/target/plugins/apps/texteditor/windowsnotepad.py | 6 +----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/dissect/target/plugins/apps/texteditor/texteditor.py b/dissect/target/plugins/apps/texteditor/texteditor.py index f2988cd99..ab3fadf03 100644 --- a/dissect/target/plugins/apps/texteditor/texteditor.py +++ b/dissect/target/plugins/apps/texteditor/texteditor.py @@ -4,7 +4,6 @@ GENERIC_TAB_CONTENTS_RECORD_FIELDS = [ ("string", "content"), - ("string", "content_length"), ("path", "path"), ] diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py index 47c1de0a2..5b01c274e 100644 --- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py +++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py @@ -100,8 +100,6 @@ def _process_tab_file( if tab.fsize1 != 0: data_entry = c_windowstab.single_block_entry(fh) - size = data_entry.len - # The header (minus the magic) plus all data (exluding the CRC32 at the end) is included in the checksum actual_crc32 = _calc_crc32(tab.dumps()[3:] + data_entry.dumps()[:-4]) @@ -117,7 +115,6 @@ def _process_tab_file( else: header_crc = c_windowstab.header_crc(fh) - # Reconstruct the text of the multi_block_entry variant # CRC32 is calculated based on the entire header, up to the point where the CRC32 value is stored defined_header_crc32 = header_crc.crc32 @@ -168,7 +165,7 @@ def _process_tab_file( # Join all the characters to reconstruct the original text text = "".join(text) - return TextEditorTabRecord(content=text, content_length=size, path=file, _target=self.target, _user=user) + return TextEditorTabRecord(content=text, path=file, _target=self.target, _user=user) @export(record=TextEditorTabRecord) def tabs(self) -> Iterator[TextEditorTabRecord]: @@ -176,7 +173,6 @@ def tabs(self) -> Iterator[TextEditorTabRecord]: Yields TextEditorTabRecord with the following fields: contents (string): The contents of the tab. - content_length (int): The length of the tab content. path (path): The path the content originates from. """ for file, user in self.users_tabs: From a124202999b883003924167d32fb26d22b6de50c Mon Sep 17 00:00:00 2001 From: Joost Jansen <12032793+joost-j@users.noreply.github.com> Date: Mon, 25 Mar 2024 17:02:05 +0100 Subject: [PATCH 16/36] Apply suggestions from code review Co-authored-by: Erik Schamper <1254028+Schamper@users.noreply.github.com> --- .../target/plugins/apps/texteditor/windowsnotepad.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py index 5b01c274e..4596e1b9a 100644 --- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py +++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py @@ -1,5 +1,5 @@ import zlib -from typing import Iterator, List, Union +from typing import Iterator from dissect.cstruct import cstruct @@ -69,7 +69,7 @@ class WindowsNotepadPlugin(TexteditorPlugin): def __init__(self, target): super().__init__(target) - self.users_tabs: List[TargetPath, Union[UnixUserRecord, WindowsUserRecord]] = [] + self.users_tabs: list[TargetPath, UnixUserRecord | WindowsUserRecord] = [] for user_details in self.target.user_details.all_with_home(): for tab_file in user_details.home_path.glob(self.GLOB): @@ -83,10 +83,9 @@ def check_compatible(self) -> None: raise UnsupportedPluginError("No Windows Notepad temporary tab files found") def _process_tab_file( - self, file: TargetPath, user: Union[UnixUserRecord, WindowsUserRecord] + self, file: TargetPath, user: UnixUserRecord | WindowsUserRecord ) -> TextEditorTabRecord: - """ - Function that parses a binary tab file and reconstructs the contents. + """Parse a binary tab file and reconstruct the contents. Args: file: The binary file on disk that needs to be parsed. @@ -105,7 +104,7 @@ def _process_tab_file( if data_entry.crc32 != actual_crc32: self.target.log.warning( - "CRC32 mismatch in single-block file: %s " "expected=%s, actual=%s", + "CRC32 mismatch in single-block file: %s (expected=%s, actual=%s)", file.name, data_entry.crc32.hex(), actual_crc32.hex(), From dbaca5d2a66044afe76b45b0cefd0afbee98a37a Mon Sep 17 00:00:00 2001 From: Joost Jansen <12032793+joost-j@users.noreply.github.com> Date: Mon, 25 Mar 2024 17:04:39 +0100 Subject: [PATCH 17/36] Change TabEditorTabRecord formatting --- dissect/target/plugins/apps/texteditor/windowsnotepad.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py index 4596e1b9a..e83f857e4 100644 --- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py +++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py @@ -164,7 +164,12 @@ def _process_tab_file( # Join all the characters to reconstruct the original text text = "".join(text) - return TextEditorTabRecord(content=text, path=file, _target=self.target, _user=user) + return TextEditorTabRecord( + content=text, + path=file, + _target=self.target, + _user=user + ) @export(record=TextEditorTabRecord) def tabs(self) -> Iterator[TextEditorTabRecord]: From d66fa54c394c3b5bfd9bd8433eb8bcf10dd2ae5b Mon Sep 17 00:00:00 2001 From: Joost Jansen <12032793+joost-j@users.noreply.github.com> Date: Mon, 25 Mar 2024 17:14:27 +0100 Subject: [PATCH 18/36] Black formatting, fix tests, add annotations import --- .../plugins/apps/texteditor/windowsnotepad.py | 19 +++++++------------ .../apps/texteditor/test_texteditor.py | 2 +- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py index e83f857e4..4c2a7dac4 100644 --- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py +++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import zlib from typing import Iterator @@ -22,7 +24,7 @@ uint16 offset; uleb128 len; wchar data[len]; - char crc32[4]; + char crc32[4]; // Big endian CRC32 }; struct single_block_entry { @@ -30,12 +32,12 @@ uleb128 len; wchar data[len]; char unk1; - char crc32[4]; + char crc32[4]; // Big endian CRC32 }; struct header_crc { char unk[4]; - char crc32[4]; + char crc32[4]; // Big endian CRC32 }; struct tab { @@ -82,9 +84,7 @@ def check_compatible(self) -> None: if not self.users_tabs: raise UnsupportedPluginError("No Windows Notepad temporary tab files found") - def _process_tab_file( - self, file: TargetPath, user: UnixUserRecord | WindowsUserRecord - ) -> TextEditorTabRecord: + def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUserRecord) -> TextEditorTabRecord: """Parse a binary tab file and reconstruct the contents. Args: @@ -164,12 +164,7 @@ def _process_tab_file( # Join all the characters to reconstruct the original text text = "".join(text) - return TextEditorTabRecord( - content=text, - path=file, - _target=self.target, - _user=user - ) + return TextEditorTabRecord(content=text, path=file, _target=self.target, _user=user) @export(record=TextEditorTabRecord) def tabs(self) -> Iterator[TextEditorTabRecord]: diff --git a/tests/plugins/apps/texteditor/test_texteditor.py b/tests/plugins/apps/texteditor/test_texteditor.py index e797ad1d0..fa19b11fb 100644 --- a/tests/plugins/apps/texteditor/test_texteditor.py +++ b/tests/plugins/apps/texteditor/test_texteditor.py @@ -50,7 +50,7 @@ def test_texteditor_plugin(target_win, fs_win, tmp_path, target_win_users, caplo assert len(records) == len(file_text_map.keys()) # One file should still return contents, but there should be an entry for in the logging for a CRC missmatch. - assert "CRC32 mismatch in single-block file: wrong-checksum.bin expected=deadbeef, actual=a48d30a6" in caplog.text + assert "CRC32 mismatch in single-block file: wrong-checksum.bin (expected=deadbeef, actual=a48d30a6)" in caplog.text # The recovered content in the records should match the original data, as well as the length for rec in records: From bdaccbc301186c6a5b4882d01cb2ae1753786d0b Mon Sep 17 00:00:00 2001 From: Joost Jansen <12032793+joost-j@users.noreply.github.com> Date: Mon, 25 Mar 2024 17:17:35 +0100 Subject: [PATCH 19/36] Bump cstruct version again --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ff583249c..e2db9523c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,7 @@ classifiers = [ ] dependencies = [ "defusedxml", - "dissect.cstruct>=3.13.dev,<4.0.dev", + "dissect.cstruct>=3.14.dev,<4.0.dev", "dissect.eventlog>=3.0.dev,<4.0.dev", "dissect.evidence>=3.0.dev,<4.0.dev", "dissect.hypervisor>=3.0.dev,<4.0.dev", From ad7827389fae8bde0ad329c0856650b8959edd0a Mon Sep 17 00:00:00 2001 From: Joost Jansen <12032793+joost-j@users.noreply.github.com> Date: Thu, 28 Mar 2024 13:39:13 +0100 Subject: [PATCH 20/36] Bump dependencies as leb128 is now included in dev release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e2db9523c..743e3aaac 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,7 @@ classifiers = [ ] dependencies = [ "defusedxml", - "dissect.cstruct>=3.14.dev,<4.0.dev", + "dissect.cstruct>=3.14.dev4,<4.0.dev", "dissect.eventlog>=3.0.dev,<4.0.dev", "dissect.evidence>=3.0.dev,<4.0.dev", "dissect.hypervisor>=3.0.dev,<4.0.dev", From 0d9c88f6a43ec169069512b7cd350c90151cf107 Mon Sep 17 00:00:00 2001 From: Joost Jansen <12032793+joost-j@users.noreply.github.com> Date: Thu, 28 Mar 2024 16:08:47 +0100 Subject: [PATCH 21/36] Implemented deletion of characters, refactored, added new tests --- .../plugins/apps/texteditor/windowsnotepad.py | 163 +++++++++++------- .../appclosed_saved_and_deletions.bin | Bin 0 -> 377 bytes .../windowsnotepad/appclosed_unsaved.bin | Bin 0 -> 63 bytes .../apps/texteditor/windowsnotepad/saved.bin | Bin 0 -> 139 bytes .../windowsnotepad/unsaved-with-deletions.bin | Bin 0 -> 460 bytes .../texteditor/windowsnotepad/unsaved.bin | Bin 0 -> 257 bytes .../apps/texteditor/test_texteditor.py | 6 + 7 files changed, 111 insertions(+), 58 deletions(-) create mode 100755 tests/_data/plugins/apps/texteditor/windowsnotepad/appclosed_saved_and_deletions.bin create mode 100755 tests/_data/plugins/apps/texteditor/windowsnotepad/appclosed_unsaved.bin create mode 100755 tests/_data/plugins/apps/texteditor/windowsnotepad/saved.bin create mode 100755 tests/_data/plugins/apps/texteditor/windowsnotepad/unsaved-with-deletions.bin create mode 100755 tests/_data/plugins/apps/texteditor/windowsnotepad/unsaved.bin diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py index 4c2a7dac4..97d5d979a 100644 --- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py +++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py @@ -1,6 +1,7 @@ from __future__ import annotations import zlib +from enum import IntEnum from typing import Iterator from dissect.cstruct import cstruct @@ -19,33 +20,43 @@ TexteditorPlugin, ) -c_def = """ -struct multi_block_entry { - uint16 offset; - uleb128 len; - wchar data[len]; - char crc32[4]; // Big endian CRC32 -}; +# Thanks to @Nordgaren, @daddycocoaman, @JustArion and @ogmini for their suggestions and feedback in the PR +# thread. This really helped figuring out the last missing bits and pieces +# required for recovering text from these files. -struct single_block_entry { - uint16 offset; - uleb128 len; - wchar data[len]; - char unk1; - char crc32[4]; // Big endian CRC32 +c_def = """ +struct header { + char magic[2]; // NP + uint8 unk0; // + uint8 fileState; // 0 if unsaved, 1 if saved +} + +struct header_saved_tab { + uleb128 filePathLength; + wchar filePath[filePathLength]; + uleb128 fileSize; + uleb128 encoding; + uleb128 carriageReturnType; + uleb128 timestamp; // Windows Filetime format (not unix timestamp) + char sha256[32]; + uleb128 unk0; + uleb128 unk1; + char crc32[4]; // Big endian CRC32 }; -struct header_crc { - char unk[4]; - char crc32[4]; // Big endian CRC32 +struct header_unsaved_tab { + uint8 unk0; + uleb128 fileSize; + uleb128 fileSizeDuplicate; // not used + uint8 unk1; + uint8 unk2; }; -struct tab { - char magic[3]; // NP\x00 - char header_start[2]; // \x00\x01 - uleb128 fsize1; - uleb128 fsize2; - char header_end[2]; // \x01\x00 +struct data_block { + uleb128 offset; + uleb128 nDeleted; + uleb128 nAdded; + wchar data[nAdded]; }; """ @@ -57,6 +68,11 @@ ) +class FileState(IntEnum): + Unsaved = 0x00 + Saved = 0x01 + + def _calc_crc32(data: bytes) -> bytes: """Perform a CRC32 checksum on the data and return it as bytes.""" return zlib.crc32(data).to_bytes(length=4, byteorder="big") @@ -94,31 +110,56 @@ def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUser A TextEditorTabRecord containing information that is in the tab. """ with file.open("rb") as fh: - tab = c_windowstab.tab(fh) + # Header is the same for all types + header = c_windowstab.header(fh) - if tab.fsize1 != 0: - data_entry = c_windowstab.single_block_entry(fh) + # File can be saved, or unsaved. Depending on the filestate, different header fields are present + # Currently, no information in the header is used in the outputted records, only the contents of the tab + tab = ( + c_windowstab.header_saved_tab(fh) + if header.fileState == FileState.Saved + else c_windowstab.header_unsaved_tab(fh) + ) - # The header (minus the magic) plus all data (exluding the CRC32 at the end) is included in the checksum - actual_crc32 = _calc_crc32(tab.dumps()[3:] + data_entry.dumps()[:-4]) + # In the case that the filesize is known up front, then this file is zet to a nonzero value + # This means that the data is stored in one block + if tab.fileSize != 0: + # So we only parse one block + data_entry = c_windowstab.data_block(fh) - if data_entry.crc32 != actual_crc32: + # An extra byte is appended to the single block, not yet sure where this is defined and/or used for + extra_byte = fh.read(1) + + # The CRC32 value is appended after the extra byte + defined_crc32 = fh.read(4) + + # The header (minus the magic) plus all data (including the extra byte) is included in the checksum + actual_crc32 = _calc_crc32(header.dumps()[3:] + tab.dumps() + data_entry.dumps() + extra_byte) + + if defined_crc32 != actual_crc32: self.target.log.warning( "CRC32 mismatch in single-block file: %s (expected=%s, actual=%s)", file.name, - data_entry.crc32.hex(), + defined_crc32.hex(), actual_crc32.hex(), ) text = data_entry.data else: - header_crc = c_windowstab.header_crc(fh) - # Reconstruct the text of the multi_block_entry variant - # CRC32 is calculated based on the entire header, up to the point where the CRC32 value is stored - defined_header_crc32 = header_crc.crc32 + # Here, the fileSize is zero'ed, meaning that the size is not known up front. + # Data may be stored in multiple, variable-length blocks. This happens, for example, when several + # additions and deletions of characters have been recorded and these changes have not been 'flushed' + + # First, parse 4 as of yet unknown bytes + # Likely holds some addition information about the tab (view options etc) + unknown_bytes = fh.read(4) + + # In this multi-block variant, he header itself has a CRC32 value as well + defined_header_crc32 = fh.read(4) - actual_header_crc32 = _calc_crc32(tab.dumps()[3:] + header_crc.unk) + # Calculate CRC32 of the header and check if it matches + actual_header_crc32 = _calc_crc32(header.dumps()[3:] + tab.dumps() + unknown_bytes) if defined_header_crc32 != actual_header_crc32: self.target.log.warning( "CRC32 mismatch in header of multi-block file: %s " "expected=%s, actual=%s", @@ -130,36 +171,42 @@ def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUser # Since we don't know the size of the file up front, and offsets don't necessarily have to be in order, # a list is used to easily insert text at offsets text = [] - size = 0 while True: + # Unfortunately, there is no way of determining how many blocks there are. So just try to parse + # until we reach EOF, after which we stop. try: - data_entry = c_windowstab.multi_block_entry(fh) + data_entry = c_windowstab.data_block(fh) except EOFError: break - # If there is no data to be added, skip. This may happen sometimes. - if data_entry.len <= 0: - continue - - size += data_entry.len - # Check the CRC32 checksum for this block - actual_crc32 = _calc_crc32(data_entry.dumps()[:-4]) - if data_entry.crc32 != actual_crc32: - self.target.log.warning( - "CRC32 mismatch in multi-block file: %s " "expected=%s, actual=%s", - file.name, - data_entry.crc32.hex(), - actual_crc32.hex(), - ) - - # Extend the list if required. All characters need to fit in the list. - while data_entry.offset + data_entry.len > len(text): - text.append("\x00") - - # Place the text at the correct offset. UTF16-LE consumes two bytes for one character. - for idx in range(data_entry.len): - text[data_entry.offset + idx] = data_entry.data[(2 * idx) : (2 * idx) + 2] + # Each block has a CRC32 value appended to the block + defined_crc32 = fh.read(4) + + # Either the nAdded is nonzero, or the nDeleted + if data_entry.nAdded > 0: + # Check the CRC32 checksum for this block + actual_crc32 = _calc_crc32(data_entry.dumps()) + if defined_crc32 != actual_crc32: + self.target.log.warning( + "CRC32 mismatch in multi-block file: %s " "expected=%s, actual=%s", + file.name, + data_entry.crc32.hex(), + actual_crc32.hex(), + ) + + # Extend the list if required. All characters need to fit in the list. + while data_entry.offset + data_entry.nAdded > len(text): + text.append("\x00" * 100) + + # Insert the text at the correct offset. + for idx in range(data_entry.nAdded): + text[data_entry.offset + idx] = data_entry.data[idx] + + elif data_entry.nDeleted > 0: + # Create a new slice. Include everything up to the offset, + # plus everything after the nDeleted following bytes + text = text[: data_entry.offset] + text[data_entry.offset + data_entry.nDeleted :] # Join all the characters to reconstruct the original text text = "".join(text) diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/appclosed_saved_and_deletions.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/appclosed_saved_and_deletions.bin new file mode 100755 index 0000000000000000000000000000000000000000..deac4577193f124c86faffa816e03448b5f64a7e GIT binary patch literal 377 zcmZvXu}T9$6h-e8`GbI^lPZO{g?4tb0ntLlkZzkX8%=P{24`ce7PPSn#6OTA{)N8~ zdm)`57UC~>CYYcg!=3l$z5DLFb9f}P9&1O5TJq}Xk~`7xCr{L%=5?+@xdwEaa=N0H zDx!7`)+**@Ho2WnA3n{vvRyrRj5crH_u{SRuhhS7^d`H<x%!0Rmf|GRT2xW$&>ut65QIB?3`h?o*Hu>6r%P3CU?ntcQL$5EI7 literal 0 HcmV?d00001 diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/appclosed_unsaved.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/appclosed_unsaved.bin new file mode 100755 index 0000000000000000000000000000000000000000..54e75e70e7677b4bbd6032c85dda4b5b3ccf70e6 GIT binary patch literal 63 zcmeYZU|?Vr7iVN(U=Vj^$YIE5C}zlH$YV%nP+&-8C;-A7AU_$%E&{mX0T$2VF+a?W=Lfy0+KNdrBHSZg9}ism?4{?gdv}y04Nj8kjPL56iZ=H zWY7bORsb;@E8~T&AExy$TJVy z^Qd}GIvT^H+=-=K4X6uK!7P)OFE)LaR7(hr7}7O_NBM>=hVbCpZ(@8GHWw6^zx>}k zn(#C}4CWV6(ju-i-ipDh={v@^qc8E`>|b6@c&^kHstETQ#TM&p!O=j>e!wD_O?EVsbzGZ|SfVo^|L#&X$t T?OSIYmmMn{O>u-^@$1kRVZVYF literal 0 HcmV?d00001 diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/unsaved.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/unsaved.bin new file mode 100755 index 0000000000000000000000000000000000000000..3622312c149293892cf94fe6ca8367ab42d3441a GIT binary patch literal 257 zcmeYZU|?VbBL)Tr`?c#Q0NJ4o3_2#(j0}u<42=IiZDwL%EN0kg^rW7Nk>Tk*8(T(3 zhUYVyQh?f?MZE|Hs`q19DfNFLP<=jw#M8r{fa*&az7-ekU}j)cU=TfWID!Raf?1K^ zeO3m>M26yL9(&jr7|R$8k8b>H!5ENEHA8 literal 0 HcmV?d00001 diff --git a/tests/plugins/apps/texteditor/test_texteditor.py b/tests/plugins/apps/texteditor/test_texteditor.py index fa19b11fb..ae0712fbb 100644 --- a/tests/plugins/apps/texteditor/test_texteditor.py +++ b/tests/plugins/apps/texteditor/test_texteditor.py @@ -26,6 +26,12 @@ def test_texteditor_plugin(target_win, fs_win, tmp_path, target_win_users, caplo "3d0cc86e-dfc9-4f16-b74a-918c2c24188c.bin": loremipsum, "wrong-checksum.bin": text4, # only added to check for corrupt checksum, not validity "cfe38135-9dca-4480-944f-d5ea0e1e589f.bin": (loremipsum * 37)[:-2], # removed the two newlines in this file + "saved.bin": "Saved!", + "unsaved.bin": "Not saved at all", + "unsaved-with-deletions.bin": "Not saved aasdflasd", + "appclosed_saved_and_deletions.bin": "Closing application now. It's saved but now I'm adding unsaved" + " changes and closing the application again. Dit a few deletions!", + "appclosed_unsaved.bin": "Closing application now", } tabcache = absolute_path("_data/plugins/apps/texteditor/windowsnotepad/") From 304db58f13322cce0cc7e58d435e35337e0a7519 Mon Sep 17 00:00:00 2001 From: Joost Jansen <12032793+joost-j@users.noreply.github.com> Date: Thu, 28 Mar 2024 16:44:16 +0100 Subject: [PATCH 22/36] Small comment changes --- .../plugins/apps/texteditor/windowsnotepad.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py index 97d5d979a..3bb34a9d0 100644 --- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py +++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py @@ -27,7 +27,7 @@ c_def = """ struct header { char magic[2]; // NP - uint8 unk0; // + uint8 unk0; uint8 fileState; // 0 if unsaved, 1 if saved } @@ -39,15 +39,13 @@ uleb128 carriageReturnType; uleb128 timestamp; // Windows Filetime format (not unix timestamp) char sha256[32]; - uleb128 unk0; - uleb128 unk1; - char crc32[4]; // Big endian CRC32 + char unk[6]; }; struct header_unsaved_tab { uint8 unk0; uleb128 fileSize; - uleb128 fileSizeDuplicate; // not used + uleb128 fileSizeDuplicate; uint8 unk1; uint8 unk2; }; @@ -130,7 +128,7 @@ def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUser # An extra byte is appended to the single block, not yet sure where this is defined and/or used for extra_byte = fh.read(1) - # The CRC32 value is appended after the extra byte + # The CRC32 value is appended after the extra byte in big-endian defined_crc32 = fh.read(4) # The header (minus the magic) plus all data (including the extra byte) is included in the checksum @@ -155,7 +153,7 @@ def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUser # Likely holds some addition information about the tab (view options etc) unknown_bytes = fh.read(4) - # In this multi-block variant, he header itself has a CRC32 value as well + # In this multi-block variant, he header itself has a CRC32 value in big-endian as well defined_header_crc32 = fh.read(4) # Calculate CRC32 of the header and check if it matches @@ -180,7 +178,7 @@ def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUser except EOFError: break - # Each block has a CRC32 value appended to the block + # Each block has a CRC32 value in big-endian appended to the block defined_crc32 = fh.read(4) # Either the nAdded is nonzero, or the nDeleted From 2ca889c2b20b9a14a6a639618a272b6bb80ed717 Mon Sep 17 00:00:00 2001 From: Joost Jansen <12032793+joost-j@users.noreply.github.com> Date: Thu, 28 Mar 2024 16:46:31 +0100 Subject: [PATCH 23/36] Remove chunked addition of zero bytes --- dissect/target/plugins/apps/texteditor/windowsnotepad.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py index 3bb34a9d0..10f065624 100644 --- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py +++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py @@ -195,7 +195,7 @@ def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUser # Extend the list if required. All characters need to fit in the list. while data_entry.offset + data_entry.nAdded > len(text): - text.append("\x00" * 100) + text.append("\x00") # Insert the text at the correct offset. for idx in range(data_entry.nAdded): From 74ffb83a2b3e64fc44dc0f4ef2004cec85504e10 Mon Sep 17 00:00:00 2001 From: Joost Jansen <12032793+joost-j@users.noreply.github.com> Date: Thu, 28 Mar 2024 17:21:46 +0100 Subject: [PATCH 24/36] Added new test, changed to list insertion instead of appending --- .../plugins/apps/texteditor/windowsnotepad.py | 6 +----- .../windowsnotepad/lots-of-deletions.bin | Bin 0 -> 2558 bytes tests/plugins/apps/texteditor/test_texteditor.py | 3 +++ 3 files changed, 4 insertions(+), 5 deletions(-) create mode 100755 tests/_data/plugins/apps/texteditor/windowsnotepad/lots-of-deletions.bin diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py index 10f065624..d7c92cae0 100644 --- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py +++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py @@ -193,13 +193,9 @@ def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUser actual_crc32.hex(), ) - # Extend the list if required. All characters need to fit in the list. - while data_entry.offset + data_entry.nAdded > len(text): - text.append("\x00") - # Insert the text at the correct offset. for idx in range(data_entry.nAdded): - text[data_entry.offset + idx] = data_entry.data[idx] + text.insert(data_entry.offset + idx, data_entry.data[idx]) elif data_entry.nDeleted > 0: # Create a new slice. Include everything up to the offset, diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/lots-of-deletions.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/lots-of-deletions.bin new file mode 100755 index 0000000000000000000000000000000000000000..203f12ee3ce97374643a9af1b6b33712f6ce961f GIT binary patch literal 2558 zcmb`JiC4|(AIIO1R1#X0CE`YsN~P3IMGCo5mZl^nQn$LQTesH&%6ZyQ~m(BoGMD? z?1SM#)wK*P0)9EZc74Dk&=@@`M+HoVWbgVreZd6Ce>qyDN)}CBhZr@m7@Vjb{ahU^ z8o6mccl&{f(P7}Bp+Vwh@<$)~gUL~+G9^KihK`lmJ{SPD1})u5jRR>&KDI?N2+R_) zX0vcD^13_O(q9|Q4dVIZt#rT?xMo^-QkTS;X)TU}Nux0Kd8h~G0h!ZcV|}vx6?$~- z5U^nQ*m%WG4P@him&Ea6-*jSKJVn8)(HHw2r&;&y}-ujIMlOk*P(T#?r zQCQd}j3TdH=Taj_Q&Opl(M|z*y_{L=ZbUM?Y(-@|T(jc?vjVn&t+`nxBZgROvx;|>FJ zhI?+V-ewF98_mTN7=EhDaOQdmDhHvM>*Y|d8M2=11$e2*KV(6nDut@oCz2)7&U>mQ zg=&AOy3&fgtmk#(8F%)z16zf& zrp3qXNwZG1vrq^Yg8h@K?>kT~Gr!3@r_l7vHhT*l!D68@Z{|lQuqfR3w;tk58(miB zH-9Ra1@f2gI6IBvcq%GmrZHSA5?r6kaO2J4RA+{p=AHRY47Wp{k8xzUlebNO3Pb%< zdrt?3TVnzfg$!3WjBw}q#kdt27R2+fL9ySVG@d^gLxqM;JbyGkjWKQI`C~E7GI}o0 zABB|(ZE8Hf7+W4~=;iq>@U-fEhzk`Gr=R&`I-Q-0W0S00$s+8z7UxC>U(^#{FoPoJ z__p}Y1Y3>4-Ip3>k)|eITI@~%`#mT!noXL8jWaStlq<~qgoX#L*`EI8VRJ}RKfz5f zmo(|)3e!F5Tm-DzQtd^jAZx?r=y??I$NXhS<}n-$5@mTY93IwP<;n2?fZx+u9t>GJK_f*B-!)7&pUsfAM>fQrVXxP4qgf28n)z#ZePYZ{X#5YaPk@wW zz2JYVzWFpO%<-sOKq*=i>Qxrfa_d$}%e?8dgfCkC<||sJYWr?iAJXW4h`j7e6Sm(h zvspy-v`F_3;#$^5L{eI|h1Q;&br4J5Cktq0te zQlC+YD;Ecm_=mg$ua{BwQ}6FSTux&5#fp?*5_j9TDprtKfHa#B3g~TQF)NgEeK4rj z6Vo-!%{Xo!M)2IO;Z-=@g4QmVfCzH`Xp+)b0_KX+Bi-IBsd)WA0>zO8rfG`CD7r*e zhRxfesZlI-j_8$AqqrG#JS>LXJ=Fa5VrjG8ejcw9N0!W{*q&8j@i6|T`kai4KhP7q zR8EW8R93Z6L6LiA1lq4Av2k}nYdocJm>4%=4W;OEjvbglLwR{l_G@W6E&~~=#!e&lV zj0@BGHAzYBjx~wN%F*%-p?y=7Hirm)i&j5@_%hEZs!z( zMWuHGcW{b_x1Hn3ot!N3ySCQgZD_L^yL$)H%4RXxrr z76&~ab)Mi9gXYcoS59)0U`}#FYXK)IhE*xPJH<(kkmA5sg`6y~Z}tbhB2EIlFBD%o z%}Ii>8Ag?-c}>P0H{O%Q1Lt*A4U0LA#}?bvZ_jXAjcoPZW@kBxp>b}c;WIfY{Sit>&NoFtf*s-u39lN{D3?%Xa>>W*2tB$g_5!XxX_%Qy)T@n)QL zIVTBx{GMI8#7U0j;X&&wl%o#D=aMQp#bLpHP1`C?3f!)W8(Ymuf){UlAJuS@WB=f1 zlFOXL=##1Q%N0%n>^-#KxRz5ePG<{i>y)-9zD`zGIYmKK`#R|wrx@(5(8##XNsjkF z>zug3$rAQ@O%IJ|8LM0#U*OMy7#vQ{G~v$yIr`s|58%&%2n3~cxqkT^kYd$nmd~F9 z;W&ILE$DyG0Uf^a6-Ij641cYgJ64O~om!D}5JTIQTK|Cz?VA%ZfT3f$j!=`~H!bxu z{TbfYN2h2oyxH0COFxF!9wl$p8D8jrZc<}--kknSmEq~PW*1dycCuHZay6RpV`ER# FzX7Rn_euZ& literal 0 HcmV?d00001 diff --git a/tests/plugins/apps/texteditor/test_texteditor.py b/tests/plugins/apps/texteditor/test_texteditor.py index ae0712fbb..a4bb730ad 100644 --- a/tests/plugins/apps/texteditor/test_texteditor.py +++ b/tests/plugins/apps/texteditor/test_texteditor.py @@ -32,6 +32,9 @@ def test_texteditor_plugin(target_win, fs_win, tmp_path, target_win_users, caplo "appclosed_saved_and_deletions.bin": "Closing application now. It's saved but now I'm adding unsaved" " changes and closing the application again. Dit a few deletions!", "appclosed_unsaved.bin": "Closing application now", + "lots-of-deletions.bin": "This a text, which is nothing special. But I am going to modify it a bit. " + "For example, I have removed quote some stuff. Adding a word in the beginning now." + ".. At this point, I've edited it quite a lot.", } tabcache = absolute_path("_data/plugins/apps/texteditor/windowsnotepad/") From c1480619db8d5f181fda78d89c0603db5a779e3d Mon Sep 17 00:00:00 2001 From: Joost Jansen <12032793+joost-j@users.noreply.github.com> Date: Thu, 28 Mar 2024 17:31:32 +0100 Subject: [PATCH 25/36] Refactored test file and removed fileState enum --- .../plugins/apps/texteditor/windowsnotepad.py | 8 +------- tests/plugins/apps/texteditor/test_texteditor.py | 16 +++++++++++----- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py index d7c92cae0..69855923f 100644 --- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py +++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py @@ -1,7 +1,6 @@ from __future__ import annotations import zlib -from enum import IntEnum from typing import Iterator from dissect.cstruct import cstruct @@ -66,11 +65,6 @@ ) -class FileState(IntEnum): - Unsaved = 0x00 - Saved = 0x01 - - def _calc_crc32(data: bytes) -> bytes: """Perform a CRC32 checksum on the data and return it as bytes.""" return zlib.crc32(data).to_bytes(length=4, byteorder="big") @@ -115,7 +109,7 @@ def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUser # Currently, no information in the header is used in the outputted records, only the contents of the tab tab = ( c_windowstab.header_saved_tab(fh) - if header.fileState == FileState.Saved + if header.fileState == 0x01 # 0x00 is unsaved, 0x01 is saved else c_windowstab.header_unsaved_tab(fh) ) diff --git a/tests/plugins/apps/texteditor/test_texteditor.py b/tests/plugins/apps/texteditor/test_texteditor.py index a4bb730ad..d27d9e15a 100644 --- a/tests/plugins/apps/texteditor/test_texteditor.py +++ b/tests/plugins/apps/texteditor/test_texteditor.py @@ -12,6 +12,15 @@ text4 = "This is another short test. And we should be able to parse this." text5 = "This is a test and the text is longer than 256 bytes. " text6 = "This is a test and the text is longer than 65536 bytes. " +text7 = ( + "This a text, which is nothing special. But I am going to modify it a bit. For example, " + "I have removed quote some stuff. Adding a word in the beginning now... " + "At this point, I've edited it quite a lot." +) +text8 = ( + "Closing application now. It's saved but now I'm adding unsaved changes and closing " + "the application again. Dit a few deletions!" +) loremipsum = """Lorem ipsum dolor sit amet. Eum error blanditiis eum pariatur delectus ut consequuntur officiis a excepturi dignissimos et doloribus quia 33 perspiciatis soluta nam perspiciatis dolor. Ut repudiandae quidem cum sint modi qui sint consequatur. Aut autem quidem eum enim consequatur qui voluptate consequatur non similique voluptate. A vitae modi vel sint provident ut galisum tenetur sit voluptatem amet. Est impedit perspiciatis est repudiandae voluptates ut fugit alias! Eum magni esse aut velit illum qui excepturi aperiam. Ex dolores asperiores ut debitis omnis qui consequuntur dolore. Est voluptatem mollitia et quibusdam unde ea accusamus fuga. Cum quis galisum et impedit sunt qui aliquam perspiciatis sed modi quidem qui nisi molestias. Aut temporibus architecto ut neque voluptatem et consequatur deleniti sed accusantium quibusdam et omnis dignissimos ad rerum ipsam et rerum quia. Ut nihil repellat et eaque molestias quo iusto ipsum At optio sint eos quidem earum?\r\rEx deleniti unde eum tenetur rerum ea dolore numquam? Eos aperiam officiis et neque explicabo et enim atque ut eaque omnis non illum eveniet est molestias itaque et ratione voluptatem. Ea deserunt nemo et quos tempora et nostrum aperiam sit necessitatibus illo sit culpa placeat. Vel tempore quibusdam ut velit voluptate aut odio facere non voluptas earum est odio galisum et voluptas harum. Et blanditiis sapiente et nostrum laborum aut voluptatem explicabo a quasi assumenda. Est voluptatem quia eum minima galisum quo totam excepturi aut facilis enim vel voluptate repudiandae sit distinctio laboriosam. Quo possimus molestiae et molestiae accusantium est voluptas omnis sed obcaecati natus. Non vitae asperiores qui nostrum enim id saepe fugiat et incidunt quasi.\r\rEos ipsa facilis aut excepturi voluptatem a omnis magni vel magni iste. Sed ipsum consequatur qui reprehenderit deleniti et soluta molestiae. Ut vero assumenda id dolor ipsum in deleniti voluptatem aut quis quisquam sed repudiandae temporibus ab quia inventore. Sed velit fugit vel facere cumque et delectus ullam sed eaque impedit. Est veritatis dignissimos aut doloribus dolorem vel pariatur repellendus sit nesciunt similique eum architecto quia. Ea expedita veritatis eum dolorem molestiae ut enim fugit aut beatae quibusdam. Aut voluptas natus in quidem deleniti aut animi iure est incidunt tenetur qui culpa maiores! Et nostrum quaerat qui consequatur consequatur aut aliquam atque aut praesentium rerum et consequuntur exercitationem. Non accusantium ipsa vel consectetur vitae ut magnam autem et natus rerum ut consectetur inventore est doloremque temporibus 33 dolores doloribus! Aut perferendis optio et nostrum repellendus et fugit itaque ut nisi neque sed sint quaerat. Aut placeat architecto et eius sapiente eum molestiae quam. Quo mollitia sapiente non Quis neque non tempora laudantium. Quo distinctio quos et molestias natus sit veritatis consequuntur aut repellendus neque a porro galisum cum numquam nesciunt et animi earum? Aut dolorum dolore non assumenda omnis et molestiae amet id sint vero est eligendi harum sit temporibus magnam aut ipsam quos.\r\r""" # noqa: E501 @@ -29,12 +38,9 @@ def test_texteditor_plugin(target_win, fs_win, tmp_path, target_win_users, caplo "saved.bin": "Saved!", "unsaved.bin": "Not saved at all", "unsaved-with-deletions.bin": "Not saved aasdflasd", - "appclosed_saved_and_deletions.bin": "Closing application now. It's saved but now I'm adding unsaved" - " changes and closing the application again. Dit a few deletions!", + "lots-of-deletions.bin": text7, + "appclosed_saved_and_deletions.bin": text8, "appclosed_unsaved.bin": "Closing application now", - "lots-of-deletions.bin": "This a text, which is nothing special. But I am going to modify it a bit. " - "For example, I have removed quote some stuff. Adding a word in the beginning now." - ".. At this point, I've edited it quite a lot.", } tabcache = absolute_path("_data/plugins/apps/texteditor/windowsnotepad/") From 2bf6e2f1287c439c204890f2eb8ddfadb0216029 Mon Sep 17 00:00:00 2001 From: Joost Jansen <12032793+joost-j@users.noreply.github.com> Date: Thu, 11 Apr 2024 14:51:16 +0200 Subject: [PATCH 26/36] Small comment changes/typos --- .../plugins/apps/texteditor/windowsnotepad.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py index 69855923f..d73db7b87 100644 --- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py +++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py @@ -20,7 +20,7 @@ ) # Thanks to @Nordgaren, @daddycocoaman, @JustArion and @ogmini for their suggestions and feedback in the PR -# thread. This really helped figuring out the last missing bits and pieces +# thread. This really helped to figure out the last missing bits and pieces # required for recovering text from these files. c_def = """ @@ -105,7 +105,7 @@ def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUser # Header is the same for all types header = c_windowstab.header(fh) - # File can be saved, or unsaved. Depending on the filestate, different header fields are present + # File can be saved, or unsaved. Depending on the file state, different header fields are present # Currently, no information in the header is used in the outputted records, only the contents of the tab tab = ( c_windowstab.header_saved_tab(fh) @@ -113,7 +113,7 @@ def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUser else c_windowstab.header_unsaved_tab(fh) ) - # In the case that the filesize is known up front, then this file is zet to a nonzero value + # In the case that the file size is known up front, then this fileSize is set to a nonzero value # This means that the data is stored in one block if tab.fileSize != 0: # So we only parse one block @@ -139,15 +139,15 @@ def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUser text = data_entry.data else: - # Here, the fileSize is zero'ed, meaning that the size is not known up front. + # Here, the fileSize is zeroed, meaning that the size is not known up front. # Data may be stored in multiple, variable-length blocks. This happens, for example, when several # additions and deletions of characters have been recorded and these changes have not been 'flushed' - # First, parse 4 as of yet unknown bytes - # Likely holds some addition information about the tab (view options etc) + # First, parse 4 unknown bytes. These likely + # hold some addition information about the tab (view options etc.) unknown_bytes = fh.read(4) - # In this multi-block variant, he header itself has a CRC32 value in big-endian as well + # In this multi-block variant, the header itself has a CRC32 value in big-endian as well defined_header_crc32 = fh.read(4) # Calculate CRC32 of the header and check if it matches From a19c49b37fc6bcd962a06bb0c17388f5ba83c829 Mon Sep 17 00:00:00 2001 From: Joost Jansen <12032793+joost-j@users.noreply.github.com> Date: Fri, 26 Apr 2024 10:52:35 +0200 Subject: [PATCH 27/36] Split plugin from parsing logic, added more tests --- .../plugins/apps/texteditor/windowsnotepad.py | 93 +++++++++++++------ .../apps/texteditor/test_texteditor.py | 54 ++++++++++- 2 files changed, 115 insertions(+), 32 deletions(-) diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py index d73db7b87..663f14ad6 100644 --- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py +++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py @@ -1,5 +1,6 @@ from __future__ import annotations +import logging import zlib from typing import Iterator @@ -13,7 +14,7 @@ WindowsUserRecord, create_extended_descriptor, ) -from dissect.target.plugin import export +from dissect.target.plugin import arg, export from dissect.target.plugins.apps.texteditor.texteditor import ( GENERIC_TAB_CONTENTS_RECORD_FIELDS, TexteditorPlugin, @@ -60,39 +61,28 @@ c_windowstab = cstruct() c_windowstab.load(c_def) -TextEditorTabRecord = create_extended_descriptor([UserRecordDescriptorExtension])( +WindowsNotepadTabRecord = create_extended_descriptor([UserRecordDescriptorExtension])( "texteditor/windowsnotepad/tab", GENERIC_TAB_CONTENTS_RECORD_FIELDS ) +WindowsNotepadTabContentRecord = create_extended_descriptor([])( + "texteditor/windowsnotepad/tab_content", GENERIC_TAB_CONTENTS_RECORD_FIELDS +) + def _calc_crc32(data: bytes) -> bytes: """Perform a CRC32 checksum on the data and return it as bytes.""" return zlib.crc32(data).to_bytes(length=4, byteorder="big") -class WindowsNotepadPlugin(TexteditorPlugin): - """Windows notepad tab content plugin.""" - - __namespace__ = "windowsnotepad" - - GLOB = "AppData/Local/Packages/Microsoft.WindowsNotepad_*/LocalState/TabState/*.bin" - - def __init__(self, target): - super().__init__(target) - self.users_tabs: list[TargetPath, UnixUserRecord | WindowsUserRecord] = [] - - for user_details in self.target.user_details.all_with_home(): - for tab_file in user_details.home_path.glob(self.GLOB): - if tab_file.name.endswith(".1.bin") or tab_file.name.endswith(".0.bin"): - continue - - self.users_tabs.append((tab_file, user_details.user)) +class WindowsNotepadTabContent: + """Windows notepad tab parser""" - def check_compatible(self) -> None: - if not self.users_tabs: - raise UnsupportedPluginError("No Windows Notepad temporary tab files found") + def __new__(cls, file: TargetPath, include_deleted_content=False) -> WindowsNotepadTabContentRecord: + return cls._process_tab_file(file, include_deleted_content) - def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUserRecord) -> TextEditorTabRecord: + @staticmethod + def _process_tab_file(file: TargetPath, include_deleted_content: bool) -> WindowsNotepadTabContentRecord: """Parse a binary tab file and reconstruct the contents. Args: @@ -129,7 +119,7 @@ def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUser actual_crc32 = _calc_crc32(header.dumps()[3:] + tab.dumps() + data_entry.dumps() + extra_byte) if defined_crc32 != actual_crc32: - self.target.log.warning( + logging.warning( "CRC32 mismatch in single-block file: %s (expected=%s, actual=%s)", file.name, defined_crc32.hex(), @@ -153,7 +143,7 @@ def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUser # Calculate CRC32 of the header and check if it matches actual_header_crc32 = _calc_crc32(header.dumps()[3:] + tab.dumps() + unknown_bytes) if defined_header_crc32 != actual_header_crc32: - self.target.log.warning( + logging.warning( "CRC32 mismatch in header of multi-block file: %s " "expected=%s, actual=%s", file.name, defined_header_crc32.hex(), @@ -164,6 +154,8 @@ def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUser # a list is used to easily insert text at offsets text = [] + deleted_content = "" + while True: # Unfortunately, there is no way of determining how many blocks there are. So just try to parse # until we reach EOF, after which we stop. @@ -180,7 +172,7 @@ def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUser # Check the CRC32 checksum for this block actual_crc32 = _calc_crc32(data_entry.dumps()) if defined_crc32 != actual_crc32: - self.target.log.warning( + logging.warning( "CRC32 mismatch in multi-block file: %s " "expected=%s, actual=%s", file.name, data_entry.crc32.hex(), @@ -194,15 +186,52 @@ def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUser elif data_entry.nDeleted > 0: # Create a new slice. Include everything up to the offset, # plus everything after the nDeleted following bytes + if include_deleted_content: + deleted_content += "".join( + text[data_entry.offset : data_entry.offset + data_entry.nDeleted] + ) text = text[: data_entry.offset] + text[data_entry.offset + data_entry.nDeleted :] # Join all the characters to reconstruct the original text text = "".join(text) - return TextEditorTabRecord(content=text, path=file, _target=self.target, _user=user) + if include_deleted_content: + text += " --- DELETED-CONTENT: " + text += deleted_content - @export(record=TextEditorTabRecord) - def tabs(self) -> Iterator[TextEditorTabRecord]: + return WindowsNotepadTabContentRecord(content=text, path=file) + + +class WindowsNotepadPlugin(TexteditorPlugin): + """Windows notepad tab content plugin.""" + + __namespace__ = "windowsnotepad" + + GLOB = "AppData/Local/Packages/Microsoft.WindowsNotepad_*/LocalState/TabState/*.bin" + + def __init__(self, target): + super().__init__(target) + self.users_tabs: list[TargetPath, UnixUserRecord | WindowsUserRecord] = [] + for user_details in self.target.user_details.all_with_home(): + for tab_file in user_details.home_path.glob(self.GLOB): + if tab_file.name.endswith(".1.bin") or tab_file.name.endswith(".0.bin"): + continue + + self.users_tabs.append((tab_file, user_details.user)) + + def check_compatible(self) -> None: + if not self.users_tabs: + raise UnsupportedPluginError("No Windows Notepad temporary tab files found") + + @arg( + "--include-deleted-content", + type=bool, + default=False, + required=False, + help="Include deleted but recoverable content.", + ) + @export(record=WindowsNotepadTabRecord) + def tabs(self, include_deleted_content) -> Iterator[WindowsNotepadTabRecord]: """Return contents from Windows 11 temporary Notepad tabs. Yields TextEditorTabRecord with the following fields: @@ -210,4 +239,8 @@ def tabs(self) -> Iterator[TextEditorTabRecord]: path (path): The path the content originates from. """ for file, user in self.users_tabs: - yield self._process_tab_file(file, user) + # Parse the file + r: WindowsNotepadTabContentRecord = WindowsNotepadTabContent(file, include_deleted_content) + + # Add user- and target specific information to the content record record + yield WindowsNotepadTabRecord(content=r.content, path=r.path, _target=self.target, _user=user) diff --git a/tests/plugins/apps/texteditor/test_texteditor.py b/tests/plugins/apps/texteditor/test_texteditor.py index d27d9e15a..baaddaa49 100644 --- a/tests/plugins/apps/texteditor/test_texteditor.py +++ b/tests/plugins/apps/texteditor/test_texteditor.py @@ -1,4 +1,5 @@ import os +from pathlib import Path from dissect.target.plugins.apps.texteditor import windowsnotepad from tests._utils import absolute_path @@ -24,7 +25,56 @@ loremipsum = """Lorem ipsum dolor sit amet. Eum error blanditiis eum pariatur delectus ut consequuntur officiis a excepturi dignissimos et doloribus quia 33 perspiciatis soluta nam perspiciatis dolor. Ut repudiandae quidem cum sint modi qui sint consequatur. Aut autem quidem eum enim consequatur qui voluptate consequatur non similique voluptate. A vitae modi vel sint provident ut galisum tenetur sit voluptatem amet. Est impedit perspiciatis est repudiandae voluptates ut fugit alias! Eum magni esse aut velit illum qui excepturi aperiam. Ex dolores asperiores ut debitis omnis qui consequuntur dolore. Est voluptatem mollitia et quibusdam unde ea accusamus fuga. Cum quis galisum et impedit sunt qui aliquam perspiciatis sed modi quidem qui nisi molestias. Aut temporibus architecto ut neque voluptatem et consequatur deleniti sed accusantium quibusdam et omnis dignissimos ad rerum ipsam et rerum quia. Ut nihil repellat et eaque molestias quo iusto ipsum At optio sint eos quidem earum?\r\rEx deleniti unde eum tenetur rerum ea dolore numquam? Eos aperiam officiis et neque explicabo et enim atque ut eaque omnis non illum eveniet est molestias itaque et ratione voluptatem. Ea deserunt nemo et quos tempora et nostrum aperiam sit necessitatibus illo sit culpa placeat. Vel tempore quibusdam ut velit voluptate aut odio facere non voluptas earum est odio galisum et voluptas harum. Et blanditiis sapiente et nostrum laborum aut voluptatem explicabo a quasi assumenda. Est voluptatem quia eum minima galisum quo totam excepturi aut facilis enim vel voluptate repudiandae sit distinctio laboriosam. Quo possimus molestiae et molestiae accusantium est voluptas omnis sed obcaecati natus. Non vitae asperiores qui nostrum enim id saepe fugiat et incidunt quasi.\r\rEos ipsa facilis aut excepturi voluptatem a omnis magni vel magni iste. Sed ipsum consequatur qui reprehenderit deleniti et soluta molestiae. Ut vero assumenda id dolor ipsum in deleniti voluptatem aut quis quisquam sed repudiandae temporibus ab quia inventore. Sed velit fugit vel facere cumque et delectus ullam sed eaque impedit. Est veritatis dignissimos aut doloribus dolorem vel pariatur repellendus sit nesciunt similique eum architecto quia. Ea expedita veritatis eum dolorem molestiae ut enim fugit aut beatae quibusdam. Aut voluptas natus in quidem deleniti aut animi iure est incidunt tenetur qui culpa maiores! Et nostrum quaerat qui consequatur consequatur aut aliquam atque aut praesentium rerum et consequuntur exercitationem. Non accusantium ipsa vel consectetur vitae ut magnam autem et natus rerum ut consectetur inventore est doloremque temporibus 33 dolores doloribus! Aut perferendis optio et nostrum repellendus et fugit itaque ut nisi neque sed sint quaerat. Aut placeat architecto et eius sapiente eum molestiae quam. Quo mollitia sapiente non Quis neque non tempora laudantium. Quo distinctio quos et molestias natus sit veritatis consequuntur aut repellendus neque a porro galisum cum numquam nesciunt et animi earum? Aut dolorum dolore non assumenda omnis et molestiae amet id sint vero est eligendi harum sit temporibus magnam aut ipsam quos.\r\r""" # noqa: E501 -def test_texteditor_plugin(target_win, fs_win, tmp_path, target_win_users, caplog): +def test_windows_tab_parsing(tmp_path): + # Standalone parsing of tab files, so not using the plugin + tab_files = Path(absolute_path("_data/plugins/apps/texteditor/windowsnotepad/")) + content_record = windowsnotepad.WindowsNotepadTabContent(tab_files / "unsaved-with-deletions.bin") + assert content_record.content == "Not saved aasdflasd" + content_record_with_deletions = windowsnotepad.WindowsNotepadTabContent( + tab_files / "unsaved-with-deletions.bin", include_deleted_content=True + ) + assert content_record_with_deletions.content == "Not saved aasdflasd --- DELETED-CONTENT: snUlltllafds tjkf" + + +def test_windows_tab_plugin_deleted_contents(target_win, fs_win, tmp_path, target_win_users, caplog): + file_text_map = { + "unsaved-with-deletions.bin": "Not saved aasdflasd --- DELETED-CONTENT: snUlltllafds tjkf", + "lots-of-deletions.bin": "This a text, which is nothing special. But I am going to modify it a bit. " + "For example, I have removed quote some stuff. " + "Adding a word in the beginning now... " + "At this point, I've edited it quite a lot. --- DELETED-CONTENT: " + "b a ,elpmac ydaerlae already thi laiceps emos", + } + + tabcache = absolute_path("_data/plugins/apps/texteditor/windowsnotepad/") + + user = target_win_users.user_details.find(username="John") + tab_dir = user.home_path.joinpath( + "AppData/Local/Packages/Microsoft.WindowsNotepad_8wekyb3d8bbwe/LocalState/TabState" + ) + + fs_win.map_dir("Users\\John", tmp_path) + + for file in file_text_map.keys(): + tab_file = str(tab_dir.joinpath(file))[3:] + fs_win.map_file(tab_file, os.path.join(tabcache, file)) + + target_win.add_plugin(windowsnotepad.WindowsNotepadPlugin) + + records = list(target_win.windowsnotepad.tabs(include_deleted_content=True)) + + # Check the amount of files + assert len(list(tab_dir.iterdir())) == len(file_text_map.keys()) + assert len(records) == len(file_text_map.keys()) + + # The recovered content in the records should match the original data, as well as the length + for rec in records: + print(rec.content) + assert rec.content == file_text_map[rec.path.name] + assert len(rec.content) == len(file_text_map[rec.path.name]) + + +def test_windows_tab_plugin_default(target_win, fs_win, tmp_path, target_win_users, caplog): file_text_map = { "c515e86f-08b3-4d76-844a-cddfcd43fcbb.bin": text1, "85167c9d-aac2-4469-ae44-db5dccf8f7f4.bin": text2, @@ -58,7 +108,7 @@ def test_texteditor_plugin(target_win, fs_win, tmp_path, target_win_users, caplo target_win.add_plugin(windowsnotepad.WindowsNotepadPlugin) - records = list(target_win.windowsnotepad.tabs()) + records = list(target_win.windowsnotepad.tabs(include_deleted_content=False)) # Check the amount of files assert len(list(tab_dir.iterdir())) == len(file_text_map.keys()) From f808bc71fa33fa7ca40c57289f916cb5c613ad89 Mon Sep 17 00:00:00 2001 From: Joost Jansen <12032793+joost-j@users.noreply.github.com> Date: Fri, 26 Apr 2024 12:18:15 +0200 Subject: [PATCH 28/36] Removed fh.read() and re-added them to the c_def --- .../plugins/apps/texteditor/windowsnotepad.py | 56 +++++++++---------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py index 663f14ad6..847dea22e 100644 --- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py +++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py @@ -46,15 +46,30 @@ uint8 unk0; uleb128 fileSize; uleb128 fileSizeDuplicate; - uint8 unk1; - uint8 unk2; + char unk1; + char unk2; }; -struct data_block { +struct single_data_block { uleb128 offset; uleb128 nDeleted; uleb128 nAdded; wchar data[nAdded]; + char unk[1]; + char crc32[4]; +}; + +struct multi_data_extra_header { + char unk[4]; + char crc32[4]; +}; + +struct multi_data_block { + uleb128 offset; + uleb128 nDeleted; + uleb128 nAdded; + wchar data[nAdded]; + char crc32[4]; }; """ @@ -107,22 +122,16 @@ def _process_tab_file(file: TargetPath, include_deleted_content: bool) -> Window # This means that the data is stored in one block if tab.fileSize != 0: # So we only parse one block - data_entry = c_windowstab.data_block(fh) - - # An extra byte is appended to the single block, not yet sure where this is defined and/or used for - extra_byte = fh.read(1) - - # The CRC32 value is appended after the extra byte in big-endian - defined_crc32 = fh.read(4) + data_entry = c_windowstab.single_data_block(fh) # The header (minus the magic) plus all data (including the extra byte) is included in the checksum - actual_crc32 = _calc_crc32(header.dumps()[3:] + tab.dumps() + data_entry.dumps() + extra_byte) + actual_crc32 = _calc_crc32(header.dumps()[3:] + tab.dumps() + data_entry.dumps()[:-4]) - if defined_crc32 != actual_crc32: + if data_entry.crc32 != actual_crc32: logging.warning( "CRC32 mismatch in single-block file: %s (expected=%s, actual=%s)", file.name, - defined_crc32.hex(), + data_entry.crc32.hex(), actual_crc32.hex(), ) @@ -132,21 +141,15 @@ def _process_tab_file(file: TargetPath, include_deleted_content: bool) -> Window # Here, the fileSize is zeroed, meaning that the size is not known up front. # Data may be stored in multiple, variable-length blocks. This happens, for example, when several # additions and deletions of characters have been recorded and these changes have not been 'flushed' - - # First, parse 4 unknown bytes. These likely - # hold some addition information about the tab (view options etc.) - unknown_bytes = fh.read(4) - - # In this multi-block variant, the header itself has a CRC32 value in big-endian as well - defined_header_crc32 = fh.read(4) + mdeh = c_windowstab.multi_data_extra_header(fh) # Calculate CRC32 of the header and check if it matches - actual_header_crc32 = _calc_crc32(header.dumps()[3:] + tab.dumps() + unknown_bytes) - if defined_header_crc32 != actual_header_crc32: + actual_header_crc32 = _calc_crc32(header.dumps()[3:] + tab.dumps() + mdeh.unk) + if mdeh.crc32 != actual_header_crc32: logging.warning( "CRC32 mismatch in header of multi-block file: %s " "expected=%s, actual=%s", file.name, - defined_header_crc32.hex(), + mdeh.crc32.hex(), actual_header_crc32.hex(), ) @@ -160,18 +163,15 @@ def _process_tab_file(file: TargetPath, include_deleted_content: bool) -> Window # Unfortunately, there is no way of determining how many blocks there are. So just try to parse # until we reach EOF, after which we stop. try: - data_entry = c_windowstab.data_block(fh) + data_entry = c_windowstab.multi_data_block(fh) except EOFError: break - # Each block has a CRC32 value in big-endian appended to the block - defined_crc32 = fh.read(4) - # Either the nAdded is nonzero, or the nDeleted if data_entry.nAdded > 0: # Check the CRC32 checksum for this block actual_crc32 = _calc_crc32(data_entry.dumps()) - if defined_crc32 != actual_crc32: + if data_entry.crc32 != actual_crc32: logging.warning( "CRC32 mismatch in multi-block file: %s " "expected=%s, actual=%s", file.name, From 9b38f3e003d5bab1f7bc4ebf3136a93cec749df0 Mon Sep 17 00:00:00 2001 From: Joost Jansen <12032793+joost-j@users.noreply.github.com> Date: Fri, 26 Apr 2024 12:36:56 +0200 Subject: [PATCH 29/36] Added options and more test cases to support newest version --- .../plugins/apps/texteditor/windowsnotepad.py | 174 ++++++++++++------ .../texteditor/windowsnotepad/new-format.bin | Bin 0 -> 19 bytes .../stored_unsaved_with_new_data.bin | Bin 0 -> 268 bytes .../apps/texteditor/test_texteditor.py | 11 +- 4 files changed, 126 insertions(+), 59 deletions(-) create mode 100755 tests/_data/plugins/apps/texteditor/windowsnotepad/new-format.bin create mode 100755 tests/_data/plugins/apps/texteditor/windowsnotepad/stored_unsaved_with_new_data.bin diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py index 847dea22e..8794c983d 100644 --- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py +++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py @@ -25,52 +25,70 @@ # required for recovering text from these files. c_def = """ -struct header { +struct file_header { char magic[2]; // NP - uint8 unk0; - uint8 fileState; // 0 if unsaved, 1 if saved + uleb128 updateNumber; // increases on every settings update when fileType=9, + // doesn't seem to change on fileType 0 or 1 + uleb128 fileType; // 0 if unsaved, 1 if saved, 9 if contains settings? } -struct header_saved_tab { +struct tab_header_saved { uleb128 filePathLength; wchar filePath[filePathLength]; - uleb128 fileSize; + uleb128 fileSize; // likely similar to fixedSizeBlockLength uleb128 encoding; uleb128 carriageReturnType; uleb128 timestamp; // Windows Filetime format (not unix timestamp) char sha256[32]; - char unk[6]; + char unk0; + char unk1; + uleb128 fixedSizeBlockLength; + uleb128 fixedSizeBlockLengthDuplicate; + uint8 wordWrap; // 1 if wordwrap enabled, 0 if disabled + uint8 rightToLeft; + uint8 showUnicode; + uint8 optionsVersion; +}; + +struct tab_header_unsaved { + char unk0; + uleb128 fixedSizeBlockLength; // will always be 00 when unsaved because size is not yet known + uleb128 fixedSizeBlockLengthDuplicate; // will always be 00 when unsaved because size is not yet known + uint8 wordWrap; // 1 if wordwrap enabled, 0 if disabled + uint8 rightToLeft; + uint8 showUnicode; + uint8 optionsVersion; }; -struct header_unsaved_tab { - uint8 unk0; - uleb128 fileSize; - uleb128 fileSizeDuplicate; +struct tab_header_crc32_stub { char unk1; char unk2; + char crc32[4]; }; -struct single_data_block { - uleb128 offset; - uleb128 nDeleted; +struct fixed_size_data_block { uleb128 nAdded; wchar data[nAdded]; - char unk[1]; - char crc32[4]; -}; - -struct multi_data_extra_header { - char unk[4]; + uint8 hasRemainingVariableDataBlocks; // indicates whether after this single-data block more data will follow char crc32[4]; }; -struct multi_data_block { +struct variable_size_data_block { uleb128 offset; uleb128 nDeleted; uleb128 nAdded; wchar data[nAdded]; char crc32[4]; }; + +struct options_v1 { + uleb128 unk; +}; + +struct options_v2 { + uleb128 unk1; // likely autocorrect or spellcheck + uleb128 unk2; // likely autocorrect or spellcheck +}; """ c_windowstab = cstruct() @@ -108,24 +126,67 @@ def _process_tab_file(file: TargetPath, include_deleted_content: bool) -> Window """ with file.open("rb") as fh: # Header is the same for all types - header = c_windowstab.header(fh) - - # File can be saved, or unsaved. Depending on the file state, different header fields are present - # Currently, no information in the header is used in the outputted records, only the contents of the tab - tab = ( - c_windowstab.header_saved_tab(fh) - if header.fileState == 0x01 # 0x00 is unsaved, 0x01 is saved - else c_windowstab.header_unsaved_tab(fh) + file_header = c_windowstab.file_header(fh) + + # Tabs can be saved to a file with a filename on disk, or unsaved (kept in the TabState folder). + # Depending on the file's saved state, different header fields are present + tab_header = ( + c_windowstab.tab_header_saved(fh) + if file_header.fileType == 0x01 # 0x00 is unsaved, 0x01 is saved, 0x09 is settings? + else c_windowstab.tab_header_unsaved(fh) ) - # In the case that the file size is known up front, then this fileSize is set to a nonzero value - # This means that the data is stored in one block - if tab.fileSize != 0: - # So we only parse one block - data_entry = c_windowstab.single_data_block(fh) + # There appears to be a optionsVersion field that specifies the options that are passed. + # At the moment of writing, it is not sure whether this specifies a version or a number of bytes + # that is parsed, so just going with the 'optionsVersion' type for now. + # We don't use the options, but since they are required for the CRC32 checksum + # we store the byte representation + if tab_header.optionsVersion == 0: + # No options specified + options = b"" + elif tab_header.optionsVersion == 1: + options = c_windowstab.options_v1(fh).dumps() + elif tab_header.optionsVersion == 2: + options = c_windowstab.options_v2(fh).dumps() + else: + # Raise an error, since we don't know how many bytes future optionVersions will occupy. + # Now knowing how many bytes to parse can mess up the alignment and structs. + raise Exception("Unknown option version") - # The header (minus the magic) plus all data (including the extra byte) is included in the checksum - actual_crc32 = _calc_crc32(header.dumps()[3:] + tab.dumps() + data_entry.dumps()[:-4]) + # If the file is not saved to disk and no fixedSizeBlockLength is present, an extra checksum stub + # is present. So parse that first + if file_header.fileType == 0 and tab_header.fixedSizeBlockLength == 0: + # Two unknown bytes before the CRC32 + tab_header_crc32_stub = c_windowstab.tab_header_crc32_stub(fh) + + # Calculate CRC32 of the header and check if it matches + actual_header_crc32 = _calc_crc32( + file_header.dumps()[3:] + tab_header.dumps() + options + tab_header_crc32_stub.dumps()[:-4] + ) + if tab_header_crc32_stub.crc32 != actual_header_crc32: + logging.warning( + "CRC32 mismatch in header of file: %s (expected=%s, actual=%s)", + file.name, + tab_header_crc32_stub.crc32.hex(), + actual_header_crc32.hex(), + ) + + # Used to store the final content + content = "" + + # After a fixed_size_data_block, some more variable_size_data_blocks can be present. This boolean + # keeps track of whether more data is still present. + has_remaining_data = False + + # In the case that a fixedSizeDataBlock is present, this value is set to a nonzero value + if tab_header.fixedSizeBlockLength > 0: + # So we parse the fixed size data block + data_entry = c_windowstab.fixed_size_data_block(fh) + + # The header (minus the magic) plus all data is included in the checksum + actual_crc32 = _calc_crc32( + file_header.dumps()[3:] + tab_header.dumps() + options + data_entry.dumps()[:-4] + ) if data_entry.crc32 != actual_crc32: logging.warning( @@ -135,45 +196,42 @@ def _process_tab_file(file: TargetPath, include_deleted_content: bool) -> Window actual_crc32.hex(), ) - text = data_entry.data + # Add the content of the fixed size data block to the tab content + content += data_entry.data - else: - # Here, the fileSize is zeroed, meaning that the size is not known up front. - # Data may be stored in multiple, variable-length blocks. This happens, for example, when several - # additions and deletions of characters have been recorded and these changes have not been 'flushed' - mdeh = c_windowstab.multi_data_extra_header(fh) + # The hasRemainingVariableDataBlocks indicates whether more data will follow after this single block + if data_entry.hasRemainingVariableDataBlocks == 1: + has_remaining_data = True - # Calculate CRC32 of the header and check if it matches - actual_header_crc32 = _calc_crc32(header.dumps()[3:] + tab.dumps() + mdeh.unk) - if mdeh.crc32 != actual_header_crc32: - logging.warning( - "CRC32 mismatch in header of multi-block file: %s " "expected=%s, actual=%s", - file.name, - mdeh.crc32.hex(), - actual_header_crc32.hex(), - ) + # If fixedSizeBlockLength in the header has a value of zero, this means that the entire file consists of + # variable-length blocks. Furthermore, if there is any remaining data after the + # first fixed size blocks, also continue we also want to continue parsing + if tab_header.fixedSizeBlockLength == 0 or has_remaining_data: + # Here, data is stored in variable-length blocks. This happens, for example, when several + # additions and deletions of characters have been recorded and these changes have not been 'flushed' # Since we don't know the size of the file up front, and offsets don't necessarily have to be in order, # a list is used to easily insert text at offsets text = [] + # Used to store the deleted content, if available and requested deleted_content = "" while True: # Unfortunately, there is no way of determining how many blocks there are. So just try to parse # until we reach EOF, after which we stop. try: - data_entry = c_windowstab.multi_data_block(fh) + data_entry = c_windowstab.variable_size_data_block(fh) except EOFError: break # Either the nAdded is nonzero, or the nDeleted if data_entry.nAdded > 0: # Check the CRC32 checksum for this block - actual_crc32 = _calc_crc32(data_entry.dumps()) + actual_crc32 = _calc_crc32(data_entry.dumps()[:-4]) if data_entry.crc32 != actual_crc32: logging.warning( - "CRC32 mismatch in multi-block file: %s " "expected=%s, actual=%s", + "CRC32 mismatch in multi-block file: %s (expected=%s, actual=%s)", file.name, data_entry.crc32.hex(), actual_crc32.hex(), @@ -192,14 +250,18 @@ def _process_tab_file(file: TargetPath, include_deleted_content: bool) -> Window ) text = text[: data_entry.offset] + text[data_entry.offset + data_entry.nDeleted :] - # Join all the characters to reconstruct the original text + # Join all the characters to reconstruct the original text within the variable-length data blocks text = "".join(text) + # Add the deleted content, if specified if include_deleted_content: text += " --- DELETED-CONTENT: " text += deleted_content - return WindowsNotepadTabContentRecord(content=text, path=file) + # Finally, add the reconstructed text to the tab content + content += text + + return WindowsNotepadTabContentRecord(content=content, path=file) class WindowsNotepadPlugin(TexteditorPlugin): @@ -242,5 +304,5 @@ def tabs(self, include_deleted_content) -> Iterator[WindowsNotepadTabRecord]: # Parse the file r: WindowsNotepadTabContentRecord = WindowsNotepadTabContent(file, include_deleted_content) - # Add user- and target specific information to the content record record + # Add user- and target specific information to the content record yield WindowsNotepadTabRecord(content=r.content, path=r.path, _target=self.target, _user=user) diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/new-format.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/new-format.bin new file mode 100755 index 0000000000000000000000000000000000000000..8773f88ff4840b596df7d445b42a25ef3b7d0d99 GIT binary patch literal 19 VcmeYZU|?VbBPK>5z182H82}Iq0rLO= literal 0 HcmV?d00001 diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/stored_unsaved_with_new_data.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/stored_unsaved_with_new_data.bin new file mode 100755 index 0000000000000000000000000000000000000000..f41219d57a7950fa8b6e55bef8d7164055e5439e GIT binary patch literal 268 zcmeYZU|?XBl41ao3{t@iB@FotMGUD7DGUl=wgQlq$xzIY4df>=lmf*RfMgy}B$1&E ztd8-(^g}D97#MXJt~6FImu6s8U~mnQmzH5*Okyy#e%B+*z*x$_d&7B^90OwsLvS*O zwLD18hATU7D=;vYGo;1G$}2K3W-|2Gc^4>w)R=FI-J{IFn8EO7sb-)GNO78~fsZN! zV=BWn??rpm7#J%U^4hl)sDl*$54v1s1D qrZ5yF=+4mrN$odhJ*f-Qwe``dTs@G;(eGg^^%)rT82Za}E&~9owL~WX literal 0 HcmV?d00001 diff --git a/tests/plugins/apps/texteditor/test_texteditor.py b/tests/plugins/apps/texteditor/test_texteditor.py index baaddaa49..f85091090 100644 --- a/tests/plugins/apps/texteditor/test_texteditor.py +++ b/tests/plugins/apps/texteditor/test_texteditor.py @@ -69,7 +69,6 @@ def test_windows_tab_plugin_deleted_contents(target_win, fs_win, tmp_path, targe # The recovered content in the records should match the original data, as well as the length for rec in records: - print(rec.content) assert rec.content == file_text_map[rec.path.name] assert len(rec.content) == len(file_text_map[rec.path.name]) @@ -91,6 +90,8 @@ def test_windows_tab_plugin_default(target_win, fs_win, tmp_path, target_win_use "lots-of-deletions.bin": text7, "appclosed_saved_and_deletions.bin": text8, "appclosed_unsaved.bin": "Closing application now", + "new-format.bin": "", + "stored_unsaved_with_new_data.bin": "Stored to disk but unsaved, but with extra data.", } tabcache = absolute_path("_data/plugins/apps/texteditor/windowsnotepad/") @@ -114,8 +115,12 @@ def test_windows_tab_plugin_default(target_win, fs_win, tmp_path, target_win_use assert len(list(tab_dir.iterdir())) == len(file_text_map.keys()) assert len(records) == len(file_text_map.keys()) - # One file should still return contents, but there should be an entry for in the logging for a CRC missmatch. - assert "CRC32 mismatch in single-block file: wrong-checksum.bin (expected=deadbeef, actual=a48d30a6)" in caplog.text + for line in caplog.text.split("\n"): + # One file should still return contents, but there should be an entry for in the logging for a CRC missmatch. + assert ( + "CRC32 mismatch in single-block file: wrong-checksum.bin (expected=deadbeef, actual=a48d30a6)" in line + or not "CRC32 mismatch" in line + ) # The recovered content in the records should match the original data, as well as the length for rec in records: From a3b6f27d879a3343f18be614950f06a2d49d7008 Mon Sep 17 00:00:00 2001 From: Joost Jansen <12032793+joost-j@users.noreply.github.com> Date: Wed, 8 May 2024 14:52:34 +0200 Subject: [PATCH 30/36] Added separate records for unsaved/saved tabs, included more data (timestamp,saved_path,sha256) in the fields --- .../plugins/apps/texteditor/windowsnotepad.py | 105 +++++++++++++++--- .../apps/texteditor/test_texteditor.py | 69 ++++++++++-- 2 files changed, 147 insertions(+), 27 deletions(-) diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py index 8794c983d..4da9fccbe 100644 --- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py +++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py @@ -5,11 +5,16 @@ from typing import Iterator from dissect.cstruct import cstruct +from dissect.util.ts import wintimestamp from dissect.target.exceptions import UnsupportedPluginError -from dissect.target.helpers.descriptor_extensions import UserRecordDescriptorExtension +from dissect.target.helpers.descriptor_extensions import ( + RecordDescriptorExtensionBase, + UserRecordDescriptorExtension, +) from dissect.target.helpers.fsutil import TargetPath from dissect.target.helpers.record import ( + DynamicDescriptor, UnixUserRecord, WindowsUserRecord, create_extended_descriptor, @@ -27,7 +32,7 @@ c_def = """ struct file_header { char magic[2]; // NP - uleb128 updateNumber; // increases on every settings update when fileType=9, + uleb128 updateNumber; // increases on every settings update when fileType=9, // doesn't seem to change on fileType 0 or 1 uleb128 fileType; // 0 if unsaved, 1 if saved, 9 if contains settings? } @@ -91,17 +96,59 @@ }; """ -c_windowstab = cstruct() -c_windowstab.load(c_def) +WINDOWS_SAVED_TABS_EXTRA_FIELDS = [("datetime", "modification_time"), ("string", "sha256"), ("path", "saved_path")] + + +class WindowsSavedTabRecordDescriptorExtension(RecordDescriptorExtensionBase): + """RecordDescriptorExtension used to add extra fields to tabs that are saved to disk and contain more info.""" + + _default_fields = WINDOWS_SAVED_TABS_EXTRA_FIELDS + + _input_fields = ("_saved",) + + def _fill_default_fields(self, record_kwargs): + r: WindowsNotepadSavedTabContentRecord = record_kwargs.get("_saved", None) + + modification_time = None + saved_path = None + sha256 = None + + if r: + modification_time = r.modification_time + sha256 = r.sha256 + saved_path = r.saved_path + + record_kwargs.update({"modification_time": modification_time, "sha256": sha256, "saved_path": saved_path}) + return record_kwargs + + +# Different Record types for both saved/unsaved tabs, and with/without UserRecordDescriptor so that the +# plugin can be used as a standalone tool as well + -WindowsNotepadTabRecord = create_extended_descriptor([UserRecordDescriptorExtension])( - "texteditor/windowsnotepad/tab", GENERIC_TAB_CONTENTS_RECORD_FIELDS +WindowsNotepadUnsavedTabRecord = create_extended_descriptor([UserRecordDescriptorExtension])( + "texteditor/windowsnotepad/tab/unsaved", + GENERIC_TAB_CONTENTS_RECORD_FIELDS, ) -WindowsNotepadTabContentRecord = create_extended_descriptor([])( - "texteditor/windowsnotepad/tab_content", GENERIC_TAB_CONTENTS_RECORD_FIELDS +WindowsNotepadSavedTabRecord = create_extended_descriptor( + [UserRecordDescriptorExtension, WindowsSavedTabRecordDescriptorExtension] +)( + "texteditor/windowsnotepad/tab/saved", + GENERIC_TAB_CONTENTS_RECORD_FIELDS, ) +WindowsNotepadUnsavedTabContentRecord = create_extended_descriptor([])( + "texteditor/windowsnotepad/tab_content/unsaved", GENERIC_TAB_CONTENTS_RECORD_FIELDS +) + +WindowsNotepadSavedTabContentRecord = create_extended_descriptor([])( + "texteditor/windowsnotepad/tab_content/saved", GENERIC_TAB_CONTENTS_RECORD_FIELDS + WINDOWS_SAVED_TABS_EXTRA_FIELDS +) + +c_windowstab = cstruct() +c_windowstab.load(c_def) + def _calc_crc32(data: bytes) -> bytes: """Perform a CRC32 checksum on the data and return it as bytes.""" @@ -109,13 +156,17 @@ def _calc_crc32(data: bytes) -> bytes: class WindowsNotepadTabContent: - """Windows notepad tab parser""" + """Windows notepad tab content parser""" - def __new__(cls, file: TargetPath, include_deleted_content=False) -> WindowsNotepadTabContentRecord: + def __new__( + cls, file: TargetPath, include_deleted_content=False + ) -> WindowsNotepadSavedTabContentRecord | WindowsNotepadUnsavedTabContentRecord: return cls._process_tab_file(file, include_deleted_content) @staticmethod - def _process_tab_file(file: TargetPath, include_deleted_content: bool) -> WindowsNotepadTabContentRecord: + def _process_tab_file( + file: TargetPath, include_deleted_content: bool + ) -> WindowsNotepadSavedTabContentRecord | WindowsNotepadUnsavedTabContentRecord: """Parse a binary tab file and reconstruct the contents. Args: @@ -132,7 +183,7 @@ def _process_tab_file(file: TargetPath, include_deleted_content: bool) -> Window # Depending on the file's saved state, different header fields are present tab_header = ( c_windowstab.tab_header_saved(fh) - if file_header.fileType == 0x01 # 0x00 is unsaved, 0x01 is saved, 0x09 is settings? + if file_header.fileType == 1 # 0 is unsaved, 1 is saved, 9 is settings? else c_windowstab.tab_header_unsaved(fh) ) @@ -261,7 +312,16 @@ def _process_tab_file(file: TargetPath, include_deleted_content: bool) -> Window # Finally, add the reconstructed text to the tab content content += text - return WindowsNotepadTabContentRecord(content=content, path=file) + if file_header.fileType == 0: + return WindowsNotepadUnsavedTabContentRecord(content=content, path=file) + else: + return WindowsNotepadSavedTabContentRecord( + content=content, + path=file, + modification_time=wintimestamp(tab_header.timestamp), + sha256=tab_header.sha256.hex(), + saved_path=tab_header.filePath, + ) class WindowsNotepadPlugin(TexteditorPlugin): @@ -276,6 +336,8 @@ def __init__(self, target): self.users_tabs: list[TargetPath, UnixUserRecord | WindowsUserRecord] = [] for user_details in self.target.user_details.all_with_home(): for tab_file in user_details.home_path.glob(self.GLOB): + # These files seem to contain information on different settings / configurations, + # and are skipped for now if tab_file.name.endswith(".1.bin") or tab_file.name.endswith(".0.bin"): continue @@ -292,8 +354,8 @@ def check_compatible(self) -> None: required=False, help="Include deleted but recoverable content.", ) - @export(record=WindowsNotepadTabRecord) - def tabs(self, include_deleted_content) -> Iterator[WindowsNotepadTabRecord]: + @export(record=DynamicDescriptor(["path", "datetime", "string"])) + def tabs(self, include_deleted_content) -> Iterator[WindowsNotepadSavedTabRecord | WindowsNotepadUnsavedTabRecord]: """Return contents from Windows 11 temporary Notepad tabs. Yields TextEditorTabRecord with the following fields: @@ -302,7 +364,14 @@ def tabs(self, include_deleted_content) -> Iterator[WindowsNotepadTabRecord]: """ for file, user in self.users_tabs: # Parse the file - r: WindowsNotepadTabContentRecord = WindowsNotepadTabContent(file, include_deleted_content) + r: WindowsNotepadSavedTabContentRecord | WindowsNotepadUnsavedTabContentRecord = WindowsNotepadTabContent( + file, include_deleted_content + ) - # Add user- and target specific information to the content record - yield WindowsNotepadTabRecord(content=r.content, path=r.path, _target=self.target, _user=user) + # If the modification_time attribute is present, this means that it's a WindowsNotepadSavedTabContentRecord + if hasattr(r, "modification_time"): + yield WindowsNotepadSavedTabRecord( + content=r.content, path=r.path, _saved=r, _target=self.target, _user=user + ) + else: + yield WindowsNotepadUnsavedTabRecord(content=r.content, path=r.path, _target=self.target, _user=user) diff --git a/tests/plugins/apps/texteditor/test_texteditor.py b/tests/plugins/apps/texteditor/test_texteditor.py index f85091090..c5e7d5906 100644 --- a/tests/plugins/apps/texteditor/test_texteditor.py +++ b/tests/plugins/apps/texteditor/test_texteditor.py @@ -1,7 +1,12 @@ import os from pathlib import Path -from dissect.target.plugins.apps.texteditor import windowsnotepad +from flow.record.fieldtypes import datetime as dt + +from dissect.target.plugins.apps.texteditor.windowsnotepad import ( + WindowsNotepadPlugin, + WindowsNotepadTabContent, +) from tests._utils import absolute_path text1 = "This is an unsaved tab, UTF-8 encoded with Windows (CRLF). It's only 88 characters long." @@ -27,12 +32,11 @@ def test_windows_tab_parsing(tmp_path): # Standalone parsing of tab files, so not using the plugin - tab_files = Path(absolute_path("_data/plugins/apps/texteditor/windowsnotepad/")) - content_record = windowsnotepad.WindowsNotepadTabContent(tab_files / "unsaved-with-deletions.bin") + tab_file = Path(absolute_path("_data/plugins/apps/texteditor/windowsnotepad/unsaved-with-deletions.bin")) + content_record = WindowsNotepadTabContent(tab_file) assert content_record.content == "Not saved aasdflasd" - content_record_with_deletions = windowsnotepad.WindowsNotepadTabContent( - tab_files / "unsaved-with-deletions.bin", include_deleted_content=True - ) + + content_record_with_deletions = WindowsNotepadTabContent(tab_file, include_deleted_content=True) assert content_record_with_deletions.content == "Not saved aasdflasd --- DELETED-CONTENT: snUlltllafds tjkf" @@ -59,7 +63,7 @@ def test_windows_tab_plugin_deleted_contents(target_win, fs_win, tmp_path, targe tab_file = str(tab_dir.joinpath(file))[3:] fs_win.map_file(tab_file, os.path.join(tabcache, file)) - target_win.add_plugin(windowsnotepad.WindowsNotepadPlugin) + target_win.add_plugin(WindowsNotepadPlugin) records = list(target_win.windowsnotepad.tabs(include_deleted_content=True)) @@ -107,7 +111,7 @@ def test_windows_tab_plugin_default(target_win, fs_win, tmp_path, target_win_use tab_file = str(tab_dir.joinpath(file))[3:] fs_win.map_file(tab_file, os.path.join(tabcache, file)) - target_win.add_plugin(windowsnotepad.WindowsNotepadPlugin) + target_win.add_plugin(WindowsNotepadPlugin) records = list(target_win.windowsnotepad.tabs(include_deleted_content=False)) @@ -119,10 +123,57 @@ def test_windows_tab_plugin_default(target_win, fs_win, tmp_path, target_win_use # One file should still return contents, but there should be an entry for in the logging for a CRC missmatch. assert ( "CRC32 mismatch in single-block file: wrong-checksum.bin (expected=deadbeef, actual=a48d30a6)" in line - or not "CRC32 mismatch" in line + or "CRC32 mismatch" not in line ) # The recovered content in the records should match the original data, as well as the length for rec in records: assert rec.content == file_text_map[rec.path.name] assert len(rec.content) == len(file_text_map[rec.path.name]) + + +def test_windows_saved_tab_plugin_extra_fields(target_win, fs_win, tmp_path, target_win_users, caplog): + file_text_map = { + "saved.bin": ( + "Saved!", + "C:\\Users\\user\\Desktop\\Saved!.txt", + dt(2024, 3, 28, 13, 7, 55, 482183), + "ed9b760289e614c9dc8776e7280abe870be0a85019a32220b35acc54c0ecfbc1", + ), + "appclosed_saved_and_deletions.bin": ( + text8, + "C:\\Users\\user\\Desktop\\Saved.txt", + dt(2024, 3, 28, 13, 16, 21, 158279), + "8d0533144aa42e2d81e7474332bdef6473e42b699041528d55a62e5391e914ce", + ), + } + + tabcache = absolute_path("_data/plugins/apps/texteditor/windowsnotepad/") + + user = target_win_users.user_details.find(username="John") + tab_dir = user.home_path.joinpath( + "AppData/Local/Packages/Microsoft.WindowsNotepad_8wekyb3d8bbwe/LocalState/TabState" + ) + + fs_win.map_dir("Users\\John", tmp_path) + + for file in file_text_map.keys(): + tab_file = str(tab_dir.joinpath(file))[3:] + fs_win.map_file(tab_file, os.path.join(tabcache, file)) + + target_win.add_plugin(WindowsNotepadPlugin) + + records = list(target_win.windowsnotepad.tabs(include_deleted_content=False)) + + # Check the amount of files + assert len(list(tab_dir.iterdir())) == len(file_text_map.keys()) + assert len(records) == len(file_text_map.keys()) + + # The recovered content in the records should match the original data, as well as the length and all the + # other saved metadata + for rec in records: + assert len(rec.content) == len(file_text_map[rec.path.name][0]) + assert rec.content == file_text_map[rec.path.name][0] + assert rec.saved_path == file_text_map[rec.path.name][1] + assert rec.modification_time == file_text_map[rec.path.name][2] + assert rec.sha256 == file_text_map[rec.path.name][3] From 677817c389da00c3f22c5178d863fc3bcf254eb5 Mon Sep 17 00:00:00 2001 From: Joost Jansen <12032793+joost-j@users.noreply.github.com> Date: Mon, 13 May 2024 10:27:59 +0200 Subject: [PATCH 31/36] Change cstruct version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 743e3aaac..e2db9523c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,7 @@ classifiers = [ ] dependencies = [ "defusedxml", - "dissect.cstruct>=3.14.dev4,<4.0.dev", + "dissect.cstruct>=3.14.dev,<4.0.dev", "dissect.eventlog>=3.0.dev,<4.0.dev", "dissect.evidence>=3.0.dev,<4.0.dev", "dissect.hypervisor>=3.0.dev,<4.0.dev", From 9674e377db6c35484178b3b89655fbb981e13832 Mon Sep 17 00:00:00 2001 From: Joost Jansen <12032793+joost-j@users.noreply.github.com> Date: Wed, 14 Aug 2024 11:18:54 +0200 Subject: [PATCH 32/36] Remove the --include-deleted-contents arg and make it default --- .../plugins/apps/texteditor/texteditor.py | 5 +- .../plugins/apps/texteditor/windowsnotepad.py | 85 ++++++++----------- .../apps/texteditor/test_texteditor.py | 74 +++++++++------- 3 files changed, 79 insertions(+), 85 deletions(-) diff --git a/dissect/target/plugins/apps/texteditor/texteditor.py b/dissect/target/plugins/apps/texteditor/texteditor.py index ab3fadf03..1063d4919 100644 --- a/dissect/target/plugins/apps/texteditor/texteditor.py +++ b/dissect/target/plugins/apps/texteditor/texteditor.py @@ -2,10 +2,7 @@ from dissect.target.helpers.record import create_extended_descriptor from dissect.target.plugin import NamespacePlugin -GENERIC_TAB_CONTENTS_RECORD_FIELDS = [ - ("string", "content"), - ("path", "path"), -] +GENERIC_TAB_CONTENTS_RECORD_FIELDS = [("string", "content"), ("path", "path"), ("string", "deleted_content")] TexteditorTabContentRecord = create_extended_descriptor([UserRecordDescriptorExtension])( "texteditor/tab", GENERIC_TAB_CONTENTS_RECORD_FIELDS diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py index 4da9fccbe..d1406d7c7 100644 --- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py +++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py @@ -6,6 +6,7 @@ from dissect.cstruct import cstruct from dissect.util.ts import wintimestamp +from flow.record.fieldtypes import digest from dissect.target.exceptions import UnsupportedPluginError from dissect.target.helpers.descriptor_extensions import ( @@ -19,7 +20,7 @@ WindowsUserRecord, create_extended_descriptor, ) -from dissect.target.plugin import arg, export +from dissect.target.plugin import export from dissect.target.plugins.apps.texteditor.texteditor import ( GENERIC_TAB_CONTENTS_RECORD_FIELDS, TexteditorPlugin, @@ -96,7 +97,7 @@ }; """ -WINDOWS_SAVED_TABS_EXTRA_FIELDS = [("datetime", "modification_time"), ("string", "sha256"), ("path", "saved_path")] +WINDOWS_SAVED_TABS_EXTRA_FIELDS = [("datetime", "modification_time"), ("digest", "hashes"), ("path", "saved_path")] class WindowsSavedTabRecordDescriptorExtension(RecordDescriptorExtensionBase): @@ -111,14 +112,14 @@ def _fill_default_fields(self, record_kwargs): modification_time = None saved_path = None - sha256 = None + hashes = None if r: modification_time = r.modification_time - sha256 = r.sha256 + hashes = r.hashes saved_path = r.saved_path - record_kwargs.update({"modification_time": modification_time, "sha256": sha256, "saved_path": saved_path}) + record_kwargs.update({"modification_time": modification_time, "hashes": hashes, "saved_path": saved_path}) return record_kwargs @@ -139,7 +140,8 @@ def _fill_default_fields(self, record_kwargs): ) WindowsNotepadUnsavedTabContentRecord = create_extended_descriptor([])( - "texteditor/windowsnotepad/tab_content/unsaved", GENERIC_TAB_CONTENTS_RECORD_FIELDS + "texteditor/windowsnotepad/tab_content/unsaved", + GENERIC_TAB_CONTENTS_RECORD_FIELDS, ) WindowsNotepadSavedTabContentRecord = create_extended_descriptor([])( @@ -158,14 +160,12 @@ def _calc_crc32(data: bytes) -> bytes: class WindowsNotepadTabContent: """Windows notepad tab content parser""" - def __new__( - cls, file: TargetPath, include_deleted_content=False - ) -> WindowsNotepadSavedTabContentRecord | WindowsNotepadUnsavedTabContentRecord: - return cls._process_tab_file(file, include_deleted_content) + def __new__(cls, file: TargetPath) -> WindowsNotepadSavedTabContentRecord | WindowsNotepadUnsavedTabContentRecord: + return cls._process_tab_file(file) @staticmethod def _process_tab_file( - file: TargetPath, include_deleted_content: bool + file: TargetPath, ) -> WindowsNotepadSavedTabContentRecord | WindowsNotepadUnsavedTabContentRecord: """Parse a binary tab file and reconstruct the contents. @@ -225,10 +225,6 @@ def _process_tab_file( # Used to store the final content content = "" - # After a fixed_size_data_block, some more variable_size_data_blocks can be present. This boolean - # keeps track of whether more data is still present. - has_remaining_data = False - # In the case that a fixedSizeDataBlock is present, this value is set to a nonzero value if tab_header.fixedSizeBlockLength > 0: # So we parse the fixed size data block @@ -250,14 +246,16 @@ def _process_tab_file( # Add the content of the fixed size data block to the tab content content += data_entry.data - # The hasRemainingVariableDataBlocks indicates whether more data will follow after this single block - if data_entry.hasRemainingVariableDataBlocks == 1: - has_remaining_data = True + # Used to store the deleted content, if available + deleted_content = "" # If fixedSizeBlockLength in the header has a value of zero, this means that the entire file consists of # variable-length blocks. Furthermore, if there is any remaining data after the - # first fixed size blocks, also continue we also want to continue parsing - if tab_header.fixedSizeBlockLength == 0 or has_remaining_data: + # first fixed size blocks, as indicated by the value of hasRemainingVariableDataBlocks, + # also continue we also want to continue parsing + if tab_header.fixedSizeBlockLength == 0 or ( + tab_header.fixedSizeBlockLength > 0 and data_entry.hasRemainingVariableDataBlocks == 1 + ): # Here, data is stored in variable-length blocks. This happens, for example, when several # additions and deletions of characters have been recorded and these changes have not been 'flushed' @@ -265,9 +263,6 @@ def _process_tab_file( # a list is used to easily insert text at offsets text = [] - # Used to store the deleted content, if available and requested - deleted_content = "" - while True: # Unfortunately, there is no way of determining how many blocks there are. So just try to parse # until we reach EOF, after which we stop. @@ -295,32 +290,28 @@ def _process_tab_file( elif data_entry.nDeleted > 0: # Create a new slice. Include everything up to the offset, # plus everything after the nDeleted following bytes - if include_deleted_content: - deleted_content += "".join( - text[data_entry.offset : data_entry.offset + data_entry.nDeleted] - ) + deleted_content += "".join(text[data_entry.offset : data_entry.offset + data_entry.nDeleted]) text = text[: data_entry.offset] + text[data_entry.offset + data_entry.nDeleted :] # Join all the characters to reconstruct the original text within the variable-length data blocks text = "".join(text) - # Add the deleted content, if specified - if include_deleted_content: - text += " --- DELETED-CONTENT: " - text += deleted_content - # Finally, add the reconstructed text to the tab content content += text + # Return None if no deleted content was found + deleted_content = deleted_content if deleted_content else None + if file_header.fileType == 0: - return WindowsNotepadUnsavedTabContentRecord(content=content, path=file) + return WindowsNotepadUnsavedTabContentRecord(content=content, path=file, deleted_content=deleted_content) else: return WindowsNotepadSavedTabContentRecord( content=content, path=file, modification_time=wintimestamp(tab_header.timestamp), - sha256=tab_header.sha256.hex(), + hashes=digest((None, None, tab_header.sha256.hex())), saved_path=tab_header.filePath, + deleted_content=deleted_content, ) @@ -347,31 +338,29 @@ def check_compatible(self) -> None: if not self.users_tabs: raise UnsupportedPluginError("No Windows Notepad temporary tab files found") - @arg( - "--include-deleted-content", - type=bool, - default=False, - required=False, - help="Include deleted but recoverable content.", - ) @export(record=DynamicDescriptor(["path", "datetime", "string"])) - def tabs(self, include_deleted_content) -> Iterator[WindowsNotepadSavedTabRecord | WindowsNotepadUnsavedTabRecord]: + def tabs(self) -> Iterator[WindowsNotepadSavedTabRecord | WindowsNotepadUnsavedTabRecord]: """Return contents from Windows 11 temporary Notepad tabs. - Yields TextEditorTabRecord with the following fields: - contents (string): The contents of the tab. - path (path): The path the content originates from. + Yields a WindowsNotepadSavedTabRecord or WindowsNotepadUnsavedTabRecord, depending on the state of the tab. """ for file, user in self.users_tabs: # Parse the file r: WindowsNotepadSavedTabContentRecord | WindowsNotepadUnsavedTabContentRecord = WindowsNotepadTabContent( - file, include_deleted_content + file ) # If the modification_time attribute is present, this means that it's a WindowsNotepadSavedTabContentRecord if hasattr(r, "modification_time"): yield WindowsNotepadSavedTabRecord( - content=r.content, path=r.path, _saved=r, _target=self.target, _user=user + content=r.content, + path=r.path, + _saved=r, + _target=self.target, + _user=user, + deleted_content=r.deleted_content, ) else: - yield WindowsNotepadUnsavedTabRecord(content=r.content, path=r.path, _target=self.target, _user=user) + yield WindowsNotepadUnsavedTabRecord( + content=r.content, path=r.path, _target=self.target, _user=user, deleted_content=r.deleted_content + ) diff --git a/tests/plugins/apps/texteditor/test_texteditor.py b/tests/plugins/apps/texteditor/test_texteditor.py index c5e7d5906..9a8394975 100644 --- a/tests/plugins/apps/texteditor/test_texteditor.py +++ b/tests/plugins/apps/texteditor/test_texteditor.py @@ -36,18 +36,21 @@ def test_windows_tab_parsing(tmp_path): content_record = WindowsNotepadTabContent(tab_file) assert content_record.content == "Not saved aasdflasd" - content_record_with_deletions = WindowsNotepadTabContent(tab_file, include_deleted_content=True) - assert content_record_with_deletions.content == "Not saved aasdflasd --- DELETED-CONTENT: snUlltllafds tjkf" + content_record_with_deletions = WindowsNotepadTabContent(tab_file) + assert content_record_with_deletions.content == "Not saved aasdflasd" + assert content_record_with_deletions.deleted_content == "snUlltllafds tjkf" def test_windows_tab_plugin_deleted_contents(target_win, fs_win, tmp_path, target_win_users, caplog): file_text_map = { - "unsaved-with-deletions.bin": "Not saved aasdflasd --- DELETED-CONTENT: snUlltllafds tjkf", - "lots-of-deletions.bin": "This a text, which is nothing special. But I am going to modify it a bit. " - "For example, I have removed quote some stuff. " - "Adding a word in the beginning now... " - "At this point, I've edited it quite a lot. --- DELETED-CONTENT: " - "b a ,elpmac ydaerlae already thi laiceps emos", + "unsaved-with-deletions.bin": ("Not saved aasdflasd", "snUlltllafds tjkf"), + "lots-of-deletions.bin": ( + "This a text, which is nothing special. But I am going to modify it a bit. " + "For example, I have removed quote some stuff. " + "Adding a word in the beginning now... " + "At this point, I've edited it quite a lot.", + "b a ,elpmac ydaerlae already thi laiceps emos", + ), } tabcache = absolute_path("_data/plugins/apps/texteditor/windowsnotepad/") @@ -65,7 +68,7 @@ def test_windows_tab_plugin_deleted_contents(target_win, fs_win, tmp_path, targe target_win.add_plugin(WindowsNotepadPlugin) - records = list(target_win.windowsnotepad.tabs(include_deleted_content=True)) + records = list(target_win.windowsnotepad.tabs()) # Check the amount of files assert len(list(tab_dir.iterdir())) == len(file_text_map.keys()) @@ -73,29 +76,33 @@ def test_windows_tab_plugin_deleted_contents(target_win, fs_win, tmp_path, targe # The recovered content in the records should match the original data, as well as the length for rec in records: - assert rec.content == file_text_map[rec.path.name] - assert len(rec.content) == len(file_text_map[rec.path.name]) + print(rec) + assert rec.content == file_text_map[rec.path.name][0] + assert rec.deleted_content == file_text_map[rec.path.name][1] def test_windows_tab_plugin_default(target_win, fs_win, tmp_path, target_win_users, caplog): file_text_map = { - "c515e86f-08b3-4d76-844a-cddfcd43fcbb.bin": text1, - "85167c9d-aac2-4469-ae44-db5dccf8f7f4.bin": text2, - "dae80df8-e1e5-4996-87fe-b453f63fcb19.bin": text3, - "3f915e17-cf6c-462b-9bd1-2f23314cb979.bin": text4, - "ba291ccd-f1c3-4ca8-949c-c01f6633789d.bin": (text5 * 5), - "e609218e-94f2-45fa-84e2-f29df2190b26.bin": (text6 * 1260), - "3d0cc86e-dfc9-4f16-b74a-918c2c24188c.bin": loremipsum, - "wrong-checksum.bin": text4, # only added to check for corrupt checksum, not validity - "cfe38135-9dca-4480-944f-d5ea0e1e589f.bin": (loremipsum * 37)[:-2], # removed the two newlines in this file - "saved.bin": "Saved!", - "unsaved.bin": "Not saved at all", - "unsaved-with-deletions.bin": "Not saved aasdflasd", - "lots-of-deletions.bin": text7, - "appclosed_saved_and_deletions.bin": text8, - "appclosed_unsaved.bin": "Closing application now", - "new-format.bin": "", - "stored_unsaved_with_new_data.bin": "Stored to disk but unsaved, but with extra data.", + "c515e86f-08b3-4d76-844a-cddfcd43fcbb.bin": (text1, None), + "85167c9d-aac2-4469-ae44-db5dccf8f7f4.bin": (text2, None), + "dae80df8-e1e5-4996-87fe-b453f63fcb19.bin": (text3, "THis is "), + "3f915e17-cf6c-462b-9bd1-2f23314cb979.bin": (text4, None), + "ba291ccd-f1c3-4ca8-949c-c01f6633789d.bin": ((text5 * 5), None), + "e609218e-94f2-45fa-84e2-f29df2190b26.bin": ((text6 * 1260), None), + "3d0cc86e-dfc9-4f16-b74a-918c2c24188c.bin": (loremipsum, None), + "wrong-checksum.bin": (text4, None), # only added to check for corrupt checksum, not validity + "cfe38135-9dca-4480-944f-d5ea0e1e589f.bin": ( + (loremipsum * 37)[:-2], + None, + ), # removed the two newlines in this file + "saved.bin": ("Saved!", None), + "unsaved.bin": ("Not saved at all", "snUllt"), + "unsaved-with-deletions.bin": ("Not saved aasdflasd", "snUlltllafds tjkf"), + "lots-of-deletions.bin": (text7, "b a ,elpmac ydaerlae already thi laiceps emos"), + "appclosed_saved_and_deletions.bin": (text8, None), + "appclosed_unsaved.bin": ("Closing application now", None), + "new-format.bin": ("", None), + "stored_unsaved_with_new_data.bin": ("Stored to disk but unsaved, but with extra data.", None), } tabcache = absolute_path("_data/plugins/apps/texteditor/windowsnotepad/") @@ -113,7 +120,7 @@ def test_windows_tab_plugin_default(target_win, fs_win, tmp_path, target_win_use target_win.add_plugin(WindowsNotepadPlugin) - records = list(target_win.windowsnotepad.tabs(include_deleted_content=False)) + records = list(target_win.windowsnotepad.tabs()) # Check the amount of files assert len(list(tab_dir.iterdir())) == len(file_text_map.keys()) @@ -128,8 +135,9 @@ def test_windows_tab_plugin_default(target_win, fs_win, tmp_path, target_win_use # The recovered content in the records should match the original data, as well as the length for rec in records: - assert rec.content == file_text_map[rec.path.name] - assert len(rec.content) == len(file_text_map[rec.path.name]) + print(rec) + assert rec.content == file_text_map[rec.path.name][0] + assert rec.deleted_content == file_text_map[rec.path.name][1] def test_windows_saved_tab_plugin_extra_fields(target_win, fs_win, tmp_path, target_win_users, caplog): @@ -163,7 +171,7 @@ def test_windows_saved_tab_plugin_extra_fields(target_win, fs_win, tmp_path, tar target_win.add_plugin(WindowsNotepadPlugin) - records = list(target_win.windowsnotepad.tabs(include_deleted_content=False)) + records = list(target_win.windowsnotepad.tabs()) # Check the amount of files assert len(list(tab_dir.iterdir())) == len(file_text_map.keys()) @@ -176,4 +184,4 @@ def test_windows_saved_tab_plugin_extra_fields(target_win, fs_win, tmp_path, tar assert rec.content == file_text_map[rec.path.name][0] assert rec.saved_path == file_text_map[rec.path.name][1] assert rec.modification_time == file_text_map[rec.path.name][2] - assert rec.sha256 == file_text_map[rec.path.name][3] + assert rec.hashes.sha256 == file_text_map[rec.path.name][3] From 06e3f075dffc99cf3b1ff29e4939e1f695ccb4d6 Mon Sep 17 00:00:00 2001 From: Joost Jansen <12032793+joost-j@users.noreply.github.com> Date: Wed, 14 Aug 2024 12:19:59 +0200 Subject: [PATCH 33/36] Rewrite TabContent records into WindowsNotepadTab class --- .../plugins/apps/texteditor/windowsnotepad.py | 164 ++++++------------ .../apps/texteditor/test_texteditor.py | 14 +- 2 files changed, 59 insertions(+), 119 deletions(-) diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py index d1406d7c7..210b85331 100644 --- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py +++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py @@ -9,10 +9,7 @@ from flow.record.fieldtypes import digest from dissect.target.exceptions import UnsupportedPluginError -from dissect.target.helpers.descriptor_extensions import ( - RecordDescriptorExtensionBase, - UserRecordDescriptorExtension, -) +from dissect.target.helpers.descriptor_extensions import UserRecordDescriptorExtension from dissect.target.helpers.fsutil import TargetPath from dissect.target.helpers.record import ( DynamicDescriptor, @@ -99,53 +96,14 @@ WINDOWS_SAVED_TABS_EXTRA_FIELDS = [("datetime", "modification_time"), ("digest", "hashes"), ("path", "saved_path")] - -class WindowsSavedTabRecordDescriptorExtension(RecordDescriptorExtensionBase): - """RecordDescriptorExtension used to add extra fields to tabs that are saved to disk and contain more info.""" - - _default_fields = WINDOWS_SAVED_TABS_EXTRA_FIELDS - - _input_fields = ("_saved",) - - def _fill_default_fields(self, record_kwargs): - r: WindowsNotepadSavedTabContentRecord = record_kwargs.get("_saved", None) - - modification_time = None - saved_path = None - hashes = None - - if r: - modification_time = r.modification_time - hashes = r.hashes - saved_path = r.saved_path - - record_kwargs.update({"modification_time": modification_time, "hashes": hashes, "saved_path": saved_path}) - return record_kwargs - - -# Different Record types for both saved/unsaved tabs, and with/without UserRecordDescriptor so that the -# plugin can be used as a standalone tool as well - - WindowsNotepadUnsavedTabRecord = create_extended_descriptor([UserRecordDescriptorExtension])( "texteditor/windowsnotepad/tab/unsaved", GENERIC_TAB_CONTENTS_RECORD_FIELDS, ) -WindowsNotepadSavedTabRecord = create_extended_descriptor( - [UserRecordDescriptorExtension, WindowsSavedTabRecordDescriptorExtension] -)( +WindowsNotepadSavedTabRecord = create_extended_descriptor([UserRecordDescriptorExtension])( "texteditor/windowsnotepad/tab/saved", - GENERIC_TAB_CONTENTS_RECORD_FIELDS, -) - -WindowsNotepadUnsavedTabContentRecord = create_extended_descriptor([])( - "texteditor/windowsnotepad/tab_content/unsaved", - GENERIC_TAB_CONTENTS_RECORD_FIELDS, -) - -WindowsNotepadSavedTabContentRecord = create_extended_descriptor([])( - "texteditor/windowsnotepad/tab_content/saved", GENERIC_TAB_CONTENTS_RECORD_FIELDS + WINDOWS_SAVED_TABS_EXTRA_FIELDS + GENERIC_TAB_CONTENTS_RECORD_FIELDS + WINDOWS_SAVED_TABS_EXTRA_FIELDS, ) c_windowstab = cstruct() @@ -157,34 +115,26 @@ def _calc_crc32(data: bytes) -> bytes: return zlib.crc32(data).to_bytes(length=4, byteorder="big") -class WindowsNotepadTabContent: +class WindowsNotepadTab: """Windows notepad tab content parser""" - def __new__(cls, file: TargetPath) -> WindowsNotepadSavedTabContentRecord | WindowsNotepadUnsavedTabContentRecord: - return cls._process_tab_file(file) - - @staticmethod - def _process_tab_file( - file: TargetPath, - ) -> WindowsNotepadSavedTabContentRecord | WindowsNotepadUnsavedTabContentRecord: - """Parse a binary tab file and reconstruct the contents. + def __init__(self, file: TargetPath): + self.file = file + self._process_tab_file() - Args: - file: The binary file on disk that needs to be parsed. - - Returns: - A TextEditorTabRecord containing information that is in the tab. - """ - with file.open("rb") as fh: + def _process_tab_file(self): + """Parse a binary tab file and reconstruct the contents.""" + with self.file.open("rb") as fh: # Header is the same for all types - file_header = c_windowstab.file_header(fh) + self.file_header = c_windowstab.file_header(fh) + + # fileType == 1 # 0 is unsaved, 1 is saved, 9 is settings? + self.is_saved = self.file_header.fileType == 1 # Tabs can be saved to a file with a filename on disk, or unsaved (kept in the TabState folder). # Depending on the file's saved state, different header fields are present - tab_header = ( - c_windowstab.tab_header_saved(fh) - if file_header.fileType == 1 # 0 is unsaved, 1 is saved, 9 is settings? - else c_windowstab.tab_header_unsaved(fh) + self.tab_header = ( + c_windowstab.tab_header_saved(fh) if self.is_saved else c_windowstab.tab_header_unsaved(fh) ) # There appears to be a optionsVersion field that specifies the options that are passed. @@ -192,13 +142,13 @@ def _process_tab_file( # that is parsed, so just going with the 'optionsVersion' type for now. # We don't use the options, but since they are required for the CRC32 checksum # we store the byte representation - if tab_header.optionsVersion == 0: + if self.tab_header.optionsVersion == 0: # No options specified - options = b"" - elif tab_header.optionsVersion == 1: - options = c_windowstab.options_v1(fh).dumps() - elif tab_header.optionsVersion == 2: - options = c_windowstab.options_v2(fh).dumps() + self.options = b"" + elif self.tab_header.optionsVersion == 1: + self.options = c_windowstab.options_v1(fh).dumps() + elif self.tab_header.optionsVersion == 2: + self.options = c_windowstab.options_v2(fh).dumps() else: # Raise an error, since we don't know how many bytes future optionVersions will occupy. # Now knowing how many bytes to parse can mess up the alignment and structs. @@ -206,45 +156,48 @@ def _process_tab_file( # If the file is not saved to disk and no fixedSizeBlockLength is present, an extra checksum stub # is present. So parse that first - if file_header.fileType == 0 and tab_header.fixedSizeBlockLength == 0: + if not self.is_saved and self.tab_header.fixedSizeBlockLength == 0: # Two unknown bytes before the CRC32 tab_header_crc32_stub = c_windowstab.tab_header_crc32_stub(fh) # Calculate CRC32 of the header and check if it matches actual_header_crc32 = _calc_crc32( - file_header.dumps()[3:] + tab_header.dumps() + options + tab_header_crc32_stub.dumps()[:-4] + self.file_header.dumps()[3:] + + self.tab_header.dumps() + + self.options + + tab_header_crc32_stub.dumps()[:-4] ) if tab_header_crc32_stub.crc32 != actual_header_crc32: logging.warning( "CRC32 mismatch in header of file: %s (expected=%s, actual=%s)", - file.name, + self.file.name, tab_header_crc32_stub.crc32.hex(), actual_header_crc32.hex(), ) # Used to store the final content - content = "" + self.content = "" # In the case that a fixedSizeDataBlock is present, this value is set to a nonzero value - if tab_header.fixedSizeBlockLength > 0: + if self.tab_header.fixedSizeBlockLength > 0: # So we parse the fixed size data block - data_entry = c_windowstab.fixed_size_data_block(fh) + self.data_entry = c_windowstab.fixed_size_data_block(fh) # The header (minus the magic) plus all data is included in the checksum actual_crc32 = _calc_crc32( - file_header.dumps()[3:] + tab_header.dumps() + options + data_entry.dumps()[:-4] + self.file_header.dumps()[3:] + self.tab_header.dumps() + self.options + self.data_entry.dumps()[:-4] ) - if data_entry.crc32 != actual_crc32: + if self.data_entry.crc32 != actual_crc32: logging.warning( "CRC32 mismatch in single-block file: %s (expected=%s, actual=%s)", - file.name, - data_entry.crc32.hex(), + self.file.name, + self.data_entry.crc32.hex(), actual_crc32.hex(), ) # Add the content of the fixed size data block to the tab content - content += data_entry.data + self.content += self.data_entry.data # Used to store the deleted content, if available deleted_content = "" @@ -253,8 +206,8 @@ def _process_tab_file( # variable-length blocks. Furthermore, if there is any remaining data after the # first fixed size blocks, as indicated by the value of hasRemainingVariableDataBlocks, # also continue we also want to continue parsing - if tab_header.fixedSizeBlockLength == 0 or ( - tab_header.fixedSizeBlockLength > 0 and data_entry.hasRemainingVariableDataBlocks == 1 + if self.tab_header.fixedSizeBlockLength == 0 or ( + self.tab_header.fixedSizeBlockLength > 0 and self.data_entry.hasRemainingVariableDataBlocks == 1 ): # Here, data is stored in variable-length blocks. This happens, for example, when several # additions and deletions of characters have been recorded and these changes have not been 'flushed' @@ -278,7 +231,7 @@ def _process_tab_file( if data_entry.crc32 != actual_crc32: logging.warning( "CRC32 mismatch in multi-block file: %s (expected=%s, actual=%s)", - file.name, + self.file.name, data_entry.crc32.hex(), actual_crc32.hex(), ) @@ -297,22 +250,10 @@ def _process_tab_file( text = "".join(text) # Finally, add the reconstructed text to the tab content - content += text - - # Return None if no deleted content was found - deleted_content = deleted_content if deleted_content else None - - if file_header.fileType == 0: - return WindowsNotepadUnsavedTabContentRecord(content=content, path=file, deleted_content=deleted_content) - else: - return WindowsNotepadSavedTabContentRecord( - content=content, - path=file, - modification_time=wintimestamp(tab_header.timestamp), - hashes=digest((None, None, tab_header.sha256.hex())), - saved_path=tab_header.filePath, - deleted_content=deleted_content, - ) + self.content += text + + # Set None if no deleted content was found + self.deleted_content = deleted_content if deleted_content else None class WindowsNotepadPlugin(TexteditorPlugin): @@ -346,21 +287,20 @@ def tabs(self) -> Iterator[WindowsNotepadSavedTabRecord | WindowsNotepadUnsavedT """ for file, user in self.users_tabs: # Parse the file - r: WindowsNotepadSavedTabContentRecord | WindowsNotepadUnsavedTabContentRecord = WindowsNotepadTabContent( - file - ) + w: WindowsNotepadTab = WindowsNotepadTab(file) - # If the modification_time attribute is present, this means that it's a WindowsNotepadSavedTabContentRecord - if hasattr(r, "modification_time"): + if w.is_saved: yield WindowsNotepadSavedTabRecord( - content=r.content, - path=r.path, - _saved=r, + content=w.content, + path=w.file, + deleted_content=w.deleted_content, + hashes=digest((None, None, w.tab_header.sha256.hex())), + saved_path=w.tab_header.filePath, + modification_time=wintimestamp(w.tab_header.timestamp), _target=self.target, _user=user, - deleted_content=r.deleted_content, ) else: yield WindowsNotepadUnsavedTabRecord( - content=r.content, path=r.path, _target=self.target, _user=user, deleted_content=r.deleted_content + content=w.content, path=w.file, _target=self.target, _user=user, deleted_content=w.deleted_content ) diff --git a/tests/plugins/apps/texteditor/test_texteditor.py b/tests/plugins/apps/texteditor/test_texteditor.py index 9a8394975..975ef0c77 100644 --- a/tests/plugins/apps/texteditor/test_texteditor.py +++ b/tests/plugins/apps/texteditor/test_texteditor.py @@ -5,7 +5,7 @@ from dissect.target.plugins.apps.texteditor.windowsnotepad import ( WindowsNotepadPlugin, - WindowsNotepadTabContent, + WindowsNotepadTab, ) from tests._utils import absolute_path @@ -30,15 +30,15 @@ loremipsum = """Lorem ipsum dolor sit amet. Eum error blanditiis eum pariatur delectus ut consequuntur officiis a excepturi dignissimos et doloribus quia 33 perspiciatis soluta nam perspiciatis dolor. Ut repudiandae quidem cum sint modi qui sint consequatur. Aut autem quidem eum enim consequatur qui voluptate consequatur non similique voluptate. A vitae modi vel sint provident ut galisum tenetur sit voluptatem amet. Est impedit perspiciatis est repudiandae voluptates ut fugit alias! Eum magni esse aut velit illum qui excepturi aperiam. Ex dolores asperiores ut debitis omnis qui consequuntur dolore. Est voluptatem mollitia et quibusdam unde ea accusamus fuga. Cum quis galisum et impedit sunt qui aliquam perspiciatis sed modi quidem qui nisi molestias. Aut temporibus architecto ut neque voluptatem et consequatur deleniti sed accusantium quibusdam et omnis dignissimos ad rerum ipsam et rerum quia. Ut nihil repellat et eaque molestias quo iusto ipsum At optio sint eos quidem earum?\r\rEx deleniti unde eum tenetur rerum ea dolore numquam? Eos aperiam officiis et neque explicabo et enim atque ut eaque omnis non illum eveniet est molestias itaque et ratione voluptatem. Ea deserunt nemo et quos tempora et nostrum aperiam sit necessitatibus illo sit culpa placeat. Vel tempore quibusdam ut velit voluptate aut odio facere non voluptas earum est odio galisum et voluptas harum. Et blanditiis sapiente et nostrum laborum aut voluptatem explicabo a quasi assumenda. Est voluptatem quia eum minima galisum quo totam excepturi aut facilis enim vel voluptate repudiandae sit distinctio laboriosam. Quo possimus molestiae et molestiae accusantium est voluptas omnis sed obcaecati natus. Non vitae asperiores qui nostrum enim id saepe fugiat et incidunt quasi.\r\rEos ipsa facilis aut excepturi voluptatem a omnis magni vel magni iste. Sed ipsum consequatur qui reprehenderit deleniti et soluta molestiae. Ut vero assumenda id dolor ipsum in deleniti voluptatem aut quis quisquam sed repudiandae temporibus ab quia inventore. Sed velit fugit vel facere cumque et delectus ullam sed eaque impedit. Est veritatis dignissimos aut doloribus dolorem vel pariatur repellendus sit nesciunt similique eum architecto quia. Ea expedita veritatis eum dolorem molestiae ut enim fugit aut beatae quibusdam. Aut voluptas natus in quidem deleniti aut animi iure est incidunt tenetur qui culpa maiores! Et nostrum quaerat qui consequatur consequatur aut aliquam atque aut praesentium rerum et consequuntur exercitationem. Non accusantium ipsa vel consectetur vitae ut magnam autem et natus rerum ut consectetur inventore est doloremque temporibus 33 dolores doloribus! Aut perferendis optio et nostrum repellendus et fugit itaque ut nisi neque sed sint quaerat. Aut placeat architecto et eius sapiente eum molestiae quam. Quo mollitia sapiente non Quis neque non tempora laudantium. Quo distinctio quos et molestias natus sit veritatis consequuntur aut repellendus neque a porro galisum cum numquam nesciunt et animi earum? Aut dolorum dolore non assumenda omnis et molestiae amet id sint vero est eligendi harum sit temporibus magnam aut ipsam quos.\r\r""" # noqa: E501 -def test_windows_tab_parsing(tmp_path): +def test_windows_tab_parsing(): # Standalone parsing of tab files, so not using the plugin tab_file = Path(absolute_path("_data/plugins/apps/texteditor/windowsnotepad/unsaved-with-deletions.bin")) - content_record = WindowsNotepadTabContent(tab_file) - assert content_record.content == "Not saved aasdflasd" + content = WindowsNotepadTab(tab_file) + assert content.content == "Not saved aasdflasd" - content_record_with_deletions = WindowsNotepadTabContent(tab_file) - assert content_record_with_deletions.content == "Not saved aasdflasd" - assert content_record_with_deletions.deleted_content == "snUlltllafds tjkf" + content_with_deletions = WindowsNotepadTab(tab_file) + assert content_with_deletions.content == "Not saved aasdflasd" + assert content_with_deletions.deleted_content == "snUlltllafds tjkf" def test_windows_tab_plugin_deleted_contents(target_win, fs_win, tmp_path, target_win_users, caplog): From a384fd9810ca2873b59f2617e57a148162a3924f Mon Sep 17 00:00:00 2001 From: Joost Jansen <12032793+joost-j@users.noreply.github.com> Date: Wed, 14 Aug 2024 12:31:00 +0200 Subject: [PATCH 34/36] Implement repr for WindowsNotepadTab class --- dissect/target/plugins/apps/texteditor/windowsnotepad.py | 9 +++++++++ tests/plugins/apps/texteditor/test_texteditor.py | 5 +---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py index 210b85331..a4898fa26 100644 --- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py +++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py @@ -120,8 +120,17 @@ class WindowsNotepadTab: def __init__(self, file: TargetPath): self.file = file + self.is_saved = None + self.content = None + self.deleted_content = None self._process_tab_file() + def __repr__(self): + return ( + f"<{self.__class__.__name__} saved={self.is_saved} " + f"content_size={len(self.content)} has_deleted_content={self.deleted_content is not None}>" + ) + def _process_tab_file(self): """Parse a binary tab file and reconstruct the contents.""" with self.file.open("rb") as fh: diff --git a/tests/plugins/apps/texteditor/test_texteditor.py b/tests/plugins/apps/texteditor/test_texteditor.py index 975ef0c77..380c1598c 100644 --- a/tests/plugins/apps/texteditor/test_texteditor.py +++ b/tests/plugins/apps/texteditor/test_texteditor.py @@ -35,10 +35,7 @@ def test_windows_tab_parsing(): tab_file = Path(absolute_path("_data/plugins/apps/texteditor/windowsnotepad/unsaved-with-deletions.bin")) content = WindowsNotepadTab(tab_file) assert content.content == "Not saved aasdflasd" - - content_with_deletions = WindowsNotepadTab(tab_file) - assert content_with_deletions.content == "Not saved aasdflasd" - assert content_with_deletions.deleted_content == "snUlltllafds tjkf" + assert repr(content) == "" def test_windows_tab_plugin_deleted_contents(target_win, fs_win, tmp_path, target_win_users, caplog): From e625684a2f2cadb61e20deab4e54eba47eb5ee47 Mon Sep 17 00:00:00 2001 From: Stefan de Reuver <9864602+Horofic@users.noreply.github.com> Date: Fri, 16 Aug 2024 11:00:41 +0200 Subject: [PATCH 35/36] Add typehints and small fixes --- .../plugins/apps/texteditor/windowsnotepad.py | 56 ++++++++++++++----- .../apps/texteditor/test_texteditor.py | 18 ++++-- 2 files changed, 53 insertions(+), 21 deletions(-) diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py index a4898fa26..df77ca45b 100644 --- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py +++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py @@ -125,13 +125,13 @@ def __init__(self, file: TargetPath): self.deleted_content = None self._process_tab_file() - def __repr__(self): + def __repr__(self) -> str: return ( f"<{self.__class__.__name__} saved={self.is_saved} " f"content_size={len(self.content)} has_deleted_content={self.deleted_content is not None}>" ) - def _process_tab_file(self): + def _process_tab_file(self) -> None: """Parse a binary tab file and reconstruct the contents.""" with self.file.open("rb") as fh: # Header is the same for all types @@ -161,7 +161,7 @@ def _process_tab_file(self): else: # Raise an error, since we don't know how many bytes future optionVersions will occupy. # Now knowing how many bytes to parse can mess up the alignment and structs. - raise Exception("Unknown option version") + raise NotImplementedError("Unknown Windows Notepad tab option version") # If the file is not saved to disk and no fixedSizeBlockLength is present, an extra checksum stub # is present. So parse that first @@ -286,30 +286,56 @@ def __init__(self, target): def check_compatible(self) -> None: if not self.users_tabs: - raise UnsupportedPluginError("No Windows Notepad temporary tab files found") + raise UnsupportedPluginError("No Windows Notepad tab files found") @export(record=DynamicDescriptor(["path", "datetime", "string"])) def tabs(self) -> Iterator[WindowsNotepadSavedTabRecord | WindowsNotepadUnsavedTabRecord]: - """Return contents from Windows 11 temporary Notepad tabs. + """Return contents from Windows 11 Notepad tabs - and its deleted content if available. - Yields a WindowsNotepadSavedTabRecord or WindowsNotepadUnsavedTabRecord, depending on the state of the tab. + Windows Notepad application for Windows 11 is now able to restore both saved and unsaved tabs when you re-open + the application. + + + Resources: + - https://github.com/fox-it/dissect.target/pull/540 + - https://github.com/JustArion/Notepad-Tabs + - https://github.com/ogmini/Notepad-Tabstate-Buffer + - https://github.com/ogmini/Notepad-State-Library + - https://github.com/Nordgaren/tabstate-util + - https://github.com/Nordgaren/tabstate-util/issues/1 + - https://medium.com/@mahmoudsoheem/new-digital-forensics-artifact-from-windows-notepad-527645906b7b + + Yields a WindowsNotepadSavedTabRecord or WindowsNotepadUnsavedTabRecord. with fields: + + .. code-block:: text + + content (string): The content of the tab. + path (path): The path to the tab file. + deleted_content (string): The deleted content of the tab, if available. + hashes (digest): A digest of the tab content. + saved_path (path): The path where the tab was saved. + modification_time (datetime): The modification time of the tab. """ for file, user in self.users_tabs: # Parse the file - w: WindowsNotepadTab = WindowsNotepadTab(file) + tab: WindowsNotepadTab = WindowsNotepadTab(file) - if w.is_saved: + if tab.is_saved: yield WindowsNotepadSavedTabRecord( - content=w.content, - path=w.file, - deleted_content=w.deleted_content, - hashes=digest((None, None, w.tab_header.sha256.hex())), - saved_path=w.tab_header.filePath, - modification_time=wintimestamp(w.tab_header.timestamp), + content=tab.content, + path=tab.file, + deleted_content=tab.deleted_content, + hashes=digest((None, None, tab.tab_header.sha256.hex())), + saved_path=tab.tab_header.filePath, + modification_time=wintimestamp(tab.tab_header.timestamp), _target=self.target, _user=user, ) else: yield WindowsNotepadUnsavedTabRecord( - content=w.content, path=w.file, _target=self.target, _user=user, deleted_content=w.deleted_content + content=tab.content, + path=tab.file, + _target=self.target, + _user=user, + deleted_content=tab.deleted_content, ) diff --git a/tests/plugins/apps/texteditor/test_texteditor.py b/tests/plugins/apps/texteditor/test_texteditor.py index 380c1598c..e8078d194 100644 --- a/tests/plugins/apps/texteditor/test_texteditor.py +++ b/tests/plugins/apps/texteditor/test_texteditor.py @@ -3,10 +3,12 @@ from flow.record.fieldtypes import datetime as dt +from dissect.target.filesystem import VirtualFilesystem from dissect.target.plugins.apps.texteditor.windowsnotepad import ( WindowsNotepadPlugin, WindowsNotepadTab, ) +from dissect.target.target import Target from tests._utils import absolute_path text1 = "This is an unsaved tab, UTF-8 encoded with Windows (CRLF). It's only 88 characters long." @@ -30,7 +32,7 @@ loremipsum = """Lorem ipsum dolor sit amet. Eum error blanditiis eum pariatur delectus ut consequuntur officiis a excepturi dignissimos et doloribus quia 33 perspiciatis soluta nam perspiciatis dolor. Ut repudiandae quidem cum sint modi qui sint consequatur. Aut autem quidem eum enim consequatur qui voluptate consequatur non similique voluptate. A vitae modi vel sint provident ut galisum tenetur sit voluptatem amet. Est impedit perspiciatis est repudiandae voluptates ut fugit alias! Eum magni esse aut velit illum qui excepturi aperiam. Ex dolores asperiores ut debitis omnis qui consequuntur dolore. Est voluptatem mollitia et quibusdam unde ea accusamus fuga. Cum quis galisum et impedit sunt qui aliquam perspiciatis sed modi quidem qui nisi molestias. Aut temporibus architecto ut neque voluptatem et consequatur deleniti sed accusantium quibusdam et omnis dignissimos ad rerum ipsam et rerum quia. Ut nihil repellat et eaque molestias quo iusto ipsum At optio sint eos quidem earum?\r\rEx deleniti unde eum tenetur rerum ea dolore numquam? Eos aperiam officiis et neque explicabo et enim atque ut eaque omnis non illum eveniet est molestias itaque et ratione voluptatem. Ea deserunt nemo et quos tempora et nostrum aperiam sit necessitatibus illo sit culpa placeat. Vel tempore quibusdam ut velit voluptate aut odio facere non voluptas earum est odio galisum et voluptas harum. Et blanditiis sapiente et nostrum laborum aut voluptatem explicabo a quasi assumenda. Est voluptatem quia eum minima galisum quo totam excepturi aut facilis enim vel voluptate repudiandae sit distinctio laboriosam. Quo possimus molestiae et molestiae accusantium est voluptas omnis sed obcaecati natus. Non vitae asperiores qui nostrum enim id saepe fugiat et incidunt quasi.\r\rEos ipsa facilis aut excepturi voluptatem a omnis magni vel magni iste. Sed ipsum consequatur qui reprehenderit deleniti et soluta molestiae. Ut vero assumenda id dolor ipsum in deleniti voluptatem aut quis quisquam sed repudiandae temporibus ab quia inventore. Sed velit fugit vel facere cumque et delectus ullam sed eaque impedit. Est veritatis dignissimos aut doloribus dolorem vel pariatur repellendus sit nesciunt similique eum architecto quia. Ea expedita veritatis eum dolorem molestiae ut enim fugit aut beatae quibusdam. Aut voluptas natus in quidem deleniti aut animi iure est incidunt tenetur qui culpa maiores! Et nostrum quaerat qui consequatur consequatur aut aliquam atque aut praesentium rerum et consequuntur exercitationem. Non accusantium ipsa vel consectetur vitae ut magnam autem et natus rerum ut consectetur inventore est doloremque temporibus 33 dolores doloribus! Aut perferendis optio et nostrum repellendus et fugit itaque ut nisi neque sed sint quaerat. Aut placeat architecto et eius sapiente eum molestiae quam. Quo mollitia sapiente non Quis neque non tempora laudantium. Quo distinctio quos et molestias natus sit veritatis consequuntur aut repellendus neque a porro galisum cum numquam nesciunt et animi earum? Aut dolorum dolore non assumenda omnis et molestiae amet id sint vero est eligendi harum sit temporibus magnam aut ipsam quos.\r\r""" # noqa: E501 -def test_windows_tab_parsing(): +def test_windows_tab_parsing() -> None: # Standalone parsing of tab files, so not using the plugin tab_file = Path(absolute_path("_data/plugins/apps/texteditor/windowsnotepad/unsaved-with-deletions.bin")) content = WindowsNotepadTab(tab_file) @@ -38,7 +40,9 @@ def test_windows_tab_parsing(): assert repr(content) == "" -def test_windows_tab_plugin_deleted_contents(target_win, fs_win, tmp_path, target_win_users, caplog): +def test_windows_tab_plugin_deleted_contents( + target_win: Target, fs_win: VirtualFilesystem, tmp_path: Path, target_win_users: Target +) -> None: file_text_map = { "unsaved-with-deletions.bin": ("Not saved aasdflasd", "snUlltllafds tjkf"), "lots-of-deletions.bin": ( @@ -73,12 +77,13 @@ def test_windows_tab_plugin_deleted_contents(target_win, fs_win, tmp_path, targe # The recovered content in the records should match the original data, as well as the length for rec in records: - print(rec) assert rec.content == file_text_map[rec.path.name][0] assert rec.deleted_content == file_text_map[rec.path.name][1] -def test_windows_tab_plugin_default(target_win, fs_win, tmp_path, target_win_users, caplog): +def test_windows_tab_plugin_default( + target_win: Target, fs_win: VirtualFilesystem, tmp_path: Path, target_win_users: Target, caplog +) -> None: file_text_map = { "c515e86f-08b3-4d76-844a-cddfcd43fcbb.bin": (text1, None), "85167c9d-aac2-4469-ae44-db5dccf8f7f4.bin": (text2, None), @@ -132,12 +137,13 @@ def test_windows_tab_plugin_default(target_win, fs_win, tmp_path, target_win_use # The recovered content in the records should match the original data, as well as the length for rec in records: - print(rec) assert rec.content == file_text_map[rec.path.name][0] assert rec.deleted_content == file_text_map[rec.path.name][1] -def test_windows_saved_tab_plugin_extra_fields(target_win, fs_win, tmp_path, target_win_users, caplog): +def test_windows_saved_tab_plugin_extra_fields( + target_win: Target, fs_win: VirtualFilesystem, tmp_path: Path, target_win_users: Target +) -> None: file_text_map = { "saved.bin": ( "Saved!", From 27fca9256cc649c795981572099933bbed30d81b Mon Sep 17 00:00:00 2001 From: Stefan de Reuver <9864602+Horofic@users.noreply.github.com> Date: Fri, 16 Aug 2024 11:25:40 +0200 Subject: [PATCH 36/36] Add suggestions --- .../target/plugins/apps/texteditor/windowsnotepad.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py index df77ca45b..260bc5b04 100644 --- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py +++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py @@ -12,7 +12,6 @@ from dissect.target.helpers.descriptor_extensions import UserRecordDescriptorExtension from dissect.target.helpers.fsutil import TargetPath from dissect.target.helpers.record import ( - DynamicDescriptor, UnixUserRecord, WindowsUserRecord, create_extended_descriptor, @@ -22,12 +21,13 @@ GENERIC_TAB_CONTENTS_RECORD_FIELDS, TexteditorPlugin, ) +from dissect.target.target import Target # Thanks to @Nordgaren, @daddycocoaman, @JustArion and @ogmini for their suggestions and feedback in the PR # thread. This really helped to figure out the last missing bits and pieces # required for recovering text from these files. -c_def = """ +windowstab_def = """ struct file_header { char magic[2]; // NP uleb128 updateNumber; // increases on every settings update when fileType=9, @@ -106,8 +106,7 @@ GENERIC_TAB_CONTENTS_RECORD_FIELDS + WINDOWS_SAVED_TABS_EXTRA_FIELDS, ) -c_windowstab = cstruct() -c_windowstab.load(c_def) +c_windowstab = cstruct().load(windowstab_def) def _calc_crc32(data: bytes) -> bytes: @@ -272,7 +271,7 @@ class WindowsNotepadPlugin(TexteditorPlugin): GLOB = "AppData/Local/Packages/Microsoft.WindowsNotepad_*/LocalState/TabState/*.bin" - def __init__(self, target): + def __init__(self, target: Target): super().__init__(target) self.users_tabs: list[TargetPath, UnixUserRecord | WindowsUserRecord] = [] for user_details in self.target.user_details.all_with_home(): @@ -288,7 +287,7 @@ def check_compatible(self) -> None: if not self.users_tabs: raise UnsupportedPluginError("No Windows Notepad tab files found") - @export(record=DynamicDescriptor(["path", "datetime", "string"])) + @export(record=[WindowsNotepadSavedTabRecord, WindowsNotepadUnsavedTabRecord]) def tabs(self) -> Iterator[WindowsNotepadSavedTabRecord | WindowsNotepadUnsavedTabRecord]: """Return contents from Windows 11 Notepad tabs - and its deleted content if available.