From c528241c020f163277e0106572bdfd5b5546daff Mon Sep 17 00:00:00 2001
From: Joost Jansen <joost.jansen@fox-it.com>
Date: Tue, 23 Jan 2024 13:18:44 +0100
Subject: [PATCH 01/36] Initial commit

Improvements, formatting, added tests

Added more tests

Move CRCMismatchException to general exceptions.py

Refactor of functions, removal of logging

Added another test, simplified code
---
 dissect/target/exceptions.py                  |   4 +
 .../plugins/apps/texteditor/__init__.py       |   0
 .../plugins/apps/texteditor/texteditor.py     |  17 ++
 .../plugins/apps/texteditor/windowsnotepad.py | 215 ++++++++++++++++++
 .../plugins/os/windows/regf/shimcache.py      |  11 +-
 .../3d0cc86e-dfc9-4f16-b74a-918c2c24188c.bin  | Bin 0 -> 6263 bytes
 .../3f915e17-cf6c-462b-9bd1-2f23314cb979.bin  | Bin 0 -> 145 bytes
 .../85167c9d-aac2-4469-ae44-db5dccf8f7f4.bin  | Bin 0 -> 250 bytes
 .../c515e86f-08b3-4d76-844a-cddfcd43fcbb.bin  | Bin 0 -> 200 bytes
 .../cfe38135-9dca-4480-944f-d5ea0e1e589f.bin  | Bin 0 -> 230828 bytes
 .../dae80df8-e1e5-4996-87fe-b453f63fcb19.bin  | Bin 0 -> 330 bytes
 .../windowsnotepad/wrong-checksum.bin         | Bin 0 -> 145 bytes
 tests/plugins/apps/texteditor/__init__.py     |   0
 .../apps/texteditor/test_texteditor.py        |  86 +++++++
 14 files changed, 327 insertions(+), 6 deletions(-)
 create mode 100644 dissect/target/plugins/apps/texteditor/__init__.py
 create mode 100644 dissect/target/plugins/apps/texteditor/texteditor.py
 create mode 100644 dissect/target/plugins/apps/texteditor/windowsnotepad.py
 create mode 100644 tests/_data/plugins/apps/texteditor/windowsnotepad/3d0cc86e-dfc9-4f16-b74a-918c2c24188c.bin
 create mode 100755 tests/_data/plugins/apps/texteditor/windowsnotepad/3f915e17-cf6c-462b-9bd1-2f23314cb979.bin
 create mode 100644 tests/_data/plugins/apps/texteditor/windowsnotepad/85167c9d-aac2-4469-ae44-db5dccf8f7f4.bin
 create mode 100644 tests/_data/plugins/apps/texteditor/windowsnotepad/c515e86f-08b3-4d76-844a-cddfcd43fcbb.bin
 create mode 100644 tests/_data/plugins/apps/texteditor/windowsnotepad/cfe38135-9dca-4480-944f-d5ea0e1e589f.bin
 create mode 100755 tests/_data/plugins/apps/texteditor/windowsnotepad/dae80df8-e1e5-4996-87fe-b453f63fcb19.bin
 create mode 100755 tests/_data/plugins/apps/texteditor/windowsnotepad/wrong-checksum.bin
 create mode 100644 tests/plugins/apps/texteditor/__init__.py
 create mode 100644 tests/plugins/apps/texteditor/test_texteditor.py

diff --git a/dissect/target/exceptions.py b/dissect/target/exceptions.py
index 22f46a604..1c435bcc7 100644
--- a/dissect/target/exceptions.py
+++ b/dissect/target/exceptions.py
@@ -114,3 +114,7 @@ class RegistryCorruptError(RegistryError):
 
 class ConfigurationParsingError(Error):
     """An error occurred during configuration parsing."""
+
+
+class CRCMismatchException(Error):
+    """A mismatch between CRC checksums has occurred."""
diff --git a/dissect/target/plugins/apps/texteditor/__init__.py b/dissect/target/plugins/apps/texteditor/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/dissect/target/plugins/apps/texteditor/texteditor.py b/dissect/target/plugins/apps/texteditor/texteditor.py
new file mode 100644
index 000000000..853b384a9
--- /dev/null
+++ b/dissect/target/plugins/apps/texteditor/texteditor.py
@@ -0,0 +1,17 @@
+from dissect.target.helpers.descriptor_extensions import UserRecordDescriptorExtension
+from dissect.target.helpers.record import create_extended_descriptor
+from dissect.target.plugin import NamespacePlugin
+
+GENERIC_TAB_CONTENTS_RECORD_FIELDS = [
+    ("string", "content"),
+    ("string", "content_length"),
+    ("string", "filename"),
+]
+
+TexteditorTabContentRecord = create_extended_descriptor([UserRecordDescriptorExtension])(
+    "texteditor/tab", GENERIC_TAB_CONTENTS_RECORD_FIELDS
+)
+
+
+class TexteditorTabPlugin(NamespacePlugin):
+    __namespace__ = "texteditortab"
diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
new file mode 100644
index 000000000..1fd316a57
--- /dev/null
+++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
@@ -0,0 +1,215 @@
+import io
+import zlib
+from typing import BinaryIO, Iterator
+
+from dissect.target.exceptions import CRCMismatchException, UnsupportedPluginError
+from dissect.target.helpers.descriptor_extensions import UserRecordDescriptorExtension
+from dissect.target.helpers.fsutil import TargetPath
+from dissect.target.helpers.record import create_extended_descriptor
+from dissect.target.plugin import export
+from dissect.target.plugins.apps.texteditor.texteditor import (
+    GENERIC_TAB_CONTENTS_RECORD_FIELDS,
+    TexteditorTabPlugin,
+)
+
+
+def seek_size(fh: BinaryIO) -> int:
+    """
+    Find the size of a file on disk.
+
+    Args:
+        fh: A file-like object that we want to calculate the size of.
+
+    Returns:
+        An integer representing the size (in bytes) of the file.
+    """
+    pos = fh.tell()
+    fh.seek(0, io.SEEK_END)
+    size = fh.tell()
+    fh.seek(pos)
+    return size
+
+
+def parse_large_structure_data_length(fh: BinaryIO) -> (int, bytes):
+    """
+    Read a variable-length representation of a length field. Acts much like a ``varint`` object
+    from ``dissect.ntfs``, however it introduces some additional bit shifts and masking.
+
+    The position of ``fh`` will be restored before returning.
+
+    Args:
+        fh: A file-like object where we want to read the length bytes from.
+
+    Returns:
+        Length of the data as an integer
+        The original bytes that have been processed to determine the length
+    """
+    offset = fh.tell()
+    original_bytes = b""
+    modified_bytes = b""
+
+    while True:
+        # Read the original byte
+        bt = fh.read(1)
+
+        # Transform into an integer
+        bt_int = int.from_bytes(bt)
+
+        # Shift this new byte a few places to the right, depending on the number of bytes that have already
+        # been processed
+        new_bt = bt_int >> len(original_bytes)
+
+        # Add this byte back to
+        modified_bytes += new_bt.to_bytes(length=1)
+
+        # Add the processed byte to the list of original by tes
+        original_bytes += bt
+
+        # If the first bit of the original byte is a zero, this is the final byte
+        # Otherwise, continue until we find the zero-led byte
+        if not bt_int & 128:
+            break
+
+    # Convert it to an integer
+    f = int.from_bytes(bytes=modified_bytes, byteorder="little")
+
+    # Apply the mask
+    f = f ^ (2 ** ((len(original_bytes) - 1) * 8) >> 1)
+
+    # Restore to original cursor
+    fh.seek(offset)
+
+    return f, original_bytes
+
+
+def _calc_crc32(data: bytes) -> bytes:
+    """Perform a CRC32 checksum on the data and return it as a big-endian uint32"""
+    return zlib.crc32(data).to_bytes(length=4, byteorder="big")
+
+
+def _parse_large_structure_tab(handle: BinaryIO, header_has_crc: bool, header: bytes) -> str:
+    # A dictionary where the data will be stored in the correct order
+    content = dict()
+
+    while True:
+        offset_bytes = handle.read(2)
+
+        # If we reach the end of the file, break
+        if offset_bytes == b"":
+            break
+
+        offset = int.from_bytes(offset_bytes, byteorder="big")
+
+        # Parse the length field based on the first one, two, three or four bytes.
+        data_length, data_length_bytes = parse_large_structure_data_length(handle)
+
+        # Move the cursor past the length bytes
+        handle.seek(handle.tell() + len(data_length_bytes))
+
+        chunk_data = b""
+        for i in range(data_length):
+            r = handle.read(2)
+            chunk_data += r
+
+        # Insert the chunk data into the correct offset. I have not yet encountered a file
+        # where the chunks were placed in a non-sequential order, but you never know.
+        for i in range(len(chunk_data)):
+            content[offset + i] = chunk_data[i].to_bytes(length=1)
+
+        # CRC32 consists of the following data
+        crc_data_reconstructed = offset_bytes + data_length_bytes + chunk_data
+
+        # If the header did not have a CRC, this means that it is combined with the only data entry
+        # in the file. So we need to prepend this extra header data.
+        if not header_has_crc:
+            # Furthermore, if the header does not have its own CRC32 it
+            # places a byte at the end to indicate the start
+            # of the CRC32. This should be included in the CRC32 calculation
+            crc_data_reconstructed = header + crc_data_reconstructed + handle.read(1)
+
+        # Finally, read the CRC32 from disk and compare it
+        crc32_on_disk = handle.read(4)
+
+        crc32_calculated = _calc_crc32(crc_data_reconstructed)
+
+        if not crc32_on_disk == crc32_calculated:
+            raise CRCMismatchException(message=f"data, calculated={crc32_calculated}, expected={crc32_on_disk}")
+
+    # Reconstruct the text
+    text_reconstructed = b"".join(content.values())
+    text = text_reconstructed.decode("utf-16-le")
+    return text
+
+
+class WindowsNotepadPlugin(TexteditorTabPlugin):
+    """Windows notepad tab content plugin."""
+
+    __namespace__ = "windowsnotepad"
+
+    DIRECTORY = "AppData/Local/Packages/Microsoft.WindowsNotepad_8wekyb3d8bbwe/LocalState/TabState"
+    TextEditorTabRecord = create_extended_descriptor([UserRecordDescriptorExtension])(
+        "texteditor/windowsnotepad/tab", GENERIC_TAB_CONTENTS_RECORD_FIELDS
+    )
+
+    def __init__(self, target):
+        super().__init__(target)
+        self.users_dirs = []
+        for user_details in self.target.user_details.all_with_home():
+            cur_dir = user_details.home_path.joinpath(self.DIRECTORY)
+            if not cur_dir.exists():
+                continue
+            self.users_dirs.append((user_details.user, cur_dir))
+
+    def check_compatible(self) -> None:
+        if not len(self.users_dirs):
+            raise UnsupportedPluginError("No tabs directories found")
+
+    def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord:
+        handle: BinaryIO = file.open(mode="rb")
+
+        # Skip the presumed magic bytes 0x4e5000 (NP\x00)
+        handle.read(3)
+
+        # Read some of the info in the header. Not entirely sure at this point what info is in there,
+        # there seems to be an indication of the length of the file.
+        header = handle.read(6)
+
+        # Whenever the bytes between the two \x01 bytes in the header are zeroed out, it means that the
+        # header itself has a CRC32 checksum
+        header_has_crc32 = True if header[2:4] == b"\x00\x00" else False
+
+        if header_has_crc32:
+            # Header CRC32 is composed of the header, plus four more bytes.
+            header_crc_data = header + handle.read(4)
+            # After that, the CRC32 of the header is stored.
+            header_crc_on_disk = handle.read(4)
+
+            # This should match
+            header_crc_calculated = _calc_crc32(header_crc_data)
+            if not header_crc_on_disk == header_crc_calculated:
+                raise CRCMismatchException(
+                    message=f"header, calculated={header_crc_calculated}, " f"expected={header_crc_on_disk}"
+                )
+
+        text = _parse_large_structure_tab(handle, header_has_crc32, header)
+
+        return self.TextEditorTabRecord(content=text, content_length=len(text), filename=file.name)
+
+    @export(record=TextEditorTabRecord)
+    def tabs(self) -> Iterator[TextEditorTabRecord]:
+        """Return contents from the notepad tab.
+
+        Yields TextEditorTabRecord with the following fields:
+            contents (string): The contents of the tab.
+            title (string): The title of the tab.
+        """
+        for user, directory in self.users_dirs:
+            for file in self.target.fs.path(directory).iterdir():
+                if file.name.endswith(".1.bin") or file.name.endswith(".0.bin"):
+                    continue
+
+                try:
+                    yield self._process_tab_file(file)
+                except CRCMismatchException as e:
+                    self.target.log.warning("CRC32 checksum mismatch in file: %s", file.name, exc_info=e)
+                    continue
diff --git a/dissect/target/plugins/os/windows/regf/shimcache.py b/dissect/target/plugins/os/windows/regf/shimcache.py
index af72a068f..3aea545fd 100644
--- a/dissect/target/plugins/os/windows/regf/shimcache.py
+++ b/dissect/target/plugins/os/windows/regf/shimcache.py
@@ -7,7 +7,11 @@
 from dissect.cstruct import Structure, cstruct
 from dissect.util.ts import wintimestamp
 
-from dissect.target.exceptions import Error, RegistryError, UnsupportedPluginError
+from dissect.target.exceptions import (
+    CRCMismatchException,
+    RegistryError,
+    UnsupportedPluginError,
+)
 from dissect.target.helpers.record import TargetRecordDescriptor
 from dissect.target.plugin import Plugin, export
 
@@ -179,11 +183,6 @@ def nt61_entry_type(_) -> Structure:
     },
 }
 
-
-class CRCMismatchException(Error):
-    pass
-
-
 ShimCacheGeneratorType = Union[CRCMismatchException, Tuple[Optional[datetime], str]]
 
 
diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/3d0cc86e-dfc9-4f16-b74a-918c2c24188c.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/3d0cc86e-dfc9-4f16-b74a-918c2c24188c.bin
new file mode 100644
index 0000000000000000000000000000000000000000..874ab51f54fd4b8f4cdabcf687ab13b26876b2a4
GIT binary patch
literal 6263
zcmai&O>P@U5QRGj$UaN@0^Z~Rf)HpI7%_rmPe~)0fG8@WWcV1lNZ?I&S>!~yL+b0t
z=c=9=+OQ>&Gt>3+>ecJ&dG||e-}V2kwTr(#|JmB-AAW9!_S~knZ=1GjPwm)V`{&!Z
z{?PAj;@Ymg#QkmDnc~yi_<VJGZi>~<r{}I?#kM`R+y4A6ejnq>>Gb4N+<T5|+pzE&
zpEmK_6f357)35OCYxi>#_8w#H)c%MoukqO{4(%@f?P7(kZ@Y(2ab=3=_1v!8-nIK!
zNesll51WdHOftJ&yABWB!57hO+K=&X6V}M>7=PuDxRuc{Y(2!&Fa7Dq_zimJpHFWf
z_gk#ngk|D=?N|x3ZCr5_AcM`Dc!IxV01BI)2l)EQdT+$dIOFrOM@t2XTDFz1J-JF#
z$4tI!6>Kg2emtQ7>f9yz^$0d$6I+UPAJIR=5B^!Rw4;c*1Mh?j^;6y-W7WgSmN0n=
zG!Ol0ve7j$={{ED%d95K$M`Xy%<N?hATaX%7^~=!eXq1Q{<W%4;q_~kUi!=qxC?CV
zd-g;@2HUXqCj7ha_Hjl0ro-SkSgO7t2S(~z(1xvKq?)s0*)iJ_gYmUL_4ub)lkCM0
zKIjR0V;88y<BS{pue)D$${{SN2h;b+k=otW6CAh-%OJQ!id<j>nUSUXmb%)Hy85=~
zg07%SAT-4la=7Web?o=m|H6FRGyiG8i~QzHlCL=%;F<Vmq*Xn>eM=>%;B9zHrptyq
z)E0#&3Lpf)UkDs~?Cy2-`BSe^svH`8(>`?6q`Zf)q;8=z@Uq^OV6Lpk3Z^m;DSA$X
zL`1(Ur;0cD?7~a(ajcbmROZlYYg)eTc)`HulRfQVLRxm}T_LCnONEIPEYK4vG8wRo
z!6aFLF-B#HxcCN!<U}R(Id{^_ZtD;|L*Cp?HJ{=;>{4W|G#-`FMf*PfT}8L%Zkgy-
zRtwL$zSZo-{MuvaGVAaTW*6b*>J-ta2AjA#-+6^j<qWFw6zH*EZbJvi)ff^r6$e?P
zDb`hdYt_AHRQ;N|vSG`1r4Va%(cWQSdj7flrwYWqt}Id>RDycIU}~YRW);4i7uHq5
z+HI^gI^2Ev!p}LX-hZ&m1fy9+`)T&95HNFKCUe0<V2vjbr@z2av)S+Q8AjKb$d+hr
zHI-2coBPF5ZF67I9!w;6C!a7Z_xEuv%uX)zS=CA_>#Dl6@*%vbBUBw?UY|BmbS5^<
z0mQam2{419R*u{`HQVJ>Bi&mv8|*=p{Sd9=ujUMVgYUQZFjYi+Q3o>}saLo<$47jl
zvWNJw&aLg`>Md0X{%AjzHj|O3s1a)zV<lY9re-U8OeKkw*<GrZi1J-KD+s*rJpxa;
zij93-fgAI#&1$XPIk$(HRbwXDuWI6*=nz#$o#;3Eg~~C(GZAY4d>88;<7%Bg*JmD8
zPwcN<o+$774h0rW`{d`z4v*%haO7F*43@>Kx2GLID&zgl{g;(Bf7d3v5gDlhd>s+0
z6Q$Wvuc_u!XZ5d<3$sc+F`k<WjFR?Oet(D`&11}v;INz=)V1SDYsm`urJbCJIR&is
zA#oCI?tNqA;J+5bIo#A+$}4y8<ClFj=N{#}?4+v9R%9Z-jyPnO>JS%svG-`wCSPN(
z8VNtMDz)4U0Tp<slO(pY%Zz$Ge{1?dA>1)_e;(&uqPdFa>{ZoNSAom<xlIUWJraYw
zrc<h!xO{scY&EZmPf``D6npA1@@6j6?k(+6&m<Dk-SxTHwNR$gX?6nGulYhsI?vpv
z>3KFw27C5S&8W3>S6_!maINnM>eNhpp8H4EQis$xampO?ylEcfWNE*=nV{yVBshvo
zR@|j-!E12$`C9mm^9q{64uLJ7X<-oaUR6=nlCNeq$9(=B!#VxQT-XI=bv}vu8Qvt|
z6Yke@3bA@pkTQ+l#4x?pLV<Z2=6RCSys1;Q_m^|~S!%qwpT6kSM7Qox>L}~ACIe-q
zK2p?`2{k9JKIdntLpjTh3<q8GWnJI&UahW>O88BqRp3onxC~3MLT?)GI-=@wzgfuh
zlye*TLw>AA(X>+fpr>n>sZNeve&IcO0Wb4ZY)#~jG-YII?Za^{N8Q&>-il@ldseUI
z{OZXNoYZ@~?*M1B8Nctt0{fpdroI<h&cr@ju|{~-EPc0D5$$8bV=qG$n4E~KdS0Ik
ziBZ#wa-D17T%oMruz#+r$xy7;u5$;Ut9s3kT=ZT<6T^JU_FLESbSx%}JBl)@Dw*dU
zPSa*_a$dE}?^MxvFHaETgtm?!7U*}~^~7m@3PpHf)z(SxKECTs{U+uypRV+7g1)P@
acVEISy>+bm;#o@9Ty4#*fB*a6KmP;Xc9zTl

literal 0
HcmV?d00001

diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/3f915e17-cf6c-462b-9bd1-2f23314cb979.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/3f915e17-cf6c-462b-9bd1-2f23314cb979.bin
new file mode 100755
index 0000000000000000000000000000000000000000..a177bccb2e002a900e7916a6cf1d61b1767a357a
GIT binary patch
literal 145
zcmXwy(G5UA5JcZ~q5@IEX+V5v;4UO2g18e^=t9(K9DZhJ-%e($8jQ6Dunj!}f$USq
z#7q?JSVe=}EEnaYSdixOrE;z2F7~H3#>9ML4zK;WMm1zCIQ4{&ykB$7c@)<T4Fwu0

literal 0
HcmV?d00001

diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/85167c9d-aac2-4469-ae44-db5dccf8f7f4.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/85167c9d-aac2-4469-ae44-db5dccf8f7f4.bin
new file mode 100644
index 0000000000000000000000000000000000000000..29f0c1117d493e817c28cd4984c919a3bfff276f
GIT binary patch
literal 250
zcmXYsF$%&!6htRpz$<Jm7)EUDY{W7sXdxCMBuiqGs1dZax9||QlF}pS1?((5fM8)O
z;v~xMe%|tDX8(Evz^OKX?y=YdiymgkwJbucV35(_fiIqTR=w~uz4<A9Zke(%+=;Gy
z@Xb4)ss&?CSfjxRi82g4WXiFaBS574TKTCc6B)geRFsRy^7PR}M;xeQ<ojh<IYQ~8
n${WK~ugoteEmanqYS!fMYoUd<I-xWHmOABMtSFcF;JfuNz85%+

literal 0
HcmV?d00001

diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/c515e86f-08b3-4d76-844a-cddfcd43fcbb.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/c515e86f-08b3-4d76-844a-cddfcd43fcbb.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ca05de5b78590869855ae8d465ee92dbc51a81e4
GIT binary patch
literal 200
zcmXYrO$x#=5JsQiLAtYoKilrQ5ClO)q@vq24JwobY_)g`FCs3)`}rA#WO$i*^O4mW
zdhr1H<9UaD!I2XU$-f%B_@dfixZ<evZp9@d(UyWaV=}RmWeGBVR;P?sc?mm_S3l6j
t?35WBmf9RL(R?9q5GO%i%}u>5W?JnV&bn5P6Mg?yE6H=v?t9vQeE|O*A{77t

literal 0
HcmV?d00001

diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/cfe38135-9dca-4480-944f-d5ea0e1e589f.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/cfe38135-9dca-4480-944f-d5ea0e1e589f.bin
new file mode 100644
index 0000000000000000000000000000000000000000..bedf49e05d5194ce5b18ceb671f176cb9a142e2e
GIT binary patch
literal 230828
zcmeI5OKu*?a)p17JnB7A8|X<-JQIWlXBQ0%jlOeIq8@-K(xP;s4S4R27zVrxPrV}N
zSBFOtS(Vj2_259QCiOq+5fvGab0Q*t``63mpSJ%lm&?ol{ICDGT>j_Z|M_p1hs&4C
z-R1srb@_OCygXgLZNI-AuYcO!yBe>3ynG$+-;8(e#&185->;uPcQ?NM<@s~B;~O`Z
z&zHB`=Rc1BKaEe`J%93Xy!U0ib~BFfZTxmMK6f|1ad&yMy~1a|ZRdG2j`w+d_wMp<
z<CSmYcYovI@^1Y3IKJVd-)v|2K3=&SSFg`~+>ZV6@_u}iYvB6t$1!y^+)3Tt$II<F
z1MhGyuI}pc%lLCOj>f$`jh{Ln*R4Bx8prxHKK*t3^yl$^q<8-LIvbMvV|?#w9GUC=
zw#iB|yBV+eDv%7vd^0}5PwoIITy6J(eErG(-YhrEnRC9{u9hdrRqNQgU!TcO>28ym
z&fTAak5=dX$FmfWI`4Az>npe#$K+VL*86ewpT=L#&v%Z;(G~L!dMCN?{B+;{7~lHz
zd@RZ2aY*xF`!si>*Q81B$G16`wVJE^JpNjrtl7tFKmxPAKaFp(M((#KEgk=}r+zoi
z`e#r2c+Oe@?}lvNZ+FjCa0fTzXfMY3z3%SEE7EUP7&?wD^?Z>WGSaF=+Gs0xq-V}I
zjw|NoS%W#({qq|CKE6}h%Xv5lYl5}$aY!9KuIonrx7&HOQXa;Uv<B<kvySxD-Oq#$
zT#q9o!K0+O7c_!9<1V#sd9FT>R`uKMUa%`zB_wn=Uf~|zY-{Ujdtd8cGQZjG{MSid
z+}~c4bgsQNpl8xQOIlBlfBlt};0eARXDUtCF}y;3M9C8iAPFFUN#JR_cCU4<^T+KO
zm6gK=zqx$aq$bPzFpi|v!ph(*>s5)&b=UI~EX%-^V$Zoku88%nd+PE=J|D+fN+0KE
zrE_%6VXduf`OPLTH1Nyw@#MiIX&tA(D+%fe%L;R)$O3yJi_9J1#n2=j0gbU#juMx?
zAw%woC!ycVlfK%H^)S{9_szRy&F{v)(Joz?pEPIG9lgB#H2z$V)mq-Nq}#Px^4#0E
ze)rP+zQ(Z2e2=puvzOy6*UwipTZ60d>fZB8I$dY5Dvv{Y_{*DN2e?<ukgMj2BUwvR
zTG!><pSpcUw_kUgW#d>rt}MjrxyX0;T>AW%?fg6iu6u1+WO=X>JP$OOXQ5Tiw>X#Y
zi=*oa_SMFBEgjzd`a?hWQnml#xI{3DRpd`=%}N5+95j=-;M0&bXF}qvFXX7$?C;}u
zG`e3CcgxlKsAU;tVas0}t!?=g`CzW(-Sauou=4NochT(9Wq#Mws-vu}>hUNa#@VzY
z^mMr9b+m~^Ct_0^z_qQP1TsTIZ8^$wDz>YrMt1M0*~lJ=;t#QP{8XI5xsmT*`!GEb
z>5En{(UH~)ukPg|edEbKjK8*XD_^d)rKg1c$REo#b4QM%W?7>#wuHyn)Y^(Y=1Fp;
z#O|_cB~kt@&x!<IZ)*fS<yDSxKVCsM_N%s@*6N)zA7ZVV?}X!bYhq9IFrGTriS@>M
z;mHxf6A{XP{x-h%dA!=A&vneBrzh?2tGuNAar02ff@q)nb7aREt(%e~pS7Lg$kMBy
zpFM!AjQyL}Up-mt?<%sJB_peVzRnWr5v8@Gzout?w`uk7<6ekWdd7Gb6<A90ul)bR
z_^Wt~7!o-gBL}V8Inz371^OjV&J~jdtkxmd$<>zco39-Cudd-tZdzNqukzkcKm2IL
zJ-X-PO6r!`7MUx*9oM1b@^rW^?hD_eNSph$?DdQ!KWkOla%%`uLGP4Fa_m}VmU_>>
z75!i#ykfe4p5tAv=6Zb2XVufxszNU3ahoJq&!c2euPIBlCLYlP$yV{2^hs95Rtlf`
zH13<YOx|0zM{A}eA-lVdi~THgr?S&{0<>T8g)Hg*&Yex~@8+n%j=jrf^k?Z+eLK#G
zTsub4qh_wpasRBfJcqJx(kbhh<4x;9MV3A<$OO+ED~TMXOSZU2yM<mOcjwoV-yE-C
zQ}7TR%Q-C?M7-CnDBtCN6|?!8&oPEG`&0LV7t~Srk*GfdlE699{hp_At&S9AndY-e
z!-Q&Kf%!C==SWWRrm|}LmotA>Hr~2l=VGOnbi0Srin6^{WT3m!I?`2lP3ZTeZ=ap#
zXoo76n>8HiVqezwO}JYBhOC5<Mq7aw;|Q<Dk<bbu4evIIYLy#VsC>%zHtP@f$G5R)
z`ljrIKHXQDo|CU#=SA<a7wBbW#kPsOBbzd7X<vtPT#j{LW!}1)yKy{QuVeh`$PhVc
z?SXe7XKOR(eLaqV|C5dBxX3Xkc5cOYB+q`Aez!kGY#$LGz6`5C<ivILGq2-9u2Ip8
z?sabi_ZG_b8|~lQ)zVOo+E<-d@Yz%E_s6{m7g5BpkFt%r&e^dvVfHAxqi!WD-yxf}
z7MITJS=PUKik9~=gP0?<wfr~&>s{|U;<SEBis*%{wlcl<<KIH+uSPuP>`K@K>#je0
zuS;}GsAIP;j-~XPpSI%GunS=qls`-F(K+Ej^Ga6!1t*05V0Gsx25mg|{9Zq+`0jWd
z+2}dH!zN+96wQTQ@H$5#*bV$1Rz|*2afvbyVN-g#es-%IbVj~-2G`H1roBJs;T&NX
z3Ky8UrmzcP7x3gAb$PW#$zd1dRq!KNA*_{bRYxy69%!G@?N`_Z+4QgrVHd(KIAS%v
z*?A!%m2ORp3xr)z-bfrbOQ=Vb#<luuVHbqcImT8jjh^{8ek#Ydwh8M9yI@vPD3c{v
z7)#+7^;+14%ICr^c$J-F3}<+SwQXPL)gw`VChUT$dS@0R?1D2E+x4&uVHd#Q!!95N
z^P%9#tQ5R_*ai6Wifa@j5_6|_0X;eM){;}$1>rp6)>j=a<_ga;j3cW;rXs|xGwbPH
zI7$lrJaY81_0rE?*@INmyP(>Lbp?g%HL6NuEptiu%rYEnpd5{z)#<FBmCn(Tq4OFz
zDiT>=t}E((PRHr*+QP$}{jp*GFsbQXKrWyn>0JQP5U)xcfIg$aXqPILsDQ!lIIcu;
zLQHzC2j7+U!I1QNyqUqRN)e8m-UUl-wmo>os;<$~A*N05f{@I0rGVoIEN@k~SM7-L
z<5||K5jl%kTvfKNQA_WFy--Qnm1I;J5-Fy40o_XPLiZMxfu?t%_m_lS2)mHp1?ng(
z-<nbX^e(7tavf_cib?MRS;oE}!j)UENB)xDh4e0@cY(V3<9r5X^2grrGek<3s465X
zwlnq=b|JkBifqZ=(H9{as*poA<ca7P7)RIzbu<Xg0FTRLB=kbog#2Lc$352<Bk|Rx
zqhlabv%>1sbkC;v6|q>ForR{p8W96maF)TY`ZM*7-aU9kowx>B&x&@r!s==YyMT>I
z??NUcd8LG1s7_&IA|IN`NV3iL5{h9}iJ!?x_^!QYQ(tCHGpx@1(yG~GrFWsG#Lkkj
z?xA&^!|#0x=AQa%>0NMEplC_aS$Y>_)6tmy9E-CY+?C?1EWBO|yAXCE>_XTD>`Eph
ztz#nP-}8JT#ZH-wG|$88*aneLCL?7s604_A*9yC^#~xMxsLIMU>emrR#dRI6G<NAp
zkyUrKKAJP)^e%*5sOg#Rxd^-9Dre=MVHdLRLM9`D0D9hD??)F(lgUV$=$YOHb22j;
zn%)I)-I{C&dzpO~+?gMCLA8fQY*(Eu^Lh%qK%H8jUC88*?7M)SWj7yskf<bawM+NU
zb8S_xx(d7C8T-i5+4P<(S+S;$f*c{ued~HRlaUH{IKw5*Ubki3*Ln3w)St;@q)bNA
zieoj>tC@Wlgu{c#XWs?tqxw!hs<zC&3+ytfr~%w7laV}2KD`U#8XON54s_<Njhh~G
zmfpk1r>d&iomkm-LHj*Yi+7Zt;(KcM<_HrWn%xdwjCZb|afV)1hRq`vs9G1*o{9hH
zbIbkgRypWAwL_Uhc6D~=;J$lKer3+K$jHJj$bx9UOlPQ>jFf#B)DhQnI_K53Yj&@t
zwa@7GEBh|Urf1)UOh(GS3)y#}W?mT|TSa!qNE7rJyrA3L9#N)uK|8$loji(_k>64*
zP0a4!l=F0C{%ppWjwRftGSEy$%4DQ{?iY3;yE+#VlgUV$%82L6uFmp2pt_(nuQy!R
zJr3zzNbdqjqF8^;@>Q2%c6ELj)g&NO#B1qY==}-lU2x`8JL^z!@G#a9Q*OaFSyhV4
z_LwkHJv~zD8kbzP2G@B+CL_)9KDktOb*>pfVHY}wn8`?)jP$ro0(&#8#O<)ju0}7;
zI_!cbNVq;!Y(e&2Fj9Q<#`v7<yO7?6Gltspwq^_+t?AggdG74$TqyZ0p|A_&-Muzq
z)~J$O*agjnP45B>o$wp{II)d7iCn9f-UX_&s5+!8=5b>*u`|{6u8KLl{(g$2xH_UG
z<Z2G@yn?YXczx43zIoU5E+AuMsU7%WKD|?rELur$W-+p>Ga5zprlW|$o@On@BEEXY
zp2&oVS?K*jf7d%6s!H*0tOz}o;~ojNC9A$CojvSA)zzNuwvow5j+4VKgk4Za0bW_P
zFj#nORM>^E3ur{o1=G7gZ-<dgJg)1(-j3Ii77QiWH=p5?9jUl~_Uq~wx!LLfetlkd
zIu=O9o~KIhg4tiit4tdRyAXCE>;l=aZP7ksp8La!-444Dc0m%zz6*|WL3**L_;&SW
zX5R(P^TEEn+hUmXE?iyy*z%&P(WClBlf+;Ohya;`^mdC++_9d?NUSo)Yha1OwlWzB
zmf&t+g4#bz@xO9{>u10IecNkK{0h1icH!-oU!hU^>OILX>;iZehz{uUZ-e#zJbv~Z
zwC^KSHr`0b8S0;1owKVm)qP<X!Y-%|+L2DNUlkXHUD$J|c&GF(q<5jSD)zaxg3`N?
z$w-ChDWbjI@};l~$L{;=>P(C;OoHi}AI4wyPU}?<sy1<5NlotA*N?zEp>NvBnG8L<
zI@6Pz-UaDS*ag^JSI9ZO&92U_gO;z7?+Uvh_CM@`Cms;t{(Sjq{J9<;247RiQ`iMK
zr}E^LMTcFWI??<S^~tO&)u~}`YL$gupb9{CcU@U*`;4B2UHJB_OZMYbJTiqm>_W$=
zm`%s3Lo)7X3A=#z`epn%&O)gE9@hlE7@w*R0A!h6ojvhruDHNHg<YVs(H%-Xi&u4l
zc=Plw*lMSDA-xOfU7#vN{+W2VdN#r?q<29Qpc1Xm7ZjqWj3DfS5Uj8Z)&JXb#jp!l
z)z4dQ?HKjtc*Qu@zRLb#MELK9Jtk*z_dhsbkJ`wq(z}q}g>TzTTu?ssiiKSumRZeS
zv_&eEE4>S07s4*M_v`6-)qv8i@RhI&WJvR@e&<KxpTaIMb3}druyxp1at17~c$TmW
zW|^q9(SB*h>iT}A$Wcs{ajLKj?%xi(fM(qPSXJxXGwi~ft(FbxxI4-fY?+MY{yD5P
zK27xkLI6E+uDW=z-+k5DVuf9xOS3AktLnm5JM2Q(g|G`@7i48MFA}T<PCvUkyY5qU
z<Y5=8n&D}y<#W}?I>KAmXsKtcBgJbpOtH}H<1-m4y$jCN!!A6G3>+jZy$fL%RPjnb
zQ`m*8!8lQWFN9C^WcyCAs;XYSb2drBS<~#XV79XF!k3Y$pXnY}s!OQP+B8DlUxQO7
zra-HIA3w=r)k`6K_ADw8gXZ2qvi}d`uO?M;eXNcnzA5_|c7Z1WHjZDJWv^!>`Dv=B
ze)qEryMSe&8Y1k1VsvaGjDYtrVAWBlbzFVM>cTF_wz2lAMgog9Tf2^PrguRTvhFvJ
zkA+vIUwRi-Qv<>-V3qoQAYm7na^#tgJwlVNtYhO$MgrMlimLb9u~+MJAOGBPA1f%8
z$$uZ#{%)(7gI_12W-j9U@r;OxmC>lTSWoEKjb)F^@x-w{ZXV@n990#YT(_z==a`q3
zfsa1_e4VXoZN#yC+pZHI&I}i_E5+jW``LGaswL#Yn&O#?)1>pa__<6*%4DQM^t6J~
zyAXB(U()jeX@;igrgs6Wn%;$DMY%9x><M0&>NN67RX=KaZtYf$Mk%i7tF3HNCL?7s
zQrHEtuVELkxW<9WCd@$|Yk;vUuydJ=B>SA+g|G{I^gtOjm6+O#oB2E-RYs@q)nOO*
z*j?BK&5~i3%GV#Y_GVK+62KdX`ubi7J&O>66m~&zO<`N<UHCdx_hnaS?TS%3(i$D#
zTPWLaSaMY#6!Ln$>b!!_yh5*>BtO$khc6?0JMO7z)OF5|)u%su6x~thU@PBY5`Zf`
zYn6zttbZ#D_f9O}czh!4g5-n-@Qj#!bvyjLkaDUhYJ!43`8eJSyWol6S_NSjR+UxY
zJFmvyOhzIDP4B{7l_(s~ee&sDAQr~jf<|cNs1u>%omD@Q-i5FWpEiFTc46gvv_qO^
zEQehnu18NCyVs5hb7d?n%ID2vceaMSp`vYmS4LV#dH?L)6jK{*o$WnG`7l1G+CgzY
z_{DX!`7}OBJ&4c&u3Z_xp64M_z1`M<qEp}Dd~W-i)yyw)ltO`#(LOiOwT4{?y8vQE
z^-<Ub@}BfAoW*9h&o+f<PIcz#UC=HPNA%zfM>0AUb^%UqkKKh`2)iH_j`iby3}uXk
z-BRyM+(fPJ$_`a5*KyKJM(X)@*oCkQVHZ>tLG^v_b1kb;)d1A!V{?=>g<ViZO?nrQ
zgHVX_Ps+RPpNu{b`RylP9`kwG=&%dwbPl_4)R64EKo`%xM=tDwcfCYEgevwJ8k<Dk
zj(th*LfD0{3s+-BfYkRX8%Jv!b|LHn*21_QJ)~l^+0_$fn+mYvYxp-TkC?jGL&tbO
zy$fL%s0|Lga5gyvzSG_7VHY&D$8(;-E|6pQ`3Ai^yQ7-3{|>w0x&h_U!X?NLkSG7b
zD*H3(UHCjaw{`^xyFk1%?<|LG=DGILCwLn4sjtJZ3t<<+E`(hWmy4ZH%}6FA;eF4T
z0?BEG%$$EG?8322WsU-2XNUxa_PBpOy$iy=dPS`2zKpG>cOktCRf7|Dftu~g3p%$#
z6*<#>SWEKxnT&L_!RcK%%A~UI>N-g8LV6c8(I@P}zApwFOcf3FTEZgIyO4bs`utnP
zu$s=$`%1$u^!S7o6m~&<dPHww7eIT@aEY_m%^ZYXAl^rd(z{SSgsd*AIB$M1O+sKa
zQ$6g0ch4Z&rG5k)3r|MWM}0Yb%IonP^&{)56Y#=5mHt>0IqRisSg`z=jFf#BG8u_D
zDEls)d;YKsVHeCw%~4?A536Suc&+QLLUP4pvEJmR!Y+he2)l5%MS7}<47;Ft0CUdZ
zP8PB&YHE<u+Oo*<h>2lsNxQP|0(n^2g*PL&Vxkg^OC}?wcR~0A+4I+t3%j2??1E4%
z=4v?ZgXvE1f~wEMF5pcn-Y<rwUQ6#nvHG`T?Sx&R`tuxPnDffAKVcWhmKFboU9cp2
z{pOr0gk3ON*S%EO1$D7x-vxFv)=tB=ti7hwT++{%pT?iRjZBHS4-1sZNa<aux~ns9
z?QCIH<9PNR+pA1Qs{Ol<@=NbR)m4XGa9-{m-o5Uss^VA$fYe?MjDy^*#|D+XW#5HN
zMoRBO*aa$(i1}nIG8xH{^3f|RH~&76QJCzo3uiTInT!;6L31^TR0`d5+!A)7V*j()
zEWHb17dq}~uM~E{*-+Sp>P+tYQYwzSA9i<-{t4R)yWm~lC5gVu!Y&9o$5T^tWwfq)
zsjv%S7pUkx+86T%$|unwYHh+Uz^fGQrYM+Il*veB)MreAvxQ?Ge*PWXtFQ}|VI1Wb
zb|LJ7dNSxSbUYn)0cj93GwoXzM4gs5+uVsh{in0&iX!{Fw(xK;e{2j4$NVt<ej2X`
z{a_D4a5DHA9f7JbzDeA5<a6|tsAZ;8lnPe;{y2VDCF`s0SPui8^b8U1LACIAn<w5s
zY0fC5>g84eaDDkcj*lLE8h^QFjs!A{HQ~C_yMX2BT|E1KXKMpbHoXf^LvO+^WLM`^
z_l_;$vD*^6rJCdHyFjewNSNq0y$gSpFN9sF*uUd8m4Rm8h2GDQ-i7oo(CMRz_GN9)
z?9AEgW@s`Q34BznFW#$XM&c$)g4xx%YWV1KcK(-r7cv>?&4_|Eal;4!lcKbrEU#4V
zUX`B3nF|xBe|I*Ao_nDNWcg%hbc^3_M`y(<huvck9D!U;?|K%e5eChd0+Ud7%ulMM
z-jAQ0103PQk87^I)z%S8*aa8^)mN<$DLsRFx)s|QdkVXtnX%<-kc_-i?{q$*<6#%*
zeF43aEg<7O&()9e<6H9comtPW&PPeXeq}P!-p^!L=dcUJ4EVTjBYT)L2;!bOdnel<
z+L3*~zq}mpUqAC5H_wy)uxPBdhs(S1huZCrKkOUUt;gQ0y>I1j#ffkn?4x6K*}Bf&
z^jOM_G~Cap?K+8!h?a=ud|cVs<4Op-Fn2EzgDS%5yFuFDq<6s>#M;-aG@I3=l>_RI
zX76)@zwW=&6X8ha2k*zRl&kRS`*Boq0W+0+o=gD_lUc#6D6Z|z_&@9=t6taQIO1ko
zu{s8y)v;@y`SE_wBbKlWNLRhy#A<qa*ubz0=dr(~6m}u(f~!JgomN?Ry%u&M>_XTD
z>YfUTnNk1rE@<Ci<pO&z+^fZid(yjr6hMs?v3wpsUyq1UXjRYSoOyMGnP@coE`ZR7
zUC3mlvwBxqqKaX{F1T}UKQp#`8u>xk1>smQFYK^^eu1u?@3vvCP+sm>33$xw!!D$E
zA-xN-&>d$!v*qbsAZ8|7G&V^*7<M81E>NEmc0rlcI`^zOK8kz7F2GhuVrLOc*oCkQ
zciTRWzi$z+d*Zc%{wiMxyAXCE?84k*N$lnt3-i8nWtra1WTfJ9&+N?E>-OyFT>w=U
zjswzwJ@za=X<12dW-+LDP*o3A4SbJ`LwHCbY2Zj8MZDW_=|b2v6QqBG>!^SGlP|`r
zub$V1@RfJlwTdGU8s2-s*Abrlje60;E<6srD(r%^6Q<;G(`0rGKGyKO8M;T^&Rl!P
zzr!wMGE#aMIv>zyvCGqnPvA53(S%)C%?D7Ot@@364Pw{@usiwuUdQKZq`R@_r6#u!
zIdqIz`qnEkcqO|!yVu2Cm(*daV{KlcmLFM&s%4KA+kLICBfRXpkbM`j??U!n$Ydn#
z&g}f!7{rnDcYi?kT?o4nb|LJ7ErsGSqFn#RPxTWymJ`BttSmZ<&9d)8VU5a5)l~-?
zMC7X8@aoGX523TMqldOqM=XeU=nbQji)YpAGS+dio~$Cq>v09@6yvO_F5?xhkSh~A
zE9}$N9{ao5N9<u2K$5SwI=y}ExmHsqBlV1VM*Y*fklqD(9P<9x!=ize%f?{iKo+o2
zPvie#7izv0G6}n2t}c_2Jn7*$p8-u-^@g9-yNc&5uW!E8F(Svyz6)g3#bnFR`xz+W
zS|Rf1-*K<iD!x1B^TJ3ugJL#fu)<s&Z*nY6FMk}--NW{2xDvf4w0l2UfT+IL_Bs+h
zUPIUgqCU-guZ{w8sjv&eo9JBGb4^UJkp6j<>?>x(%fQcQ$_%w6%1E4%mZs|%=Kp-e
zunR~?n)r4+gI<G^$w;$(>$1sYBy>gVE$l+rg|G|sw;Us%Oh&>w%+cpM<|&Vy$w=q1
zzojIko&SH>qHy9Gp|aGkf)eAqgo%*T5nnwGPtR05kXz#>s+t8i5!3O<_DNy9pesD9
z{q$l@Bd)A(?z$k4SY5+$Z73^HZlU`MyPz886~=y+gPWhJ=TTXBy%u&MlaVqRDU*@#
z66sx7c|T#6@Y!J(>{AqF964uDKRh;AV%UZ3y8zmTKJ@D5unT+aQTuQR(OmNZ!s-<f
zYYnD%!Awcmg+8m<9SQbVVHYZc-1Ah*e#0*GnQ2Fiz>Epgzv(xmiUM23zDQ*1lJ7hT
zUXtxyFXvfm*ac(`79)A?+0(EKjwq#7VHb?!gk1=`5Ox7i)aOMIEB`hSZP4ak+Y)vG
zJRTf5y$i-MDmL_Q{7mmc)ytg6ZOTTANw8Pyb5g=Cgk1=`5OyK#f^s{MKNxttu(<WG
z3t$QAWV2Tey8tFqmDg2u;aO%dFJTwLE>I67%Tn4Bb|JkB;IiJmo$MI9!D^{!R5GFt
zjI6MBMO7ZO?)I7YaV5_VyWqU>IGZo*g6*>=d9sIa*aad#>L=AhA=~|GTZMg2Xj${S
z@#(65M1Javp--Q0zr6f3{&*ek>pS7y!!BUs_K`O-Ro@#cSo{m03%g((C+tGlg|G`@
z7w(4-QSW4rC>>+>*TOEyI~&KS*wDZ6Gwedxg{t-_{zSU7k5h<D#I&eyhv+Z+E+}r+
zYRY7!-W8U~NHCT}_dTCdF20|vb%rzR(|wiAnS$11O$g{!@2-}s3~tuaUdhWOO{(hX
z87KEx&tB8j+>N`U_HH%B$dzL&+hLExd0&qsWHQp(Q}0hPy$fL%j9#U8p>t7T7o7Q&
zh0kQ9unTw$d1^fF5ko5+mQVIt`uk_Uu6RQ;R(~C@Rc$?9KfMdWy@`|7JyKy8h!4Xq
zP#N)NtgzcH)^+quw2lT5X|qCmUVZc9mEN)9X?mR<y&Dz9%ENtJBp!C5vKaD=zYiIK
z9QWzo$a`%Wk)M33*y!UtkYcU1BMdpryUm7@`*UPJtNFXwO>!&mZQ$yeI?CFr9*^?j
z$5F(3bIrF~_J&>Ox<8ITXW1E_-shg#)mb_+%NmWzWTf;iIK~dUz+9%sVPEiDVHf6J
zUD;xJU^HgG`#^dZ!Y+`DsY>$GSV!bu*p%9_@5OlKdaSLZhbCG{??QSP(z_rmBJ9E*
zdFaTnIsdQ=>0MAai)&qBWX0TxH=NO{u0xrO6n4RxPuPX`TiqE5PS}O83ruey-+MJ|
zDbaez7R|PtRZzHIqh=ExQKzsAYZRL-Nc{?77ru`SGVDUxqCKWP_lK!!K$-R3R^@~p
zS0xpAxh96W`wSh^yK}Za*GPq3*zaH`9%}z2!Y+heP?dEiBc*rY^RTVXMr-efbqr)2
zLtHJ4rhgM2N}S{xS79C5)meUx+!c9Qf`z@8udCO@<v82RuFhC1(01!vpC{Vm{#k30
z_vi6FBP`luL>lW}*`wzB2{FeMMhS+1>T1;zxJrOyy&12+9q(v1GdV}!*HGt~<CU-r
zVy}CJTb~x|z2jW-(J|HZE~Iw>HaC-zuq+)Jw#5p&5OyK#!dcuo`~6Htazs_EisQ|&
z3t<<S`J~F#LiDtPdhAX2ae5cT->&;J!!EdD7t9Uh5zN>1@l{*DuOBhC5O#sA_ipS0
zA9jK1OV*t|rX6;Hb(P))*FnoyNnc!{24(}I!>jw=4nB{qp+kzuNR03IjhUjtnz$My
z$hnp}Oz8d*f?V@A97WAo6xU@-c*I1_Z7IVi#|2&DC)ZSb4gY3`oq4yx$L&>);cmgb
zHI)rPLssvcp)tm~<~zYM_f9C|y$^%4qq7P&FYH2>Q14T)cJ$ZKAoK$y^Y`(SEVfSo
zoNK~_XEHBQI&Q5Z@5!#tTK}vF)~i`$`u!{~RzK^DCjs`zCuiB~8L8i_&-w7Po4d=D
zN%Aael^Ip;)qRS7IzvMDnZK9b1;sdMzp4O)0q?suJk4a!m8|TVV(+rd{aM<M-;Sro
ziVC~Xv39hw?97?{u?%lEFR1UJ-+a@0?awd;0U6^hI^xgTs}4*&)O@y%bD|-98s3E`
zCjSWMB7gYXSj(`PXZ|eg!iyoDSL4|tXYB=<U7go4k+m70{CY@?I7~LC<08ajk3$=!
z)7XFV5_FL7C^GDmgZysyQurexJfgzBcVa*DIxgfI#VRYG-rK-E|LG_bKeo54=c~>u
zSh%XPc<vYGQ)j5kN$y3y_sb@+eUxp~_4D}d{bnN+X(OYjZC#s9)g5&XwtBT+j<Z;c
z%MU2ZuYdQaL4D}cc1~S6*V!Yqwfr~&*Qa;kdi$&!NfEtpoQ!NT83_xIW_HV+-UZes
ztJqO!*Q9yv+3&Lct>s#SstQ52=%W7RcnG~FhTQv4AFa+iXLHI+*H=*1Ou419UCz(S
zI37n=%scYWe5&r-(KE+V#Y83}rFQ{KEc+_G4!h7}gPy?>_kP$`76^+b`Gj3C`Vn@)
z^@V+pccPqk;}0{BJcCB7tw*I{7ZkIWEeg9((RRhRVHaeZ9ZN}H!Y&Z!{aFU}W>_C`
z#h%@kR?Tr_*oESnX31Fh_PJ|k5*%aq*TOC+Q!<XBI1@edZ~PPvVJ=Hp$C0Nyi_O9=
zblin3)S8&y1v2ok3+K4R8TR1y8g^msV(E;D^1qVc+#_*hIm_$L>lF)#7hl`AUoO9l
z_xd-mp7n2MsV~N>My$dv7@6Cn^2pg9r(^v3T-XJ6zz(|*cA?i!{5dat#5k0h9jmuM
z;8oi_cV#MjIa)Nwh#XH=P<HKUWBtHX#6n{ap9a>#JWjm5EH-h_`RD6wSb;yr_l#v7
z<4#4Y#{A8q6_Q>kyFM9q0m;Kb!bKoAF%I&VdsY&!2Og%(OS0o>oaJ9)1bJ%L<Eit>
zxq9c%p1Qfhvkc>SPIODxJ=d?`Dd06<Y}a8F;^tYu<>|?1s%B9!x$2tw+;Zy-I3W8C
z&Gn8QPt-ksAIB{1<&HQ9YeKxTDyr(bk^k+uTIw};wKK0icf~Ng(z(NarY`fNq*!l9
zFRHbz^MqYkM|kO7(9RmN!Z4{YElfA<RS(#DBVE0AZy({I<LDc;0os=<`z~OuvhRXy
zQP_pB3!Y1^m~o!rE?-3ySve%RJW&R(?&BBRn)EIZVJH%EoHa)Y`{=`XZ+aIL7ZAtI
z@mXgQjOq5*!Y+hen6sZbMn6`Xp5@@idn?{It~h5KVHYf^^ez~m3A<3}MX$aTZeV*Y
zj#Jv>>QOS9J|ipa!ut_J2-5+{BenuVktB)1K;4j8RlPfkC_IjD!THhi@^zX_JWnje
z->&NVlqY0YXQZbt17yon5iZW}g}k0)N4Atbix0bCDW-R!Fv@cs3cC<?!I@84_^=D<
zT@XTpR}x|qb|LHn+R$TYX-3$EPlF9XE=0~Cbc&GgFZ6O=kN5Dz`x!n*&ujd=V(;`W
zq<5iX=;%?{g|G`^Us)Tj^idwU=W4M?vMQ^0&L&BOU68#NDzd5@nR%D63(RZkGo9e`
zz_saH1d&j6%z4d`B&}!y+50#9QL2_SIZ9Z^v4-p{HcRh9F%CjSo$aM}0gNrZ3t<<+
zE?{#Xw#rD&WOOD%eDQT;HmW)}!<qHzUcV^>8|xi*L0O^ZCUq|bzLLpE#F|u^XEGAZ
zzZo6Ra^p2t5$o{h%TMFa-^TB(L*D5TS9$rW@b+{k)hxJj=*##%$G93*%jh57D2mTi
zzwD~4V|?IvF1-uLtVa$=h9{EV1xHlHsyN;ZyO4bsvhM=<Ffo<7Td>TphJ6QD@3BGE
zhOH|oT(40`@fwj(WHz(Esye{_+n!f`?kxKZ8B$(8GeJ5>?|M0}fn&xX-pdhX_j5W<
ze^+M<yAXDP=$SZrrR9olh^EnJG#KqtM;zb66Bz|{EH$sTVmi~i5OyK#Lf8djd-^@E
z#&o%Fqa#I~(6%C=+vKdZJ1TaC9?_4_S(-Ut`!|k*Ppus_oau#KSWPe1G~%Aq_FOmY
zg6l@VjSQX~p4`*1TkqXW?}92QvhM;J#?fw(bM)B&inxVid>R&z$cXxSP+=l-_v3T4
zeXJxK=&EpYcwraFYqW1^dKbbjXciwi%j=;PWG1T2$Yi8UM#8G910=l*Fp@&KX7sAh
zPB~UkDw99Xc{xL*ggI55S-6`#A;-&PBrzLcFvOVh^Y&?y(+Zh6_xwZ-pNC}0m+qf)
zB9600c!ZI12C>R~D(r$IF|m5?fuwhjD2?Y-Y@!$#q(=-$#-MNc*{yQWxmsBmTP7oU
z*Q5Q63fYl4+d{vMLt4`NX`kow)11wfrt27H8hym-*;E`6b|LJ78T@^A*SQr?f_d^l
zdKbbjDB|k=;wY2yE45?7yl1HW`}|!Zy08mj7s4)tUGU^VJW_UbKCApl??Pqf>0Kzl
zo5@IF7swCi{SZ1nqBzc|Kqe#E-yHY%RpxG%UkFJiBlY=Dj^?|UB6m#h0$59W7nrXG
zLhOnb*Szo=C{=nF=-`Jx54)f!&iIr%gTpRp^<>|L8IJ@n%DxNeM%aZT@0i{NkVNVP
zdS&FRjpOT`yw!Dew|Qb`PUevck$t)K7haEDQdXTIFj;&DZ4`SLcEMddJcFJyXuWDW
zDiu%ZT~LG=c0nu0k@C!sD8FKTeIJ#V^Nu3qTg1|;F_&M2T_$EwFRB>&unS=q!Y&x?
z3A=#5?zl~5pf{uY-?0GJh1zg<U;5@$>y1VEeY`@?W@m&uR;Jh8-9qIBXH*wF_Db;r
zsG8%oK06@23p^VnLL8@x9(B834@+-PklqEMviqK^ef%l;rFTK`=KVI8|F<prBwl#4
zt;(nI8!O!V2pOyM3hy<WVHc<x&SWHdAF<<#Kc!c%hiy;q0^W`aPW+hSa82Rt><M;U
zGi;f<bKKp`6JzGN!P<c%>;e;0!Y*W2XLTE>6F?|}a%k}h^8AHpDa#JKz@!&Rh&2Q%
z)lq_dwfcE2x@yXuRUK!q9&wE7-s#zbs^7sdUJOfkz2O(?FU@2me2Av7De@=gm0r)&
zb~+BrCwpXfKO~CeC39sGzdlP@$Ku`eE)=IS_m*WclA^lo>b$~Wh3c1AL0ec=#K!VT
za4CMJ&%<&BP4B(Bdq!J6Zg!bmU)b%jizn>DjA1z!F_y(7Mpks#g|G{BHr8&_=Mf96
zs8C7m@T<uf;3DK`RJPrY->BH3dI|j^a(X?E&imPSfi>vJ4m*hbTBCH>L$OwxUz^ED
zL~VO3f4Aw^ADaxp)P)rwx0&U`7l6lCT|BQmK7UFmeqrr3*YOyktL8{HlV>EP%fIQV
z@fr7_3hOw={$UqnM~*dq^^DTHK%OF9OYeg7x{kGLy?Lg>Syo~h?z_l)B<jzwgCn>t
z>$fBRtYc4KmCc^d*5_*w1@mcYoT+b-e+1nOyYS0og^_c&%8pIJIbJ<$zVMZIn?!{c
zhh0zwl_UCnoFy;WaS>v%$DxhVX};&&>h%vF*3oyr+r1S2H@ypJY<d^ME@UzievSEj
zVHfa6!Vvbg70#E>@`$ht-TNyVAbXR~2Sp(6B1TXY3|Do#MOBUg?uRGqIoLir>~UGo
zEIOmJemX}ls<=<NUf6{m8#rzwUm@=Ousx%y(bCiF5yo2PtH|1RW|f~i%kFGWkJ?wy
zO6TY}^O-Hb*)W41`Md6?n2`Ri&X#=_vhPBA7s4(W3lF;xc0oRjz38qkt}X3zC)P2K
z{K;`*cyXyhwaM<Y?*g6}^e&T;X5P2QX6aq%IRH_dJ28&CHcFq4RjSVEh-POjh<EmL
zdENE7j*EN5nBE1@1h7fux@JZ9`L!)1)xg<zA?!lfg?;9&Z1-+hCe=6|nE+d;Be$O3
z1+WC|L1KRJ%qw(k1EwUs3+C#s1{#F-QyygRxkisxlh<6Sp=vcOekLOwt0o*tWZwm1
zussS~7@{MFBmQ{)9kZdU`0jWd@gdoFL6w5(T{vn8`Wbd%A8!a9an;A#KdiH;unVr(
zRkn(?b7g6-tv^a!5gxU<>0L<gf?`&pS7i<OJu-2}Zu8zY+J^{DBldl>W$lXY(7!^R
z)V+a@Iri(>Ium<f@6c2A+5LU|{%L%=TL4!&`KVzR!Y-gAN8f5oc&xa#_Gvl+8p>o0
z)n=x5p;md=1v)RB_d8!u-p~KD??SiCw#Za|gk1=`;O-{k0%K<BT}bZ&m0FpMl*vfs
z6Pb)eRM9I_Z7IVp(4|SF4)Y>*UOk=Wgvkw=>l1ciuS40@8GH*1Ti&)bY3|l@e4pNh
z^e$vq=bBIt_Sb9aGa1R9C!OCvqE6Khu(mVlU7-3T`!3AW>0O&c2NN2ESHi1<c8k;M
z>`m`jF(aMHNMRSMx<);1#Lc%GtAbtEY&><yt!lr--^@Hu&GAX^LV6bzE2nqCvo>8Z
z?pihS4b~T^I4kh?@sljJ&lft23c@ZNdAhI**yLgoieahOG8rlC0zF^aE35F1Gdpwk
zx^c%$Mxu(vo=!Ccu6j`<UJ{&HjN|?<)xR>YS4Z}1`}WHvy$k7GSXJnCy>tzWW*yda
aF(zb`hVGGJukx@9|6kaJ|NWO2|NVdP^V?<s

literal 0
HcmV?d00001

diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/dae80df8-e1e5-4996-87fe-b453f63fcb19.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/dae80df8-e1e5-4996-87fe-b453f63fcb19.bin
new file mode 100755
index 0000000000000000000000000000000000000000..996c82afee2b7b6d0fd1ae17c07c780372caa4f5
GIT binary patch
literal 330
zcmeYZU|?VbBL)Tr`?c#Q0NEi7lIE;cj0}t(42Kj7r!z4yW-@Tf%PnDMU@T^62|9b8
zg@I9lVfhnz4_1)WZu>)9*g#S~o1FHugQOm~q|RXAU@)H%EQM@b21EFgP*d`Z<6vM+
zWauvI;pGHb5MOfpCl>=_8N(OT=-=E7jHwJOb{XH~VPGs`keAGk=Vf55WH{J6TZIp#
z_-zRLOMa0192Y5^6JTJ>VBk`D%`V8mn9oqb8d4wx(skgZ(;i_4#uA3rpU-rOfOH*a
v={O|{5>eS~sU-%o!r)ZGD{%(K3WiU|RNW*%Qg@X9&X#0g)MHpEp7<32c6?II

literal 0
HcmV?d00001

diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/wrong-checksum.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/wrong-checksum.bin
new file mode 100755
index 0000000000000000000000000000000000000000..9e33f0c67d828f71b7e0cbfe6f4fa8a9059f7a84
GIT binary patch
literal 145
zcmXwy(G5UA5JcZ~q5?IX2E>O3?m|K$h&xf07L=j|Ef|NN+1a;~*|Gv-tpRLJM^7O8
z)G;y<MOzlpAUDZH`6w2oxqP8qqq&Lwsg)rypP0jIf38st88c2j;Un+Y9CPl+bpsEw
B8(IJW

literal 0
HcmV?d00001

diff --git a/tests/plugins/apps/texteditor/__init__.py b/tests/plugins/apps/texteditor/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/plugins/apps/texteditor/test_texteditor.py b/tests/plugins/apps/texteditor/test_texteditor.py
new file mode 100644
index 000000000..e7546f923
--- /dev/null
+++ b/tests/plugins/apps/texteditor/test_texteditor.py
@@ -0,0 +1,86 @@
+import os
+
+from dissect.target.plugins.apps.texteditor import windowsnotepad
+from dissect.target.plugins.apps.texteditor.windowsnotepad import (
+    parse_large_structure_data_length,
+)
+from tests._utils import absolute_path
+
+text1 = "This is an unsaved tab, UTF-8 encoded with Windows (CRLF). It's only 88 characters long."
+text2 = (
+    "Dissect は、インシデント対応のための優れたフレームワークです。 The Notepad window shows UTF-8 as the encoding. This text has 113 "
+    "characters."
+)
+text3 = "This is a very short text."
+text4 = "This is another short test. And we should be able to parse this."
+text5 = "A bit more text. This requires two bytes for the length! "
+loremipsum = """Lorem ipsum dolor sit amet. Eum error blanditiis eum pariatur delectus ut consequuntur officiis a excepturi dignissimos et doloribus quia 33 perspiciatis soluta nam perspiciatis dolor. Ut repudiandae quidem cum sint modi qui sint consequatur. Aut autem quidem eum enim consequatur qui voluptate consequatur non similique voluptate. A vitae modi vel sint provident ut galisum tenetur sit voluptatem amet. Est impedit perspiciatis est repudiandae voluptates ut fugit alias! Eum magni esse aut velit illum qui excepturi aperiam. Ex dolores asperiores ut debitis omnis qui consequuntur dolore. Est voluptatem mollitia et quibusdam unde ea accusamus fuga. Cum quis galisum et impedit sunt qui aliquam perspiciatis sed modi quidem qui nisi molestias. Aut temporibus architecto ut neque voluptatem et consequatur deleniti sed accusantium quibusdam et omnis dignissimos ad rerum ipsam et rerum quia. Ut nihil repellat et eaque molestias quo iusto ipsum At optio sint eos quidem earum?\r\rEx deleniti unde eum tenetur rerum ea dolore numquam? Eos aperiam officiis et neque explicabo et enim atque ut eaque omnis non illum eveniet est molestias itaque et ratione voluptatem. Ea deserunt nemo et quos tempora et nostrum aperiam sit necessitatibus illo sit culpa placeat. Vel tempore quibusdam ut velit voluptate aut odio facere non voluptas earum est odio galisum et voluptas harum. Et blanditiis sapiente et nostrum laborum aut voluptatem explicabo a quasi assumenda. Est voluptatem quia eum minima galisum quo totam excepturi aut facilis enim vel voluptate repudiandae sit distinctio laboriosam. Quo possimus molestiae et molestiae accusantium est voluptas omnis sed obcaecati natus. Non vitae asperiores qui nostrum enim id saepe fugiat et incidunt quasi.\r\rEos ipsa facilis aut excepturi voluptatem a omnis magni vel magni iste. Sed ipsum consequatur qui reprehenderit deleniti et soluta molestiae. Ut vero assumenda id dolor ipsum in deleniti voluptatem aut quis quisquam sed repudiandae temporibus ab quia inventore. Sed velit fugit vel facere cumque et delectus ullam sed eaque impedit. Est veritatis dignissimos aut doloribus dolorem vel pariatur repellendus sit nesciunt similique eum architecto quia. Ea expedita veritatis eum dolorem molestiae ut enim fugit aut beatae quibusdam. Aut voluptas natus in quidem deleniti aut animi iure est incidunt tenetur qui culpa maiores! Et nostrum quaerat qui consequatur consequatur aut aliquam atque aut praesentium rerum et consequuntur exercitationem. Non accusantium ipsa vel consectetur vitae ut magnam autem et natus rerum ut consectetur inventore est doloremque temporibus 33 dolores doloribus! Aut perferendis optio et nostrum repellendus et fugit itaque ut nisi neque sed sint quaerat. Aut placeat architecto et eius sapiente eum molestiae quam. Quo mollitia sapiente non Quis neque non tempora laudantium. Quo distinctio quos et molestias natus sit veritatis consequuntur aut repellendus neque a porro galisum cum numquam nesciunt et animi earum? Aut dolorum dolore non assumenda omnis et molestiae amet id sint vero est eligendi harum sit temporibus magnam aut ipsam quos.\r\r"""  # noqa: E501
+
+
+def test_read_length(tmp_path):
+    # 3-byte length
+    testfile1 = tmp_path / "test_file1.bin"
+
+    with open(testfile1, "wb+") as file:
+        file.write(b"\xc9\x85\x07")
+
+    with open(testfile1, "rb") as file:
+        read_length, original_bytes = parse_large_structure_data_length(file)
+        assert read_length == 115401
+        assert original_bytes == b"\xc9\x85\x07"
+
+    # 2-byte length
+    testfile2 = tmp_path / "test_file2.bin"
+
+    with open(testfile2, "wb+") as file:
+        file.write(b"\xaf\x18")
+
+    with open(testfile2, "rb") as file:
+        read_length, original_bytes = parse_large_structure_data_length(file)
+        assert read_length == 3119
+        assert original_bytes == b"\xaf\x18"
+
+
+def test_texteditor_plugin(target_win, fs_win, tmp_path, target_win_users, caplog):
+    file_text_map = {
+        "c515e86f-08b3-4d76-844a-cddfcd43fcbb.bin": text1,
+        "85167c9d-aac2-4469-ae44-db5dccf8f7f4.bin": text2,
+        "dae80df8-e1e5-4996-87fe-b453f63fcb19.bin": text3,
+        "3f915e17-cf6c-462b-9bd1-2f23314cb979.bin": text4,
+        "3d0cc86e-dfc9-4f16-b74a-918c2c24188c.bin": loremipsum,
+        "wrong-checksum.bin": "",  # only added to check for corrupt checksum, not validity
+        "cfe38135-9dca-4480-944f-d5ea0e1e589f.bin": (loremipsum * 37)[:-2],  # removed the two newlines in this file
+    }
+
+    tabcache = absolute_path("_data/plugins/apps/texteditor/windowsnotepad/")
+
+    user = target_win_users.user_details.find(username="John")
+    tab_dir = user.home_path.joinpath(windowsnotepad.WindowsNotepadPlugin.DIRECTORY)
+
+    fs_win.map_dir("Users\\John", tmp_path)
+
+    for file in file_text_map.keys():
+        tab_file = str(tab_dir.joinpath(file))[3:]
+        fs_win.map_file(tab_file, os.path.join(tabcache, file))
+
+    target_win.add_plugin(windowsnotepad.WindowsNotepadPlugin)
+
+    records = list(target_win.windowsnotepad.tabs())
+
+    # Check the amount of files
+    assert len(list(tab_dir.iterdir())) == len(file_text_map.keys())
+
+    # Only six should be parsed correctly, without errors/warnings
+    assert len(records) == 6
+
+    # One file should not return any contents, there should be an entry for this in the logging.
+    assert "CRC32 checksum mismatch in file: wrong-checksum.bin" in caplog.text
+    assert (
+        "dissect.target.exceptions.CRCMismatchException: data, calculated=b'\\xa4\\x8d0\\xa6', "
+        "expected=b'\\xde\\xad\\xbe\\xef'"
+    ) in caplog.text
+
+    # The recovered content in the records should match the original data, as well as the length
+    for rec in records:
+        assert rec.content == file_text_map[rec.filename]
+        assert len(rec.content) == len(file_text_map[rec.filename])

From 3594ff0440b2c6936e819807900a8f45fe38ea9d Mon Sep 17 00:00:00 2001
From: Joost Jansen <joost.jansen@fox-it.com>
Date: Wed, 14 Feb 2024 09:25:27 +0100
Subject: [PATCH 02/36] Removed unused 'seek_size' function

---
 .../plugins/apps/texteditor/windowsnotepad.py  | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
index 1fd316a57..0f26ddfea 100644
--- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py
+++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
@@ -1,4 +1,3 @@
-import io
 import zlib
 from typing import BinaryIO, Iterator
 
@@ -13,23 +12,6 @@
 )
 
 
-def seek_size(fh: BinaryIO) -> int:
-    """
-    Find the size of a file on disk.
-
-    Args:
-        fh: A file-like object that we want to calculate the size of.
-
-    Returns:
-        An integer representing the size (in bytes) of the file.
-    """
-    pos = fh.tell()
-    fh.seek(0, io.SEEK_END)
-    size = fh.tell()
-    fh.seek(pos)
-    return size
-
-
 def parse_large_structure_data_length(fh: BinaryIO) -> (int, bytes):
     """
     Read a variable-length representation of a length field. Acts much like a ``varint`` object

From b1bcd69fd5b86aeb76f8fdd6b34643c755f88445 Mon Sep 17 00:00:00 2001
From: Joost Jansen <joost.jansen@fox-it.com>
Date: Thu, 15 Feb 2024 23:22:50 +0100
Subject: [PATCH 03/36] Refactored the code to work with new LEB128 structure,
 added some more test data & test cases

---
 .../plugins/apps/texteditor/windowsnotepad.py | 234 +++++++++---------
 .../ba291ccd-f1c3-4ca8-949c-c01f6633789d.bin  | Bin 0 -> 560 bytes
 .../e609218e-94f2-45fa-84e2-f29df2190b26.bin  | Bin 0 -> 141143 bytes
 .../apps/texteditor/test_texteditor.py        |  41 +--
 4 files changed, 122 insertions(+), 153 deletions(-)
 create mode 100755 tests/_data/plugins/apps/texteditor/windowsnotepad/ba291ccd-f1c3-4ca8-949c-c01f6633789d.bin
 create mode 100755 tests/_data/plugins/apps/texteditor/windowsnotepad/e609218e-94f2-45fa-84e2-f29df2190b26.bin

diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
index 0f26ddfea..cea3172a2 100644
--- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py
+++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
@@ -1,6 +1,9 @@
+import io
 import zlib
 from typing import BinaryIO, Iterator
 
+from dissect import cstruct
+
 from dissect.target.exceptions import CRCMismatchException, UnsupportedPluginError
 from dissect.target.helpers.descriptor_extensions import UserRecordDescriptorExtension
 from dissect.target.helpers.fsutil import TargetPath
@@ -11,57 +14,38 @@
     TexteditorTabPlugin,
 )
 
-
-def parse_large_structure_data_length(fh: BinaryIO) -> (int, bytes):
-    """
-    Read a variable-length representation of a length field. Acts much like a ``varint`` object
-    from ``dissect.ntfs``, however it introduces some additional bit shifts and masking.
-
-    The position of ``fh`` will be restored before returning.
-
-    Args:
-        fh: A file-like object where we want to read the length bytes from.
-
-    Returns:
-        Length of the data as an integer
-        The original bytes that have been processed to determine the length
-    """
-    offset = fh.tell()
-    original_bytes = b""
-    modified_bytes = b""
-
-    while True:
-        # Read the original byte
-        bt = fh.read(1)
-
-        # Transform into an integer
-        bt_int = int.from_bytes(bt)
-
-        # Shift this new byte a few places to the right, depending on the number of bytes that have already
-        # been processed
-        new_bt = bt_int >> len(original_bytes)
-
-        # Add this byte back to
-        modified_bytes += new_bt.to_bytes(length=1)
-
-        # Add the processed byte to the list of original by tes
-        original_bytes += bt
-
-        # If the first bit of the original byte is a zero, this is the final byte
-        # Otherwise, continue until we find the zero-led byte
-        if not bt_int & 128:
-            break
-
-    # Convert it to an integer
-    f = int.from_bytes(bytes=modified_bytes, byteorder="little")
-
-    # Apply the mask
-    f = f ^ (2 ** ((len(original_bytes) - 1) * 8) >> 1)
-
-    # Restore to original cursor
-    fh.seek(offset)
-
-    return f, original_bytes
+c_def = """
+struct data_entry_multi_block {
+    uint16    offset;
+    uleb128   len;
+    char      data[len * 2];
+    char      crc32[4];
+};
+
+struct data_entry_single_block {
+    uint16    offset;
+    uleb128   len;
+    char      data[len * 2];
+    char      unk1;
+    char      crc32[4];
+};
+
+struct tab_header {
+    char      magic[3];         // NP\x00
+    char      header_start[2];  // \x00\x01
+    uleb128   len1;
+    uleb128   len2;
+    char      header_end[2];    // \x01\x00
+};
+
+struct tab_crc {
+    char      unk[4];
+    char      crc32[4];
+};
+"""
+
+c_windowstab = cstruct.cstruct()
+c_windowstab.load(c_def)
 
 
 def _calc_crc32(data: bytes) -> bytes:
@@ -69,58 +53,21 @@ def _calc_crc32(data: bytes) -> bytes:
     return zlib.crc32(data).to_bytes(length=4, byteorder="big")
 
 
-def _parse_large_structure_tab(handle: BinaryIO, header_has_crc: bool, header: bytes) -> str:
-    # A dictionary where the data will be stored in the correct order
-    content = dict()
-
-    while True:
-        offset_bytes = handle.read(2)
-
-        # If we reach the end of the file, break
-        if offset_bytes == b"":
-            break
-
-        offset = int.from_bytes(offset_bytes, byteorder="big")
-
-        # Parse the length field based on the first one, two, three or four bytes.
-        data_length, data_length_bytes = parse_large_structure_data_length(handle)
-
-        # Move the cursor past the length bytes
-        handle.seek(handle.tell() + len(data_length_bytes))
-
-        chunk_data = b""
-        for i in range(data_length):
-            r = handle.read(2)
-            chunk_data += r
-
-        # Insert the chunk data into the correct offset. I have not yet encountered a file
-        # where the chunks were placed in a non-sequential order, but you never know.
-        for i in range(len(chunk_data)):
-            content[offset + i] = chunk_data[i].to_bytes(length=1)
-
-        # CRC32 consists of the following data
-        crc_data_reconstructed = offset_bytes + data_length_bytes + chunk_data
-
-        # If the header did not have a CRC, this means that it is combined with the only data entry
-        # in the file. So we need to prepend this extra header data.
-        if not header_has_crc:
-            # Furthermore, if the header does not have its own CRC32 it
-            # places a byte at the end to indicate the start
-            # of the CRC32. This should be included in the CRC32 calculation
-            crc_data_reconstructed = header + crc_data_reconstructed + handle.read(1)
-
-        # Finally, read the CRC32 from disk and compare it
-        crc32_on_disk = handle.read(4)
-
-        crc32_calculated = _calc_crc32(crc_data_reconstructed)
+def seek_size(fh: BinaryIO) -> int:
+    """
+    Find the size of a file on disk.
 
-        if not crc32_on_disk == crc32_calculated:
-            raise CRCMismatchException(message=f"data, calculated={crc32_calculated}, expected={crc32_on_disk}")
+    Args:
+        fh: A file-like object that we want to calculate the size of.
 
-    # Reconstruct the text
-    text_reconstructed = b"".join(content.values())
-    text = text_reconstructed.decode("utf-16-le")
-    return text
+    Returns:
+        An integer representing the size (in bytes) of the file.
+    """
+    pos = fh.tell()
+    fh.seek(0, io.SEEK_END)
+    size = fh.tell()
+    fh.seek(pos)
+    return size
 
 
 class WindowsNotepadPlugin(TexteditorTabPlugin):
@@ -147,33 +94,80 @@ def check_compatible(self) -> None:
             raise UnsupportedPluginError("No tabs directories found")
 
     def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord:
-        handle: BinaryIO = file.open(mode="rb")
+        """
+        Function that parses a binary tab file and reconstructs the contents.
 
-        # Skip the presumed magic bytes 0x4e5000 (NP\x00)
-        handle.read(3)
+        Args:
+            file: The binary file on disk that needs to be parsed.
 
-        # Read some of the info in the header. Not entirely sure at this point what info is in there,
-        # there seems to be an indication of the length of the file.
-        header = handle.read(6)
+        Returns:
+            A TextEditorTabRecord containing information that is in the tab.
+        """
+        fh: BinaryIO = file.open(mode="rb")
 
-        # Whenever the bytes between the two \x01 bytes in the header are zeroed out, it means that the
-        # header itself has a CRC32 checksum
-        header_has_crc32 = True if header[2:4] == b"\x00\x00" else False
+        # There is always a 4 byte value at the end. The offset is always 2 bytes, and the length is always at
+        # least 1 byte. That means that if we reach the end of a data section, and we have equal or less
+        # than 4 + 2 + 1 = 7 bytes left, we should stop parsing new data blobs.
+        data_threshold = seek_size(fh) - 4 - 2 - 1
 
-        if header_has_crc32:
-            # Header CRC32 is composed of the header, plus four more bytes.
-            header_crc_data = header + handle.read(4)
-            # After that, the CRC32 of the header is stored.
-            header_crc_on_disk = handle.read(4)
+        # Parse the generic header
+        header = c_windowstab.tab_header(fh)
 
-            # This should match
-            header_crc_calculated = _calc_crc32(header_crc_data)
-            if not header_crc_on_disk == header_crc_calculated:
+        # Some tabs are stored as one big block. In this case, the data is contiguous and the file
+        # only contains one CRC32 at the end which checksums the entire file (excluding the file magic).
+        # It is likely stored as a single block whenever a length field is nonzero in the header.
+        is_single_blob = header.len1 != 0
+
+        if is_single_blob:
+            # In this case, we parse the single block
+            data_entry = c_windowstab.data_entry_single_block(fh)
+
+            # The header (minus the magic) plus all data (exluding the CRC32 at the end) is included
+            actual_crc32 = _calc_crc32(header.dumps()[3:] + data_entry.dumps()[:-4])
+
+            if data_entry.crc32 != actual_crc32:
                 raise CRCMismatchException(
-                    message=f"header, calculated={header_crc_calculated}, " f"expected={header_crc_on_disk}"
+                    f"CRC32 mismatch in single-block file. "
+                    f"expected={data_entry.crc32.hex()}, actual={actual_crc32.hex()} "
                 )
 
-        text = _parse_large_structure_tab(handle, header_has_crc32, header)
+            # Finally, decode the block using UTF16-LE, common for Windows.
+            text = data_entry.data.decode("utf-16-le")
+
+        else:
+            text = ["\x00"] * 100
+
+            # in this case, the header contains a separate CRC32 checksum as well
+            header_crc = c_windowstab.tab_crc(fh)
+
+            # the header, minus the file magic, plus some bytes from the extra header are
+            # required in the calculation
+            assert header_crc.crc32 == _calc_crc32(header.dumps()[3:] + header_crc.unk.dumps())
+
+            # otherwise, the file can be reconstructed out of many smaller entries
+            while fh.tell() < data_threshold:
+                data_entry = c_windowstab.data_entry_multi_block(fh)
+
+                # Check for CRC mismatch in a data block
+                actual_crc32 = _calc_crc32(data_entry.dumps()[:-4])
+                if data_entry.crc32 != actual_crc32:
+                    raise CRCMismatchException(
+                        f"CRC32 mismatch in single-block file. "
+                        f"expected={data_entry.crc32.hex()}, actual={actual_crc32.hex()} "
+                    )
+
+                # insert the text at the right offset in the textfile
+                # since we don't know the size of the file in the beginning, gradually increase the size
+                # of the list that holds the data
+                while data_entry.offset + data_entry.len > len(text) and data_entry.len > 0:
+                    text += ["\x00"] * 100
+
+                # place the text on the correct spot
+                for i in range(data_entry.len):
+                    text[data_entry.offset + i] = data_entry.data[(2 * i) : (2 * i) + 2].decode("utf-16-le")
+
+            # join the data and strip off excess null bytes
+            text = "".join(text).rstrip("\x00")
 
         return self.TextEditorTabRecord(content=text, content_length=len(text), filename=file.name)
 
diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/ba291ccd-f1c3-4ca8-949c-c01f6633789d.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/ba291ccd-f1c3-4ca8-949c-c01f6633789d.bin
new file mode 100755
index 0000000000000000000000000000000000000000..1e3160846e22f77f2e16dfecc188500c2cd6c38c
GIT binary patch
literal 560
zcmeYZU|?YEW9nmKWME+EV+vu&V8~=BW>A2Vi9l8fLn=_D1jtHc$YV$Wi(~-BATkwT
v8Hk=7hJ2ukbf8=jR24{v0)r8QDT5i1PGYEpnx+TjjoLM|!jN&tvh;5NE}~$Q

literal 0
HcmV?d00001

diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/e609218e-94f2-45fa-84e2-f29df2190b26.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/e609218e-94f2-45fa-84e2-f29df2190b26.bin
new file mode 100755
index 0000000000000000000000000000000000000000..fe17b9ad2b264d4aed4bbb90a7b2403d0499442e
GIT binary patch
literal 141143
zcmeI*K~5B56ot{(2@B~JFd?`M;=qAZ41^{zm?#6;jWajo5?Ba-b@jIg2PYcAZqUS7
zk9*JiySV#2bNKW3YW+OS%zE=>PV+p!=6Lz_e!cfPkL!`w_1^uw%)|MS(|T-w=J)xT
z{d2z0kM$8x>$#tokJ>)tIG^Tr{kU0gkMnKbE`Q?n`klYezo-8%`xWMTFkj41UmtwE
zEq%xn@{;_yC2#p!f7n`?KBjN#(|m%zY`Nfc7Ae<*>%sNldf;dLjGyr{e#Q&z8+S3{
z1-w8%(vS2b{YXF3kMtw`NI%k#^dtRfKUtw4=|}pJexx7iNBWU|q#x-=`jLL5AL&QV
z*Li)a*Qa`Y>Yj-7BmGD}(vS2b{YXF3kMtw`NI%k#^dtSq{lT^!y8D&5U&)?|To0}X
z_d9gIlE-nr&iOj$>zuE1zRv#0{>c8w{>c8w{>c8w{>c8w{>c8w{>c8w{)m_Ja$e5M
zdHKuQBl~_?HDvd%vp;e@*dN&+*&o>-*&o>-*&o>-m5!Y^b>7r@Q{Nwt7c>!h0WaVM
zynq+*0$#uicmXfq1*KzNzzcW*FW?1DL|(uPcmXfq1-yV4@B&`I3wS~4m>2K@Ucd`@
zK@*V|@B&`I3wQx9;03&Z7w`gJP&(!Xynq+*0$$KW<ORHd7w`gJzzcW*FW?2dfESdG
zc>yor1-yV4G!c0LFW?2dfEVxrUcd`@0WaVMrDI;e3wQx9-~~-YUcd`@0WaVMynq+*
z0$#uictPoy7w`gJzzcXm6OkA20$#uicmXfq1-yV4@B&^?I_3qufEVxrUeHA31-yV4
z@B&`I3wQx9;03&Z7nF{90WaVMynq)p5qSYG;03&Z7w`gJzzcW*FW?2GV_v`ucmXfq
z1x-X=zzcW*FW?2dfEVxrUcd`@LFt$m@B&`I3wS{jkr(g+Ucd`@0WaVMynq+*0$xx$
z<^{Zf7w`gJ&_v_~ynq+*0$#uicmXfq1-yV4l#Y1;FW?2dfEP3oc>yor1-yV4@B&`I
z3wQx9;02{)Ucd`@0WaVMO+;S63wQx9;03&Z7w`gJzzcXm>6jPr0$#uictI197w`gJ
zzzcW*FW?2dfEVxrUQjyb1-yV4@B&`YMC1j$fEVxrUcd`@0WaVMynq*!j(Gtu;03&Z
z7c>!h0WaVMynq+*0$#uicmXfq1*KzNzzcW*FW?1DL|(uPcmXfq1-yV4@B&`I3wS~4
zm>2K@Ucd`@K@*V|@B&`I3wQx9;03&Z7w`gJP&(!Xynq+*0$$KW<ORHd7w`gJzzcW*
zFW?2dfESdGc>yor1-yV4G!c0LFW?2dfEVxrUcd`@0WaVMrDI;e3wQx9-~~-YUcd`@
z0WaVMynq+*0$#uictPoy7w`gJzzcXm6OkA20$#uicmXfq1-yV4@B&^?I_3qufEVxr
zUeHA31-yV4@B&`I3wQx9;03&Z7nF{90WaVMynq)p5qSYG;03&Z7w`gJzzcW*FW?2G
zV_v`ucmXfq1x-X=zzcW*FW?2dfEVxrUcd`@LFt$m@B&`I3wS{jkr(g+Ucd`@!T;$6
Khkw`aUq1l19XFi-

literal 0
HcmV?d00001

diff --git a/tests/plugins/apps/texteditor/test_texteditor.py b/tests/plugins/apps/texteditor/test_texteditor.py
index e7546f923..21e722a6f 100644
--- a/tests/plugins/apps/texteditor/test_texteditor.py
+++ b/tests/plugins/apps/texteditor/test_texteditor.py
@@ -1,9 +1,6 @@
 import os
 
 from dissect.target.plugins.apps.texteditor import windowsnotepad
-from dissect.target.plugins.apps.texteditor.windowsnotepad import (
-    parse_large_structure_data_length,
-)
 from tests._utils import absolute_path
 
 text1 = "This is an unsaved tab, UTF-8 encoded with Windows (CRLF). It's only 88 characters long."
@@ -13,40 +10,19 @@
 )
 text3 = "This is a very short text."
 text4 = "This is another short test. And we should be able to parse this."
-text5 = "A bit more text. This requires two bytes for the length! "
+text5 = "This is a test and the text is longer than 256 bytes. "
+text6 = "This is a test and the text is longer than 65536 bytes. "
 loremipsum = """Lorem ipsum dolor sit amet. Eum error blanditiis eum pariatur delectus ut consequuntur officiis a excepturi dignissimos et doloribus quia 33 perspiciatis soluta nam perspiciatis dolor. Ut repudiandae quidem cum sint modi qui sint consequatur. Aut autem quidem eum enim consequatur qui voluptate consequatur non similique voluptate. A vitae modi vel sint provident ut galisum tenetur sit voluptatem amet. Est impedit perspiciatis est repudiandae voluptates ut fugit alias! Eum magni esse aut velit illum qui excepturi aperiam. Ex dolores asperiores ut debitis omnis qui consequuntur dolore. Est voluptatem mollitia et quibusdam unde ea accusamus fuga. Cum quis galisum et impedit sunt qui aliquam perspiciatis sed modi quidem qui nisi molestias. Aut temporibus architecto ut neque voluptatem et consequatur deleniti sed accusantium quibusdam et omnis dignissimos ad rerum ipsam et rerum quia. Ut nihil repellat et eaque molestias quo iusto ipsum At optio sint eos quidem earum?\r\rEx deleniti unde eum tenetur rerum ea dolore numquam? Eos aperiam officiis et neque explicabo et enim atque ut eaque omnis non illum eveniet est molestias itaque et ratione voluptatem. Ea deserunt nemo et quos tempora et nostrum aperiam sit necessitatibus illo sit culpa placeat. Vel tempore quibusdam ut velit voluptate aut odio facere non voluptas earum est odio galisum et voluptas harum. Et blanditiis sapiente et nostrum laborum aut voluptatem explicabo a quasi assumenda. Est voluptatem quia eum minima galisum quo totam excepturi aut facilis enim vel voluptate repudiandae sit distinctio laboriosam. Quo possimus molestiae et molestiae accusantium est voluptas omnis sed obcaecati natus. Non vitae asperiores qui nostrum enim id saepe fugiat et incidunt quasi.\r\rEos ipsa facilis aut excepturi voluptatem a omnis magni vel magni iste. Sed ipsum consequatur qui reprehenderit deleniti et soluta molestiae. Ut vero assumenda id dolor ipsum in deleniti voluptatem aut quis quisquam sed repudiandae temporibus ab quia inventore. Sed velit fugit vel facere cumque et delectus ullam sed eaque impedit. Est veritatis dignissimos aut doloribus dolorem vel pariatur repellendus sit nesciunt similique eum architecto quia. Ea expedita veritatis eum dolorem molestiae ut enim fugit aut beatae quibusdam. Aut voluptas natus in quidem deleniti aut animi iure est incidunt tenetur qui culpa maiores! Et nostrum quaerat qui consequatur consequatur aut aliquam atque aut praesentium rerum et consequuntur exercitationem. Non accusantium ipsa vel consectetur vitae ut magnam autem et natus rerum ut consectetur inventore est doloremque temporibus 33 dolores doloribus! Aut perferendis optio et nostrum repellendus et fugit itaque ut nisi neque sed sint quaerat. Aut placeat architecto et eius sapiente eum molestiae quam. Quo mollitia sapiente non Quis neque non tempora laudantium. Quo distinctio quos et molestias natus sit veritatis consequuntur aut repellendus neque a porro galisum cum numquam nesciunt et animi earum? Aut dolorum dolore non assumenda omnis et molestiae amet id sint vero est eligendi harum sit temporibus magnam aut ipsam quos.\r\r"""  # noqa: E501
 
 
-def test_read_length(tmp_path):
-    # 3-byte length
-    testfile1 = tmp_path / "test_file1.bin"
-
-    with open(testfile1, "wb+") as file:
-        file.write(b"\xc9\x85\x07")
-
-    with open(testfile1, "rb") as file:
-        read_length, original_bytes = parse_large_structure_data_length(file)
-        assert read_length == 115401
-        assert original_bytes == b"\xc9\x85\x07"
-
-    # 2-byte length
-    testfile2 = tmp_path / "test_file2.bin"
-
-    with open(testfile2, "wb+") as file:
-        file.write(b"\xaf\x18")
-
-    with open(testfile2, "rb") as file:
-        read_length, original_bytes = parse_large_structure_data_length(file)
-        assert read_length == 3119
-        assert original_bytes == b"\xaf\x18"
-
-
 def test_texteditor_plugin(target_win, fs_win, tmp_path, target_win_users, caplog):
     file_text_map = {
         "c515e86f-08b3-4d76-844a-cddfcd43fcbb.bin": text1,
         "85167c9d-aac2-4469-ae44-db5dccf8f7f4.bin": text2,
         "dae80df8-e1e5-4996-87fe-b453f63fcb19.bin": text3,
         "3f915e17-cf6c-462b-9bd1-2f23314cb979.bin": text4,
+        "ba291ccd-f1c3-4ca8-949c-c01f6633789d.bin": (text5 * 5),
+        "e609218e-94f2-45fa-84e2-f29df2190b26.bin": (text6 * 1260),
         "3d0cc86e-dfc9-4f16-b74a-918c2c24188c.bin": loremipsum,
         "wrong-checksum.bin": "",  # only added to check for corrupt checksum, not validity
         "cfe38135-9dca-4480-944f-d5ea0e1e589f.bin": (loremipsum * 37)[:-2],  # removed the two newlines in this file
@@ -70,15 +46,14 @@ def test_texteditor_plugin(target_win, fs_win, tmp_path, target_win_users, caplo
     # Check the amount of files
     assert len(list(tab_dir.iterdir())) == len(file_text_map.keys())
 
-    # Only six should be parsed correctly, without errors/warnings
-    assert len(records) == 6
+    # Only one should not be parsed correctly, without errors/warnings
+    assert len(records) == len(file_text_map.keys()) - 1
 
     # One file should not return any contents, there should be an entry for this in the logging.
     assert "CRC32 checksum mismatch in file: wrong-checksum.bin" in caplog.text
     assert (
-        "dissect.target.exceptions.CRCMismatchException: data, calculated=b'\\xa4\\x8d0\\xa6', "
-        "expected=b'\\xde\\xad\\xbe\\xef'"
-    ) in caplog.text
+        "CRCMismatchException: CRC32 mismatch in single-block file. expected=deadbeef, actual=a48d30a6" in caplog.text
+    )
 
     # The recovered content in the records should match the original data, as well as the length
     for rec in records:

From c634987fc7890d83612a60ca629152df06cb6845 Mon Sep 17 00:00:00 2001
From: Joost Jansen <joost.jansen@fox-it.com>
Date: Fri, 16 Feb 2024 00:01:01 +0100
Subject: [PATCH 04/36] Added more comments

---
 .../plugins/apps/texteditor/windowsnotepad.py | 22 ++++++++++---------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
index cea3172a2..425b84409 100644
--- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py
+++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
@@ -135,16 +135,19 @@ def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord:
             text = data_entry.data.decode("utf-16-le")
 
         else:
-            text = ["\x00"] * 100
-
-            # in this case, the header contains a separate CRC32 checksum as well
+            # In this case, the header contains a separate CRC32 checksum as well
             header_crc = c_windowstab.tab_crc(fh)
 
-            # the header, minus the file magic, plus some bytes from the extra header are
+            # The header, minus the file magic, plus some bytes from the extra header are
             # required in the calculation
             assert header_crc.crc32 == _calc_crc32(header.dumps()[3:] + header_crc.unk.dumps())
 
-            # otherwise, the file can be reconstructed out of many smaller entries
+            # We don't know how many blocks there will be beforehand. So we also don't know the exact file
+            # size, since the file, next to data, also contains quite some metadata and checksums.
+            # Also, because blocks can possibly be present in a non-contiguous order, a list is used
+            # that gradually increases in size. This allows for quick and flexible insertion of chars.
+            text = ["\x00"] * 100
+
             while fh.tell() < data_threshold:
                 data_entry = c_windowstab.data_entry_multi_block(fh)
 
@@ -156,17 +159,16 @@ def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord:
                         f"expected={data_entry.crc32.hex()}, actual={actual_crc32.hex()} "
                     )
 
-                # insert the text at the right offset in the textfile
-                # since we don't know the size of the file in the beginning, gradually increase the size
-                # of the list that holds the data
+                # Since we don't know the size of the file in the beginning, gradually increase the size
+                # of the list that holds the data if there is not enough room
                 while data_entry.offset + data_entry.len > len(text) and data_entry.len > 0:
                     text += ["\x00"] * 100
 
-                # place the text on the correct spot
+                # Place the text at the correct offset. UTF16-LE consumes two bytes for one character.
                 for i in range(data_entry.len):
                     text[data_entry.offset + i] = data_entry.data[(2 * i) : (2 * i) + 2].decode("utf-16-le")
 
-            # join the data and strip off excess null bytes
+            # Join the chars and strip off excess null bytes that may be present
             text = "".join(text).rstrip("\x00")
 
         return self.TextEditorTabRecord(content=text, content_length=len(text), filename=file.name)

From d3d35a11759e5e4e3c26981988b0a692797734b6 Mon Sep 17 00:00:00 2001
From: Joost Jansen <12032793+joost-j@users.noreply.github.com>
Date: Mon, 19 Feb 2024 15:44:08 +0100
Subject: [PATCH 05/36] Refactor c_def to include parsing of both variants

---
 .../plugins/apps/texteditor/windowsnotepad.py | 119 ++++++++----------
 1 file changed, 50 insertions(+), 69 deletions(-)

diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
index 425b84409..638a125ce 100644
--- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py
+++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
@@ -1,4 +1,3 @@
-import io
 import zlib
 from typing import BinaryIO, Iterator
 
@@ -15,14 +14,14 @@
 )
 
 c_def = """
-struct data_entry_multi_block {
+struct multi_block_entry {
     uint16    offset;
     uleb128   len;
     char      data[len * 2];
     char      crc32[4];
 };
 
-struct data_entry_single_block {
+struct single_block_entry {
     uint16    offset;
     uleb128   len;
     char      data[len * 2];
@@ -30,18 +29,33 @@
     char      crc32[4];
 };
 
-struct tab_header {
-    char      magic[3];         // NP\x00
-    char      header_start[2];  // \x00\x01
-    uleb128   len1;
-    uleb128   len2;
-    char      header_end[2];    // \x01\x00
-};
-
-struct tab_crc {
+struct header_crc {
     char      unk[4];
     char      crc32[4];
 };
+
+struct tab {
+    char                        magic[3];         // NP\x00
+    char                        header_start[2];  // \x00\x01
+    uleb128                     len1;
+    uleb128                     len2;
+    char                        header_end[2];    // \x01\x00
+    
+    // Data can be stored in two says:
+    //  1. A single, contiguous block of data that holds all the data
+    //     In this case, the header is included in the single CRC32 checksum present at the end of the block
+    //  2. Multiple blocks of data that, when combined, hold all the data
+    //     In this case, the header has a separate CRC32 value stored at the end of the header
+    // The following bitmask operations basically check whether len1 is nonzero (boolean check) and depending
+    // on the outcome, parse 0 or 1 (so basically, parse or not parse) structs.
+    header_crc                  header_crc[((len1 | -len1) >> 31) ^ 1]; // Optional, only if len1 == 0
+    single_block_entry          single_block_entry[((len1 | (~len1 + 1)) >> 31) & 1];  // Optional, only if len1 > 0
+
+
+    multi_block_entry           multi_block_entries[EOF];  // Optional. If a single_block_entry is present
+                                                           // this will already be at EOF, so it won't do anything.
+                                                           // Otherwise, it will parse the individual blocks.
+};
 """
 
 c_windowstab = cstruct.cstruct()
@@ -49,27 +63,10 @@
 
 
 def _calc_crc32(data: bytes) -> bytes:
-    """Perform a CRC32 checksum on the data and return it as a big-endian uint32"""
+    """Perform a CRC32 checksum on the data and return it as bytes"""
     return zlib.crc32(data).to_bytes(length=4, byteorder="big")
 
 
-def seek_size(fh: BinaryIO) -> int:
-    """
-    Find the size of a file on disk.
-
-    Args:
-        fh: A file-like object that we want to calculate the size of.
-
-    Returns:
-        An integer representing the size (in bytes) of the file.
-    """
-    pos = fh.tell()
-    fh.seek(0, io.SEEK_END)
-    size = fh.tell()
-    fh.seek(pos)
-    return size
-
-
 class WindowsNotepadPlugin(TexteditorTabPlugin):
     """Windows notepad tab content plugin."""
 
@@ -105,25 +102,14 @@ def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord:
         """
         fh: BinaryIO = file.open(mode="rb")
 
-        # There is always a 4 byte value at the end. The offset is always 2 bytes, and the length is always at
-        # least 1 byte. That means that if we reach the end of a data section, and we have equal or less
-        # than 4 + 2 + 1 = 7 bytes left, we should stop parsing new data blobs.
-        data_threshold = seek_size(fh) - 4 - 2 - 1
-
-        # Parse the generic header
-        header = c_windowstab.tab_header(fh)
+        tab = c_windowstab.tab(fh)
 
-        # Some tabs are stored as one big block. In this case, the data is contiguous and the file
-        # only contains one CRC32 at the end which checksums the entire file (excluding the file magic).
-        # It is likely stored as a single block whenever a length field is nonzero in the header.
-        is_single_blob = header.len1 != 0
+        if tab.len1 != 0:
+            # Reconstruct the text of the single_block_entry variant
+            data_entry = tab.single_block_entry[0]
 
-        if is_single_blob:
-            # In this case, we parse the single block
-            data_entry = c_windowstab.data_entry_single_block(fh)
-
-            # The header (minus the magic) plus all data (exluding the CRC32 at the end) is included
-            actual_crc32 = _calc_crc32(header.dumps()[3:] + data_entry.dumps()[:-4])
+            # The header (minus the magic) plus all data (exluding the CRC32 at the end) is included in the checksum
+            actual_crc32 = _calc_crc32(tab.dumps()[3:-4])
 
             if data_entry.crc32 != actual_crc32:
                 raise CRCMismatchException(
@@ -131,27 +117,19 @@ def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord:
                     f"expected={data_entry.crc32.hex()}, actual={actual_crc32.hex()} "
                 )
 
-            # Finally, decode the block using UTF16-LE, common for Windows.
             text = data_entry.data.decode("utf-16-le")
 
         else:
-            # In this case, the header contains a separate CRC32 checksum as well
-            header_crc = c_windowstab.tab_crc(fh)
-
-            # The header, minus the file magic, plus some bytes from the extra header are
-            # required in the calculation
-            assert header_crc.crc32 == _calc_crc32(header.dumps()[3:] + header_crc.unk.dumps())
+            # Reconstruct the text of the multi_block_entry variant
+            # CRC32 is calculated based on the entire header, up to the point where the CRC32 value is stored
+            assert tab.header_crc[0].crc32 == _calc_crc32(tab.dumps()[3 : tab.dumps().index(tab.header_crc[0].crc32)])
 
-            # We don't know how many blocks there will be beforehand. So we also don't know the exact file
-            # size, since the file, next to data, also contains quite some metadata and checksums.
-            # Also, because blocks can possibly be present in a non-contiguous order, a list is used
-            # that gradually increases in size. This allows for quick and flexible insertion of chars.
-            text = ["\x00"] * 100
+            # Since we don't know the size of the file up front, and offsets don't necessarily have to be in order,
+            # a list is used to easily insert text at offsets
+            text = ["\x00"]
 
-            while fh.tell() < data_threshold:
-                data_entry = c_windowstab.data_entry_multi_block(fh)
-
-                # Check for CRC mismatch in a data block
+            for data_entry in tab.multi_block_entries:
+                # Check the CRC32 checksum for this block
                 actual_crc32 = _calc_crc32(data_entry.dumps()[:-4])
                 if data_entry.crc32 != actual_crc32:
                     raise CRCMismatchException(
@@ -159,17 +137,20 @@ def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord:
                         f"expected={data_entry.crc32.hex()}, actual={actual_crc32.hex()} "
                     )
 
-                # Since we don't know the size of the file in the beginning, gradually increase the size
-                # of the list that holds the data if there is not enough room
-                while data_entry.offset + data_entry.len > len(text) and data_entry.len > 0:
-                    text += ["\x00"] * 100
+                # If there is no data to be added, skip. This may happen sometimes.
+                if data_entry.len <= 0:
+                    continue
+
+                # Extend the list if required. All characters need to fit in the list.
+                while data_entry.offset + data_entry.len > len(text):
+                    text += "\x00"
 
                 # Place the text at the correct offset. UTF16-LE consumes two bytes for one character.
                 for i in range(data_entry.len):
                     text[data_entry.offset + i] = data_entry.data[(2 * i) : (2 * i) + 2].decode("utf-16-le")
 
-            # Join the chars and strip off excess null bytes that may be present
-            text = "".join(text).rstrip("\x00")
+            # Join all the characters to reconstruct the original text
+            text = "".join(text)
 
         return self.TextEditorTabRecord(content=text, content_length=len(text), filename=file.name)
 

From cef81d0ef141eeb5a288e02c96f91fc3add27879 Mon Sep 17 00:00:00 2001
From: Joost Jansen <12032793+joost-j@users.noreply.github.com>
Date: Mon, 19 Feb 2024 15:49:09 +0100
Subject: [PATCH 06/36] Bump dissect.cstruct version to >=4.0.dev for clarity

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 0e912cfe8..961f45c01 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,7 +26,7 @@ classifiers = [
 ]
 dependencies = [
     "defusedxml",
-    "dissect.cstruct>=3.0.dev,<4.0.dev",
+    "dissect.cstruct>=4.0.dev,<5.0.dev",
     "dissect.eventlog>=3.0.dev,<4.0.dev",
     "dissect.evidence>=3.0.dev,<4.0.dev",
     "dissect.hypervisor>=3.0.dev,<4.0.dev",

From 7934f3e79c53050e9915259efc4401eea31d721a Mon Sep 17 00:00:00 2001
From: Joost Jansen <12032793+joost-j@users.noreply.github.com>
Date: Mon, 26 Feb 2024 16:26:14 +0100
Subject: [PATCH 07/36] Apply suggestions from code review

Co-authored-by: Stefan de Reuver <9864602+Horofic@users.noreply.github.com>
---
 .../plugins/apps/texteditor/texteditor.py     |  6 +-
 .../plugins/apps/texteditor/windowsnotepad.py | 84 +++++++++----------
 .../apps/texteditor/test_texteditor.py        | 21 ++---
 3 files changed, 54 insertions(+), 57 deletions(-)

diff --git a/dissect/target/plugins/apps/texteditor/texteditor.py b/dissect/target/plugins/apps/texteditor/texteditor.py
index 853b384a9..f2988cd99 100644
--- a/dissect/target/plugins/apps/texteditor/texteditor.py
+++ b/dissect/target/plugins/apps/texteditor/texteditor.py
@@ -5,7 +5,7 @@
 GENERIC_TAB_CONTENTS_RECORD_FIELDS = [
     ("string", "content"),
     ("string", "content_length"),
-    ("string", "filename"),
+    ("path", "path"),
 ]
 
 TexteditorTabContentRecord = create_extended_descriptor([UserRecordDescriptorExtension])(
@@ -13,5 +13,5 @@
 )
 
 
-class TexteditorTabPlugin(NamespacePlugin):
-    __namespace__ = "texteditortab"
+class TexteditorPlugin(NamespacePlugin):
+    __namespace__ = "texteditor"
diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
index 638a125ce..9608b8f23 100644
--- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py
+++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
@@ -10,24 +10,25 @@
 from dissect.target.plugin import export
 from dissect.target.plugins.apps.texteditor.texteditor import (
     GENERIC_TAB_CONTENTS_RECORD_FIELDS,
-    TexteditorTabPlugin,
+    TexteditorPlugin,
 )
 
 c_def = """
 struct multi_block_entry {
     uint16    offset;
     uleb128   len;
-    char      data[len * 2];
+    wchar     data[len];
     char      crc32[4];
 };
 
 struct single_block_entry {
     uint16    offset;
     uleb128   len;
-    char      data[len * 2];
+    wchar     data[len];
     char      unk1;
     char      crc32[4];
 };
+};
 
 struct header_crc {
     char      unk[4];
@@ -57,6 +58,9 @@
                                                            // Otherwise, it will parse the individual blocks.
 };
 """
+TextEditorTabRecord = create_extended_descriptor([UserRecordDescriptorExtension])(
+        "texteditor/windowsnotepad/tab", GENERIC_TAB_CONTENTS_RECORD_FIELDS
+)
 
 c_windowstab = cstruct.cstruct()
 c_windowstab.load(c_def)
@@ -67,28 +71,27 @@ def _calc_crc32(data: bytes) -> bytes:
     return zlib.crc32(data).to_bytes(length=4, byteorder="big")
 
 
-class WindowsNotepadPlugin(TexteditorTabPlugin):
+class WindowsNotepadPlugin(TexteditorPlugin):
     """Windows notepad tab content plugin."""
 
     __namespace__ = "windowsnotepad"
 
-    DIRECTORY = "AppData/Local/Packages/Microsoft.WindowsNotepad_8wekyb3d8bbwe/LocalState/TabState"
-    TextEditorTabRecord = create_extended_descriptor([UserRecordDescriptorExtension])(
-        "texteditor/windowsnotepad/tab", GENERIC_TAB_CONTENTS_RECORD_FIELDS
-    )
+    GLOB = "AppData/Local/Packages/Microsoft.WindowsNotepad_*/LocalState/TabState/*.bin"
 
     def __init__(self, target):
         super().__init__(target)
-        self.users_dirs = []
+        self.users_tabs = []
+        
         for user_details in self.target.user_details.all_with_home():
-            cur_dir = user_details.home_path.joinpath(self.DIRECTORY)
-            if not cur_dir.exists():
-                continue
-            self.users_dirs.append((user_details.user, cur_dir))
+            for tab_file in user_details.home_path.glob(self.GLOB):
+                if tab_file.name.endswith(".1.bin") or tab_file.name.endswith(".0.bin"):
+                    continue
+
+                self.users_tabs.append(tab_file)
 
     def check_compatible(self) -> None:
-        if not len(self.users_dirs):
-            raise UnsupportedPluginError("No tabs directories found")
+        if not self.users_tabs:
+            raise UnsupportedPluginError("No Windows Notepad temporary tab files found")
 
     def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord:
         """
@@ -107,17 +110,18 @@ def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord:
         if tab.len1 != 0:
             # Reconstruct the text of the single_block_entry variant
             data_entry = tab.single_block_entry[0]
+            size = data_entry.len
 
             # The header (minus the magic) plus all data (exluding the CRC32 at the end) is included in the checksum
             actual_crc32 = _calc_crc32(tab.dumps()[3:-4])
 
             if data_entry.crc32 != actual_crc32:
-                raise CRCMismatchException(
-                    f"CRC32 mismatch in single-block file. "
-                    f"expected={data_entry.crc32.hex()}, actual={actual_crc32.hex()} "
+                self.target.log.warning(
+                    "CRC32 mismatch in single-block file: %s "
+                    "expected=%s, actual=%s", file.name, data_entry.crc32.hex(), actual_crc32.hex()
                 )
 
-            text = data_entry.data.decode("utf-16-le")
+            text = data_entry.data
 
         else:
             # Reconstruct the text of the multi_block_entry variant
@@ -126,49 +130,45 @@ def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord:
 
             # Since we don't know the size of the file up front, and offsets don't necessarily have to be in order,
             # a list is used to easily insert text at offsets
-            text = ["\x00"]
+            text = []
+            size = 0
 
             for data_entry in tab.multi_block_entries:
+                # If there is no data to be added, skip. This may happen sometimes.
+                if data_entry.len <= 0:
+                    continue
+                
+                size += data_entry.len
                 # Check the CRC32 checksum for this block
                 actual_crc32 = _calc_crc32(data_entry.dumps()[:-4])
                 if data_entry.crc32 != actual_crc32:
-                    raise CRCMismatchException(
-                        f"CRC32 mismatch in single-block file. "
-                        f"expected={data_entry.crc32.hex()}, actual={actual_crc32.hex()} "
+                    self.target.log.warning(
+                        "CRC32 mismatch in multi-block file: %s "
+                        "expected=%s, actual=%s", file.name, data_entry.crc32.hex(), actual_crc32.hex()
                     )
 
-                # If there is no data to be added, skip. This may happen sometimes.
-                if data_entry.len <= 0:
-                    continue
 
                 # Extend the list if required. All characters need to fit in the list.
                 while data_entry.offset + data_entry.len > len(text):
-                    text += "\x00"
+                    text.append("\x00")
 
                 # Place the text at the correct offset. UTF16-LE consumes two bytes for one character.
-                for i in range(data_entry.len):
-                    text[data_entry.offset + i] = data_entry.data[(2 * i) : (2 * i) + 2].decode("utf-16-le")
+                for idx in range(data_entry.len):
+                    text[data_entry.offset + idx] = data_entry.data[(2 * idx) : (2 * idx) + 2]
 
             # Join all the characters to reconstruct the original text
             text = "".join(text)
 
-        return self.TextEditorTabRecord(content=text, content_length=len(text), filename=file.name)
+        return TextEditorTabRecord(content=text, content_length=size, path=file)
 
     @export(record=TextEditorTabRecord)
     def tabs(self) -> Iterator[TextEditorTabRecord]:
-        """Return contents from the notepad tab.
+        """Return contents from Windows 11 temporary Notepad tabs.
 
         Yields TextEditorTabRecord with the following fields:
             contents (string): The contents of the tab.
-            title (string): The title of the tab.
+            content_length (int): The length of the tab content.
+            path (path): The path the content originates from.
         """
-        for user, directory in self.users_dirs:
-            for file in self.target.fs.path(directory).iterdir():
-                if file.name.endswith(".1.bin") or file.name.endswith(".0.bin"):
-                    continue
-
-                try:
-                    yield self._process_tab_file(file)
-                except CRCMismatchException as e:
-                    self.target.log.warning("CRC32 checksum mismatch in file: %s", file.name, exc_info=e)
-                    continue
+        for file in self.users_tabs:
+            yield self._process_tab_file(file)
diff --git a/tests/plugins/apps/texteditor/test_texteditor.py b/tests/plugins/apps/texteditor/test_texteditor.py
index 21e722a6f..e797ad1d0 100644
--- a/tests/plugins/apps/texteditor/test_texteditor.py
+++ b/tests/plugins/apps/texteditor/test_texteditor.py
@@ -24,14 +24,16 @@ def test_texteditor_plugin(target_win, fs_win, tmp_path, target_win_users, caplo
         "ba291ccd-f1c3-4ca8-949c-c01f6633789d.bin": (text5 * 5),
         "e609218e-94f2-45fa-84e2-f29df2190b26.bin": (text6 * 1260),
         "3d0cc86e-dfc9-4f16-b74a-918c2c24188c.bin": loremipsum,
-        "wrong-checksum.bin": "",  # only added to check for corrupt checksum, not validity
+        "wrong-checksum.bin": text4,  # only added to check for corrupt checksum, not validity
         "cfe38135-9dca-4480-944f-d5ea0e1e589f.bin": (loremipsum * 37)[:-2],  # removed the two newlines in this file
     }
 
     tabcache = absolute_path("_data/plugins/apps/texteditor/windowsnotepad/")
 
     user = target_win_users.user_details.find(username="John")
-    tab_dir = user.home_path.joinpath(windowsnotepad.WindowsNotepadPlugin.DIRECTORY)
+    tab_dir = user.home_path.joinpath(
+        "AppData/Local/Packages/Microsoft.WindowsNotepad_8wekyb3d8bbwe/LocalState/TabState"
+    )
 
     fs_win.map_dir("Users\\John", tmp_path)
 
@@ -45,17 +47,12 @@ def test_texteditor_plugin(target_win, fs_win, tmp_path, target_win_users, caplo
 
     # Check the amount of files
     assert len(list(tab_dir.iterdir())) == len(file_text_map.keys())
+    assert len(records) == len(file_text_map.keys())
 
-    # Only one should not be parsed correctly, without errors/warnings
-    assert len(records) == len(file_text_map.keys()) - 1
-
-    # One file should not return any contents, there should be an entry for this in the logging.
-    assert "CRC32 checksum mismatch in file: wrong-checksum.bin" in caplog.text
-    assert (
-        "CRCMismatchException: CRC32 mismatch in single-block file. expected=deadbeef, actual=a48d30a6" in caplog.text
-    )
+    # One file should still return contents, but there should be an entry for in the logging for a CRC missmatch.
+    assert "CRC32 mismatch in single-block file: wrong-checksum.bin expected=deadbeef, actual=a48d30a6" in caplog.text
 
     # The recovered content in the records should match the original data, as well as the length
     for rec in records:
-        assert rec.content == file_text_map[rec.filename]
-        assert len(rec.content) == len(file_text_map[rec.filename])
+        assert rec.content == file_text_map[rec.path.name]
+        assert len(rec.content) == len(file_text_map[rec.path.name])

From e6ea0195a42f53563d06e9884186c7f1eab1cfca Mon Sep 17 00:00:00 2001
From: Joost Jansen <12032793+joost-j@users.noreply.github.com>
Date: Mon, 26 Feb 2024 16:36:07 +0100
Subject: [PATCH 08/36] Removed duplicate brackets and refactor assertion into
 warning log

---
 .../target/plugins/apps/texteditor/windowsnotepad.py   | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
index 9608b8f23..b723118a9 100644
--- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py
+++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
@@ -28,7 +28,6 @@
     char      unk1;
     char      crc32[4];
 };
-};
 
 struct header_crc {
     char      unk[4];
@@ -126,7 +125,13 @@ def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord:
         else:
             # Reconstruct the text of the multi_block_entry variant
             # CRC32 is calculated based on the entire header, up to the point where the CRC32 value is stored
-            assert tab.header_crc[0].crc32 == _calc_crc32(tab.dumps()[3 : tab.dumps().index(tab.header_crc[0].crc32)])
+            defined_header_crc32 = tab.header_crc[0].crc32
+            actual_header_crc32 = _calc_crc32(tab.dumps()[3 : tab.dumps().index(defined_header_crc32)])
+            if defined_header_crc32 != actual_header_crc32:
+                self.target.log.warning(
+                    "CRC32 mismatch in header of multi-block file: %s "
+                    "expected=%s, actual=%s", file.name, defined_header_crc32.hex(), actual_header_crc32.hex(),
+                )
 
             # Since we don't know the size of the file up front, and offsets don't necessarily have to be in order,
             # a list is used to easily insert text at offsets
@@ -147,7 +152,6 @@ def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord:
                         "expected=%s, actual=%s", file.name, data_entry.crc32.hex(), actual_crc32.hex()
                     )
 
-
                 # Extend the list if required. All characters need to fit in the list.
                 while data_entry.offset + data_entry.len > len(text):
                     text.append("\x00")

From 12fdd4ab76b3810c4b87cba538a8c8266ebcbdae Mon Sep 17 00:00:00 2001
From: Joost Jansen <12032793+joost-j@users.noreply.github.com>
Date: Mon, 26 Feb 2024 16:46:27 +0100
Subject: [PATCH 09/36] Change variable names to fsize1 and fsize2, plus some
 linting

---
 .../plugins/apps/texteditor/windowsnotepad.py | 38 +++++++++++--------
 1 file changed, 22 insertions(+), 16 deletions(-)

diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
index b723118a9..909f640db 100644
--- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py
+++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
@@ -3,7 +3,7 @@
 
 from dissect import cstruct
 
-from dissect.target.exceptions import CRCMismatchException, UnsupportedPluginError
+from dissect.target.exceptions import UnsupportedPluginError
 from dissect.target.helpers.descriptor_extensions import UserRecordDescriptorExtension
 from dissect.target.helpers.fsutil import TargetPath
 from dissect.target.helpers.record import create_extended_descriptor
@@ -37,19 +37,19 @@
 struct tab {
     char                        magic[3];         // NP\x00
     char                        header_start[2];  // \x00\x01
-    uleb128                     len1;
-    uleb128                     len2;
+    uleb128                     fsize1;
+    uleb128                     fsize2;
     char                        header_end[2];    // \x01\x00
-    
+
     // Data can be stored in two says:
     //  1. A single, contiguous block of data that holds all the data
     //     In this case, the header is included in the single CRC32 checksum present at the end of the block
     //  2. Multiple blocks of data that, when combined, hold all the data
     //     In this case, the header has a separate CRC32 value stored at the end of the header
-    // The following bitmask operations basically check whether len1 is nonzero (boolean check) and depending
+    // The following bitmask operations basically check whether fsize1 is nonzero (boolean check) and depending
     // on the outcome, parse 0 or 1 (so basically, parse or not parse) structs.
-    header_crc                  header_crc[((len1 | -len1) >> 31) ^ 1]; // Optional, only if len1 == 0
-    single_block_entry          single_block_entry[((len1 | (~len1 + 1)) >> 31) & 1];  // Optional, only if len1 > 0
+    header_crc                  header_crc[((fsize1 | -fsize1) >> 31) ^ 1]; // Optional, only if fsize1 == 0
+    single_block_entry          single_block_entry[((fsize1 | (~fsize1 + 1)) >> 31) & 1];  // Optional, only if fsize1 > 0  # noqa: E501
 
 
     multi_block_entry           multi_block_entries[EOF];  // Optional. If a single_block_entry is present
@@ -58,7 +58,7 @@
 };
 """
 TextEditorTabRecord = create_extended_descriptor([UserRecordDescriptorExtension])(
-        "texteditor/windowsnotepad/tab", GENERIC_TAB_CONTENTS_RECORD_FIELDS
+    "texteditor/windowsnotepad/tab", GENERIC_TAB_CONTENTS_RECORD_FIELDS
 )
 
 c_windowstab = cstruct.cstruct()
@@ -80,7 +80,7 @@ class WindowsNotepadPlugin(TexteditorPlugin):
     def __init__(self, target):
         super().__init__(target)
         self.users_tabs = []
-        
+
         for user_details in self.target.user_details.all_with_home():
             for tab_file in user_details.home_path.glob(self.GLOB):
                 if tab_file.name.endswith(".1.bin") or tab_file.name.endswith(".0.bin"):
@@ -116,8 +116,10 @@ def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord:
 
             if data_entry.crc32 != actual_crc32:
                 self.target.log.warning(
-                    "CRC32 mismatch in single-block file: %s "
-                    "expected=%s, actual=%s", file.name, data_entry.crc32.hex(), actual_crc32.hex()
+                    "CRC32 mismatch in single-block file: %s " "expected=%s, actual=%s",
+                    file.name,
+                    data_entry.crc32.hex(),
+                    actual_crc32.hex(),
                 )
 
             text = data_entry.data
@@ -129,8 +131,10 @@ def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord:
             actual_header_crc32 = _calc_crc32(tab.dumps()[3 : tab.dumps().index(defined_header_crc32)])
             if defined_header_crc32 != actual_header_crc32:
                 self.target.log.warning(
-                    "CRC32 mismatch in header of multi-block file: %s "
-                    "expected=%s, actual=%s", file.name, defined_header_crc32.hex(), actual_header_crc32.hex(),
+                    "CRC32 mismatch in header of multi-block file: %s " "expected=%s, actual=%s",
+                    file.name,
+                    defined_header_crc32.hex(),
+                    actual_header_crc32.hex(),
                 )
 
             # Since we don't know the size of the file up front, and offsets don't necessarily have to be in order,
@@ -142,14 +146,16 @@ def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord:
                 # If there is no data to be added, skip. This may happen sometimes.
                 if data_entry.len <= 0:
                     continue
-                
+
                 size += data_entry.len
                 # Check the CRC32 checksum for this block
                 actual_crc32 = _calc_crc32(data_entry.dumps()[:-4])
                 if data_entry.crc32 != actual_crc32:
                     self.target.log.warning(
-                        "CRC32 mismatch in multi-block file: %s "
-                        "expected=%s, actual=%s", file.name, data_entry.crc32.hex(), actual_crc32.hex()
+                        "CRC32 mismatch in multi-block file: %s " "expected=%s, actual=%s",
+                        file.name,
+                        data_entry.crc32.hex(),
+                        actual_crc32.hex(),
                     )
 
                 # Extend the list if required. All characters need to fit in the list.

From 39a34a7c6450852d17b726cf2c11c67518e1516b Mon Sep 17 00:00:00 2001
From: Joost Jansen <12032793+joost-j@users.noreply.github.com>
Date: Mon, 4 Mar 2024 12:45:03 +0100
Subject: [PATCH 10/36] Refactored to work with LEB128 backport

---
 .../plugins/apps/texteditor/windowsnotepad.py | 37 ++++++++-----------
 1 file changed, 15 insertions(+), 22 deletions(-)

diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
index 909f640db..9090fc4c7 100644
--- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py
+++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
@@ -40,21 +40,6 @@
     uleb128                     fsize1;
     uleb128                     fsize2;
     char                        header_end[2];    // \x01\x00
-
-    // Data can be stored in two says:
-    //  1. A single, contiguous block of data that holds all the data
-    //     In this case, the header is included in the single CRC32 checksum present at the end of the block
-    //  2. Multiple blocks of data that, when combined, hold all the data
-    //     In this case, the header has a separate CRC32 value stored at the end of the header
-    // The following bitmask operations basically check whether fsize1 is nonzero (boolean check) and depending
-    // on the outcome, parse 0 or 1 (so basically, parse or not parse) structs.
-    header_crc                  header_crc[((fsize1 | -fsize1) >> 31) ^ 1]; // Optional, only if fsize1 == 0
-    single_block_entry          single_block_entry[((fsize1 | (~fsize1 + 1)) >> 31) & 1];  // Optional, only if fsize1 > 0  # noqa: E501
-
-
-    multi_block_entry           multi_block_entries[EOF];  // Optional. If a single_block_entry is present
-                                                           // this will already be at EOF, so it won't do anything.
-                                                           // Otherwise, it will parse the individual blocks.
 };
 """
 TextEditorTabRecord = create_extended_descriptor([UserRecordDescriptorExtension])(
@@ -106,13 +91,13 @@ def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord:
 
         tab = c_windowstab.tab(fh)
 
-        if tab.len1 != 0:
-            # Reconstruct the text of the single_block_entry variant
-            data_entry = tab.single_block_entry[0]
+        if tab.fsize1 != 0:
+            data_entry = c_windowstab.single_block_entry(fh)
+
             size = data_entry.len
 
             # The header (minus the magic) plus all data (exluding the CRC32 at the end) is included in the checksum
-            actual_crc32 = _calc_crc32(tab.dumps()[3:-4])
+            actual_crc32 = _calc_crc32(tab.dumps()[3:] + data_entry.dumps()[:-4])
 
             if data_entry.crc32 != actual_crc32:
                 self.target.log.warning(
@@ -125,10 +110,13 @@ def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord:
             text = data_entry.data
 
         else:
+            header_crc = c_windowstab.header_crc(fh)
+
             # Reconstruct the text of the multi_block_entry variant
             # CRC32 is calculated based on the entire header, up to the point where the CRC32 value is stored
-            defined_header_crc32 = tab.header_crc[0].crc32
-            actual_header_crc32 = _calc_crc32(tab.dumps()[3 : tab.dumps().index(defined_header_crc32)])
+            defined_header_crc32 = header_crc.crc32
+
+            actual_header_crc32 = _calc_crc32(tab.dumps()[3:] + header_crc.unk)
             if defined_header_crc32 != actual_header_crc32:
                 self.target.log.warning(
                     "CRC32 mismatch in header of multi-block file: %s " "expected=%s, actual=%s",
@@ -142,7 +130,12 @@ def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord:
             text = []
             size = 0
 
-            for data_entry in tab.multi_block_entries:
+            while True:
+                try:
+                    data_entry = c_windowstab.multi_block_entry(fh)
+                except EOFError:
+                    break
+
                 # If there is no data to be added, skip. This may happen sometimes.
                 if data_entry.len <= 0:
                     continue

From 85660284fcf4d1995aebe070e34a39b4197441da Mon Sep 17 00:00:00 2001
From: Joost Jansen <12032793+joost-j@users.noreply.github.com>
Date: Mon, 4 Mar 2024 13:06:50 +0100
Subject: [PATCH 11/36] Process feedback

---
 dissect/target/exceptions.py                  |   4 -
 .../plugins/apps/texteditor/windowsnotepad.py | 149 +++++++++---------
 .../plugins/os/windows/regf/shimcache.py      |  11 +-
 3 files changed, 83 insertions(+), 81 deletions(-)

diff --git a/dissect/target/exceptions.py b/dissect/target/exceptions.py
index 1c435bcc7..22f46a604 100644
--- a/dissect/target/exceptions.py
+++ b/dissect/target/exceptions.py
@@ -114,7 +114,3 @@ class RegistryCorruptError(RegistryError):
 
 class ConfigurationParsingError(Error):
     """An error occurred during configuration parsing."""
-
-
-class CRCMismatchException(Error):
-    """A mismatch between CRC checksums has occurred."""
diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
index 9090fc4c7..0152123d4 100644
--- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py
+++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
@@ -1,12 +1,16 @@
 import zlib
-from typing import BinaryIO, Iterator
+from typing import Iterator, List, Union
 
-from dissect import cstruct
+from dissect.cstruct import cstruct
 
 from dissect.target.exceptions import UnsupportedPluginError
 from dissect.target.helpers.descriptor_extensions import UserRecordDescriptorExtension
 from dissect.target.helpers.fsutil import TargetPath
-from dissect.target.helpers.record import create_extended_descriptor
+from dissect.target.helpers.record import (
+    UnixUserRecord,
+    WindowsUserRecord,
+    create_extended_descriptor,
+)
 from dissect.target.plugin import export
 from dissect.target.plugins.apps.texteditor.texteditor import (
     GENERIC_TAB_CONTENTS_RECORD_FIELDS,
@@ -46,12 +50,12 @@
     "texteditor/windowsnotepad/tab", GENERIC_TAB_CONTENTS_RECORD_FIELDS
 )
 
-c_windowstab = cstruct.cstruct()
+c_windowstab = cstruct()
 c_windowstab.load(c_def)
 
 
 def _calc_crc32(data: bytes) -> bytes:
-    """Perform a CRC32 checksum on the data and return it as bytes"""
+    """Perform a CRC32 checksum on the data and return it as bytes."""
     return zlib.crc32(data).to_bytes(length=4, byteorder="big")
 
 
@@ -64,20 +68,22 @@ class WindowsNotepadPlugin(TexteditorPlugin):
 
     def __init__(self, target):
         super().__init__(target)
-        self.users_tabs = []
+        self.users_tabs: List[TargetPath, Union[UnixUserRecord, WindowsUserRecord]] = []
 
         for user_details in self.target.user_details.all_with_home():
             for tab_file in user_details.home_path.glob(self.GLOB):
                 if tab_file.name.endswith(".1.bin") or tab_file.name.endswith(".0.bin"):
                     continue
 
-                self.users_tabs.append(tab_file)
+                self.users_tabs.append((tab_file, user_details.user))
 
     def check_compatible(self) -> None:
         if not self.users_tabs:
             raise UnsupportedPluginError("No Windows Notepad temporary tab files found")
 
-    def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord:
+    def _process_tab_file(
+        self, file: TargetPath, user: Union[UnixUserRecord, WindowsUserRecord]
+    ) -> TextEditorTabRecord:
         """
         Function that parses a binary tab file and reconstructs the contents.
 
@@ -87,82 +93,81 @@ def _process_tab_file(self, file: TargetPath) -> TextEditorTabRecord:
         Returns:
             A TextEditorTabRecord containing information that is in the tab.
         """
-        fh: BinaryIO = file.open(mode="rb")
-
-        tab = c_windowstab.tab(fh)
-
-        if tab.fsize1 != 0:
-            data_entry = c_windowstab.single_block_entry(fh)
-
-            size = data_entry.len
-
-            # The header (minus the magic) plus all data (exluding the CRC32 at the end) is included in the checksum
-            actual_crc32 = _calc_crc32(tab.dumps()[3:] + data_entry.dumps()[:-4])
-
-            if data_entry.crc32 != actual_crc32:
-                self.target.log.warning(
-                    "CRC32 mismatch in single-block file: %s " "expected=%s, actual=%s",
-                    file.name,
-                    data_entry.crc32.hex(),
-                    actual_crc32.hex(),
-                )
-
-            text = data_entry.data
-
-        else:
-            header_crc = c_windowstab.header_crc(fh)
+        with file.open("rb") as fh:
+            tab = c_windowstab.tab(fh)
 
-            # Reconstruct the text of the multi_block_entry variant
-            # CRC32 is calculated based on the entire header, up to the point where the CRC32 value is stored
-            defined_header_crc32 = header_crc.crc32
+            if tab.fsize1 != 0:
+                data_entry = c_windowstab.single_block_entry(fh)
 
-            actual_header_crc32 = _calc_crc32(tab.dumps()[3:] + header_crc.unk)
-            if defined_header_crc32 != actual_header_crc32:
-                self.target.log.warning(
-                    "CRC32 mismatch in header of multi-block file: %s " "expected=%s, actual=%s",
-                    file.name,
-                    defined_header_crc32.hex(),
-                    actual_header_crc32.hex(),
-                )
+                size = data_entry.len
 
-            # Since we don't know the size of the file up front, and offsets don't necessarily have to be in order,
-            # a list is used to easily insert text at offsets
-            text = []
-            size = 0
+                # The header (minus the magic) plus all data (exluding the CRC32 at the end) is included in the checksum
+                actual_crc32 = _calc_crc32(tab.dumps()[3:] + data_entry.dumps()[:-4])
 
-            while True:
-                try:
-                    data_entry = c_windowstab.multi_block_entry(fh)
-                except EOFError:
-                    break
-
-                # If there is no data to be added, skip. This may happen sometimes.
-                if data_entry.len <= 0:
-                    continue
-
-                size += data_entry.len
-                # Check the CRC32 checksum for this block
-                actual_crc32 = _calc_crc32(data_entry.dumps()[:-4])
                 if data_entry.crc32 != actual_crc32:
                     self.target.log.warning(
-                        "CRC32 mismatch in multi-block file: %s " "expected=%s, actual=%s",
+                        "CRC32 mismatch in single-block file: %s " "expected=%s, actual=%s",
                         file.name,
                         data_entry.crc32.hex(),
                         actual_crc32.hex(),
                     )
 
-                # Extend the list if required. All characters need to fit in the list.
-                while data_entry.offset + data_entry.len > len(text):
-                    text.append("\x00")
+                text = data_entry.data
+
+            else:
+                header_crc = c_windowstab.header_crc(fh)
 
-                # Place the text at the correct offset. UTF16-LE consumes two bytes for one character.
-                for idx in range(data_entry.len):
-                    text[data_entry.offset + idx] = data_entry.data[(2 * idx) : (2 * idx) + 2]
+                # Reconstruct the text of the multi_block_entry variant
+                # CRC32 is calculated based on the entire header, up to the point where the CRC32 value is stored
+                defined_header_crc32 = header_crc.crc32
 
-            # Join all the characters to reconstruct the original text
-            text = "".join(text)
+                actual_header_crc32 = _calc_crc32(tab.dumps()[3:] + header_crc.unk)
+                if defined_header_crc32 != actual_header_crc32:
+                    self.target.log.warning(
+                        "CRC32 mismatch in header of multi-block file: %s " "expected=%s, actual=%s",
+                        file.name,
+                        defined_header_crc32.hex(),
+                        actual_header_crc32.hex(),
+                    )
 
-        return TextEditorTabRecord(content=text, content_length=size, path=file)
+                # Since we don't know the size of the file up front, and offsets don't necessarily have to be in order,
+                # a list is used to easily insert text at offsets
+                text = []
+                size = 0
+
+                while True:
+                    try:
+                        data_entry = c_windowstab.multi_block_entry(fh)
+                    except EOFError:
+                        break
+
+                    # If there is no data to be added, skip. This may happen sometimes.
+                    if data_entry.len <= 0:
+                        continue
+
+                    size += data_entry.len
+                    # Check the CRC32 checksum for this block
+                    actual_crc32 = _calc_crc32(data_entry.dumps()[:-4])
+                    if data_entry.crc32 != actual_crc32:
+                        self.target.log.warning(
+                            "CRC32 mismatch in multi-block file: %s " "expected=%s, actual=%s",
+                            file.name,
+                            data_entry.crc32.hex(),
+                            actual_crc32.hex(),
+                        )
+
+                    # Extend the list if required. All characters need to fit in the list.
+                    while data_entry.offset + data_entry.len > len(text):
+                        text.append("\x00")
+
+                    # Place the text at the correct offset. UTF16-LE consumes two bytes for one character.
+                    for idx in range(data_entry.len):
+                        text[data_entry.offset + idx] = data_entry.data[(2 * idx) : (2 * idx) + 2]
+
+                # Join all the characters to reconstruct the original text
+                text = "".join(text)
+
+        return TextEditorTabRecord(content=text, content_length=size, path=file, _target=self.target, _user=user)
 
     @export(record=TextEditorTabRecord)
     def tabs(self) -> Iterator[TextEditorTabRecord]:
@@ -173,5 +178,5 @@ def tabs(self) -> Iterator[TextEditorTabRecord]:
             content_length (int): The length of the tab content.
             path (path): The path the content originates from.
         """
-        for file in self.users_tabs:
-            yield self._process_tab_file(file)
+        for file, user in self.users_tabs:
+            yield self._process_tab_file(file, user)
diff --git a/dissect/target/plugins/os/windows/regf/shimcache.py b/dissect/target/plugins/os/windows/regf/shimcache.py
index 3aea545fd..06db50558 100644
--- a/dissect/target/plugins/os/windows/regf/shimcache.py
+++ b/dissect/target/plugins/os/windows/regf/shimcache.py
@@ -7,11 +7,7 @@
 from dissect.cstruct import Structure, cstruct
 from dissect.util.ts import wintimestamp
 
-from dissect.target.exceptions import (
-    CRCMismatchException,
-    RegistryError,
-    UnsupportedPluginError,
-)
+from dissect.target.exceptions import Error, RegistryError, UnsupportedPluginError
 from dissect.target.helpers.record import TargetRecordDescriptor
 from dissect.target.plugin import Plugin, export
 
@@ -183,6 +179,11 @@ def nt61_entry_type(_) -> Structure:
     },
 }
 
+
+class CRCMismatchException(Error):
+    """A mismatch between CRC checksums has occurred."""
+
+
 ShimCacheGeneratorType = Union[CRCMismatchException, Tuple[Optional[datetime], str]]
 
 

From 56a26fa05b49ea805ff03c6566d1718a1d50d55d Mon Sep 17 00:00:00 2001
From: Joost Jansen <12032793+joost-j@users.noreply.github.com>
Date: Mon, 4 Mar 2024 13:07:08 +0100
Subject: [PATCH 12/36] Set cstruct dependency to next release

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 961f45c01..ff583249c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,7 +26,7 @@ classifiers = [
 ]
 dependencies = [
     "defusedxml",
-    "dissect.cstruct>=4.0.dev,<5.0.dev",
+    "dissect.cstruct>=3.13.dev,<4.0.dev",
     "dissect.eventlog>=3.0.dev,<4.0.dev",
     "dissect.evidence>=3.0.dev,<4.0.dev",
     "dissect.hypervisor>=3.0.dev,<4.0.dev",

From b18e97584702281b07fe4995ff29472dd1a4cef6 Mon Sep 17 00:00:00 2001
From: Joost Jansen <12032793+joost-j@users.noreply.github.com>
Date: Mon, 4 Mar 2024 13:09:34 +0100
Subject: [PATCH 13/36] Restore original shimcache.py file

---
 dissect/target/plugins/os/windows/regf/shimcache.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dissect/target/plugins/os/windows/regf/shimcache.py b/dissect/target/plugins/os/windows/regf/shimcache.py
index 06db50558..af72a068f 100644
--- a/dissect/target/plugins/os/windows/regf/shimcache.py
+++ b/dissect/target/plugins/os/windows/regf/shimcache.py
@@ -181,7 +181,7 @@ def nt61_entry_type(_) -> Structure:
 
 
 class CRCMismatchException(Error):
-    """A mismatch between CRC checksums has occurred."""
+    pass
 
 
 ShimCacheGeneratorType = Union[CRCMismatchException, Tuple[Optional[datetime], str]]

From 1a1d80d2d3ece5b4d11767e9946a9e0574d27be2 Mon Sep 17 00:00:00 2001
From: Joost Jansen <12032793+joost-j@users.noreply.github.com>
Date: Mon, 25 Mar 2024 16:21:08 +0100
Subject: [PATCH 14/36] Move TextEditorTabRecord definition

---
 dissect/target/plugins/apps/texteditor/windowsnotepad.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
index 0152123d4..47c1de0a2 100644
--- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py
+++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
@@ -46,13 +46,14 @@
     char                        header_end[2];    // \x01\x00
 };
 """
-TextEditorTabRecord = create_extended_descriptor([UserRecordDescriptorExtension])(
-    "texteditor/windowsnotepad/tab", GENERIC_TAB_CONTENTS_RECORD_FIELDS
-)
 
 c_windowstab = cstruct()
 c_windowstab.load(c_def)
 
+TextEditorTabRecord = create_extended_descriptor([UserRecordDescriptorExtension])(
+    "texteditor/windowsnotepad/tab", GENERIC_TAB_CONTENTS_RECORD_FIELDS
+)
+
 
 def _calc_crc32(data: bytes) -> bytes:
     """Perform a CRC32 checksum on the data and return it as bytes."""

From b00bdc31d2f5ff96e4bd565ba5d634ea05ad9ca4 Mon Sep 17 00:00:00 2001
From: Joost Jansen <12032793+joost-j@users.noreply.github.com>
Date: Mon, 25 Mar 2024 17:00:52 +0100
Subject: [PATCH 15/36] Remove content_length field from record

---
 dissect/target/plugins/apps/texteditor/texteditor.py     | 1 -
 dissect/target/plugins/apps/texteditor/windowsnotepad.py | 6 +-----
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/dissect/target/plugins/apps/texteditor/texteditor.py b/dissect/target/plugins/apps/texteditor/texteditor.py
index f2988cd99..ab3fadf03 100644
--- a/dissect/target/plugins/apps/texteditor/texteditor.py
+++ b/dissect/target/plugins/apps/texteditor/texteditor.py
@@ -4,7 +4,6 @@
 
 GENERIC_TAB_CONTENTS_RECORD_FIELDS = [
     ("string", "content"),
-    ("string", "content_length"),
     ("path", "path"),
 ]
 
diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
index 47c1de0a2..5b01c274e 100644
--- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py
+++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
@@ -100,8 +100,6 @@ def _process_tab_file(
             if tab.fsize1 != 0:
                 data_entry = c_windowstab.single_block_entry(fh)
 
-                size = data_entry.len
-
                 # The header (minus the magic) plus all data (exluding the CRC32 at the end) is included in the checksum
                 actual_crc32 = _calc_crc32(tab.dumps()[3:] + data_entry.dumps()[:-4])
 
@@ -117,7 +115,6 @@ def _process_tab_file(
 
             else:
                 header_crc = c_windowstab.header_crc(fh)
-
                 # Reconstruct the text of the multi_block_entry variant
                 # CRC32 is calculated based on the entire header, up to the point where the CRC32 value is stored
                 defined_header_crc32 = header_crc.crc32
@@ -168,7 +165,7 @@ def _process_tab_file(
                 # Join all the characters to reconstruct the original text
                 text = "".join(text)
 
-        return TextEditorTabRecord(content=text, content_length=size, path=file, _target=self.target, _user=user)
+        return TextEditorTabRecord(content=text, path=file, _target=self.target, _user=user)
 
     @export(record=TextEditorTabRecord)
     def tabs(self) -> Iterator[TextEditorTabRecord]:
@@ -176,7 +173,6 @@ def tabs(self) -> Iterator[TextEditorTabRecord]:
 
         Yields TextEditorTabRecord with the following fields:
             contents (string): The contents of the tab.
-            content_length (int): The length of the tab content.
             path (path): The path the content originates from.
         """
         for file, user in self.users_tabs:

From a124202999b883003924167d32fb26d22b6de50c Mon Sep 17 00:00:00 2001
From: Joost Jansen <12032793+joost-j@users.noreply.github.com>
Date: Mon, 25 Mar 2024 17:02:05 +0100
Subject: [PATCH 16/36] Apply suggestions from code review

Co-authored-by: Erik Schamper <1254028+Schamper@users.noreply.github.com>
---
 .../target/plugins/apps/texteditor/windowsnotepad.py  | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
index 5b01c274e..4596e1b9a 100644
--- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py
+++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
@@ -1,5 +1,5 @@
 import zlib
-from typing import Iterator, List, Union
+from typing import Iterator
 
 from dissect.cstruct import cstruct
 
@@ -69,7 +69,7 @@ class WindowsNotepadPlugin(TexteditorPlugin):
 
     def __init__(self, target):
         super().__init__(target)
-        self.users_tabs: List[TargetPath, Union[UnixUserRecord, WindowsUserRecord]] = []
+        self.users_tabs: list[TargetPath, UnixUserRecord | WindowsUserRecord] = []
 
         for user_details in self.target.user_details.all_with_home():
             for tab_file in user_details.home_path.glob(self.GLOB):
@@ -83,10 +83,9 @@ def check_compatible(self) -> None:
             raise UnsupportedPluginError("No Windows Notepad temporary tab files found")
 
     def _process_tab_file(
-        self, file: TargetPath, user: Union[UnixUserRecord, WindowsUserRecord]
+        self, file: TargetPath, user: UnixUserRecord | WindowsUserRecord
     ) -> TextEditorTabRecord:
-        """
-        Function that parses a binary tab file and reconstructs the contents.
+        """Parse a binary tab file and reconstruct the contents.
 
         Args:
             file: The binary file on disk that needs to be parsed.
@@ -105,7 +104,7 @@ def _process_tab_file(
 
                 if data_entry.crc32 != actual_crc32:
                     self.target.log.warning(
-                        "CRC32 mismatch in single-block file: %s " "expected=%s, actual=%s",
+                        "CRC32 mismatch in single-block file: %s (expected=%s, actual=%s)",
                         file.name,
                         data_entry.crc32.hex(),
                         actual_crc32.hex(),

From dbaca5d2a66044afe76b45b0cefd0afbee98a37a Mon Sep 17 00:00:00 2001
From: Joost Jansen <12032793+joost-j@users.noreply.github.com>
Date: Mon, 25 Mar 2024 17:04:39 +0100
Subject: [PATCH 17/36] Change TabEditorTabRecord formatting

---
 dissect/target/plugins/apps/texteditor/windowsnotepad.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
index 4596e1b9a..e83f857e4 100644
--- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py
+++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
@@ -164,7 +164,12 @@ def _process_tab_file(
                 # Join all the characters to reconstruct the original text
                 text = "".join(text)
 
-        return TextEditorTabRecord(content=text, path=file, _target=self.target, _user=user)
+        return TextEditorTabRecord(
+            content=text,
+            path=file,
+            _target=self.target,
+            _user=user
+        )
 
     @export(record=TextEditorTabRecord)
     def tabs(self) -> Iterator[TextEditorTabRecord]:

From d66fa54c394c3b5bfd9bd8433eb8bcf10dd2ae5b Mon Sep 17 00:00:00 2001
From: Joost Jansen <12032793+joost-j@users.noreply.github.com>
Date: Mon, 25 Mar 2024 17:14:27 +0100
Subject: [PATCH 18/36] Black formatting, fix tests, add annotations import

---
 .../plugins/apps/texteditor/windowsnotepad.py | 19 +++++++------------
 .../apps/texteditor/test_texteditor.py        |  2 +-
 2 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
index e83f857e4..4c2a7dac4 100644
--- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py
+++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import zlib
 from typing import Iterator
 
@@ -22,7 +24,7 @@
     uint16    offset;
     uleb128   len;
     wchar     data[len];
-    char      crc32[4];
+    char      crc32[4]; // Big endian CRC32
 };
 
 struct single_block_entry {
@@ -30,12 +32,12 @@
     uleb128   len;
     wchar     data[len];
     char      unk1;
-    char      crc32[4];
+    char      crc32[4]; // Big endian CRC32
 };
 
 struct header_crc {
     char      unk[4];
-    char      crc32[4];
+    char      crc32[4]; // Big endian CRC32
 };
 
 struct tab {
@@ -82,9 +84,7 @@ def check_compatible(self) -> None:
         if not self.users_tabs:
             raise UnsupportedPluginError("No Windows Notepad temporary tab files found")
 
-    def _process_tab_file(
-        self, file: TargetPath, user: UnixUserRecord | WindowsUserRecord
-    ) -> TextEditorTabRecord:
+    def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUserRecord) -> TextEditorTabRecord:
         """Parse a binary tab file and reconstruct the contents.
 
         Args:
@@ -164,12 +164,7 @@ def _process_tab_file(
                 # Join all the characters to reconstruct the original text
                 text = "".join(text)
 
-        return TextEditorTabRecord(
-            content=text,
-            path=file,
-            _target=self.target,
-            _user=user
-        )
+        return TextEditorTabRecord(content=text, path=file, _target=self.target, _user=user)
 
     @export(record=TextEditorTabRecord)
     def tabs(self) -> Iterator[TextEditorTabRecord]:
diff --git a/tests/plugins/apps/texteditor/test_texteditor.py b/tests/plugins/apps/texteditor/test_texteditor.py
index e797ad1d0..fa19b11fb 100644
--- a/tests/plugins/apps/texteditor/test_texteditor.py
+++ b/tests/plugins/apps/texteditor/test_texteditor.py
@@ -50,7 +50,7 @@ def test_texteditor_plugin(target_win, fs_win, tmp_path, target_win_users, caplo
     assert len(records) == len(file_text_map.keys())
 
     # One file should still return contents, but there should be an entry for in the logging for a CRC missmatch.
-    assert "CRC32 mismatch in single-block file: wrong-checksum.bin expected=deadbeef, actual=a48d30a6" in caplog.text
+    assert "CRC32 mismatch in single-block file: wrong-checksum.bin (expected=deadbeef, actual=a48d30a6)" in caplog.text
 
     # The recovered content in the records should match the original data, as well as the length
     for rec in records:

From bdaccbc301186c6a5b4882d01cb2ae1753786d0b Mon Sep 17 00:00:00 2001
From: Joost Jansen <12032793+joost-j@users.noreply.github.com>
Date: Mon, 25 Mar 2024 17:17:35 +0100
Subject: [PATCH 19/36] Bump cstruct version again

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index ff583249c..e2db9523c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,7 +26,7 @@ classifiers = [
 ]
 dependencies = [
     "defusedxml",
-    "dissect.cstruct>=3.13.dev,<4.0.dev",
+    "dissect.cstruct>=3.14.dev,<4.0.dev",
     "dissect.eventlog>=3.0.dev,<4.0.dev",
     "dissect.evidence>=3.0.dev,<4.0.dev",
     "dissect.hypervisor>=3.0.dev,<4.0.dev",

From ad7827389fae8bde0ad329c0856650b8959edd0a Mon Sep 17 00:00:00 2001
From: Joost Jansen <12032793+joost-j@users.noreply.github.com>
Date: Thu, 28 Mar 2024 13:39:13 +0100
Subject: [PATCH 20/36] Bump dependencies as leb128 is now included in dev
 release

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index e2db9523c..743e3aaac 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,7 +26,7 @@ classifiers = [
 ]
 dependencies = [
     "defusedxml",
-    "dissect.cstruct>=3.14.dev,<4.0.dev",
+    "dissect.cstruct>=3.14.dev4,<4.0.dev",
     "dissect.eventlog>=3.0.dev,<4.0.dev",
     "dissect.evidence>=3.0.dev,<4.0.dev",
     "dissect.hypervisor>=3.0.dev,<4.0.dev",

From 0d9c88f6a43ec169069512b7cd350c90151cf107 Mon Sep 17 00:00:00 2001
From: Joost Jansen <12032793+joost-j@users.noreply.github.com>
Date: Thu, 28 Mar 2024 16:08:47 +0100
Subject: [PATCH 21/36] Implemented deletion of characters, refactored, added
 new tests

---
 .../plugins/apps/texteditor/windowsnotepad.py | 163 +++++++++++-------
 .../appclosed_saved_and_deletions.bin         | Bin 0 -> 377 bytes
 .../windowsnotepad/appclosed_unsaved.bin      | Bin 0 -> 63 bytes
 .../apps/texteditor/windowsnotepad/saved.bin  | Bin 0 -> 139 bytes
 .../windowsnotepad/unsaved-with-deletions.bin | Bin 0 -> 460 bytes
 .../texteditor/windowsnotepad/unsaved.bin     | Bin 0 -> 257 bytes
 .../apps/texteditor/test_texteditor.py        |   6 +
 7 files changed, 111 insertions(+), 58 deletions(-)
 create mode 100755 tests/_data/plugins/apps/texteditor/windowsnotepad/appclosed_saved_and_deletions.bin
 create mode 100755 tests/_data/plugins/apps/texteditor/windowsnotepad/appclosed_unsaved.bin
 create mode 100755 tests/_data/plugins/apps/texteditor/windowsnotepad/saved.bin
 create mode 100755 tests/_data/plugins/apps/texteditor/windowsnotepad/unsaved-with-deletions.bin
 create mode 100755 tests/_data/plugins/apps/texteditor/windowsnotepad/unsaved.bin

diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
index 4c2a7dac4..97d5d979a 100644
--- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py
+++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import zlib
+from enum import IntEnum
 from typing import Iterator
 
 from dissect.cstruct import cstruct
@@ -19,33 +20,43 @@
     TexteditorPlugin,
 )
 
-c_def = """
-struct multi_block_entry {
-    uint16    offset;
-    uleb128   len;
-    wchar     data[len];
-    char      crc32[4]; // Big endian CRC32
-};
+# Thanks to @Nordgaren, @daddycocoaman, @JustArion and @ogmini for their suggestions and feedback in the PR
+# thread. This really helped figuring out the last missing bits and pieces
+# required for recovering text from these files.
 
-struct single_block_entry {
-    uint16    offset;
-    uleb128   len;
-    wchar     data[len];
-    char      unk1;
-    char      crc32[4]; // Big endian CRC32
+c_def = """
+struct header {
+    char        magic[2]; // NP
+    uint8       unk0; //
+    uint8       fileState; // 0 if unsaved, 1 if saved
+}
+
+struct header_saved_tab {
+    uleb128     filePathLength;
+    wchar       filePath[filePathLength];
+    uleb128     fileSize;
+    uleb128     encoding;
+    uleb128     carriageReturnType;
+    uleb128     timestamp; // Windows Filetime format (not unix timestamp)
+    char        sha256[32];
+    uleb128     unk0;
+    uleb128     unk1;
+    char        crc32[4]; // Big endian CRC32
 };
 
-struct header_crc {
-    char      unk[4];
-    char      crc32[4]; // Big endian CRC32
+struct header_unsaved_tab {
+    uint8       unk0;
+    uleb128     fileSize;
+    uleb128     fileSizeDuplicate; // not used
+    uint8       unk1;
+    uint8       unk2;
 };
 
-struct tab {
-    char                        magic[3];         // NP\x00
-    char                        header_start[2];  // \x00\x01
-    uleb128                     fsize1;
-    uleb128                     fsize2;
-    char                        header_end[2];    // \x01\x00
+struct data_block {
+    uleb128     offset;
+    uleb128     nDeleted;
+    uleb128     nAdded;
+    wchar       data[nAdded];
 };
 """
 
@@ -57,6 +68,11 @@
 )
 
 
+class FileState(IntEnum):
+    Unsaved = 0x00
+    Saved = 0x01
+
+
 def _calc_crc32(data: bytes) -> bytes:
     """Perform a CRC32 checksum on the data and return it as bytes."""
     return zlib.crc32(data).to_bytes(length=4, byteorder="big")
@@ -94,31 +110,56 @@ def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUser
             A TextEditorTabRecord containing information that is in the tab.
         """
         with file.open("rb") as fh:
-            tab = c_windowstab.tab(fh)
+            # Header is the same for all types
+            header = c_windowstab.header(fh)
 
-            if tab.fsize1 != 0:
-                data_entry = c_windowstab.single_block_entry(fh)
+            # File can be saved, or unsaved. Depending on the filestate, different header fields are present
+            # Currently, no information in the header is used in the outputted records, only the contents of the tab
+            tab = (
+                c_windowstab.header_saved_tab(fh)
+                if header.fileState == FileState.Saved
+                else c_windowstab.header_unsaved_tab(fh)
+            )
 
-                # The header (minus the magic) plus all data (exluding the CRC32 at the end) is included in the checksum
-                actual_crc32 = _calc_crc32(tab.dumps()[3:] + data_entry.dumps()[:-4])
+            # In the case that the filesize is known up front, then this file is zet to a nonzero value
+            # This means that the data is stored in one block
+            if tab.fileSize != 0:
+                # So we only parse one block
+                data_entry = c_windowstab.data_block(fh)
 
-                if data_entry.crc32 != actual_crc32:
+                # An extra byte is appended to the single block, not yet sure where this is defined and/or used for
+                extra_byte = fh.read(1)
+
+                # The CRC32 value is appended after the extra byte
+                defined_crc32 = fh.read(4)
+
+                # The header (minus the magic) plus all data (including the extra byte)  is included in the checksum
+                actual_crc32 = _calc_crc32(header.dumps()[3:] + tab.dumps() + data_entry.dumps() + extra_byte)
+
+                if defined_crc32 != actual_crc32:
                     self.target.log.warning(
                         "CRC32 mismatch in single-block file: %s (expected=%s, actual=%s)",
                         file.name,
-                        data_entry.crc32.hex(),
+                        defined_crc32.hex(),
                         actual_crc32.hex(),
                     )
 
                 text = data_entry.data
 
             else:
-                header_crc = c_windowstab.header_crc(fh)
-                # Reconstruct the text of the multi_block_entry variant
-                # CRC32 is calculated based on the entire header, up to the point where the CRC32 value is stored
-                defined_header_crc32 = header_crc.crc32
+                # Here, the fileSize is zero'ed, meaning that the size is not known up front.
+                # Data may be stored in multiple, variable-length blocks. This happens, for example, when several
+                # additions and deletions of characters have been recorded and these changes have not been 'flushed'
+
+                # First, parse 4 as of yet unknown bytes
+                # Likely holds some addition information about the tab (view options etc)
+                unknown_bytes = fh.read(4)
+
+                # In this multi-block variant, he header itself has a CRC32 value as well
+                defined_header_crc32 = fh.read(4)
 
-                actual_header_crc32 = _calc_crc32(tab.dumps()[3:] + header_crc.unk)
+                # Calculate CRC32 of the header and check if it matches
+                actual_header_crc32 = _calc_crc32(header.dumps()[3:] + tab.dumps() + unknown_bytes)
                 if defined_header_crc32 != actual_header_crc32:
                     self.target.log.warning(
                         "CRC32 mismatch in header of multi-block file: %s " "expected=%s, actual=%s",
@@ -130,36 +171,42 @@ def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUser
                 # Since we don't know the size of the file up front, and offsets don't necessarily have to be in order,
                 # a list is used to easily insert text at offsets
                 text = []
-                size = 0
 
                 while True:
+                    # Unfortunately, there is no way of determining how many blocks there are. So just try to parse
+                    # until we reach EOF, after which we stop.
                     try:
-                        data_entry = c_windowstab.multi_block_entry(fh)
+                        data_entry = c_windowstab.data_block(fh)
                     except EOFError:
                         break
 
-                    # If there is no data to be added, skip. This may happen sometimes.
-                    if data_entry.len <= 0:
-                        continue
-
-                    size += data_entry.len
-                    # Check the CRC32 checksum for this block
-                    actual_crc32 = _calc_crc32(data_entry.dumps()[:-4])
-                    if data_entry.crc32 != actual_crc32:
-                        self.target.log.warning(
-                            "CRC32 mismatch in multi-block file: %s " "expected=%s, actual=%s",
-                            file.name,
-                            data_entry.crc32.hex(),
-                            actual_crc32.hex(),
-                        )
-
-                    # Extend the list if required. All characters need to fit in the list.
-                    while data_entry.offset + data_entry.len > len(text):
-                        text.append("\x00")
-
-                    # Place the text at the correct offset. UTF16-LE consumes two bytes for one character.
-                    for idx in range(data_entry.len):
-                        text[data_entry.offset + idx] = data_entry.data[(2 * idx) : (2 * idx) + 2]
+                    # Each block has a CRC32 value appended to the block
+                    defined_crc32 = fh.read(4)
+
+                    # Either the nAdded is nonzero, or the nDeleted
+                    if data_entry.nAdded > 0:
+                        # Check the CRC32 checksum for this block
+                        actual_crc32 = _calc_crc32(data_entry.dumps())
+                        if defined_crc32 != actual_crc32:
+                            self.target.log.warning(
+                                "CRC32 mismatch in multi-block file: %s " "expected=%s, actual=%s",
+                                file.name,
+                                data_entry.crc32.hex(),
+                                actual_crc32.hex(),
+                            )
+
+                        # Extend the list if required. All characters need to fit in the list.
+                        while data_entry.offset + data_entry.nAdded > len(text):
+                            text.append("\x00" * 100)
+
+                        # Insert the text at the correct offset.
+                        for idx in range(data_entry.nAdded):
+                            text[data_entry.offset + idx] = data_entry.data[idx]
+
+                    elif data_entry.nDeleted > 0:
+                        # Create a new slice. Include everything up to the offset,
+                        # plus everything after the nDeleted following bytes
+                        text = text[: data_entry.offset] + text[data_entry.offset + data_entry.nDeleted :]
 
                 # Join all the characters to reconstruct the original text
                 text = "".join(text)
diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/appclosed_saved_and_deletions.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/appclosed_saved_and_deletions.bin
new file mode 100755
index 0000000000000000000000000000000000000000..deac4577193f124c86faffa816e03448b5f64a7e
GIT binary patch
literal 377
zcmZvXu}T9$6h-e8`GbI^lPZO{g?4tb0ntLlkZzkX8%=P{24`ce7PPSn#6OTA{)N8~
zdm)`57UC~>CYYcg!=3l$z5DLFb9f}P9&1O5TJq}Xk~`7xCr{L%=5?+@xdwEaa=N0H
zDx!7`)+**@Ho2WnA3n{vvRyrRj5crH_u{SRuhhS7^d`H<<JMEuy#J`qWXjS=Dr02`
zdiB)TnJl6MPR6@UUJ!FS7UUYS%4(l#oe2w;7OYONTC8}1Ro4Y?hrx8EK1ApDR9jsb
yVRQIgVE18LcK>x%!0Rmf|GRT2xW$&>ut65QIB?3`h?o*Hu>6r%P3CU?ntcQL$5EI7

literal 0
HcmV?d00001

diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/appclosed_unsaved.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/appclosed_unsaved.bin
new file mode 100755
index 0000000000000000000000000000000000000000..54e75e70e7677b4bbd6032c85dda4b5b3ccf70e6
GIT binary patch
literal 63
zcmeYZU|?Vr7iVN(U=Vj^$YIE5C}zlH$YV%nP+&-8C;-A7AU_$%E&<Z{Kp6!f&SxlR
KV64zuw*UY<cni(|

literal 0
HcmV?d00001

diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/saved.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/saved.bin
new file mode 100755
index 0000000000000000000000000000000000000000..7c5a46e7af27f27e990f709b51f8811c4f6a1b59
GIT binary patch
literal 139
zcmeYZU|>{mX0T$2VF+a?W=Lfy0+KNdrBHSZg9}ism?4{?gdv}y04Nj8kjPL56iZ=H
zWY7bORsb;@E8~T&AExy$TJV<f?d&q9&SxSg@3faa*WlXM&i!CTfaGE&h0RfCLJqw7
WeUO2Xjg66kfq@O#AO?lPg|`5#{wMta

literal 0
HcmV?d00001

diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/unsaved-with-deletions.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/unsaved-with-deletions.bin
new file mode 100755
index 0000000000000000000000000000000000000000..7d05980d8feb93be46a6effe5e6b2f91a2cf1a6c
GIT binary patch
literal 460
zcmZvWO(?^07{|ZQW}DgA<|T3vxuU$wyIhvlno#VbF%Dj){(pIw;-n}C;pFA$fKZxR
zE*quXlw@oaBYDZn%vSjQB&F1Ic<TFkKF`-^0_b}HRIF{xu)P7u$SWa`4fNsPmViX;
z=3I|S2sfV6G9uhAj9b~6&wN+Mcq3MmKUWyvgQOd$pYgp2h=N@TN=1Up*%-kM1%l2i
z16d&Y^bR?w583-WAqA)n-8G)mNRSl)8qh2GRdcWVJ&MCGJKk;;7lvE4hiV?p&(=>y
z^Qd}GIvT^H+=-=K4X6uK!7P)OFE)LaR7(hr7}7O_NBM>=hVbCpZ(@8GHWw6^zx>}k
zn(#C}4CWV6(ju-i-ipDh={v@^qc8E`>|b6@c&^kHstETQ#TM&p!O=<Jg7p<4mzYzS
z&x-P^w<6|ihoS$u<M%)7+le%#-K7I{Kqf5b>j>e!wD_O?EVsbzGZ|SfVo^|L#&X$t
T?OSIYmmMn{O>u-^@$1kRVZVYF

literal 0
HcmV?d00001

diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/unsaved.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/unsaved.bin
new file mode 100755
index 0000000000000000000000000000000000000000..3622312c149293892cf94fe6ca8367ab42d3441a
GIT binary patch
literal 257
zcmeYZU|?VbBL)Tr`?c#Q0NJ4o3_2#(j0}u<42=IiZDwL%EN0kg^rW7Nk>Tk*8(T(3
zhUYVyQh?f?MZE|Hs`q19DfNFLP<=jw#M8r{fa*&az7-ekU}j)cU=TfWID!Raf?1K^
zeO3m>M26yL9(&jr7|R$8k8b<L&cK+;kn45t1_uLU3d1+X3J*?@uK1GMKe<4PKW@99
z%MCK{ce21q9+1@ZDIuM_ASvf*%(Z+VDdXn(JNX$Ha~OJl=miLX7<nFg%mR!IziPad
d`577BvaQeKV`O-9<Y**NeGbFM8SD?>>H!5ENEHA8

literal 0
HcmV?d00001

diff --git a/tests/plugins/apps/texteditor/test_texteditor.py b/tests/plugins/apps/texteditor/test_texteditor.py
index fa19b11fb..ae0712fbb 100644
--- a/tests/plugins/apps/texteditor/test_texteditor.py
+++ b/tests/plugins/apps/texteditor/test_texteditor.py
@@ -26,6 +26,12 @@ def test_texteditor_plugin(target_win, fs_win, tmp_path, target_win_users, caplo
         "3d0cc86e-dfc9-4f16-b74a-918c2c24188c.bin": loremipsum,
         "wrong-checksum.bin": text4,  # only added to check for corrupt checksum, not validity
         "cfe38135-9dca-4480-944f-d5ea0e1e589f.bin": (loremipsum * 37)[:-2],  # removed the two newlines in this file
+        "saved.bin": "Saved!",
+        "unsaved.bin": "Not saved at all",
+        "unsaved-with-deletions.bin": "Not saved aasdflasd",
+        "appclosed_saved_and_deletions.bin": "Closing application now. It's saved but now I'm adding unsaved"
+        " changes and closing the application again. Dit a few deletions!",
+        "appclosed_unsaved.bin": "Closing application now",
     }
 
     tabcache = absolute_path("_data/plugins/apps/texteditor/windowsnotepad/")

From 304db58f13322cce0cc7e58d435e35337e0a7519 Mon Sep 17 00:00:00 2001
From: Joost Jansen <12032793+joost-j@users.noreply.github.com>
Date: Thu, 28 Mar 2024 16:44:16 +0100
Subject: [PATCH 22/36] Small comment changes

---
 .../plugins/apps/texteditor/windowsnotepad.py      | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
index 97d5d979a..3bb34a9d0 100644
--- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py
+++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
@@ -27,7 +27,7 @@
 c_def = """
 struct header {
     char        magic[2]; // NP
-    uint8       unk0; //
+    uint8       unk0;
     uint8       fileState; // 0 if unsaved, 1 if saved
 }
 
@@ -39,15 +39,13 @@
     uleb128     carriageReturnType;
     uleb128     timestamp; // Windows Filetime format (not unix timestamp)
     char        sha256[32];
-    uleb128     unk0;
-    uleb128     unk1;
-    char        crc32[4]; // Big endian CRC32
+    char        unk[6];
 };
 
 struct header_unsaved_tab {
     uint8       unk0;
     uleb128     fileSize;
-    uleb128     fileSizeDuplicate; // not used
+    uleb128     fileSizeDuplicate;
     uint8       unk1;
     uint8       unk2;
 };
@@ -130,7 +128,7 @@ def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUser
                 # An extra byte is appended to the single block, not yet sure where this is defined and/or used for
                 extra_byte = fh.read(1)
 
-                # The CRC32 value is appended after the extra byte
+                # The CRC32 value is appended after the extra byte in big-endian
                 defined_crc32 = fh.read(4)
 
                 # The header (minus the magic) plus all data (including the extra byte)  is included in the checksum
@@ -155,7 +153,7 @@ def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUser
                 # Likely holds some addition information about the tab (view options etc)
                 unknown_bytes = fh.read(4)
 
-                # In this multi-block variant, he header itself has a CRC32 value as well
+                # In this multi-block variant, he header itself has a CRC32 value in big-endian as well
                 defined_header_crc32 = fh.read(4)
 
                 # Calculate CRC32 of the header and check if it matches
@@ -180,7 +178,7 @@ def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUser
                     except EOFError:
                         break
 
-                    # Each block has a CRC32 value appended to the block
+                    # Each block has a CRC32 value in big-endian appended to the block
                     defined_crc32 = fh.read(4)
 
                     # Either the nAdded is nonzero, or the nDeleted

From 2ca889c2b20b9a14a6a639618a272b6bb80ed717 Mon Sep 17 00:00:00 2001
From: Joost Jansen <12032793+joost-j@users.noreply.github.com>
Date: Thu, 28 Mar 2024 16:46:31 +0100
Subject: [PATCH 23/36] Remove chunked addition of zero bytes

---
 dissect/target/plugins/apps/texteditor/windowsnotepad.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
index 3bb34a9d0..10f065624 100644
--- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py
+++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
@@ -195,7 +195,7 @@ def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUser
 
                         # Extend the list if required. All characters need to fit in the list.
                         while data_entry.offset + data_entry.nAdded > len(text):
-                            text.append("\x00" * 100)
+                            text.append("\x00")
 
                         # Insert the text at the correct offset.
                         for idx in range(data_entry.nAdded):

From 74ffb83a2b3e64fc44dc0f4ef2004cec85504e10 Mon Sep 17 00:00:00 2001
From: Joost Jansen <12032793+joost-j@users.noreply.github.com>
Date: Thu, 28 Mar 2024 17:21:46 +0100
Subject: [PATCH 24/36] Added new test, changed to list insertion instead of
 appending

---
 .../plugins/apps/texteditor/windowsnotepad.py    |   6 +-----
 .../windowsnotepad/lots-of-deletions.bin         | Bin 0 -> 2558 bytes
 tests/plugins/apps/texteditor/test_texteditor.py |   3 +++
 3 files changed, 4 insertions(+), 5 deletions(-)
 create mode 100755 tests/_data/plugins/apps/texteditor/windowsnotepad/lots-of-deletions.bin

diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
index 10f065624..d7c92cae0 100644
--- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py
+++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
@@ -193,13 +193,9 @@ def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUser
                                 actual_crc32.hex(),
                             )
 
-                        # Extend the list if required. All characters need to fit in the list.
-                        while data_entry.offset + data_entry.nAdded > len(text):
-                            text.append("\x00")
-
                         # Insert the text at the correct offset.
                         for idx in range(data_entry.nAdded):
-                            text[data_entry.offset + idx] = data_entry.data[idx]
+                            text.insert(data_entry.offset + idx, data_entry.data[idx])
 
                     elif data_entry.nDeleted > 0:
                         # Create a new slice. Include everything up to the offset,
diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/lots-of-deletions.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/lots-of-deletions.bin
new file mode 100755
index 0000000000000000000000000000000000000000..203f12ee3ce97374643a9af1b6b33712f6ce961f
GIT binary patch
literal 2558
zcmb`JiC4|(AIIO1R1#X0CE`YsN~P3IMGCo5mZl^nQn$LQTe<g2!l1Ga8Z9Q<Fm0BU
zFlf<6S;J^5%NT27r2J-D#_-cTpL^!~e*S<t=bm$4=kvTj%d>sH&%6ZyQ~m(BoGMD?
z?1SM#)wK*P0)9EZc74Dk&=@@`M+HoVWbgVreZd6Ce>qyDN)}CBhZr@m7@Vjb{ahU^
z8o6mccl&{f(P7}Bp+Vwh@<$)~gUL~+G9^KihK`lmJ{SPD1})u5jRR>&KDI?N2+R_)
zX0vcD^13_O(q9|Q4dVIZt#rT?xMo^-QkTS;X)TU}Nux0Kd8h~G0h!ZcV|}vx6?$~-
z5U^nQ<mI*xrFv>*m%WG4P@him&Ea6-*jSKJVn8)(HHw2r&;&y}-ujIMlOk*P(T#?r
zQCQd}j3TdH=Taj_Q&Opl(M|z*y_{L=ZbU<cwUy3eC`G}U&YZC{l>M?Y(-<rizgw4j
zjibnGKfgO|0v3kSx}8Pi!4fd%Q^6)va?fsj>@|T(jc?vjVn&t+`nxBZgROvx;|>FJ
zhI?+V-ewF98_mTN7=EhDaOQdmDhHvM>*Y|d8M2=11$e2*KV(6nDut@oCz2)7&U>mQ
zg=&AOy3&fgtmk#<Sd-Vpy;(t%XsH+Esao5RczcSs)E3MgTiq`7PNs>(8F%)z16zf&
zrp3qXNwZG1vrq^Yg8h@K?>kT~Gr!3@r_l7vHhT*l!D68@Z{|lQuqfR3w;tk58(miB
zH-9Ra1@f2gI6IBvcq%GmrZHSA5?r6kaO2J4RA+{p=AHRY47Wp{k8xzUlebNO3Pb%<
zdrt?3TVnzfg$!3WjBw}q#kdt27R2+fL9ySVG@d^gLxqM;JbyGkjWKQI`C~E7GI}o0
zABB|(ZE8Hf7+W4~=;iq>@U-fEhzk`Gr=R&`I-Q-0W0S00$s+8z7UxC>U(^#{FoPoJ
z__p}Y1Y3>4-Ip3>k)|eITI@~%`#mT!noXL8jWaStlq<~qgoX#L*`EI8VRJ}RKfz5f
zmo(|)3e!F5Tm-DzQtd^jAZx?r=y??I$NXhS<}n-$5@mTY93IwP<;n2<!-G9@8TM!F
zST%>?fZx+u9t>GJK_f*B-!)7&pUsfAM>fQrVXxP4qgf28n)z#ZePYZ{X#5YaPk@wW
zz2JYVzWFpO%<-sOKq*=i>Qxrfa_d$}%e?8dgfCkC<||sJYWr?iAJXW4h`j7e6Sm(h
zvspy-v<Q=4_<_YCv8T>`F<s|FH`CMn$z6NP>_3;#$^5L{eI|h1Q;&br4J5Cktq0te
zQlC+YD;Ecm_=mg$ua{BwQ}6FSTux&5#fp?*5_j9TDprtKfHa#B3g~TQF)NgEeK4rj
z6Vo-!%{Xo!M)2IO;Z-=@g4QmVfCzH`Xp+)b0_KX+Bi-IBsd)WA0>zO8rfG`CD7r*e
zhRxfesZlI-j_8$AqqrG#JS>LXJ=Fa5VrjG8ejcw9N0!W{*q&8j@i6|T`kai4KhP7q
zR8EW8R93Z6L6LiA1lq4Av2k}nYdocJm>4%=4W;OEjvbglLwR{l_G@W6E<ZgupGXJv
zW!<TObu{-EA}zJ`r1{F^Q1=G9?@iZl9Q~Sxa$YN%Hu63mj*=+NgPS->&~~=#!e&lV
zj0@BGHAzYBjx~wN%F*%-p?y=7Hir<Suq~Xz5o7my(^gIbj9B>m)i&j5@_%hEZs!z(
zMWuHGcW{b_x1Hn3ot!N3ySCQgZ<N%b?{adNl8imt2J!B00oP}umM^<|Jipg1S-iVP
zKsdAJf-V&)>D_L^yL$)<V?`r*caK1Cj@Q*MySoGjzqWccl(t&O=F_fWRNg<`U53Iw
zZfyqC1Zws~_Ku*HxV9uGU?c^!5Im_jq!e$9M)i!My$Wq=x8mJB3T?wm?)j^uA$rMg
ziy4x;_62BA)PFu*GHiD%s?emU-)lA2^G}x;2Kzg%^G{a@;*JFO^2teXtK*McKDl5_
zS+u}Vo1#iZmMTa13h?j7pUn|RIl1DzTaft=oaETs+HRZ8DFT(Zmfy@#+GbV^w9H`)
zAJh9gWb^;988(|pBf!?E+`9j3a+Q9PjHISKP7-t*R87g}B*U)AH?xl^J>H%4RXxrr
z76&~ab)Mi9gXYcoS59)0U`}#FYXK)IhE*xPJH<(kkmA5sg`6y~Z}tbhB2EIlFBD%o
z%}Ii>8Ag?-c}>P0H{O%Q1Lt*A4U0LA#}?bvZ_jXAjcoPZW@kBxp>b}c;W<tMWL55K
z`B7<m@~#)>IfY{Sit>&NoFtf*s-u39lN{D3?%Xa>>W*2tB$g_5!XxX_%Qy)T@n)QL
zIVTBx{GMI8#7U0j;X&&wl%o#D=aMQp#bLpHP1`C?3f!)W8(Ymuf){UlAJuS@WB=f1
zlFOXL=##1Q%N0%n>^-#KxRz5ePG<{i>y)-9zD`zGIYmKK`#R|wrx@(5(8##XNsjkF
z>zug3$rAQ@O%IJ|8LM0#U*OMy7#vQ{G~v$yIr`s|58%&%2n3~cxqkT^kYd$nmd~F9
z;W&ILE$DyG0Uf^a6-Ij641cYgJ64O~om!D}5JTIQTK|Cz?VA%ZfT3f$j!=`~H!bxu
z{TbfYN2h2oyxH0COFxF!9wl$p8D8jrZc<}--kknSmEq~PW*1dycCuHZay6RpV`ER#
FzX7Rn_euZ&

literal 0
HcmV?d00001

diff --git a/tests/plugins/apps/texteditor/test_texteditor.py b/tests/plugins/apps/texteditor/test_texteditor.py
index ae0712fbb..a4bb730ad 100644
--- a/tests/plugins/apps/texteditor/test_texteditor.py
+++ b/tests/plugins/apps/texteditor/test_texteditor.py
@@ -32,6 +32,9 @@ def test_texteditor_plugin(target_win, fs_win, tmp_path, target_win_users, caplo
         "appclosed_saved_and_deletions.bin": "Closing application now. It's saved but now I'm adding unsaved"
         " changes and closing the application again. Dit a few deletions!",
         "appclosed_unsaved.bin": "Closing application now",
+        "lots-of-deletions.bin": "This a text, which is nothing special. But I am going to modify it a bit. "
+        "For example, I have removed quote some stuff. Adding a word in the beginning now."
+        ".. At this point, I've edited it quite a lot.",
     }
 
     tabcache = absolute_path("_data/plugins/apps/texteditor/windowsnotepad/")

From c1480619db8d5f181fda78d89c0603db5a779e3d Mon Sep 17 00:00:00 2001
From: Joost Jansen <12032793+joost-j@users.noreply.github.com>
Date: Thu, 28 Mar 2024 17:31:32 +0100
Subject: [PATCH 25/36] Refactored test file and removed fileState enum

---
 .../plugins/apps/texteditor/windowsnotepad.py    |  8 +-------
 tests/plugins/apps/texteditor/test_texteditor.py | 16 +++++++++++-----
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
index d7c92cae0..69855923f 100644
--- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py
+++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 import zlib
-from enum import IntEnum
 from typing import Iterator
 
 from dissect.cstruct import cstruct
@@ -66,11 +65,6 @@
 )
 
 
-class FileState(IntEnum):
-    Unsaved = 0x00
-    Saved = 0x01
-
-
 def _calc_crc32(data: bytes) -> bytes:
     """Perform a CRC32 checksum on the data and return it as bytes."""
     return zlib.crc32(data).to_bytes(length=4, byteorder="big")
@@ -115,7 +109,7 @@ def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUser
             # Currently, no information in the header is used in the outputted records, only the contents of the tab
             tab = (
                 c_windowstab.header_saved_tab(fh)
-                if header.fileState == FileState.Saved
+                if header.fileState == 0x01  # 0x00 is unsaved, 0x01 is saved
                 else c_windowstab.header_unsaved_tab(fh)
             )
 
diff --git a/tests/plugins/apps/texteditor/test_texteditor.py b/tests/plugins/apps/texteditor/test_texteditor.py
index a4bb730ad..d27d9e15a 100644
--- a/tests/plugins/apps/texteditor/test_texteditor.py
+++ b/tests/plugins/apps/texteditor/test_texteditor.py
@@ -12,6 +12,15 @@
 text4 = "This is another short test. And we should be able to parse this."
 text5 = "This is a test and the text is longer than 256 bytes. "
 text6 = "This is a test and the text is longer than 65536 bytes. "
+text7 = (
+    "This a text, which is nothing special. But I am going to modify it a bit. For example, "
+    "I have removed quote some stuff. Adding a word in the beginning now... "
+    "At this point, I've edited it quite a lot."
+)
+text8 = (
+    "Closing application now. It's saved but now I'm adding unsaved changes and closing "
+    "the application again. Dit a few deletions!"
+)
 loremipsum = """Lorem ipsum dolor sit amet. Eum error blanditiis eum pariatur delectus ut consequuntur officiis a excepturi dignissimos et doloribus quia 33 perspiciatis soluta nam perspiciatis dolor. Ut repudiandae quidem cum sint modi qui sint consequatur. Aut autem quidem eum enim consequatur qui voluptate consequatur non similique voluptate. A vitae modi vel sint provident ut galisum tenetur sit voluptatem amet. Est impedit perspiciatis est repudiandae voluptates ut fugit alias! Eum magni esse aut velit illum qui excepturi aperiam. Ex dolores asperiores ut debitis omnis qui consequuntur dolore. Est voluptatem mollitia et quibusdam unde ea accusamus fuga. Cum quis galisum et impedit sunt qui aliquam perspiciatis sed modi quidem qui nisi molestias. Aut temporibus architecto ut neque voluptatem et consequatur deleniti sed accusantium quibusdam et omnis dignissimos ad rerum ipsam et rerum quia. Ut nihil repellat et eaque molestias quo iusto ipsum At optio sint eos quidem earum?\r\rEx deleniti unde eum tenetur rerum ea dolore numquam? Eos aperiam officiis et neque explicabo et enim atque ut eaque omnis non illum eveniet est molestias itaque et ratione voluptatem. Ea deserunt nemo et quos tempora et nostrum aperiam sit necessitatibus illo sit culpa placeat. Vel tempore quibusdam ut velit voluptate aut odio facere non voluptas earum est odio galisum et voluptas harum. Et blanditiis sapiente et nostrum laborum aut voluptatem explicabo a quasi assumenda. Est voluptatem quia eum minima galisum quo totam excepturi aut facilis enim vel voluptate repudiandae sit distinctio laboriosam. Quo possimus molestiae et molestiae accusantium est voluptas omnis sed obcaecati natus. Non vitae asperiores qui nostrum enim id saepe fugiat et incidunt quasi.\r\rEos ipsa facilis aut excepturi voluptatem a omnis magni vel magni iste. Sed ipsum consequatur qui reprehenderit deleniti et soluta molestiae. Ut vero assumenda id dolor ipsum in deleniti voluptatem aut quis quisquam sed repudiandae temporibus ab quia inventore. Sed velit fugit vel facere cumque et delectus ullam sed eaque impedit. Est veritatis dignissimos aut doloribus dolorem vel pariatur repellendus sit nesciunt similique eum architecto quia. Ea expedita veritatis eum dolorem molestiae ut enim fugit aut beatae quibusdam. Aut voluptas natus in quidem deleniti aut animi iure est incidunt tenetur qui culpa maiores! Et nostrum quaerat qui consequatur consequatur aut aliquam atque aut praesentium rerum et consequuntur exercitationem. Non accusantium ipsa vel consectetur vitae ut magnam autem et natus rerum ut consectetur inventore est doloremque temporibus 33 dolores doloribus! Aut perferendis optio et nostrum repellendus et fugit itaque ut nisi neque sed sint quaerat. Aut placeat architecto et eius sapiente eum molestiae quam. Quo mollitia sapiente non Quis neque non tempora laudantium. Quo distinctio quos et molestias natus sit veritatis consequuntur aut repellendus neque a porro galisum cum numquam nesciunt et animi earum? Aut dolorum dolore non assumenda omnis et molestiae amet id sint vero est eligendi harum sit temporibus magnam aut ipsam quos.\r\r"""  # noqa: E501
 
 
@@ -29,12 +38,9 @@ def test_texteditor_plugin(target_win, fs_win, tmp_path, target_win_users, caplo
         "saved.bin": "Saved!",
         "unsaved.bin": "Not saved at all",
         "unsaved-with-deletions.bin": "Not saved aasdflasd",
-        "appclosed_saved_and_deletions.bin": "Closing application now. It's saved but now I'm adding unsaved"
-        " changes and closing the application again. Dit a few deletions!",
+        "lots-of-deletions.bin": text7,
+        "appclosed_saved_and_deletions.bin": text8,
         "appclosed_unsaved.bin": "Closing application now",
-        "lots-of-deletions.bin": "This a text, which is nothing special. But I am going to modify it a bit. "
-        "For example, I have removed quote some stuff. Adding a word in the beginning now."
-        ".. At this point, I've edited it quite a lot.",
     }
 
     tabcache = absolute_path("_data/plugins/apps/texteditor/windowsnotepad/")

From 2bf6e2f1287c439c204890f2eb8ddfadb0216029 Mon Sep 17 00:00:00 2001
From: Joost Jansen <12032793+joost-j@users.noreply.github.com>
Date: Thu, 11 Apr 2024 14:51:16 +0200
Subject: [PATCH 26/36] Small comment changes/typos

---
 .../plugins/apps/texteditor/windowsnotepad.py      | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
index 69855923f..d73db7b87 100644
--- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py
+++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
@@ -20,7 +20,7 @@
 )
 
 # Thanks to @Nordgaren, @daddycocoaman, @JustArion and @ogmini for their suggestions and feedback in the PR
-# thread. This really helped figuring out the last missing bits and pieces
+# thread. This really helped to figure out the last missing bits and pieces
 # required for recovering text from these files.
 
 c_def = """
@@ -105,7 +105,7 @@ def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUser
             # Header is the same for all types
             header = c_windowstab.header(fh)
 
-            # File can be saved, or unsaved. Depending on the filestate, different header fields are present
+            # File can be saved, or unsaved. Depending on the file state, different header fields are present
             # Currently, no information in the header is used in the outputted records, only the contents of the tab
             tab = (
                 c_windowstab.header_saved_tab(fh)
@@ -113,7 +113,7 @@ def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUser
                 else c_windowstab.header_unsaved_tab(fh)
             )
 
-            # In the case that the filesize is known up front, then this file is zet to a nonzero value
+            # In the case that the file size is known up front, then this fileSize is set to a nonzero value
             # This means that the data is stored in one block
             if tab.fileSize != 0:
                 # So we only parse one block
@@ -139,15 +139,15 @@ def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUser
                 text = data_entry.data
 
             else:
-                # Here, the fileSize is zero'ed, meaning that the size is not known up front.
+                # Here, the fileSize is zeroed, meaning that the size is not known up front.
                 # Data may be stored in multiple, variable-length blocks. This happens, for example, when several
                 # additions and deletions of characters have been recorded and these changes have not been 'flushed'
 
-                # First, parse 4 as of yet unknown bytes
-                # Likely holds some addition information about the tab (view options etc)
+                # First, parse 4 unknown bytes. These likely
+                # hold some addition information about the tab (view options etc.)
                 unknown_bytes = fh.read(4)
 
-                # In this multi-block variant, he header itself has a CRC32 value in big-endian as well
+                # In this multi-block variant, the header itself has a CRC32 value in big-endian as well
                 defined_header_crc32 = fh.read(4)
 
                 # Calculate CRC32 of the header and check if it matches

From a19c49b37fc6bcd962a06bb0c17388f5ba83c829 Mon Sep 17 00:00:00 2001
From: Joost Jansen <12032793+joost-j@users.noreply.github.com>
Date: Fri, 26 Apr 2024 10:52:35 +0200
Subject: [PATCH 27/36] Split plugin from parsing logic, added more tests

---
 .../plugins/apps/texteditor/windowsnotepad.py | 93 +++++++++++++------
 .../apps/texteditor/test_texteditor.py        | 54 ++++++++++-
 2 files changed, 115 insertions(+), 32 deletions(-)

diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
index d73db7b87..663f14ad6 100644
--- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py
+++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import logging
 import zlib
 from typing import Iterator
 
@@ -13,7 +14,7 @@
     WindowsUserRecord,
     create_extended_descriptor,
 )
-from dissect.target.plugin import export
+from dissect.target.plugin import arg, export
 from dissect.target.plugins.apps.texteditor.texteditor import (
     GENERIC_TAB_CONTENTS_RECORD_FIELDS,
     TexteditorPlugin,
@@ -60,39 +61,28 @@
 c_windowstab = cstruct()
 c_windowstab.load(c_def)
 
-TextEditorTabRecord = create_extended_descriptor([UserRecordDescriptorExtension])(
+WindowsNotepadTabRecord = create_extended_descriptor([UserRecordDescriptorExtension])(
     "texteditor/windowsnotepad/tab", GENERIC_TAB_CONTENTS_RECORD_FIELDS
 )
 
+WindowsNotepadTabContentRecord = create_extended_descriptor([])(
+    "texteditor/windowsnotepad/tab_content", GENERIC_TAB_CONTENTS_RECORD_FIELDS
+)
+
 
 def _calc_crc32(data: bytes) -> bytes:
     """Perform a CRC32 checksum on the data and return it as bytes."""
     return zlib.crc32(data).to_bytes(length=4, byteorder="big")
 
 
-class WindowsNotepadPlugin(TexteditorPlugin):
-    """Windows notepad tab content plugin."""
-
-    __namespace__ = "windowsnotepad"
-
-    GLOB = "AppData/Local/Packages/Microsoft.WindowsNotepad_*/LocalState/TabState/*.bin"
-
-    def __init__(self, target):
-        super().__init__(target)
-        self.users_tabs: list[TargetPath, UnixUserRecord | WindowsUserRecord] = []
-
-        for user_details in self.target.user_details.all_with_home():
-            for tab_file in user_details.home_path.glob(self.GLOB):
-                if tab_file.name.endswith(".1.bin") or tab_file.name.endswith(".0.bin"):
-                    continue
-
-                self.users_tabs.append((tab_file, user_details.user))
+class WindowsNotepadTabContent:
+    """Windows notepad tab parser"""
 
-    def check_compatible(self) -> None:
-        if not self.users_tabs:
-            raise UnsupportedPluginError("No Windows Notepad temporary tab files found")
+    def __new__(cls, file: TargetPath, include_deleted_content=False) -> WindowsNotepadTabContentRecord:
+        return cls._process_tab_file(file, include_deleted_content)
 
-    def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUserRecord) -> TextEditorTabRecord:
+    @staticmethod
+    def _process_tab_file(file: TargetPath, include_deleted_content: bool) -> WindowsNotepadTabContentRecord:
         """Parse a binary tab file and reconstruct the contents.
 
         Args:
@@ -129,7 +119,7 @@ def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUser
                 actual_crc32 = _calc_crc32(header.dumps()[3:] + tab.dumps() + data_entry.dumps() + extra_byte)
 
                 if defined_crc32 != actual_crc32:
-                    self.target.log.warning(
+                    logging.warning(
                         "CRC32 mismatch in single-block file: %s (expected=%s, actual=%s)",
                         file.name,
                         defined_crc32.hex(),
@@ -153,7 +143,7 @@ def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUser
                 # Calculate CRC32 of the header and check if it matches
                 actual_header_crc32 = _calc_crc32(header.dumps()[3:] + tab.dumps() + unknown_bytes)
                 if defined_header_crc32 != actual_header_crc32:
-                    self.target.log.warning(
+                    logging.warning(
                         "CRC32 mismatch in header of multi-block file: %s " "expected=%s, actual=%s",
                         file.name,
                         defined_header_crc32.hex(),
@@ -164,6 +154,8 @@ def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUser
                 # a list is used to easily insert text at offsets
                 text = []
 
+                deleted_content = ""
+
                 while True:
                     # Unfortunately, there is no way of determining how many blocks there are. So just try to parse
                     # until we reach EOF, after which we stop.
@@ -180,7 +172,7 @@ def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUser
                         # Check the CRC32 checksum for this block
                         actual_crc32 = _calc_crc32(data_entry.dumps())
                         if defined_crc32 != actual_crc32:
-                            self.target.log.warning(
+                            logging.warning(
                                 "CRC32 mismatch in multi-block file: %s " "expected=%s, actual=%s",
                                 file.name,
                                 data_entry.crc32.hex(),
@@ -194,15 +186,52 @@ def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUser
                     elif data_entry.nDeleted > 0:
                         # Create a new slice. Include everything up to the offset,
                         # plus everything after the nDeleted following bytes
+                        if include_deleted_content:
+                            deleted_content += "".join(
+                                text[data_entry.offset : data_entry.offset + data_entry.nDeleted]
+                            )
                         text = text[: data_entry.offset] + text[data_entry.offset + data_entry.nDeleted :]
 
                 # Join all the characters to reconstruct the original text
                 text = "".join(text)
 
-        return TextEditorTabRecord(content=text, path=file, _target=self.target, _user=user)
+                if include_deleted_content:
+                    text += " --- DELETED-CONTENT: "
+                    text += deleted_content
 
-    @export(record=TextEditorTabRecord)
-    def tabs(self) -> Iterator[TextEditorTabRecord]:
+        return WindowsNotepadTabContentRecord(content=text, path=file)
+
+
+class WindowsNotepadPlugin(TexteditorPlugin):
+    """Windows notepad tab content plugin."""
+
+    __namespace__ = "windowsnotepad"
+
+    GLOB = "AppData/Local/Packages/Microsoft.WindowsNotepad_*/LocalState/TabState/*.bin"
+
+    def __init__(self, target):
+        super().__init__(target)
+        self.users_tabs: list[TargetPath, UnixUserRecord | WindowsUserRecord] = []
+        for user_details in self.target.user_details.all_with_home():
+            for tab_file in user_details.home_path.glob(self.GLOB):
+                if tab_file.name.endswith(".1.bin") or tab_file.name.endswith(".0.bin"):
+                    continue
+
+                self.users_tabs.append((tab_file, user_details.user))
+
+    def check_compatible(self) -> None:
+        if not self.users_tabs:
+            raise UnsupportedPluginError("No Windows Notepad temporary tab files found")
+
+    @arg(
+        "--include-deleted-content",
+        type=bool,
+        default=False,
+        required=False,
+        help="Include deleted but recoverable content.",
+    )
+    @export(record=WindowsNotepadTabRecord)
+    def tabs(self, include_deleted_content) -> Iterator[WindowsNotepadTabRecord]:
         """Return contents from Windows 11 temporary Notepad tabs.
 
         Yields TextEditorTabRecord with the following fields:
@@ -210,4 +239,8 @@ def tabs(self) -> Iterator[TextEditorTabRecord]:
             path (path): The path the content originates from.
         """
         for file, user in self.users_tabs:
-            yield self._process_tab_file(file, user)
+            # Parse the file
+            r: WindowsNotepadTabContentRecord = WindowsNotepadTabContent(file, include_deleted_content)
+
+            # Add user- and target specific information to the content record record
+            yield WindowsNotepadTabRecord(content=r.content, path=r.path, _target=self.target, _user=user)
diff --git a/tests/plugins/apps/texteditor/test_texteditor.py b/tests/plugins/apps/texteditor/test_texteditor.py
index d27d9e15a..baaddaa49 100644
--- a/tests/plugins/apps/texteditor/test_texteditor.py
+++ b/tests/plugins/apps/texteditor/test_texteditor.py
@@ -1,4 +1,5 @@
 import os
+from pathlib import Path
 
 from dissect.target.plugins.apps.texteditor import windowsnotepad
 from tests._utils import absolute_path
@@ -24,7 +25,56 @@
 loremipsum = """Lorem ipsum dolor sit amet. Eum error blanditiis eum pariatur delectus ut consequuntur officiis a excepturi dignissimos et doloribus quia 33 perspiciatis soluta nam perspiciatis dolor. Ut repudiandae quidem cum sint modi qui sint consequatur. Aut autem quidem eum enim consequatur qui voluptate consequatur non similique voluptate. A vitae modi vel sint provident ut galisum tenetur sit voluptatem amet. Est impedit perspiciatis est repudiandae voluptates ut fugit alias! Eum magni esse aut velit illum qui excepturi aperiam. Ex dolores asperiores ut debitis omnis qui consequuntur dolore. Est voluptatem mollitia et quibusdam unde ea accusamus fuga. Cum quis galisum et impedit sunt qui aliquam perspiciatis sed modi quidem qui nisi molestias. Aut temporibus architecto ut neque voluptatem et consequatur deleniti sed accusantium quibusdam et omnis dignissimos ad rerum ipsam et rerum quia. Ut nihil repellat et eaque molestias quo iusto ipsum At optio sint eos quidem earum?\r\rEx deleniti unde eum tenetur rerum ea dolore numquam? Eos aperiam officiis et neque explicabo et enim atque ut eaque omnis non illum eveniet est molestias itaque et ratione voluptatem. Ea deserunt nemo et quos tempora et nostrum aperiam sit necessitatibus illo sit culpa placeat. Vel tempore quibusdam ut velit voluptate aut odio facere non voluptas earum est odio galisum et voluptas harum. Et blanditiis sapiente et nostrum laborum aut voluptatem explicabo a quasi assumenda. Est voluptatem quia eum minima galisum quo totam excepturi aut facilis enim vel voluptate repudiandae sit distinctio laboriosam. Quo possimus molestiae et molestiae accusantium est voluptas omnis sed obcaecati natus. Non vitae asperiores qui nostrum enim id saepe fugiat et incidunt quasi.\r\rEos ipsa facilis aut excepturi voluptatem a omnis magni vel magni iste. Sed ipsum consequatur qui reprehenderit deleniti et soluta molestiae. Ut vero assumenda id dolor ipsum in deleniti voluptatem aut quis quisquam sed repudiandae temporibus ab quia inventore. Sed velit fugit vel facere cumque et delectus ullam sed eaque impedit. Est veritatis dignissimos aut doloribus dolorem vel pariatur repellendus sit nesciunt similique eum architecto quia. Ea expedita veritatis eum dolorem molestiae ut enim fugit aut beatae quibusdam. Aut voluptas natus in quidem deleniti aut animi iure est incidunt tenetur qui culpa maiores! Et nostrum quaerat qui consequatur consequatur aut aliquam atque aut praesentium rerum et consequuntur exercitationem. Non accusantium ipsa vel consectetur vitae ut magnam autem et natus rerum ut consectetur inventore est doloremque temporibus 33 dolores doloribus! Aut perferendis optio et nostrum repellendus et fugit itaque ut nisi neque sed sint quaerat. Aut placeat architecto et eius sapiente eum molestiae quam. Quo mollitia sapiente non Quis neque non tempora laudantium. Quo distinctio quos et molestias natus sit veritatis consequuntur aut repellendus neque a porro galisum cum numquam nesciunt et animi earum? Aut dolorum dolore non assumenda omnis et molestiae amet id sint vero est eligendi harum sit temporibus magnam aut ipsam quos.\r\r"""  # noqa: E501
 
 
-def test_texteditor_plugin(target_win, fs_win, tmp_path, target_win_users, caplog):
+def test_windows_tab_parsing(tmp_path):
+    # Standalone parsing of tab files, so not using the plugin
+    tab_files = Path(absolute_path("_data/plugins/apps/texteditor/windowsnotepad/"))
+    content_record = windowsnotepad.WindowsNotepadTabContent(tab_files / "unsaved-with-deletions.bin")
+    assert content_record.content == "Not saved aasdflasd"
+    content_record_with_deletions = windowsnotepad.WindowsNotepadTabContent(
+        tab_files / "unsaved-with-deletions.bin", include_deleted_content=True
+    )
+    assert content_record_with_deletions.content == "Not saved aasdflasd --- DELETED-CONTENT: snUlltllafds tjkf"
+
+
+def test_windows_tab_plugin_deleted_contents(target_win, fs_win, tmp_path, target_win_users, caplog):
+    file_text_map = {
+        "unsaved-with-deletions.bin": "Not saved aasdflasd --- DELETED-CONTENT: snUlltllafds tjkf",
+        "lots-of-deletions.bin": "This a text, which is nothing special. But I am going to modify it a bit. "
+        "For example, I have removed quote some stuff. "
+        "Adding a word in the beginning now... "
+        "At this point, I've edited it quite a lot. --- DELETED-CONTENT: "
+        "b a ,elpmac ydaerlae already thi laiceps emos",
+    }
+
+    tabcache = absolute_path("_data/plugins/apps/texteditor/windowsnotepad/")
+
+    user = target_win_users.user_details.find(username="John")
+    tab_dir = user.home_path.joinpath(
+        "AppData/Local/Packages/Microsoft.WindowsNotepad_8wekyb3d8bbwe/LocalState/TabState"
+    )
+
+    fs_win.map_dir("Users\\John", tmp_path)
+
+    for file in file_text_map.keys():
+        tab_file = str(tab_dir.joinpath(file))[3:]
+        fs_win.map_file(tab_file, os.path.join(tabcache, file))
+
+    target_win.add_plugin(windowsnotepad.WindowsNotepadPlugin)
+
+    records = list(target_win.windowsnotepad.tabs(include_deleted_content=True))
+
+    # Check the amount of files
+    assert len(list(tab_dir.iterdir())) == len(file_text_map.keys())
+    assert len(records) == len(file_text_map.keys())
+
+    # The recovered content in the records should match the original data, as well as the length
+    for rec in records:
+        print(rec.content)
+        assert rec.content == file_text_map[rec.path.name]
+        assert len(rec.content) == len(file_text_map[rec.path.name])
+
+
+def test_windows_tab_plugin_default(target_win, fs_win, tmp_path, target_win_users, caplog):
     file_text_map = {
         "c515e86f-08b3-4d76-844a-cddfcd43fcbb.bin": text1,
         "85167c9d-aac2-4469-ae44-db5dccf8f7f4.bin": text2,
@@ -58,7 +108,7 @@ def test_texteditor_plugin(target_win, fs_win, tmp_path, target_win_users, caplo
 
     target_win.add_plugin(windowsnotepad.WindowsNotepadPlugin)
 
-    records = list(target_win.windowsnotepad.tabs())
+    records = list(target_win.windowsnotepad.tabs(include_deleted_content=False))
 
     # Check the amount of files
     assert len(list(tab_dir.iterdir())) == len(file_text_map.keys())

From f808bc71fa33fa7ca40c57289f916cb5c613ad89 Mon Sep 17 00:00:00 2001
From: Joost Jansen <12032793+joost-j@users.noreply.github.com>
Date: Fri, 26 Apr 2024 12:18:15 +0200
Subject: [PATCH 28/36] Removed fh.read() and re-added them to the c_def

---
 .../plugins/apps/texteditor/windowsnotepad.py | 56 +++++++++----------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
index 663f14ad6..847dea22e 100644
--- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py
+++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
@@ -46,15 +46,30 @@
     uint8       unk0;
     uleb128     fileSize;
     uleb128     fileSizeDuplicate;
-    uint8       unk1;
-    uint8       unk2;
+    char        unk1;
+    char        unk2;
 };
 
-struct data_block {
+struct single_data_block {
     uleb128     offset;
     uleb128     nDeleted;
     uleb128     nAdded;
     wchar       data[nAdded];
+    char        unk[1];
+    char        crc32[4];
+};
+
+struct multi_data_extra_header {
+    char        unk[4];
+    char        crc32[4];
+};
+
+struct multi_data_block {
+    uleb128     offset;
+    uleb128     nDeleted;
+    uleb128     nAdded;
+    wchar       data[nAdded];
+    char        crc32[4];
 };
 """
 
@@ -107,22 +122,16 @@ def _process_tab_file(file: TargetPath, include_deleted_content: bool) -> Window
             # This means that the data is stored in one block
             if tab.fileSize != 0:
                 # So we only parse one block
-                data_entry = c_windowstab.data_block(fh)
-
-                # An extra byte is appended to the single block, not yet sure where this is defined and/or used for
-                extra_byte = fh.read(1)
-
-                # The CRC32 value is appended after the extra byte in big-endian
-                defined_crc32 = fh.read(4)
+                data_entry = c_windowstab.single_data_block(fh)
 
                 # The header (minus the magic) plus all data (including the extra byte)  is included in the checksum
-                actual_crc32 = _calc_crc32(header.dumps()[3:] + tab.dumps() + data_entry.dumps() + extra_byte)
+                actual_crc32 = _calc_crc32(header.dumps()[3:] + tab.dumps() + data_entry.dumps()[:-4])
 
-                if defined_crc32 != actual_crc32:
+                if data_entry.crc32 != actual_crc32:
                     logging.warning(
                         "CRC32 mismatch in single-block file: %s (expected=%s, actual=%s)",
                         file.name,
-                        defined_crc32.hex(),
+                        data_entry.crc32.hex(),
                         actual_crc32.hex(),
                     )
 
@@ -132,21 +141,15 @@ def _process_tab_file(file: TargetPath, include_deleted_content: bool) -> Window
                 # Here, the fileSize is zeroed, meaning that the size is not known up front.
                 # Data may be stored in multiple, variable-length blocks. This happens, for example, when several
                 # additions and deletions of characters have been recorded and these changes have not been 'flushed'
-
-                # First, parse 4 unknown bytes. These likely
-                # hold some addition information about the tab (view options etc.)
-                unknown_bytes = fh.read(4)
-
-                # In this multi-block variant, the header itself has a CRC32 value in big-endian as well
-                defined_header_crc32 = fh.read(4)
+                mdeh = c_windowstab.multi_data_extra_header(fh)
 
                 # Calculate CRC32 of the header and check if it matches
-                actual_header_crc32 = _calc_crc32(header.dumps()[3:] + tab.dumps() + unknown_bytes)
-                if defined_header_crc32 != actual_header_crc32:
+                actual_header_crc32 = _calc_crc32(header.dumps()[3:] + tab.dumps() + mdeh.unk)
+                if mdeh.crc32 != actual_header_crc32:
                     logging.warning(
                         "CRC32 mismatch in header of multi-block file: %s " "expected=%s, actual=%s",
                         file.name,
-                        defined_header_crc32.hex(),
+                        mdeh.crc32.hex(),
                         actual_header_crc32.hex(),
                     )
 
@@ -160,18 +163,15 @@ def _process_tab_file(file: TargetPath, include_deleted_content: bool) -> Window
                     # Unfortunately, there is no way of determining how many blocks there are. So just try to parse
                     # until we reach EOF, after which we stop.
                     try:
-                        data_entry = c_windowstab.data_block(fh)
+                        data_entry = c_windowstab.multi_data_block(fh)
                     except EOFError:
                         break
 
-                    # Each block has a CRC32 value in big-endian appended to the block
-                    defined_crc32 = fh.read(4)
-
                     # Either the nAdded is nonzero, or the nDeleted
                     if data_entry.nAdded > 0:
                         # Check the CRC32 checksum for this block
                         actual_crc32 = _calc_crc32(data_entry.dumps())
-                        if defined_crc32 != actual_crc32:
+                        if data_entry.crc32 != actual_crc32:
                             logging.warning(
                                 "CRC32 mismatch in multi-block file: %s " "expected=%s, actual=%s",
                                 file.name,

From 9b38f3e003d5bab1f7bc4ebf3136a93cec749df0 Mon Sep 17 00:00:00 2001
From: Joost Jansen <12032793+joost-j@users.noreply.github.com>
Date: Fri, 26 Apr 2024 12:36:56 +0200
Subject: [PATCH 29/36] Added options and more test cases to support newest
 version

---
 .../plugins/apps/texteditor/windowsnotepad.py | 174 ++++++++++++------
 .../texteditor/windowsnotepad/new-format.bin  | Bin 0 -> 19 bytes
 .../stored_unsaved_with_new_data.bin          | Bin 0 -> 268 bytes
 .../apps/texteditor/test_texteditor.py        |  11 +-
 4 files changed, 126 insertions(+), 59 deletions(-)
 create mode 100755 tests/_data/plugins/apps/texteditor/windowsnotepad/new-format.bin
 create mode 100755 tests/_data/plugins/apps/texteditor/windowsnotepad/stored_unsaved_with_new_data.bin

diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
index 847dea22e..8794c983d 100644
--- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py
+++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
@@ -25,52 +25,70 @@
 # required for recovering text from these files.
 
 c_def = """
-struct header {
+struct file_header {
     char        magic[2]; // NP
-    uint8       unk0;
-    uint8       fileState; // 0 if unsaved, 1 if saved
+    uleb128     updateNumber; // increases on every settings update when fileType=9, 
+                              // doesn't seem to change on fileType 0 or 1
+    uleb128     fileType; // 0 if unsaved, 1 if saved, 9 if contains settings?
 }
 
-struct header_saved_tab {
+struct tab_header_saved {
     uleb128     filePathLength;
     wchar       filePath[filePathLength];
-    uleb128     fileSize;
+    uleb128     fileSize; // likely similar to fixedSizeBlockLength
     uleb128     encoding;
     uleb128     carriageReturnType;
     uleb128     timestamp; // Windows Filetime format (not unix timestamp)
     char        sha256[32];
-    char        unk[6];
+    char        unk0;
+    char        unk1;
+    uleb128     fixedSizeBlockLength;
+    uleb128     fixedSizeBlockLengthDuplicate;
+    uint8       wordWrap; // 1 if wordwrap enabled, 0 if disabled
+    uint8       rightToLeft;
+    uint8       showUnicode;
+    uint8       optionsVersion;
+};
+
+struct tab_header_unsaved {
+    char        unk0;
+    uleb128     fixedSizeBlockLength; // will always be 00 when unsaved because size is not yet known
+    uleb128     fixedSizeBlockLengthDuplicate; // will always be 00 when unsaved because size is not yet known
+    uint8       wordWrap; // 1 if wordwrap enabled, 0 if disabled
+    uint8       rightToLeft;
+    uint8       showUnicode;
+    uint8       optionsVersion;
 };
 
-struct header_unsaved_tab {
-    uint8       unk0;
-    uleb128     fileSize;
-    uleb128     fileSizeDuplicate;
+struct tab_header_crc32_stub {
     char        unk1;
     char        unk2;
+    char        crc32[4];
 };
 
-struct single_data_block {
-    uleb128     offset;
-    uleb128     nDeleted;
+struct fixed_size_data_block {
     uleb128     nAdded;
     wchar       data[nAdded];
-    char        unk[1];
-    char        crc32[4];
-};
-
-struct multi_data_extra_header {
-    char        unk[4];
+    uint8       hasRemainingVariableDataBlocks; // indicates whether after this single-data block more data will follow
     char        crc32[4];
 };
 
-struct multi_data_block {
+struct variable_size_data_block {
     uleb128     offset;
     uleb128     nDeleted;
     uleb128     nAdded;
     wchar       data[nAdded];
     char        crc32[4];
 };
+
+struct options_v1 {
+    uleb128     unk;
+};
+
+struct options_v2 {
+    uleb128     unk1; // likely autocorrect or spellcheck
+    uleb128     unk2; // likely autocorrect or spellcheck
+};
 """
 
 c_windowstab = cstruct()
@@ -108,24 +126,67 @@ def _process_tab_file(file: TargetPath, include_deleted_content: bool) -> Window
         """
         with file.open("rb") as fh:
             # Header is the same for all types
-            header = c_windowstab.header(fh)
-
-            # File can be saved, or unsaved. Depending on the file state, different header fields are present
-            # Currently, no information in the header is used in the outputted records, only the contents of the tab
-            tab = (
-                c_windowstab.header_saved_tab(fh)
-                if header.fileState == 0x01  # 0x00 is unsaved, 0x01 is saved
-                else c_windowstab.header_unsaved_tab(fh)
+            file_header = c_windowstab.file_header(fh)
+
+            # Tabs can be saved to a file with a filename on disk, or unsaved (kept in the TabState folder).
+            # Depending on the file's saved state, different header fields are present
+            tab_header = (
+                c_windowstab.tab_header_saved(fh)
+                if file_header.fileType == 0x01  # 0x00 is unsaved, 0x01 is saved, 0x09 is settings?
+                else c_windowstab.tab_header_unsaved(fh)
             )
 
-            # In the case that the file size is known up front, then this fileSize is set to a nonzero value
-            # This means that the data is stored in one block
-            if tab.fileSize != 0:
-                # So we only parse one block
-                data_entry = c_windowstab.single_data_block(fh)
+            # There appears to be a optionsVersion field that specifies the options that are passed.
+            # At the moment of writing, it is not sure whether this specifies a version or a number of bytes
+            # that is parsed, so just going with the 'optionsVersion' type for now.
+            # We don't use the options, but since they are required for the CRC32 checksum
+            # we store the byte representation
+            if tab_header.optionsVersion == 0:
+                # No options specified
+                options = b""
+            elif tab_header.optionsVersion == 1:
+                options = c_windowstab.options_v1(fh).dumps()
+            elif tab_header.optionsVersion == 2:
+                options = c_windowstab.options_v2(fh).dumps()
+            else:
+                # Raise an error, since we don't know how many bytes future optionVersions will occupy.
+                # Now knowing how many bytes to parse can mess up the alignment and structs.
+                raise Exception("Unknown option version")
 
-                # The header (minus the magic) plus all data (including the extra byte)  is included in the checksum
-                actual_crc32 = _calc_crc32(header.dumps()[3:] + tab.dumps() + data_entry.dumps()[:-4])
+            # If the file is not saved to disk and no fixedSizeBlockLength is present, an extra checksum stub
+            # is present. So parse that first
+            if file_header.fileType == 0 and tab_header.fixedSizeBlockLength == 0:
+                # Two unknown bytes before the CRC32
+                tab_header_crc32_stub = c_windowstab.tab_header_crc32_stub(fh)
+
+                # Calculate CRC32 of the header and check if it matches
+                actual_header_crc32 = _calc_crc32(
+                    file_header.dumps()[3:] + tab_header.dumps() + options + tab_header_crc32_stub.dumps()[:-4]
+                )
+                if tab_header_crc32_stub.crc32 != actual_header_crc32:
+                    logging.warning(
+                        "CRC32 mismatch in header of file: %s (expected=%s, actual=%s)",
+                        file.name,
+                        tab_header_crc32_stub.crc32.hex(),
+                        actual_header_crc32.hex(),
+                    )
+
+            # Used to store the final content
+            content = ""
+
+            # After a fixed_size_data_block, some more variable_size_data_blocks can be present. This boolean
+            # keeps track of whether more data is still present.
+            has_remaining_data = False
+
+            # In the case that a fixedSizeDataBlock is present, this value is set to a nonzero value
+            if tab_header.fixedSizeBlockLength > 0:
+                # So we parse the fixed size data block
+                data_entry = c_windowstab.fixed_size_data_block(fh)
+
+                # The header (minus the magic) plus all data is included in the checksum
+                actual_crc32 = _calc_crc32(
+                    file_header.dumps()[3:] + tab_header.dumps() + options + data_entry.dumps()[:-4]
+                )
 
                 if data_entry.crc32 != actual_crc32:
                     logging.warning(
@@ -135,45 +196,42 @@ def _process_tab_file(file: TargetPath, include_deleted_content: bool) -> Window
                         actual_crc32.hex(),
                     )
 
-                text = data_entry.data
+                # Add the content of the fixed size data block to the tab content
+                content += data_entry.data
 
-            else:
-                # Here, the fileSize is zeroed, meaning that the size is not known up front.
-                # Data may be stored in multiple, variable-length blocks. This happens, for example, when several
-                # additions and deletions of characters have been recorded and these changes have not been 'flushed'
-                mdeh = c_windowstab.multi_data_extra_header(fh)
+                # The hasRemainingVariableDataBlocks indicates whether more data will follow after this single block
+                if data_entry.hasRemainingVariableDataBlocks == 1:
+                    has_remaining_data = True
 
-                # Calculate CRC32 of the header and check if it matches
-                actual_header_crc32 = _calc_crc32(header.dumps()[3:] + tab.dumps() + mdeh.unk)
-                if mdeh.crc32 != actual_header_crc32:
-                    logging.warning(
-                        "CRC32 mismatch in header of multi-block file: %s " "expected=%s, actual=%s",
-                        file.name,
-                        mdeh.crc32.hex(),
-                        actual_header_crc32.hex(),
-                    )
+            # If fixedSizeBlockLength in the header has a value of zero, this means that the entire file consists of
+            # variable-length blocks. Furthermore, if there is any remaining data after the
+            # first fixed size blocks, also continue we also want to continue parsing
+            if tab_header.fixedSizeBlockLength == 0 or has_remaining_data:
+                # Here, data is stored in variable-length blocks. This happens, for example, when several
+                # additions and deletions of characters have been recorded and these changes have not been 'flushed'
 
                 # Since we don't know the size of the file up front, and offsets don't necessarily have to be in order,
                 # a list is used to easily insert text at offsets
                 text = []
 
+                # Used to store the deleted content, if available and requested
                 deleted_content = ""
 
                 while True:
                     # Unfortunately, there is no way of determining how many blocks there are. So just try to parse
                     # until we reach EOF, after which we stop.
                     try:
-                        data_entry = c_windowstab.multi_data_block(fh)
+                        data_entry = c_windowstab.variable_size_data_block(fh)
                     except EOFError:
                         break
 
                     # Either the nAdded is nonzero, or the nDeleted
                     if data_entry.nAdded > 0:
                         # Check the CRC32 checksum for this block
-                        actual_crc32 = _calc_crc32(data_entry.dumps())
+                        actual_crc32 = _calc_crc32(data_entry.dumps()[:-4])
                         if data_entry.crc32 != actual_crc32:
                             logging.warning(
-                                "CRC32 mismatch in multi-block file: %s " "expected=%s, actual=%s",
+                                "CRC32 mismatch in multi-block file: %s (expected=%s, actual=%s)",
                                 file.name,
                                 data_entry.crc32.hex(),
                                 actual_crc32.hex(),
@@ -192,14 +250,18 @@ def _process_tab_file(file: TargetPath, include_deleted_content: bool) -> Window
                             )
                         text = text[: data_entry.offset] + text[data_entry.offset + data_entry.nDeleted :]
 
-                # Join all the characters to reconstruct the original text
+                # Join all the characters to reconstruct the original text within the variable-length data blocks
                 text = "".join(text)
 
+                # Add the deleted content, if specified
                 if include_deleted_content:
                     text += " --- DELETED-CONTENT: "
                     text += deleted_content
 
-        return WindowsNotepadTabContentRecord(content=text, path=file)
+                # Finally, add the reconstructed text to the tab content
+                content += text
+
+        return WindowsNotepadTabContentRecord(content=content, path=file)
 
 
 class WindowsNotepadPlugin(TexteditorPlugin):
@@ -242,5 +304,5 @@ def tabs(self, include_deleted_content) -> Iterator[WindowsNotepadTabRecord]:
             # Parse the file
             r: WindowsNotepadTabContentRecord = WindowsNotepadTabContent(file, include_deleted_content)
 
-            # Add user- and target specific information to the content record record
+            # Add user- and target specific information to the content record
             yield WindowsNotepadTabRecord(content=r.content, path=r.path, _target=self.target, _user=user)
diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/new-format.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/new-format.bin
new file mode 100755
index 0000000000000000000000000000000000000000..8773f88ff4840b596df7d445b42a25ef3b7d0d99
GIT binary patch
literal 19
VcmeYZU|?VbBPK>5z182H82}Iq0rLO=

literal 0
HcmV?d00001

diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/stored_unsaved_with_new_data.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/stored_unsaved_with_new_data.bin
new file mode 100755
index 0000000000000000000000000000000000000000..f41219d57a7950fa8b6e55bef8d7164055e5439e
GIT binary patch
literal 268
zcmeYZU|?XBl41ao3{t@iB@FotMGUD7DGUl=wgQlq$xzIY4df>=lmf*RfMgy}B$1&E
ztd8-(^g}D97#MXJt~6FImu6s8U~mnQmzH5*Okyy#e%B+*z*x$_d&7B^90OwsLvS*O
zwLD18hATU7D=;vYGo;1G$}2K3W-|2Gc^4>w)R=FI-J{IFn8EO7sb-)GNO78~fsZN!
zV=BWn??rpm7#J%U^4hl)sDl*$54v<lgMqP#K|A8nY)uBnM27O~T|2cvx^9a0>1s1D
qrZ5yF=+4mrN$odhJ*f-Qwe``dTs@G;(eGg^^%)rT82Za}E&~9owL~WX

literal 0
HcmV?d00001

diff --git a/tests/plugins/apps/texteditor/test_texteditor.py b/tests/plugins/apps/texteditor/test_texteditor.py
index baaddaa49..f85091090 100644
--- a/tests/plugins/apps/texteditor/test_texteditor.py
+++ b/tests/plugins/apps/texteditor/test_texteditor.py
@@ -69,7 +69,6 @@ def test_windows_tab_plugin_deleted_contents(target_win, fs_win, tmp_path, targe
 
     # The recovered content in the records should match the original data, as well as the length
     for rec in records:
-        print(rec.content)
         assert rec.content == file_text_map[rec.path.name]
         assert len(rec.content) == len(file_text_map[rec.path.name])
 
@@ -91,6 +90,8 @@ def test_windows_tab_plugin_default(target_win, fs_win, tmp_path, target_win_use
         "lots-of-deletions.bin": text7,
         "appclosed_saved_and_deletions.bin": text8,
         "appclosed_unsaved.bin": "Closing application now",
+        "new-format.bin": "",
+        "stored_unsaved_with_new_data.bin": "Stored to disk but unsaved, but with extra data.",
     }
 
     tabcache = absolute_path("_data/plugins/apps/texteditor/windowsnotepad/")
@@ -114,8 +115,12 @@ def test_windows_tab_plugin_default(target_win, fs_win, tmp_path, target_win_use
     assert len(list(tab_dir.iterdir())) == len(file_text_map.keys())
     assert len(records) == len(file_text_map.keys())
 
-    # One file should still return contents, but there should be an entry for in the logging for a CRC missmatch.
-    assert "CRC32 mismatch in single-block file: wrong-checksum.bin (expected=deadbeef, actual=a48d30a6)" in caplog.text
+    for line in caplog.text.split("\n"):
+        # One file should still return contents, but there should be an entry for in the logging for a CRC missmatch.
+        assert (
+            "CRC32 mismatch in single-block file: wrong-checksum.bin (expected=deadbeef, actual=a48d30a6)" in line
+            or not "CRC32 mismatch" in line
+        )
 
     # The recovered content in the records should match the original data, as well as the length
     for rec in records:

From a3b6f27d879a3343f18be614950f06a2d49d7008 Mon Sep 17 00:00:00 2001
From: Joost Jansen <12032793+joost-j@users.noreply.github.com>
Date: Wed, 8 May 2024 14:52:34 +0200
Subject: [PATCH 30/36] Added separate records for unsaved/saved tabs, included
 more data (timestamp,saved_path,sha256) in the fields

---
 .../plugins/apps/texteditor/windowsnotepad.py | 105 +++++++++++++++---
 .../apps/texteditor/test_texteditor.py        |  69 ++++++++++--
 2 files changed, 147 insertions(+), 27 deletions(-)

diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
index 8794c983d..4da9fccbe 100644
--- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py
+++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
@@ -5,11 +5,16 @@
 from typing import Iterator
 
 from dissect.cstruct import cstruct
+from dissect.util.ts import wintimestamp
 
 from dissect.target.exceptions import UnsupportedPluginError
-from dissect.target.helpers.descriptor_extensions import UserRecordDescriptorExtension
+from dissect.target.helpers.descriptor_extensions import (
+    RecordDescriptorExtensionBase,
+    UserRecordDescriptorExtension,
+)
 from dissect.target.helpers.fsutil import TargetPath
 from dissect.target.helpers.record import (
+    DynamicDescriptor,
     UnixUserRecord,
     WindowsUserRecord,
     create_extended_descriptor,
@@ -27,7 +32,7 @@
 c_def = """
 struct file_header {
     char        magic[2]; // NP
-    uleb128     updateNumber; // increases on every settings update when fileType=9, 
+    uleb128     updateNumber; // increases on every settings update when fileType=9,
                               // doesn't seem to change on fileType 0 or 1
     uleb128     fileType; // 0 if unsaved, 1 if saved, 9 if contains settings?
 }
@@ -91,17 +96,59 @@
 };
 """
 
-c_windowstab = cstruct()
-c_windowstab.load(c_def)
+WINDOWS_SAVED_TABS_EXTRA_FIELDS = [("datetime", "modification_time"), ("string", "sha256"), ("path", "saved_path")]
+
+
+class WindowsSavedTabRecordDescriptorExtension(RecordDescriptorExtensionBase):
+    """RecordDescriptorExtension used to add extra fields to tabs that are saved to disk and contain more info."""
+
+    _default_fields = WINDOWS_SAVED_TABS_EXTRA_FIELDS
+
+    _input_fields = ("_saved",)
+
+    def _fill_default_fields(self, record_kwargs):
+        r: WindowsNotepadSavedTabContentRecord = record_kwargs.get("_saved", None)
+
+        modification_time = None
+        saved_path = None
+        sha256 = None
+
+        if r:
+            modification_time = r.modification_time
+            sha256 = r.sha256
+            saved_path = r.saved_path
+
+        record_kwargs.update({"modification_time": modification_time, "sha256": sha256, "saved_path": saved_path})
+        return record_kwargs
+
+
+# Different Record types for both saved/unsaved tabs, and with/without UserRecordDescriptor so that the
+# plugin can be used as a standalone tool as well
+
 
-WindowsNotepadTabRecord = create_extended_descriptor([UserRecordDescriptorExtension])(
-    "texteditor/windowsnotepad/tab", GENERIC_TAB_CONTENTS_RECORD_FIELDS
+WindowsNotepadUnsavedTabRecord = create_extended_descriptor([UserRecordDescriptorExtension])(
+    "texteditor/windowsnotepad/tab/unsaved",
+    GENERIC_TAB_CONTENTS_RECORD_FIELDS,
 )
 
-WindowsNotepadTabContentRecord = create_extended_descriptor([])(
-    "texteditor/windowsnotepad/tab_content", GENERIC_TAB_CONTENTS_RECORD_FIELDS
+WindowsNotepadSavedTabRecord = create_extended_descriptor(
+    [UserRecordDescriptorExtension, WindowsSavedTabRecordDescriptorExtension]
+)(
+    "texteditor/windowsnotepad/tab/saved",
+    GENERIC_TAB_CONTENTS_RECORD_FIELDS,
 )
 
+WindowsNotepadUnsavedTabContentRecord = create_extended_descriptor([])(
+    "texteditor/windowsnotepad/tab_content/unsaved", GENERIC_TAB_CONTENTS_RECORD_FIELDS
+)
+
+WindowsNotepadSavedTabContentRecord = create_extended_descriptor([])(
+    "texteditor/windowsnotepad/tab_content/saved", GENERIC_TAB_CONTENTS_RECORD_FIELDS + WINDOWS_SAVED_TABS_EXTRA_FIELDS
+)
+
+c_windowstab = cstruct()
+c_windowstab.load(c_def)
+
 
 def _calc_crc32(data: bytes) -> bytes:
     """Perform a CRC32 checksum on the data and return it as bytes."""
@@ -109,13 +156,17 @@ def _calc_crc32(data: bytes) -> bytes:
 
 
 class WindowsNotepadTabContent:
-    """Windows notepad tab parser"""
+    """Windows notepad tab content parser"""
 
-    def __new__(cls, file: TargetPath, include_deleted_content=False) -> WindowsNotepadTabContentRecord:
+    def __new__(
+        cls, file: TargetPath, include_deleted_content=False
+    ) -> WindowsNotepadSavedTabContentRecord | WindowsNotepadUnsavedTabContentRecord:
         return cls._process_tab_file(file, include_deleted_content)
 
     @staticmethod
-    def _process_tab_file(file: TargetPath, include_deleted_content: bool) -> WindowsNotepadTabContentRecord:
+    def _process_tab_file(
+        file: TargetPath, include_deleted_content: bool
+    ) -> WindowsNotepadSavedTabContentRecord | WindowsNotepadUnsavedTabContentRecord:
         """Parse a binary tab file and reconstruct the contents.
 
         Args:
@@ -132,7 +183,7 @@ def _process_tab_file(file: TargetPath, include_deleted_content: bool) -> Window
             # Depending on the file's saved state, different header fields are present
             tab_header = (
                 c_windowstab.tab_header_saved(fh)
-                if file_header.fileType == 0x01  # 0x00 is unsaved, 0x01 is saved, 0x09 is settings?
+                if file_header.fileType == 1  # 0 is unsaved, 1 is saved, 9 is settings?
                 else c_windowstab.tab_header_unsaved(fh)
             )
 
@@ -261,7 +312,16 @@ def _process_tab_file(file: TargetPath, include_deleted_content: bool) -> Window
                 # Finally, add the reconstructed text to the tab content
                 content += text
 
-        return WindowsNotepadTabContentRecord(content=content, path=file)
+        if file_header.fileType == 0:
+            return WindowsNotepadUnsavedTabContentRecord(content=content, path=file)
+        else:
+            return WindowsNotepadSavedTabContentRecord(
+                content=content,
+                path=file,
+                modification_time=wintimestamp(tab_header.timestamp),
+                sha256=tab_header.sha256.hex(),
+                saved_path=tab_header.filePath,
+            )
 
 
 class WindowsNotepadPlugin(TexteditorPlugin):
@@ -276,6 +336,8 @@ def __init__(self, target):
         self.users_tabs: list[TargetPath, UnixUserRecord | WindowsUserRecord] = []
         for user_details in self.target.user_details.all_with_home():
             for tab_file in user_details.home_path.glob(self.GLOB):
+                # These files seem to contain information on different settings / configurations,
+                # and are skipped for now
                 if tab_file.name.endswith(".1.bin") or tab_file.name.endswith(".0.bin"):
                     continue
 
@@ -292,8 +354,8 @@ def check_compatible(self) -> None:
         required=False,
         help="Include deleted but recoverable content.",
     )
-    @export(record=WindowsNotepadTabRecord)
-    def tabs(self, include_deleted_content) -> Iterator[WindowsNotepadTabRecord]:
+    @export(record=DynamicDescriptor(["path", "datetime", "string"]))
+    def tabs(self, include_deleted_content) -> Iterator[WindowsNotepadSavedTabRecord | WindowsNotepadUnsavedTabRecord]:
         """Return contents from Windows 11 temporary Notepad tabs.
 
         Yields TextEditorTabRecord with the following fields:
@@ -302,7 +364,14 @@ def tabs(self, include_deleted_content) -> Iterator[WindowsNotepadTabRecord]:
         """
         for file, user in self.users_tabs:
             # Parse the file
-            r: WindowsNotepadTabContentRecord = WindowsNotepadTabContent(file, include_deleted_content)
+            r: WindowsNotepadSavedTabContentRecord | WindowsNotepadUnsavedTabContentRecord = WindowsNotepadTabContent(
+                file, include_deleted_content
+            )
 
-            # Add user- and target specific information to the content record
-            yield WindowsNotepadTabRecord(content=r.content, path=r.path, _target=self.target, _user=user)
+            # If the modification_time attribute is present, this means that it's a WindowsNotepadSavedTabContentRecord
+            if hasattr(r, "modification_time"):
+                yield WindowsNotepadSavedTabRecord(
+                    content=r.content, path=r.path, _saved=r, _target=self.target, _user=user
+                )
+            else:
+                yield WindowsNotepadUnsavedTabRecord(content=r.content, path=r.path, _target=self.target, _user=user)
diff --git a/tests/plugins/apps/texteditor/test_texteditor.py b/tests/plugins/apps/texteditor/test_texteditor.py
index f85091090..c5e7d5906 100644
--- a/tests/plugins/apps/texteditor/test_texteditor.py
+++ b/tests/plugins/apps/texteditor/test_texteditor.py
@@ -1,7 +1,12 @@
 import os
 from pathlib import Path
 
-from dissect.target.plugins.apps.texteditor import windowsnotepad
+from flow.record.fieldtypes import datetime as dt
+
+from dissect.target.plugins.apps.texteditor.windowsnotepad import (
+    WindowsNotepadPlugin,
+    WindowsNotepadTabContent,
+)
 from tests._utils import absolute_path
 
 text1 = "This is an unsaved tab, UTF-8 encoded with Windows (CRLF). It's only 88 characters long."
@@ -27,12 +32,11 @@
 
 def test_windows_tab_parsing(tmp_path):
     # Standalone parsing of tab files, so not using the plugin
-    tab_files = Path(absolute_path("_data/plugins/apps/texteditor/windowsnotepad/"))
-    content_record = windowsnotepad.WindowsNotepadTabContent(tab_files / "unsaved-with-deletions.bin")
+    tab_file = Path(absolute_path("_data/plugins/apps/texteditor/windowsnotepad/unsaved-with-deletions.bin"))
+    content_record = WindowsNotepadTabContent(tab_file)
     assert content_record.content == "Not saved aasdflasd"
-    content_record_with_deletions = windowsnotepad.WindowsNotepadTabContent(
-        tab_files / "unsaved-with-deletions.bin", include_deleted_content=True
-    )
+
+    content_record_with_deletions = WindowsNotepadTabContent(tab_file, include_deleted_content=True)
     assert content_record_with_deletions.content == "Not saved aasdflasd --- DELETED-CONTENT: snUlltllafds tjkf"
 
 
@@ -59,7 +63,7 @@ def test_windows_tab_plugin_deleted_contents(target_win, fs_win, tmp_path, targe
         tab_file = str(tab_dir.joinpath(file))[3:]
         fs_win.map_file(tab_file, os.path.join(tabcache, file))
 
-    target_win.add_plugin(windowsnotepad.WindowsNotepadPlugin)
+    target_win.add_plugin(WindowsNotepadPlugin)
 
     records = list(target_win.windowsnotepad.tabs(include_deleted_content=True))
 
@@ -107,7 +111,7 @@ def test_windows_tab_plugin_default(target_win, fs_win, tmp_path, target_win_use
         tab_file = str(tab_dir.joinpath(file))[3:]
         fs_win.map_file(tab_file, os.path.join(tabcache, file))
 
-    target_win.add_plugin(windowsnotepad.WindowsNotepadPlugin)
+    target_win.add_plugin(WindowsNotepadPlugin)
 
     records = list(target_win.windowsnotepad.tabs(include_deleted_content=False))
 
@@ -119,10 +123,57 @@ def test_windows_tab_plugin_default(target_win, fs_win, tmp_path, target_win_use
         # One file should still return contents, but there should be an entry for in the logging for a CRC missmatch.
         assert (
             "CRC32 mismatch in single-block file: wrong-checksum.bin (expected=deadbeef, actual=a48d30a6)" in line
-            or not "CRC32 mismatch" in line
+            or "CRC32 mismatch" not in line
         )
 
     # The recovered content in the records should match the original data, as well as the length
     for rec in records:
         assert rec.content == file_text_map[rec.path.name]
         assert len(rec.content) == len(file_text_map[rec.path.name])
+
+
+def test_windows_saved_tab_plugin_extra_fields(target_win, fs_win, tmp_path, target_win_users, caplog):
+    file_text_map = {
+        "saved.bin": (
+            "Saved!",
+            "C:\\Users\\user\\Desktop\\Saved!.txt",
+            dt(2024, 3, 28, 13, 7, 55, 482183),
+            "ed9b760289e614c9dc8776e7280abe870be0a85019a32220b35acc54c0ecfbc1",
+        ),
+        "appclosed_saved_and_deletions.bin": (
+            text8,
+            "C:\\Users\\user\\Desktop\\Saved.txt",
+            dt(2024, 3, 28, 13, 16, 21, 158279),
+            "8d0533144aa42e2d81e7474332bdef6473e42b699041528d55a62e5391e914ce",
+        ),
+    }
+
+    tabcache = absolute_path("_data/plugins/apps/texteditor/windowsnotepad/")
+
+    user = target_win_users.user_details.find(username="John")
+    tab_dir = user.home_path.joinpath(
+        "AppData/Local/Packages/Microsoft.WindowsNotepad_8wekyb3d8bbwe/LocalState/TabState"
+    )
+
+    fs_win.map_dir("Users\\John", tmp_path)
+
+    for file in file_text_map.keys():
+        tab_file = str(tab_dir.joinpath(file))[3:]
+        fs_win.map_file(tab_file, os.path.join(tabcache, file))
+
+    target_win.add_plugin(WindowsNotepadPlugin)
+
+    records = list(target_win.windowsnotepad.tabs(include_deleted_content=False))
+
+    # Check the amount of files
+    assert len(list(tab_dir.iterdir())) == len(file_text_map.keys())
+    assert len(records) == len(file_text_map.keys())
+
+    # The recovered content in the records should match the original data, as well as the length and all the
+    # other saved metadata
+    for rec in records:
+        assert len(rec.content) == len(file_text_map[rec.path.name][0])
+        assert rec.content == file_text_map[rec.path.name][0]
+        assert rec.saved_path == file_text_map[rec.path.name][1]
+        assert rec.modification_time == file_text_map[rec.path.name][2]
+        assert rec.sha256 == file_text_map[rec.path.name][3]

From 677817c389da00c3f22c5178d863fc3bcf254eb5 Mon Sep 17 00:00:00 2001
From: Joost Jansen <12032793+joost-j@users.noreply.github.com>
Date: Mon, 13 May 2024 10:27:59 +0200
Subject: [PATCH 31/36] Change cstruct version

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 743e3aaac..e2db9523c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,7 +26,7 @@ classifiers = [
 ]
 dependencies = [
     "defusedxml",
-    "dissect.cstruct>=3.14.dev4,<4.0.dev",
+    "dissect.cstruct>=3.14.dev,<4.0.dev",
     "dissect.eventlog>=3.0.dev,<4.0.dev",
     "dissect.evidence>=3.0.dev,<4.0.dev",
     "dissect.hypervisor>=3.0.dev,<4.0.dev",

From 9674e377db6c35484178b3b89655fbb981e13832 Mon Sep 17 00:00:00 2001
From: Joost Jansen <12032793+joost-j@users.noreply.github.com>
Date: Wed, 14 Aug 2024 11:18:54 +0200
Subject: [PATCH 32/36] Remove the --include-deleted-contents arg and make it
 default

---
 .../plugins/apps/texteditor/texteditor.py     |  5 +-
 .../plugins/apps/texteditor/windowsnotepad.py | 85 ++++++++-----------
 .../apps/texteditor/test_texteditor.py        | 74 +++++++++-------
 3 files changed, 79 insertions(+), 85 deletions(-)

diff --git a/dissect/target/plugins/apps/texteditor/texteditor.py b/dissect/target/plugins/apps/texteditor/texteditor.py
index ab3fadf03..1063d4919 100644
--- a/dissect/target/plugins/apps/texteditor/texteditor.py
+++ b/dissect/target/plugins/apps/texteditor/texteditor.py
@@ -2,10 +2,7 @@
 from dissect.target.helpers.record import create_extended_descriptor
 from dissect.target.plugin import NamespacePlugin
 
-GENERIC_TAB_CONTENTS_RECORD_FIELDS = [
-    ("string", "content"),
-    ("path", "path"),
-]
+GENERIC_TAB_CONTENTS_RECORD_FIELDS = [("string", "content"), ("path", "path"), ("string", "deleted_content")]
 
 TexteditorTabContentRecord = create_extended_descriptor([UserRecordDescriptorExtension])(
     "texteditor/tab", GENERIC_TAB_CONTENTS_RECORD_FIELDS
diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
index 4da9fccbe..d1406d7c7 100644
--- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py
+++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
@@ -6,6 +6,7 @@
 
 from dissect.cstruct import cstruct
 from dissect.util.ts import wintimestamp
+from flow.record.fieldtypes import digest
 
 from dissect.target.exceptions import UnsupportedPluginError
 from dissect.target.helpers.descriptor_extensions import (
@@ -19,7 +20,7 @@
     WindowsUserRecord,
     create_extended_descriptor,
 )
-from dissect.target.plugin import arg, export
+from dissect.target.plugin import export
 from dissect.target.plugins.apps.texteditor.texteditor import (
     GENERIC_TAB_CONTENTS_RECORD_FIELDS,
     TexteditorPlugin,
@@ -96,7 +97,7 @@
 };
 """
 
-WINDOWS_SAVED_TABS_EXTRA_FIELDS = [("datetime", "modification_time"), ("string", "sha256"), ("path", "saved_path")]
+WINDOWS_SAVED_TABS_EXTRA_FIELDS = [("datetime", "modification_time"), ("digest", "hashes"), ("path", "saved_path")]
 
 
 class WindowsSavedTabRecordDescriptorExtension(RecordDescriptorExtensionBase):
@@ -111,14 +112,14 @@ def _fill_default_fields(self, record_kwargs):
 
         modification_time = None
         saved_path = None
-        sha256 = None
+        hashes = None
 
         if r:
             modification_time = r.modification_time
-            sha256 = r.sha256
+            hashes = r.hashes
             saved_path = r.saved_path
 
-        record_kwargs.update({"modification_time": modification_time, "sha256": sha256, "saved_path": saved_path})
+        record_kwargs.update({"modification_time": modification_time, "hashes": hashes, "saved_path": saved_path})
         return record_kwargs
 
 
@@ -139,7 +140,8 @@ def _fill_default_fields(self, record_kwargs):
 )
 
 WindowsNotepadUnsavedTabContentRecord = create_extended_descriptor([])(
-    "texteditor/windowsnotepad/tab_content/unsaved", GENERIC_TAB_CONTENTS_RECORD_FIELDS
+    "texteditor/windowsnotepad/tab_content/unsaved",
+    GENERIC_TAB_CONTENTS_RECORD_FIELDS,
 )
 
 WindowsNotepadSavedTabContentRecord = create_extended_descriptor([])(
@@ -158,14 +160,12 @@ def _calc_crc32(data: bytes) -> bytes:
 class WindowsNotepadTabContent:
     """Windows notepad tab content parser"""
 
-    def __new__(
-        cls, file: TargetPath, include_deleted_content=False
-    ) -> WindowsNotepadSavedTabContentRecord | WindowsNotepadUnsavedTabContentRecord:
-        return cls._process_tab_file(file, include_deleted_content)
+    def __new__(cls, file: TargetPath) -> WindowsNotepadSavedTabContentRecord | WindowsNotepadUnsavedTabContentRecord:
+        return cls._process_tab_file(file)
 
     @staticmethod
     def _process_tab_file(
-        file: TargetPath, include_deleted_content: bool
+        file: TargetPath,
     ) -> WindowsNotepadSavedTabContentRecord | WindowsNotepadUnsavedTabContentRecord:
         """Parse a binary tab file and reconstruct the contents.
 
@@ -225,10 +225,6 @@ def _process_tab_file(
             # Used to store the final content
             content = ""
 
-            # After a fixed_size_data_block, some more variable_size_data_blocks can be present. This boolean
-            # keeps track of whether more data is still present.
-            has_remaining_data = False
-
             # In the case that a fixedSizeDataBlock is present, this value is set to a nonzero value
             if tab_header.fixedSizeBlockLength > 0:
                 # So we parse the fixed size data block
@@ -250,14 +246,16 @@ def _process_tab_file(
                 # Add the content of the fixed size data block to the tab content
                 content += data_entry.data
 
-                # The hasRemainingVariableDataBlocks indicates whether more data will follow after this single block
-                if data_entry.hasRemainingVariableDataBlocks == 1:
-                    has_remaining_data = True
+            # Used to store the deleted content, if available
+            deleted_content = ""
 
             # If fixedSizeBlockLength in the header has a value of zero, this means that the entire file consists of
             # variable-length blocks. Furthermore, if there is any remaining data after the
-            # first fixed size blocks, also continue we also want to continue parsing
-            if tab_header.fixedSizeBlockLength == 0 or has_remaining_data:
+            # first fixed size blocks, as indicated by the value of hasRemainingVariableDataBlocks,
+            # also continue we also want to continue parsing
+            if tab_header.fixedSizeBlockLength == 0 or (
+                tab_header.fixedSizeBlockLength > 0 and data_entry.hasRemainingVariableDataBlocks == 1
+            ):
                 # Here, data is stored in variable-length blocks. This happens, for example, when several
                 # additions and deletions of characters have been recorded and these changes have not been 'flushed'
 
@@ -265,9 +263,6 @@ def _process_tab_file(
                 # a list is used to easily insert text at offsets
                 text = []
 
-                # Used to store the deleted content, if available and requested
-                deleted_content = ""
-
                 while True:
                     # Unfortunately, there is no way of determining how many blocks there are. So just try to parse
                     # until we reach EOF, after which we stop.
@@ -295,32 +290,28 @@ def _process_tab_file(
                     elif data_entry.nDeleted > 0:
                         # Create a new slice. Include everything up to the offset,
                         # plus everything after the nDeleted following bytes
-                        if include_deleted_content:
-                            deleted_content += "".join(
-                                text[data_entry.offset : data_entry.offset + data_entry.nDeleted]
-                            )
+                        deleted_content += "".join(text[data_entry.offset : data_entry.offset + data_entry.nDeleted])
                         text = text[: data_entry.offset] + text[data_entry.offset + data_entry.nDeleted :]
 
                 # Join all the characters to reconstruct the original text within the variable-length data blocks
                 text = "".join(text)
 
-                # Add the deleted content, if specified
-                if include_deleted_content:
-                    text += " --- DELETED-CONTENT: "
-                    text += deleted_content
-
                 # Finally, add the reconstructed text to the tab content
                 content += text
 
+        # Return None if no deleted content was found
+        deleted_content = deleted_content if deleted_content else None
+
         if file_header.fileType == 0:
-            return WindowsNotepadUnsavedTabContentRecord(content=content, path=file)
+            return WindowsNotepadUnsavedTabContentRecord(content=content, path=file, deleted_content=deleted_content)
         else:
             return WindowsNotepadSavedTabContentRecord(
                 content=content,
                 path=file,
                 modification_time=wintimestamp(tab_header.timestamp),
-                sha256=tab_header.sha256.hex(),
+                hashes=digest((None, None, tab_header.sha256.hex())),
                 saved_path=tab_header.filePath,
+                deleted_content=deleted_content,
             )
 
 
@@ -347,31 +338,29 @@ def check_compatible(self) -> None:
         if not self.users_tabs:
             raise UnsupportedPluginError("No Windows Notepad temporary tab files found")
 
-    @arg(
-        "--include-deleted-content",
-        type=bool,
-        default=False,
-        required=False,
-        help="Include deleted but recoverable content.",
-    )
     @export(record=DynamicDescriptor(["path", "datetime", "string"]))
-    def tabs(self, include_deleted_content) -> Iterator[WindowsNotepadSavedTabRecord | WindowsNotepadUnsavedTabRecord]:
+    def tabs(self) -> Iterator[WindowsNotepadSavedTabRecord | WindowsNotepadUnsavedTabRecord]:
         """Return contents from Windows 11 temporary Notepad tabs.
 
-        Yields TextEditorTabRecord with the following fields:
-            contents (string): The contents of the tab.
-            path (path): The path the content originates from.
+        Yields a WindowsNotepadSavedTabRecord or WindowsNotepadUnsavedTabRecord, depending on the state of the tab.
         """
         for file, user in self.users_tabs:
             # Parse the file
             r: WindowsNotepadSavedTabContentRecord | WindowsNotepadUnsavedTabContentRecord = WindowsNotepadTabContent(
-                file, include_deleted_content
+                file
             )
 
             # If the modification_time attribute is present, this means that it's a WindowsNotepadSavedTabContentRecord
             if hasattr(r, "modification_time"):
                 yield WindowsNotepadSavedTabRecord(
-                    content=r.content, path=r.path, _saved=r, _target=self.target, _user=user
+                    content=r.content,
+                    path=r.path,
+                    _saved=r,
+                    _target=self.target,
+                    _user=user,
+                    deleted_content=r.deleted_content,
                 )
             else:
-                yield WindowsNotepadUnsavedTabRecord(content=r.content, path=r.path, _target=self.target, _user=user)
+                yield WindowsNotepadUnsavedTabRecord(
+                    content=r.content, path=r.path, _target=self.target, _user=user, deleted_content=r.deleted_content
+                )
diff --git a/tests/plugins/apps/texteditor/test_texteditor.py b/tests/plugins/apps/texteditor/test_texteditor.py
index c5e7d5906..9a8394975 100644
--- a/tests/plugins/apps/texteditor/test_texteditor.py
+++ b/tests/plugins/apps/texteditor/test_texteditor.py
@@ -36,18 +36,21 @@ def test_windows_tab_parsing(tmp_path):
     content_record = WindowsNotepadTabContent(tab_file)
     assert content_record.content == "Not saved aasdflasd"
 
-    content_record_with_deletions = WindowsNotepadTabContent(tab_file, include_deleted_content=True)
-    assert content_record_with_deletions.content == "Not saved aasdflasd --- DELETED-CONTENT: snUlltllafds tjkf"
+    content_record_with_deletions = WindowsNotepadTabContent(tab_file)
+    assert content_record_with_deletions.content == "Not saved aasdflasd"
+    assert content_record_with_deletions.deleted_content == "snUlltllafds tjkf"
 
 
 def test_windows_tab_plugin_deleted_contents(target_win, fs_win, tmp_path, target_win_users, caplog):
     file_text_map = {
-        "unsaved-with-deletions.bin": "Not saved aasdflasd --- DELETED-CONTENT: snUlltllafds tjkf",
-        "lots-of-deletions.bin": "This a text, which is nothing special. But I am going to modify it a bit. "
-        "For example, I have removed quote some stuff. "
-        "Adding a word in the beginning now... "
-        "At this point, I've edited it quite a lot. --- DELETED-CONTENT: "
-        "b a ,elpmac ydaerlae already thi laiceps emos",
+        "unsaved-with-deletions.bin": ("Not saved aasdflasd", "snUlltllafds tjkf"),
+        "lots-of-deletions.bin": (
+            "This a text, which is nothing special. But I am going to modify it a bit. "
+            "For example, I have removed quote some stuff. "
+            "Adding a word in the beginning now... "
+            "At this point, I've edited it quite a lot.",
+            "b a ,elpmac ydaerlae already thi laiceps emos",
+        ),
     }
 
     tabcache = absolute_path("_data/plugins/apps/texteditor/windowsnotepad/")
@@ -65,7 +68,7 @@ def test_windows_tab_plugin_deleted_contents(target_win, fs_win, tmp_path, targe
 
     target_win.add_plugin(WindowsNotepadPlugin)
 
-    records = list(target_win.windowsnotepad.tabs(include_deleted_content=True))
+    records = list(target_win.windowsnotepad.tabs())
 
     # Check the amount of files
     assert len(list(tab_dir.iterdir())) == len(file_text_map.keys())
@@ -73,29 +76,33 @@ def test_windows_tab_plugin_deleted_contents(target_win, fs_win, tmp_path, targe
 
     # The recovered content in the records should match the original data, as well as the length
     for rec in records:
-        assert rec.content == file_text_map[rec.path.name]
-        assert len(rec.content) == len(file_text_map[rec.path.name])
+        print(rec)
+        assert rec.content == file_text_map[rec.path.name][0]
+        assert rec.deleted_content == file_text_map[rec.path.name][1]
 
 
 def test_windows_tab_plugin_default(target_win, fs_win, tmp_path, target_win_users, caplog):
     file_text_map = {
-        "c515e86f-08b3-4d76-844a-cddfcd43fcbb.bin": text1,
-        "85167c9d-aac2-4469-ae44-db5dccf8f7f4.bin": text2,
-        "dae80df8-e1e5-4996-87fe-b453f63fcb19.bin": text3,
-        "3f915e17-cf6c-462b-9bd1-2f23314cb979.bin": text4,
-        "ba291ccd-f1c3-4ca8-949c-c01f6633789d.bin": (text5 * 5),
-        "e609218e-94f2-45fa-84e2-f29df2190b26.bin": (text6 * 1260),
-        "3d0cc86e-dfc9-4f16-b74a-918c2c24188c.bin": loremipsum,
-        "wrong-checksum.bin": text4,  # only added to check for corrupt checksum, not validity
-        "cfe38135-9dca-4480-944f-d5ea0e1e589f.bin": (loremipsum * 37)[:-2],  # removed the two newlines in this file
-        "saved.bin": "Saved!",
-        "unsaved.bin": "Not saved at all",
-        "unsaved-with-deletions.bin": "Not saved aasdflasd",
-        "lots-of-deletions.bin": text7,
-        "appclosed_saved_and_deletions.bin": text8,
-        "appclosed_unsaved.bin": "Closing application now",
-        "new-format.bin": "",
-        "stored_unsaved_with_new_data.bin": "Stored to disk but unsaved, but with extra data.",
+        "c515e86f-08b3-4d76-844a-cddfcd43fcbb.bin": (text1, None),
+        "85167c9d-aac2-4469-ae44-db5dccf8f7f4.bin": (text2, None),
+        "dae80df8-e1e5-4996-87fe-b453f63fcb19.bin": (text3, "THis is "),
+        "3f915e17-cf6c-462b-9bd1-2f23314cb979.bin": (text4, None),
+        "ba291ccd-f1c3-4ca8-949c-c01f6633789d.bin": ((text5 * 5), None),
+        "e609218e-94f2-45fa-84e2-f29df2190b26.bin": ((text6 * 1260), None),
+        "3d0cc86e-dfc9-4f16-b74a-918c2c24188c.bin": (loremipsum, None),
+        "wrong-checksum.bin": (text4, None),  # only added to check for corrupt checksum, not validity
+        "cfe38135-9dca-4480-944f-d5ea0e1e589f.bin": (
+            (loremipsum * 37)[:-2],
+            None,
+        ),  # removed the two newlines in this file
+        "saved.bin": ("Saved!", None),
+        "unsaved.bin": ("Not saved at all", "snUllt"),
+        "unsaved-with-deletions.bin": ("Not saved aasdflasd", "snUlltllafds tjkf"),
+        "lots-of-deletions.bin": (text7, "b a ,elpmac ydaerlae already thi laiceps emos"),
+        "appclosed_saved_and_deletions.bin": (text8, None),
+        "appclosed_unsaved.bin": ("Closing application now", None),
+        "new-format.bin": ("", None),
+        "stored_unsaved_with_new_data.bin": ("Stored to disk but unsaved, but with extra data.", None),
     }
 
     tabcache = absolute_path("_data/plugins/apps/texteditor/windowsnotepad/")
@@ -113,7 +120,7 @@ def test_windows_tab_plugin_default(target_win, fs_win, tmp_path, target_win_use
 
     target_win.add_plugin(WindowsNotepadPlugin)
 
-    records = list(target_win.windowsnotepad.tabs(include_deleted_content=False))
+    records = list(target_win.windowsnotepad.tabs())
 
     # Check the amount of files
     assert len(list(tab_dir.iterdir())) == len(file_text_map.keys())
@@ -128,8 +135,9 @@ def test_windows_tab_plugin_default(target_win, fs_win, tmp_path, target_win_use
 
     # The recovered content in the records should match the original data, as well as the length
     for rec in records:
-        assert rec.content == file_text_map[rec.path.name]
-        assert len(rec.content) == len(file_text_map[rec.path.name])
+        print(rec)
+        assert rec.content == file_text_map[rec.path.name][0]
+        assert rec.deleted_content == file_text_map[rec.path.name][1]
 
 
 def test_windows_saved_tab_plugin_extra_fields(target_win, fs_win, tmp_path, target_win_users, caplog):
@@ -163,7 +171,7 @@ def test_windows_saved_tab_plugin_extra_fields(target_win, fs_win, tmp_path, tar
 
     target_win.add_plugin(WindowsNotepadPlugin)
 
-    records = list(target_win.windowsnotepad.tabs(include_deleted_content=False))
+    records = list(target_win.windowsnotepad.tabs())
 
     # Check the amount of files
     assert len(list(tab_dir.iterdir())) == len(file_text_map.keys())
@@ -176,4 +184,4 @@ def test_windows_saved_tab_plugin_extra_fields(target_win, fs_win, tmp_path, tar
         assert rec.content == file_text_map[rec.path.name][0]
         assert rec.saved_path == file_text_map[rec.path.name][1]
         assert rec.modification_time == file_text_map[rec.path.name][2]
-        assert rec.sha256 == file_text_map[rec.path.name][3]
+        assert rec.hashes.sha256 == file_text_map[rec.path.name][3]

From 06e3f075dffc99cf3b1ff29e4939e1f695ccb4d6 Mon Sep 17 00:00:00 2001
From: Joost Jansen <12032793+joost-j@users.noreply.github.com>
Date: Wed, 14 Aug 2024 12:19:59 +0200
Subject: [PATCH 33/36] Rewrite TabContent records into WindowsNotepadTab class

---
 .../plugins/apps/texteditor/windowsnotepad.py | 164 ++++++------------
 .../apps/texteditor/test_texteditor.py        |  14 +-
 2 files changed, 59 insertions(+), 119 deletions(-)

diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
index d1406d7c7..210b85331 100644
--- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py
+++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
@@ -9,10 +9,7 @@
 from flow.record.fieldtypes import digest
 
 from dissect.target.exceptions import UnsupportedPluginError
-from dissect.target.helpers.descriptor_extensions import (
-    RecordDescriptorExtensionBase,
-    UserRecordDescriptorExtension,
-)
+from dissect.target.helpers.descriptor_extensions import UserRecordDescriptorExtension
 from dissect.target.helpers.fsutil import TargetPath
 from dissect.target.helpers.record import (
     DynamicDescriptor,
@@ -99,53 +96,14 @@
 
 WINDOWS_SAVED_TABS_EXTRA_FIELDS = [("datetime", "modification_time"), ("digest", "hashes"), ("path", "saved_path")]
 
-
-class WindowsSavedTabRecordDescriptorExtension(RecordDescriptorExtensionBase):
-    """RecordDescriptorExtension used to add extra fields to tabs that are saved to disk and contain more info."""
-
-    _default_fields = WINDOWS_SAVED_TABS_EXTRA_FIELDS
-
-    _input_fields = ("_saved",)
-
-    def _fill_default_fields(self, record_kwargs):
-        r: WindowsNotepadSavedTabContentRecord = record_kwargs.get("_saved", None)
-
-        modification_time = None
-        saved_path = None
-        hashes = None
-
-        if r:
-            modification_time = r.modification_time
-            hashes = r.hashes
-            saved_path = r.saved_path
-
-        record_kwargs.update({"modification_time": modification_time, "hashes": hashes, "saved_path": saved_path})
-        return record_kwargs
-
-
-# Different Record types for both saved/unsaved tabs, and with/without UserRecordDescriptor so that the
-# plugin can be used as a standalone tool as well
-
-
 WindowsNotepadUnsavedTabRecord = create_extended_descriptor([UserRecordDescriptorExtension])(
     "texteditor/windowsnotepad/tab/unsaved",
     GENERIC_TAB_CONTENTS_RECORD_FIELDS,
 )
 
-WindowsNotepadSavedTabRecord = create_extended_descriptor(
-    [UserRecordDescriptorExtension, WindowsSavedTabRecordDescriptorExtension]
-)(
+WindowsNotepadSavedTabRecord = create_extended_descriptor([UserRecordDescriptorExtension])(
     "texteditor/windowsnotepad/tab/saved",
-    GENERIC_TAB_CONTENTS_RECORD_FIELDS,
-)
-
-WindowsNotepadUnsavedTabContentRecord = create_extended_descriptor([])(
-    "texteditor/windowsnotepad/tab_content/unsaved",
-    GENERIC_TAB_CONTENTS_RECORD_FIELDS,
-)
-
-WindowsNotepadSavedTabContentRecord = create_extended_descriptor([])(
-    "texteditor/windowsnotepad/tab_content/saved", GENERIC_TAB_CONTENTS_RECORD_FIELDS + WINDOWS_SAVED_TABS_EXTRA_FIELDS
+    GENERIC_TAB_CONTENTS_RECORD_FIELDS + WINDOWS_SAVED_TABS_EXTRA_FIELDS,
 )
 
 c_windowstab = cstruct()
@@ -157,34 +115,26 @@ def _calc_crc32(data: bytes) -> bytes:
     return zlib.crc32(data).to_bytes(length=4, byteorder="big")
 
 
-class WindowsNotepadTabContent:
+class WindowsNotepadTab:
     """Windows notepad tab content parser"""
 
-    def __new__(cls, file: TargetPath) -> WindowsNotepadSavedTabContentRecord | WindowsNotepadUnsavedTabContentRecord:
-        return cls._process_tab_file(file)
-
-    @staticmethod
-    def _process_tab_file(
-        file: TargetPath,
-    ) -> WindowsNotepadSavedTabContentRecord | WindowsNotepadUnsavedTabContentRecord:
-        """Parse a binary tab file and reconstruct the contents.
+    def __init__(self, file: TargetPath):
+        self.file = file
+        self._process_tab_file()
 
-        Args:
-            file: The binary file on disk that needs to be parsed.
-
-        Returns:
-            A TextEditorTabRecord containing information that is in the tab.
-        """
-        with file.open("rb") as fh:
+    def _process_tab_file(self):
+        """Parse a binary tab file and reconstruct the contents."""
+        with self.file.open("rb") as fh:
             # Header is the same for all types
-            file_header = c_windowstab.file_header(fh)
+            self.file_header = c_windowstab.file_header(fh)
+
+            # fileType == 1  # 0 is unsaved, 1 is saved, 9 is settings?
+            self.is_saved = self.file_header.fileType == 1
 
             # Tabs can be saved to a file with a filename on disk, or unsaved (kept in the TabState folder).
             # Depending on the file's saved state, different header fields are present
-            tab_header = (
-                c_windowstab.tab_header_saved(fh)
-                if file_header.fileType == 1  # 0 is unsaved, 1 is saved, 9 is settings?
-                else c_windowstab.tab_header_unsaved(fh)
+            self.tab_header = (
+                c_windowstab.tab_header_saved(fh) if self.is_saved else c_windowstab.tab_header_unsaved(fh)
             )
 
             # There appears to be a optionsVersion field that specifies the options that are passed.
@@ -192,13 +142,13 @@ def _process_tab_file(
             # that is parsed, so just going with the 'optionsVersion' type for now.
             # We don't use the options, but since they are required for the CRC32 checksum
             # we store the byte representation
-            if tab_header.optionsVersion == 0:
+            if self.tab_header.optionsVersion == 0:
                 # No options specified
-                options = b""
-            elif tab_header.optionsVersion == 1:
-                options = c_windowstab.options_v1(fh).dumps()
-            elif tab_header.optionsVersion == 2:
-                options = c_windowstab.options_v2(fh).dumps()
+                self.options = b""
+            elif self.tab_header.optionsVersion == 1:
+                self.options = c_windowstab.options_v1(fh).dumps()
+            elif self.tab_header.optionsVersion == 2:
+                self.options = c_windowstab.options_v2(fh).dumps()
             else:
                 # Raise an error, since we don't know how many bytes future optionVersions will occupy.
                 # Now knowing how many bytes to parse can mess up the alignment and structs.
@@ -206,45 +156,48 @@ def _process_tab_file(
 
             # If the file is not saved to disk and no fixedSizeBlockLength is present, an extra checksum stub
             # is present. So parse that first
-            if file_header.fileType == 0 and tab_header.fixedSizeBlockLength == 0:
+            if not self.is_saved and self.tab_header.fixedSizeBlockLength == 0:
                 # Two unknown bytes before the CRC32
                 tab_header_crc32_stub = c_windowstab.tab_header_crc32_stub(fh)
 
                 # Calculate CRC32 of the header and check if it matches
                 actual_header_crc32 = _calc_crc32(
-                    file_header.dumps()[3:] + tab_header.dumps() + options + tab_header_crc32_stub.dumps()[:-4]
+                    self.file_header.dumps()[3:]
+                    + self.tab_header.dumps()
+                    + self.options
+                    + tab_header_crc32_stub.dumps()[:-4]
                 )
                 if tab_header_crc32_stub.crc32 != actual_header_crc32:
                     logging.warning(
                         "CRC32 mismatch in header of file: %s (expected=%s, actual=%s)",
-                        file.name,
+                        self.file.name,
                         tab_header_crc32_stub.crc32.hex(),
                         actual_header_crc32.hex(),
                     )
 
             # Used to store the final content
-            content = ""
+            self.content = ""
 
             # In the case that a fixedSizeDataBlock is present, this value is set to a nonzero value
-            if tab_header.fixedSizeBlockLength > 0:
+            if self.tab_header.fixedSizeBlockLength > 0:
                 # So we parse the fixed size data block
-                data_entry = c_windowstab.fixed_size_data_block(fh)
+                self.data_entry = c_windowstab.fixed_size_data_block(fh)
 
                 # The header (minus the magic) plus all data is included in the checksum
                 actual_crc32 = _calc_crc32(
-                    file_header.dumps()[3:] + tab_header.dumps() + options + data_entry.dumps()[:-4]
+                    self.file_header.dumps()[3:] + self.tab_header.dumps() + self.options + self.data_entry.dumps()[:-4]
                 )
 
-                if data_entry.crc32 != actual_crc32:
+                if self.data_entry.crc32 != actual_crc32:
                     logging.warning(
                         "CRC32 mismatch in single-block file: %s (expected=%s, actual=%s)",
-                        file.name,
-                        data_entry.crc32.hex(),
+                        self.file.name,
+                        self.data_entry.crc32.hex(),
                         actual_crc32.hex(),
                     )
 
                 # Add the content of the fixed size data block to the tab content
-                content += data_entry.data
+                self.content += self.data_entry.data
 
             # Used to store the deleted content, if available
             deleted_content = ""
@@ -253,8 +206,8 @@ def _process_tab_file(
             # variable-length blocks. Furthermore, if there is any remaining data after the
             # first fixed size blocks, as indicated by the value of hasRemainingVariableDataBlocks,
             # also continue we also want to continue parsing
-            if tab_header.fixedSizeBlockLength == 0 or (
-                tab_header.fixedSizeBlockLength > 0 and data_entry.hasRemainingVariableDataBlocks == 1
+            if self.tab_header.fixedSizeBlockLength == 0 or (
+                self.tab_header.fixedSizeBlockLength > 0 and self.data_entry.hasRemainingVariableDataBlocks == 1
             ):
                 # Here, data is stored in variable-length blocks. This happens, for example, when several
                 # additions and deletions of characters have been recorded and these changes have not been 'flushed'
@@ -278,7 +231,7 @@ def _process_tab_file(
                         if data_entry.crc32 != actual_crc32:
                             logging.warning(
                                 "CRC32 mismatch in multi-block file: %s (expected=%s, actual=%s)",
-                                file.name,
+                                self.file.name,
                                 data_entry.crc32.hex(),
                                 actual_crc32.hex(),
                             )
@@ -297,22 +250,10 @@ def _process_tab_file(
                 text = "".join(text)
 
                 # Finally, add the reconstructed text to the tab content
-                content += text
-
-        # Return None if no deleted content was found
-        deleted_content = deleted_content if deleted_content else None
-
-        if file_header.fileType == 0:
-            return WindowsNotepadUnsavedTabContentRecord(content=content, path=file, deleted_content=deleted_content)
-        else:
-            return WindowsNotepadSavedTabContentRecord(
-                content=content,
-                path=file,
-                modification_time=wintimestamp(tab_header.timestamp),
-                hashes=digest((None, None, tab_header.sha256.hex())),
-                saved_path=tab_header.filePath,
-                deleted_content=deleted_content,
-            )
+                self.content += text
+
+        # Set None if no deleted content was found
+        self.deleted_content = deleted_content if deleted_content else None
 
 
 class WindowsNotepadPlugin(TexteditorPlugin):
@@ -346,21 +287,20 @@ def tabs(self) -> Iterator[WindowsNotepadSavedTabRecord | WindowsNotepadUnsavedT
         """
         for file, user in self.users_tabs:
             # Parse the file
-            r: WindowsNotepadSavedTabContentRecord | WindowsNotepadUnsavedTabContentRecord = WindowsNotepadTabContent(
-                file
-            )
+            w: WindowsNotepadTab = WindowsNotepadTab(file)
 
-            # If the modification_time attribute is present, this means that it's a WindowsNotepadSavedTabContentRecord
-            if hasattr(r, "modification_time"):
+            if w.is_saved:
                 yield WindowsNotepadSavedTabRecord(
-                    content=r.content,
-                    path=r.path,
-                    _saved=r,
+                    content=w.content,
+                    path=w.file,
+                    deleted_content=w.deleted_content,
+                    hashes=digest((None, None, w.tab_header.sha256.hex())),
+                    saved_path=w.tab_header.filePath,
+                    modification_time=wintimestamp(w.tab_header.timestamp),
                     _target=self.target,
                     _user=user,
-                    deleted_content=r.deleted_content,
                 )
             else:
                 yield WindowsNotepadUnsavedTabRecord(
-                    content=r.content, path=r.path, _target=self.target, _user=user, deleted_content=r.deleted_content
+                    content=w.content, path=w.file, _target=self.target, _user=user, deleted_content=w.deleted_content
                 )
diff --git a/tests/plugins/apps/texteditor/test_texteditor.py b/tests/plugins/apps/texteditor/test_texteditor.py
index 9a8394975..975ef0c77 100644
--- a/tests/plugins/apps/texteditor/test_texteditor.py
+++ b/tests/plugins/apps/texteditor/test_texteditor.py
@@ -5,7 +5,7 @@
 
 from dissect.target.plugins.apps.texteditor.windowsnotepad import (
     WindowsNotepadPlugin,
-    WindowsNotepadTabContent,
+    WindowsNotepadTab,
 )
 from tests._utils import absolute_path
 
@@ -30,15 +30,15 @@
 loremipsum = """Lorem ipsum dolor sit amet. Eum error blanditiis eum pariatur delectus ut consequuntur officiis a excepturi dignissimos et doloribus quia 33 perspiciatis soluta nam perspiciatis dolor. Ut repudiandae quidem cum sint modi qui sint consequatur. Aut autem quidem eum enim consequatur qui voluptate consequatur non similique voluptate. A vitae modi vel sint provident ut galisum tenetur sit voluptatem amet. Est impedit perspiciatis est repudiandae voluptates ut fugit alias! Eum magni esse aut velit illum qui excepturi aperiam. Ex dolores asperiores ut debitis omnis qui consequuntur dolore. Est voluptatem mollitia et quibusdam unde ea accusamus fuga. Cum quis galisum et impedit sunt qui aliquam perspiciatis sed modi quidem qui nisi molestias. Aut temporibus architecto ut neque voluptatem et consequatur deleniti sed accusantium quibusdam et omnis dignissimos ad rerum ipsam et rerum quia. Ut nihil repellat et eaque molestias quo iusto ipsum At optio sint eos quidem earum?\r\rEx deleniti unde eum tenetur rerum ea dolore numquam? Eos aperiam officiis et neque explicabo et enim atque ut eaque omnis non illum eveniet est molestias itaque et ratione voluptatem. Ea deserunt nemo et quos tempora et nostrum aperiam sit necessitatibus illo sit culpa placeat. Vel tempore quibusdam ut velit voluptate aut odio facere non voluptas earum est odio galisum et voluptas harum. Et blanditiis sapiente et nostrum laborum aut voluptatem explicabo a quasi assumenda. Est voluptatem quia eum minima galisum quo totam excepturi aut facilis enim vel voluptate repudiandae sit distinctio laboriosam. Quo possimus molestiae et molestiae accusantium est voluptas omnis sed obcaecati natus. Non vitae asperiores qui nostrum enim id saepe fugiat et incidunt quasi.\r\rEos ipsa facilis aut excepturi voluptatem a omnis magni vel magni iste. Sed ipsum consequatur qui reprehenderit deleniti et soluta molestiae. Ut vero assumenda id dolor ipsum in deleniti voluptatem aut quis quisquam sed repudiandae temporibus ab quia inventore. Sed velit fugit vel facere cumque et delectus ullam sed eaque impedit. Est veritatis dignissimos aut doloribus dolorem vel pariatur repellendus sit nesciunt similique eum architecto quia. Ea expedita veritatis eum dolorem molestiae ut enim fugit aut beatae quibusdam. Aut voluptas natus in quidem deleniti aut animi iure est incidunt tenetur qui culpa maiores! Et nostrum quaerat qui consequatur consequatur aut aliquam atque aut praesentium rerum et consequuntur exercitationem. Non accusantium ipsa vel consectetur vitae ut magnam autem et natus rerum ut consectetur inventore est doloremque temporibus 33 dolores doloribus! Aut perferendis optio et nostrum repellendus et fugit itaque ut nisi neque sed sint quaerat. Aut placeat architecto et eius sapiente eum molestiae quam. Quo mollitia sapiente non Quis neque non tempora laudantium. Quo distinctio quos et molestias natus sit veritatis consequuntur aut repellendus neque a porro galisum cum numquam nesciunt et animi earum? Aut dolorum dolore non assumenda omnis et molestiae amet id sint vero est eligendi harum sit temporibus magnam aut ipsam quos.\r\r"""  # noqa: E501
 
 
-def test_windows_tab_parsing(tmp_path):
+def test_windows_tab_parsing():
     # Standalone parsing of tab files, so not using the plugin
     tab_file = Path(absolute_path("_data/plugins/apps/texteditor/windowsnotepad/unsaved-with-deletions.bin"))
-    content_record = WindowsNotepadTabContent(tab_file)
-    assert content_record.content == "Not saved aasdflasd"
+    content = WindowsNotepadTab(tab_file)
+    assert content.content == "Not saved aasdflasd"
 
-    content_record_with_deletions = WindowsNotepadTabContent(tab_file)
-    assert content_record_with_deletions.content == "Not saved aasdflasd"
-    assert content_record_with_deletions.deleted_content == "snUlltllafds tjkf"
+    content_with_deletions = WindowsNotepadTab(tab_file)
+    assert content_with_deletions.content == "Not saved aasdflasd"
+    assert content_with_deletions.deleted_content == "snUlltllafds tjkf"
 
 
 def test_windows_tab_plugin_deleted_contents(target_win, fs_win, tmp_path, target_win_users, caplog):

From a384fd9810ca2873b59f2617e57a148162a3924f Mon Sep 17 00:00:00 2001
From: Joost Jansen <12032793+joost-j@users.noreply.github.com>
Date: Wed, 14 Aug 2024 12:31:00 +0200
Subject: [PATCH 34/36] Implement repr for WindowsNotepadTab class

---
 dissect/target/plugins/apps/texteditor/windowsnotepad.py | 9 +++++++++
 tests/plugins/apps/texteditor/test_texteditor.py         | 5 +----
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
index 210b85331..a4898fa26 100644
--- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py
+++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
@@ -120,8 +120,17 @@ class WindowsNotepadTab:
 
     def __init__(self, file: TargetPath):
         self.file = file
+        self.is_saved = None
+        self.content = None
+        self.deleted_content = None
         self._process_tab_file()
 
+    def __repr__(self):
+        return (
+            f"<{self.__class__.__name__} saved={self.is_saved} "
+            f"content_size={len(self.content)} has_deleted_content={self.deleted_content is not None}>"
+        )
+
     def _process_tab_file(self):
         """Parse a binary tab file and reconstruct the contents."""
         with self.file.open("rb") as fh:
diff --git a/tests/plugins/apps/texteditor/test_texteditor.py b/tests/plugins/apps/texteditor/test_texteditor.py
index 975ef0c77..380c1598c 100644
--- a/tests/plugins/apps/texteditor/test_texteditor.py
+++ b/tests/plugins/apps/texteditor/test_texteditor.py
@@ -35,10 +35,7 @@ def test_windows_tab_parsing():
     tab_file = Path(absolute_path("_data/plugins/apps/texteditor/windowsnotepad/unsaved-with-deletions.bin"))
     content = WindowsNotepadTab(tab_file)
     assert content.content == "Not saved aasdflasd"
-
-    content_with_deletions = WindowsNotepadTab(tab_file)
-    assert content_with_deletions.content == "Not saved aasdflasd"
-    assert content_with_deletions.deleted_content == "snUlltllafds tjkf"
+    assert repr(content) == "<WindowsNotepadTab saved=False content_size=19 has_deleted_content=True>"
 
 
 def test_windows_tab_plugin_deleted_contents(target_win, fs_win, tmp_path, target_win_users, caplog):

From e625684a2f2cadb61e20deab4e54eba47eb5ee47 Mon Sep 17 00:00:00 2001
From: Stefan de Reuver <9864602+Horofic@users.noreply.github.com>
Date: Fri, 16 Aug 2024 11:00:41 +0200
Subject: [PATCH 35/36] Add typehints and small fixes

---
 .../plugins/apps/texteditor/windowsnotepad.py | 56 ++++++++++++++-----
 .../apps/texteditor/test_texteditor.py        | 18 ++++--
 2 files changed, 53 insertions(+), 21 deletions(-)

diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
index a4898fa26..df77ca45b 100644
--- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py
+++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
@@ -125,13 +125,13 @@ def __init__(self, file: TargetPath):
         self.deleted_content = None
         self._process_tab_file()
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return (
             f"<{self.__class__.__name__} saved={self.is_saved} "
             f"content_size={len(self.content)} has_deleted_content={self.deleted_content is not None}>"
         )
 
-    def _process_tab_file(self):
+    def _process_tab_file(self) -> None:
         """Parse a binary tab file and reconstruct the contents."""
         with self.file.open("rb") as fh:
             # Header is the same for all types
@@ -161,7 +161,7 @@ def _process_tab_file(self):
             else:
                 # Raise an error, since we don't know how many bytes future optionVersions will occupy.
                 # Now knowing how many bytes to parse can mess up the alignment and structs.
-                raise Exception("Unknown option version")
+                raise NotImplementedError("Unknown Windows Notepad tab option version")
 
             # If the file is not saved to disk and no fixedSizeBlockLength is present, an extra checksum stub
             # is present. So parse that first
@@ -286,30 +286,56 @@ def __init__(self, target):
 
     def check_compatible(self) -> None:
         if not self.users_tabs:
-            raise UnsupportedPluginError("No Windows Notepad temporary tab files found")
+            raise UnsupportedPluginError("No Windows Notepad tab files found")
 
     @export(record=DynamicDescriptor(["path", "datetime", "string"]))
     def tabs(self) -> Iterator[WindowsNotepadSavedTabRecord | WindowsNotepadUnsavedTabRecord]:
-        """Return contents from Windows 11 temporary Notepad tabs.
+        """Return contents from Windows 11 Notepad tabs - and its deleted content if available.
 
-        Yields a WindowsNotepadSavedTabRecord or WindowsNotepadUnsavedTabRecord, depending on the state of the tab.
+        Windows Notepad application for Windows 11 is now able to restore both saved and unsaved tabs when you re-open
+        the application.
+
+
+        Resources:
+            - https://github.com/fox-it/dissect.target/pull/540
+            - https://github.com/JustArion/Notepad-Tabs
+            - https://github.com/ogmini/Notepad-Tabstate-Buffer
+            - https://github.com/ogmini/Notepad-State-Library
+            - https://github.com/Nordgaren/tabstate-util
+            - https://github.com/Nordgaren/tabstate-util/issues/1
+            - https://medium.com/@mahmoudsoheem/new-digital-forensics-artifact-from-windows-notepad-527645906b7b
+
+        Yields a WindowsNotepadSavedTabRecord or WindowsNotepadUnsavedTabRecord. with fields:
+
+        .. code-block:: text
+
+            content (string): The content of the tab.
+            path (path): The path to the tab file.
+            deleted_content (string): The deleted content of the tab, if available.
+            hashes (digest): A digest of the tab content.
+            saved_path (path): The path where the tab was saved.
+            modification_time (datetime): The modification time of the tab.
         """
         for file, user in self.users_tabs:
             # Parse the file
-            w: WindowsNotepadTab = WindowsNotepadTab(file)
+            tab: WindowsNotepadTab = WindowsNotepadTab(file)
 
-            if w.is_saved:
+            if tab.is_saved:
                 yield WindowsNotepadSavedTabRecord(
-                    content=w.content,
-                    path=w.file,
-                    deleted_content=w.deleted_content,
-                    hashes=digest((None, None, w.tab_header.sha256.hex())),
-                    saved_path=w.tab_header.filePath,
-                    modification_time=wintimestamp(w.tab_header.timestamp),
+                    content=tab.content,
+                    path=tab.file,
+                    deleted_content=tab.deleted_content,
+                    hashes=digest((None, None, tab.tab_header.sha256.hex())),
+                    saved_path=tab.tab_header.filePath,
+                    modification_time=wintimestamp(tab.tab_header.timestamp),
                     _target=self.target,
                     _user=user,
                 )
             else:
                 yield WindowsNotepadUnsavedTabRecord(
-                    content=w.content, path=w.file, _target=self.target, _user=user, deleted_content=w.deleted_content
+                    content=tab.content,
+                    path=tab.file,
+                    _target=self.target,
+                    _user=user,
+                    deleted_content=tab.deleted_content,
                 )
diff --git a/tests/plugins/apps/texteditor/test_texteditor.py b/tests/plugins/apps/texteditor/test_texteditor.py
index 380c1598c..e8078d194 100644
--- a/tests/plugins/apps/texteditor/test_texteditor.py
+++ b/tests/plugins/apps/texteditor/test_texteditor.py
@@ -3,10 +3,12 @@
 
 from flow.record.fieldtypes import datetime as dt
 
+from dissect.target.filesystem import VirtualFilesystem
 from dissect.target.plugins.apps.texteditor.windowsnotepad import (
     WindowsNotepadPlugin,
     WindowsNotepadTab,
 )
+from dissect.target.target import Target
 from tests._utils import absolute_path
 
 text1 = "This is an unsaved tab, UTF-8 encoded with Windows (CRLF). It's only 88 characters long."
@@ -30,7 +32,7 @@
 loremipsum = """Lorem ipsum dolor sit amet. Eum error blanditiis eum pariatur delectus ut consequuntur officiis a excepturi dignissimos et doloribus quia 33 perspiciatis soluta nam perspiciatis dolor. Ut repudiandae quidem cum sint modi qui sint consequatur. Aut autem quidem eum enim consequatur qui voluptate consequatur non similique voluptate. A vitae modi vel sint provident ut galisum tenetur sit voluptatem amet. Est impedit perspiciatis est repudiandae voluptates ut fugit alias! Eum magni esse aut velit illum qui excepturi aperiam. Ex dolores asperiores ut debitis omnis qui consequuntur dolore. Est voluptatem mollitia et quibusdam unde ea accusamus fuga. Cum quis galisum et impedit sunt qui aliquam perspiciatis sed modi quidem qui nisi molestias. Aut temporibus architecto ut neque voluptatem et consequatur deleniti sed accusantium quibusdam et omnis dignissimos ad rerum ipsam et rerum quia. Ut nihil repellat et eaque molestias quo iusto ipsum At optio sint eos quidem earum?\r\rEx deleniti unde eum tenetur rerum ea dolore numquam? Eos aperiam officiis et neque explicabo et enim atque ut eaque omnis non illum eveniet est molestias itaque et ratione voluptatem. Ea deserunt nemo et quos tempora et nostrum aperiam sit necessitatibus illo sit culpa placeat. Vel tempore quibusdam ut velit voluptate aut odio facere non voluptas earum est odio galisum et voluptas harum. Et blanditiis sapiente et nostrum laborum aut voluptatem explicabo a quasi assumenda. Est voluptatem quia eum minima galisum quo totam excepturi aut facilis enim vel voluptate repudiandae sit distinctio laboriosam. Quo possimus molestiae et molestiae accusantium est voluptas omnis sed obcaecati natus. Non vitae asperiores qui nostrum enim id saepe fugiat et incidunt quasi.\r\rEos ipsa facilis aut excepturi voluptatem a omnis magni vel magni iste. Sed ipsum consequatur qui reprehenderit deleniti et soluta molestiae. Ut vero assumenda id dolor ipsum in deleniti voluptatem aut quis quisquam sed repudiandae temporibus ab quia inventore. Sed velit fugit vel facere cumque et delectus ullam sed eaque impedit. Est veritatis dignissimos aut doloribus dolorem vel pariatur repellendus sit nesciunt similique eum architecto quia. Ea expedita veritatis eum dolorem molestiae ut enim fugit aut beatae quibusdam. Aut voluptas natus in quidem deleniti aut animi iure est incidunt tenetur qui culpa maiores! Et nostrum quaerat qui consequatur consequatur aut aliquam atque aut praesentium rerum et consequuntur exercitationem. Non accusantium ipsa vel consectetur vitae ut magnam autem et natus rerum ut consectetur inventore est doloremque temporibus 33 dolores doloribus! Aut perferendis optio et nostrum repellendus et fugit itaque ut nisi neque sed sint quaerat. Aut placeat architecto et eius sapiente eum molestiae quam. Quo mollitia sapiente non Quis neque non tempora laudantium. Quo distinctio quos et molestias natus sit veritatis consequuntur aut repellendus neque a porro galisum cum numquam nesciunt et animi earum? Aut dolorum dolore non assumenda omnis et molestiae amet id sint vero est eligendi harum sit temporibus magnam aut ipsam quos.\r\r"""  # noqa: E501
 
 
-def test_windows_tab_parsing():
+def test_windows_tab_parsing() -> None:
     # Standalone parsing of tab files, so not using the plugin
     tab_file = Path(absolute_path("_data/plugins/apps/texteditor/windowsnotepad/unsaved-with-deletions.bin"))
     content = WindowsNotepadTab(tab_file)
@@ -38,7 +40,9 @@ def test_windows_tab_parsing():
     assert repr(content) == "<WindowsNotepadTab saved=False content_size=19 has_deleted_content=True>"
 
 
-def test_windows_tab_plugin_deleted_contents(target_win, fs_win, tmp_path, target_win_users, caplog):
+def test_windows_tab_plugin_deleted_contents(
+    target_win: Target, fs_win: VirtualFilesystem, tmp_path: Path, target_win_users: Target
+) -> None:
     file_text_map = {
         "unsaved-with-deletions.bin": ("Not saved aasdflasd", "snUlltllafds tjkf"),
         "lots-of-deletions.bin": (
@@ -73,12 +77,13 @@ def test_windows_tab_plugin_deleted_contents(target_win, fs_win, tmp_path, targe
 
     # The recovered content in the records should match the original data, as well as the length
     for rec in records:
-        print(rec)
         assert rec.content == file_text_map[rec.path.name][0]
         assert rec.deleted_content == file_text_map[rec.path.name][1]
 
 
-def test_windows_tab_plugin_default(target_win, fs_win, tmp_path, target_win_users, caplog):
+def test_windows_tab_plugin_default(
+    target_win: Target, fs_win: VirtualFilesystem, tmp_path: Path, target_win_users: Target, caplog
+) -> None:
     file_text_map = {
         "c515e86f-08b3-4d76-844a-cddfcd43fcbb.bin": (text1, None),
         "85167c9d-aac2-4469-ae44-db5dccf8f7f4.bin": (text2, None),
@@ -132,12 +137,13 @@ def test_windows_tab_plugin_default(target_win, fs_win, tmp_path, target_win_use
 
     # The recovered content in the records should match the original data, as well as the length
     for rec in records:
-        print(rec)
         assert rec.content == file_text_map[rec.path.name][0]
         assert rec.deleted_content == file_text_map[rec.path.name][1]
 
 
-def test_windows_saved_tab_plugin_extra_fields(target_win, fs_win, tmp_path, target_win_users, caplog):
+def test_windows_saved_tab_plugin_extra_fields(
+    target_win: Target, fs_win: VirtualFilesystem, tmp_path: Path, target_win_users: Target
+) -> None:
     file_text_map = {
         "saved.bin": (
             "Saved!",

From 27fca9256cc649c795981572099933bbed30d81b Mon Sep 17 00:00:00 2001
From: Stefan de Reuver <9864602+Horofic@users.noreply.github.com>
Date: Fri, 16 Aug 2024 11:25:40 +0200
Subject: [PATCH 36/36] Add suggestions

---
 .../target/plugins/apps/texteditor/windowsnotepad.py  | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
index df77ca45b..260bc5b04 100644
--- a/dissect/target/plugins/apps/texteditor/windowsnotepad.py
+++ b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
@@ -12,7 +12,6 @@
 from dissect.target.helpers.descriptor_extensions import UserRecordDescriptorExtension
 from dissect.target.helpers.fsutil import TargetPath
 from dissect.target.helpers.record import (
-    DynamicDescriptor,
     UnixUserRecord,
     WindowsUserRecord,
     create_extended_descriptor,
@@ -22,12 +21,13 @@
     GENERIC_TAB_CONTENTS_RECORD_FIELDS,
     TexteditorPlugin,
 )
+from dissect.target.target import Target
 
 # Thanks to @Nordgaren, @daddycocoaman, @JustArion and @ogmini for their suggestions and feedback in the PR
 # thread. This really helped to figure out the last missing bits and pieces
 # required for recovering text from these files.
 
-c_def = """
+windowstab_def = """
 struct file_header {
     char        magic[2]; // NP
     uleb128     updateNumber; // increases on every settings update when fileType=9,
@@ -106,8 +106,7 @@
     GENERIC_TAB_CONTENTS_RECORD_FIELDS + WINDOWS_SAVED_TABS_EXTRA_FIELDS,
 )
 
-c_windowstab = cstruct()
-c_windowstab.load(c_def)
+c_windowstab = cstruct().load(windowstab_def)
 
 
 def _calc_crc32(data: bytes) -> bytes:
@@ -272,7 +271,7 @@ class WindowsNotepadPlugin(TexteditorPlugin):
 
     GLOB = "AppData/Local/Packages/Microsoft.WindowsNotepad_*/LocalState/TabState/*.bin"
 
-    def __init__(self, target):
+    def __init__(self, target: Target):
         super().__init__(target)
         self.users_tabs: list[TargetPath, UnixUserRecord | WindowsUserRecord] = []
         for user_details in self.target.user_details.all_with_home():
@@ -288,7 +287,7 @@ def check_compatible(self) -> None:
         if not self.users_tabs:
             raise UnsupportedPluginError("No Windows Notepad tab files found")
 
-    @export(record=DynamicDescriptor(["path", "datetime", "string"]))
+    @export(record=[WindowsNotepadSavedTabRecord, WindowsNotepadUnsavedTabRecord])
     def tabs(self) -> Iterator[WindowsNotepadSavedTabRecord | WindowsNotepadUnsavedTabRecord]:
         """Return contents from Windows 11 Notepad tabs - and its deleted content if available.