Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for XML configuration files #495

Merged
merged 31 commits into from
Jan 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
d4b682e
Add support for XML configuration files (DIS-2157)
cecinestpasunepipe Jan 5, 2024
b7f4402
Update dissect/target/helpers/configutil.py
cecinestpasunepipe Jan 5, 2024
1ed2171
Update dissect/target/helpers/configutil.py
cecinestpasunepipe Jan 16, 2024
8d39839
Update dissect/target/helpers/configutil.py
cecinestpasunepipe Jan 16, 2024
351995d
Update dissect/target/helpers/configutil.py
cecinestpasunepipe Jan 16, 2024
53f22d8
Add support for XML configuration files (DIS-2157)
cecinestpasunepipe Jan 5, 2024
1aecf76
Update dissect/target/helpers/configutil.py
cecinestpasunepipe Jan 5, 2024
7411820
Update dissect/target/helpers/configutil.py
cecinestpasunepipe Jan 16, 2024
8dbe3d6
Update dissect/target/helpers/configutil.py
cecinestpasunepipe Jan 16, 2024
2e7a990
Update dissect/target/helpers/configutil.py
cecinestpasunepipe Jan 16, 2024
601c79a
Integrate feedback.
cecinestpasunepipe Jan 16, 2024
20df264
Integrate feedback.
cecinestpasunepipe Jan 16, 2024
c9aee01
Integrate feedback.
cecinestpasunepipe Jan 16, 2024
fb830d6
Integrate feedback.
cecinestpasunepipe Jan 16, 2024
0d17536
Merge branch 'main' of github.com:fox-it/dissect.target into DIS-2157…
Horofic Jan 17, 2024
48acd66
Merge branch 'DIS-2157_Config_parser_xml' of github.com:fox-it/dissec…
Horofic Jan 17, 2024
2931f50
Minor parser & test changes
Horofic Jan 18, 2024
d05e1cd
Remove debug code.
cecinestpasunepipe Jan 18, 2024
fec51f4
Restore previous solution but with comments to explain rationale.
cecinestpasunepipe Jan 18, 2024
042bdd6
Merge branch 'main' of github.com:fox-it/dissect.target into DIS-2157…
cecinestpasunepipe Jan 23, 2024
6bbd3b5
Merge branch 'main' into DIS-2157_Config_parser_xml
cecinestpasunepipe Jan 24, 2024
e4e1f4f
Update dissect/target/helpers/configutil.py
cecinestpasunepipe Jan 25, 2024
07004e3
Update dissect/target/helpers/configutil.py
cecinestpasunepipe Jan 25, 2024
7a29b97
Update dissect/target/helpers/configutil.py
cecinestpasunepipe Jan 25, 2024
81e583d
Update dissect/target/helpers/configutil.py
cecinestpasunepipe Jan 25, 2024
8cdfc19
Update dissect/target/helpers/configutil.py
cecinestpasunepipe Jan 25, 2024
416dfb7
Update dissect/target/helpers/configutil.py
cecinestpasunepipe Jan 25, 2024
8066e7a
Update dissect/target/helpers/configutil.py
cecinestpasunepipe Jan 25, 2024
8a9992a
Implement feedback.
cecinestpasunepipe Jan 25, 2024
38dae04
Merge branch 'main' into DIS-2157_Config_parser_xml
cecinestpasunepipe Jan 25, 2024
7ef9086
Merge branch 'main' into DIS-2157_Config_parser_xml
cecinestpasunepipe Jan 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 78 additions & 1 deletion dissect/target/helpers/configutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
Union,
)

from defusedxml import ElementTree

from dissect.target.exceptions import ConfigurationParsingError, FileNotFoundError
from dissect.target.filesystem import FilesystemEntry
from dissect.target.helpers.fsutil import TargetPath
Expand Down Expand Up @@ -254,6 +256,79 @@ def parse_file(self, fh: TextIO) -> None:
self.parsed_data = {"content": fh.read(), "size": str(fh.tell())}


class Xml(ConfigurationParser):
"""Parses an XML file. Ignores any constructor parameters passed from ``ConfigurationParser`."""

def _tree(self, tree: ElementTree, root: bool = False) -> dict:
"""Very simple but robust xml -> dict implementation, see comments."""
nodes = {}
result = {}
counter = {}

# each node is a folder (so the structure is always the same! [1])
for node in tree.findall("*"):
# if a node contains multiple nodes with the same name, number them
if node.tag in counter:
counter[node.tag] += 1
nodes[f"{node.tag}-{counter[node.tag]}"] = self._tree(node)
else:
counter[node.tag] = 1
nodes[node.tag] = self._tree(node)

# all attribs go in the attribute folder
# (i.e. stable, does not change depending on xml structure! [2]
# Also, this way we "know" they have been attributes, i.e. we don't lose information! [3]
if tree.attrib:
result["attributes"] = tree.attrib

# all subnodes go in the nodes folder
if nodes:
result["nodes"] = nodes

# content goes into the text folder
# we don't use special prefixes ($) because XML docs may use them anyway (even though they are forbidden)
if tree.text:
if text := tree.text.strip(" \n\r"):
result["text"] = text

# if you need to store meta-data, you can extend add more entries here... CDATA, Comments, errors
result = {tree.tag: result} if root else result
return result

def _fix(self, content: str, position: tuple(int, int)) -> str:
"""Quick heuristic fix. If there is an invalid token, just remove it."""
lineno, offset = position
lines = content.split("\n")

line = lines[lineno - 1]
line = line[: offset - 1] + "" + line[offset + 1 :]

lines[lineno - 1] = line

return "\n".join(lines)

def parse_file(self, fh: TextIO) -> None:
content = fh.read()
document = content
errors = 0
limit = 20
tree = {}

while not tree and errors < limit:
try:
tree = self._tree(ElementTree.fromstring(document), root=True)
break
except ElementTree.ParseError as err:
errors += 1
document = self._fix(document, err.position)

if not tree:
# Error limit reached. Thus we consider the document not parseable.
raise ConfigurationParsingError(f"Could not parse XML file: {fh.name} after {errors} attempts.")

self.parsed_data = tree


class ScopeManager:
"""A (context)manager for dictionary scoping.

Expand Down Expand Up @@ -528,11 +603,12 @@ def create_parser(self, options: Optional[ParserOptions] = None) -> Configuratio
"*/systemd/*": ParserConfig(SystemD),
"*/sysconfig/network-scripts/ifcfg-*": ParserConfig(Default),
"*/sysctl.d/*.conf": ParserConfig(Default),
"*/xml/*": ParserConfig(Xml),
}

CONFIG_MAP: dict[tuple[str, ...], ParserConfig] = {
"ini": ParserConfig(Ini),
"xml": ParserConfig(Txt),
"xml": ParserConfig(Xml),
"json": ParserConfig(Txt),
"cnf": ParserConfig(Default),
"conf": ParserConfig(Default, separator=(r"\s",)),
Expand All @@ -549,6 +625,7 @@ def create_parser(self, options: Optional[ParserOptions] = None) -> Configuratio
"hosts": ParserConfig(Default, separator=(r"\s",)),
"nsswitch.conf": ParserConfig(Default, separator=(":",)),
"lsb-release": ParserConfig(Default),
"catalog": ParserConfig(Xml),
}


Expand Down
18 changes: 18 additions & 0 deletions tests/_data/helpers/configutil/test.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<?xml version='1.0' encoding='utf-8'?>
<Server port="8005" shutdown="SHUTDOWN">
<Listener className="org.apache.catalina.core.JasperListener1">a</Listener>
<Listener className="org.apache.catalina.core.JasperListener2">b</Listener>
<Service name="Catalina">
<Connector port="8080" protocol="HTTP/1.1"
connectionTimeout="20000"
redirectPort="8443" />
<Engine name="Catalina" defaultHost="localhost">
<Host name="localhost" appBase="webapps"
unpackWARs="true" autoDeploy="true">
<Valve className="org.apache.catalina.valves.AccessLogValve" directory="logs"
prefix="localhost_access_log." suffix=".txt"
pattern="%h %l %u %t "%s" %b" />
</Host>
</Engine>
</Service>
</Server>
55 changes: 55 additions & 0 deletions tests/filesystems/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,61 @@ def mapped_file(test_file: str, fs_unix: VirtualFilesystem) -> VirtualFilesystem
},
},
),
(
"_data/helpers/configutil/test.xml",
{
"Server": {
"attributes": {"port": "8005", "shutdown": "SHUTDOWN"},
"nodes": {
"Listener": {
"attributes": {"className": "org.apache.catalina.core.JasperListener1"},
"text": "a",
},
"Listener-2": {
"attributes": {"className": "org.apache.catalina.core.JasperListener2"},
"text": "b",
},
"Service": {
"attributes": {"name": "Catalina"},
"nodes": {
"Connector": {
"attributes": {
"port": "8080",
"protocol": "HTTP/1.1",
"connectionTimeout": "20000",
"redirectPort": "8443",
},
},
"Engine": {
"attributes": {"name": "Catalina", "defaultHost": "localhost"},
"nodes": {
"Host": {
"attributes": {
"name": "localhost",
"appBase": "webapps",
"unpackWARs": "true",
"autoDeploy": "true",
},
"nodes": {
"Valve": {
"attributes": {
"className": "org.apache.catalina.valves.AccessLogValve",
"directory": "logs",
"prefix": "localhost_access_log.",
"suffix": ".txt",
"pattern": "%h %l %u %t s",
},
}
},
}
},
},
},
},
},
},
},
),
],
)
def test_parse_file_input(target_unix: Target, mapped_file: str, expected_output: dict) -> None:
Expand Down
2 changes: 1 addition & 1 deletion tests/plugins/general/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def test_collapse_types(
"hint, data_bytes",
[
("ini", b"[DEFAULT]\nkey=value"),
("xml", b"currently_just_text"),
("xml", b"<a>currently_just_text</a>"),
("json", b"currently_just_text"),
("cnf", b"key=value"),
("conf", b"key value"),
Expand Down