Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-115398: Expose Expat >=2.6.0 reparse deferral API (CVE-2023-52425) #115623

Merged
merged 39 commits into from
Feb 29, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
7cebe78
pyexpat: Implement methods pyexpat.xmlparser.(Get|Set)ReparseDeferral…
hartwork Feb 17, 2024
c70fbae
etree: Implement method xml.etree.ElementTree.XMLParser.flush (Python…
hartwork Feb 17, 2024
dfca819
pyexpat: Make SetReparseDeferralEnabled available via PyExpat_CAPI
hartwork Feb 7, 2024
4baab67
etree: Implement method xml.etree.ElementTree.XMLParser.flush (C vers…
hartwork Feb 18, 2024
7928942
etree: Implement method xml.etree.ElementTree.XMLPullParser.flush
hartwork Feb 17, 2024
e5e4033
etree: Use XMLPullParser.flush to fix XMLPullParserTest for Expat 2.6.0
hartwork Feb 17, 2024
bc6e1a7
sax: Implement method xml.sax.expatreader.ExpatParser.flush
hartwork Feb 18, 2024
b737f03
sax: Test method xml.sax.expatreader.ExpatParser.flush
hartwork Feb 18, 2024
850e46d
Document new CVE-2023-52425 Expat API (reparse deferral)
hartwork Feb 18, 2024
7002024
_elementtree.c: Document how we know that reparse deferral is enabled
hartwork Feb 21, 2024
2132dfe
sax: Fix xml.sax.expatreader.ExpatParser.flush
hartwork Feb 21, 2024
3d02dfe
etree: Fix xml.etree.ElementTree.XMLParser.flush (Python version)
hartwork Feb 21, 2024
fdd2fac
etree: Fix typo "deferall"
hartwork Feb 21, 2024
dbbd98c
pyexpat: Cover (Get|Set)ReparseDeferralEnabled by tests
hartwork Feb 24, 2024
3b6ea39
sax: Extend xml.sax.expatreader.ExpatParser.flush test coverage
hartwork Feb 24, 2024
5c1cfb7
Doc/whatsnew/3.13.rst: Mention new Expat reparse deferral API
hartwork Feb 24, 2024
a9c666e
pyexpat: Document methods pyexpat.xmlparser.(Get|Set)ReparseDeferralE…
hartwork Feb 24, 2024
35099e3
etree: Document method xml.etree.ElementTree.XMLParser.flush
hartwork Feb 24, 2024
1496e83
etree: Document method xml.etree.ElementTree.XMLPullParser.flush
hartwork Feb 24, 2024
a6927ff
etree: Make docs point to xml.etree.ElementTree.XMLPullParser.flush
hartwork Feb 24, 2024
c5b2159
pyexpat: Move security warning into SetReparseDeferralEnabled docs
hartwork Feb 24, 2024
082bcc1
pyexpat|etree: Mark new Expat API as added in 3.13 in docs
hartwork Feb 24, 2024
f0577e7
pyexpat|sax: Do not be silent about tests skipped for Expat <2.6.0
hartwork Feb 24, 2024
f589908
pyexpat: Simplify test ReparseDeferralTest.test_getter_setter_round_trip
hartwork Feb 24, 2024
4915045
Promote xml.parsers.expat.xmlparser instead of pyexpat.xmlparser
hartwork Feb 24, 2024
d0ed243
etree: Cover method xml.etree.ElementTree.XMLPullParser.flush
hartwork Feb 24, 2024
62e4fd7
pyexpat: Cut whitespace from ReparseDeferralTest.test_getter_setter_r…
hartwork Feb 24, 2024
b0058d5
pyexpat: Drop ReparseDeferralTest.test_getter_initial_value
hartwork Feb 24, 2024
1f70c09
pyexpat: Break a long line for PEP 8
hartwork Feb 24, 2024
a77de0f
Doc/whatsnew/3.13.rst: Do not create a link into undocumented class
hartwork Feb 24, 2024
2f07457
etree: Make XMLPullParserTest._feed only flush when needed
hartwork Feb 24, 2024
4b49de9
etree: Fix XMLPullParserTest.test_flush_[..] for C version
hartwork Feb 24, 2024
3c960a6
etree: Break a long line for PEP 8
hartwork Feb 24, 2024
4855bb9
etree: Make test_flush_reparse_deferral_disabled less exclusive
hartwork Feb 24, 2024
b6a84b2
etree|sax: Simplify .flush implementations
hartwork Feb 24, 2024
0faa19e
etree: Resolve "is_python" in favor of "ET is pyET"
hartwork Feb 24, 2024
40743a6
etree: Fix emphasis syntax for "immediate" in docs
hartwork Feb 24, 2024
a473299
pypexpat: Replace "none" with "NULL" to be correct
hartwork Feb 24, 2024
a6baa0b
pyexpat: Indent warning about xmlparser.SetReparseDeferralEnabled
hartwork Feb 24, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Include/pyexpat.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ struct PyExpat_CAPI
void *encodingHandlerData, const XML_Char *name, XML_Encoding *info);
/* might be none for expat < 2.1.0 */
int (*SetHashSalt)(XML_Parser parser, unsigned long hash_salt);
/* might be none for expat < 2.6.0 */
XML_Bool (*SetReparseDeferralEnabled)(XML_Parser parser, XML_Bool enabled);
/* always add new stuff to the end! */
};

25 changes: 25 additions & 0 deletions Lib/test/test_sax.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from io import BytesIO, StringIO
import codecs
import os.path
import pyexpat
import shutil
import sys
from urllib.error import URLError
Expand Down Expand Up @@ -1214,6 +1215,30 @@ def test_expat_incremental_reset(self):

self.assertEqual(result.getvalue(), start + b"<doc>text</doc>")

def test_expat_incremental_reparse_deferral(self):
result = BytesIO()
xmlgen = XMLGenerator(result)
parser = create_parser()
parser.setContentHandler(xmlgen)

# This artificial chunking triggers reparse deferral with Expat >=2.6.0
parser.feed("<doc ")
parser.feed(">")

if pyexpat.version_info >= (2, 6, 0):
self.assertEqual(result.getvalue(), start)
else:
self.assertEqual(result.getvalue(), start + b"<doc>")

parser.flush() # no-op for Expat <2.6.0

self.assertEqual(result.getvalue(), start + b"<doc>")

parser.feed("</doc>")
parser.close()

self.assertEqual(result.getvalue(), start + b"<doc></doc>")

# ===== Locator support

def test_expat_locator_noinfo(self):
Expand Down
7 changes: 1 addition & 6 deletions Lib/test/test_xml_etree.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,10 +121,6 @@
</foo>
"""

fails_with_expat_2_6_0 = (unittest.expectedFailure
if pyexpat.version_info >= (2, 6, 0) else
lambda test: test)

def checkwarnings(*filters, quiet=False):
def decorator(test):
def newtest(*args, **kwargs):
Expand Down Expand Up @@ -1468,6 +1464,7 @@ def _feed(self, parser, data, chunk_size=None):
else:
for i in range(0, len(data), chunk_size):
parser.feed(data[i:i+chunk_size])
parser.flush()

def assert_events(self, parser, expected, max_events=None):
self.assertEqual(
Expand Down Expand Up @@ -1506,11 +1503,9 @@ def test_simple_xml(self, chunk_size=None):
self.assert_event_tags(parser, [('end', 'root')])
self.assertIsNone(parser.close())

@fails_with_expat_2_6_0
def test_simple_xml_chunk_1(self):
self.test_simple_xml(chunk_size=1)

@fails_with_expat_2_6_0
def test_simple_xml_chunk_5(self):
self.test_simple_xml(chunk_size=5)

Expand Down
16 changes: 16 additions & 0 deletions Lib/xml/etree/ElementTree.py
Original file line number Diff line number Diff line change
Expand Up @@ -1320,6 +1320,11 @@ def read_events(self):
else:
yield event

def flush(self):
if self._parser is None:
raise ValueError("flush() called after end of stream")
self._parser.flush()


def XML(text, parser=None):
"""Parse XML document from string constant.
Expand Down Expand Up @@ -1726,6 +1731,17 @@ def close(self):
del self.parser, self._parser
del self.target, self._target

def flush(self):
if not self.parser.GetReparseDeferralEnabled():
return

self.parser.SetReparseDeferralEnabled(False)
try:
self.parser.Parse(b"", False)
except self._error as v:
self._raiseerror(v)
finally:
self.parser.SetReparseDeferralEnabled(True)

# --------------------------------------------------------------------
# C14N 2.0
Expand Down
16 changes: 16 additions & 0 deletions Lib/xml/sax/expatreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,22 @@ def feed(self, data, isFinal=False):
# FIXME: when to invoke error()?
self._err_handler.fatalError(exc)

def flush(self):
if self._parser is None:
return

if not self._parser.GetReparseDeferralEnabled():
return

self._parser.SetReparseDeferralEnabled(False)
try:
self._parser.Parse(b"", False)
except expat.error as e:
exc = SAXParseException(expat.ErrorString(e.code), e, self)
self._err_handler.fatalError(exc)
finally:
self._parser.SetReparseDeferralEnabled(True)

def _close_source(self):
source = self._source
try:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Allow controlling Expat >=2.6.0 reparse deferral (CVE-2023-52425) by adding
five new methods:

* ``pyexpat.xmlparser.GetReparseDeferralEnabled``
* ``pyexpat.xmlparser.SetReparseDeferralEnabled``
* ``xml.etree.ElementTree.XMLParser.flush``
* ``xml.etree.ElementTree.XMLPullParser.flush``
* ``xml.sax.expatreader.ExpatParser.flush``
4 changes: 2 additions & 2 deletions Misc/sbom.spdx.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 29 additions & 0 deletions Modules/_elementtree.c
Original file line number Diff line number Diff line change
Expand Up @@ -3894,6 +3894,34 @@ _elementtree_XMLParser_close_impl(XMLParserObject *self)
}
}

/*[clinic input]
_elementtree.XMLParser.flush

[clinic start generated code]*/

static PyObject *
_elementtree_XMLParser_flush_impl(XMLParserObject *self)
/*[clinic end generated code: output=42fdb8795ca24509 input=effbecdb28715949]*/
{
if (!_check_xmlparser(self)) {
return NULL;
}

elementtreestate *st = self->state;

if (EXPAT(st, SetReparseDeferralEnabled) == NULL) {
Py_RETURN_NONE;
}

EXPAT(st, SetReparseDeferralEnabled)(self->parser, XML_FALSE);

PyObject *res = expat_parse(st, self, "", 0, XML_FALSE);

EXPAT(st, SetReparseDeferralEnabled)(self->parser, XML_TRUE);

return res;
}

/*[clinic input]
_elementtree.XMLParser.feed

Expand Down Expand Up @@ -4288,6 +4316,7 @@ static PyType_Spec treebuilder_spec = {
static PyMethodDef xmlparser_methods[] = {
_ELEMENTTREE_XMLPARSER_FEED_METHODDEF
_ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
_ELEMENTTREE_XMLPARSER_FLUSH_METHODDEF
_ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
_ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
{NULL, NULL}
Expand Down
19 changes: 18 additions & 1 deletion Modules/clinic/_elementtree.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

49 changes: 48 additions & 1 deletion Modules/clinic/pyexpat.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Modules/expat/pyexpatns.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@
#define XML_SetNotStandaloneHandler PyExpat_XML_SetNotStandaloneHandler
#define XML_SetParamEntityParsing PyExpat_XML_SetParamEntityParsing
#define XML_SetProcessingInstructionHandler PyExpat_XML_SetProcessingInstructionHandler
#define XML_SetReparseDeferralEnabled PyExpat_XML_SetReparseDeferralEnabled
#define XML_SetReturnNSTriplet PyExpat_XML_SetReturnNSTriplet
#define XML_SetSkippedEntityHandler PyExpat_XML_SetSkippedEntityHandler
#define XML_SetStartCdataSectionHandler PyExpat_XML_SetStartCdataSectionHandler
Expand Down
53 changes: 53 additions & 0 deletions Modules/pyexpat.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "pycore_pyhash.h" // _Py_HashSecret
#include "pycore_traceback.h" // _PyTraceback_Add()

#include <stdbool.h>
#include <stddef.h> // offsetof()
#include "expat.h"
#include "pyexpat.h"
Expand Down Expand Up @@ -81,6 +82,12 @@ typedef struct {
/* NULL if not enabled */
int buffer_size; /* Size of buffer, in XML_Char units */
int buffer_used; /* Buffer units in use */
bool reparse_deferral_enabled; /* Whether to defer reparsing of
unfinished XML tokens; a de-facto cache of
what Expat has the authority on, for lack
of a getter API function
"XML_GetReparseDeferralEnabled" in Expat
2.6.0 */
PyObject *intern; /* Dictionary to intern strings */
PyObject **handlers;
} xmlparseobject;
Expand Down Expand Up @@ -703,6 +710,40 @@ get_parse_result(pyexpat_state *state, xmlparseobject *self, int rv)

#define MAX_CHUNK_SIZE (1 << 20)

/*[clinic input]
pyexpat.xmlparser.SetReparseDeferralEnabled

enabled: bool
/

Enable/Disable reparse deferral; enabled by default with Expat >=2.6.0.
[clinic start generated code]*/

static PyObject *
pyexpat_xmlparser_SetReparseDeferralEnabled_impl(xmlparseobject *self,
int enabled)
/*[clinic end generated code: output=5ec539e3b63c8c49 input=021eb9e0bafc32c5]*/
{
#if XML_COMBINED_VERSION >= 20600
XML_SetReparseDeferralEnabled(self->itself, enabled ? XML_TRUE : XML_FALSE);
self->reparse_deferral_enabled = (bool)enabled;
#endif
Py_RETURN_NONE;
}

/*[clinic input]
pyexpat.xmlparser.GetReparseDeferralEnabled

Retrieve reparse deferral enabled status; always returns false with Expat <2.6.0.
[clinic start generated code]*/

static PyObject *
pyexpat_xmlparser_GetReparseDeferralEnabled_impl(xmlparseobject *self)
/*[clinic end generated code: output=4e91312e88a595a8 input=54b5f11d32b20f3e]*/
{
return PyBool_FromLong(self->reparse_deferral_enabled);
}

/*[clinic input]
pyexpat.xmlparser.Parse

Expand Down Expand Up @@ -1063,6 +1104,8 @@ static struct PyMethodDef xmlparse_methods[] = {
#if XML_COMBINED_VERSION >= 19505
PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF
#endif
PYEXPAT_XMLPARSER_SETREPARSEDEFERRALENABLED_METHODDEF
PYEXPAT_XMLPARSER_GETREPARSEDEFERRALENABLED_METHODDEF
{NULL, NULL} /* sentinel */
};

Expand Down Expand Up @@ -1158,6 +1201,11 @@ newxmlparseobject(pyexpat_state *state, const char *encoding,
self->ns_prefixes = 0;
self->handlers = NULL;
self->intern = Py_XNewRef(intern);
#if XML_COMBINED_VERSION >= 20600
self->reparse_deferral_enabled = true;
#else
self->reparse_deferral_enabled = false;
#endif

/* namespace_separator is either NULL or contains one char + \0 */
self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
Expand Down Expand Up @@ -2019,6 +2067,11 @@ pyexpat_exec(PyObject *mod)
#else
capi->SetHashSalt = NULL;
#endif
#if XML_COMBINED_VERSION >= 20600
capi->SetReparseDeferralEnabled = XML_SetReparseDeferralEnabled;
#else
capi->SetReparseDeferralEnabled = NULL;
#endif

/* export using capsule */
PyObject *capi_object = PyCapsule_New(capi, PyExpat_CAPSULE_NAME,
Expand Down
Loading