Skip to content

Commit d65f6e7

Browse files
authored
Merge pull request #3387 from bdarnell/chunked-parsing
http1connection: Stricter handling of transfer-encoding and whitespace
2 parents 7786f09 + 8d721a8 commit d65f6e7

5 files changed

+133
-22
lines changed

tornado/http1connection.py

+38-19
Original file line numberDiff line numberDiff line change
@@ -391,14 +391,11 @@ def write_headers(
391391
self._request_start_line = start_line
392392
lines.append(utf8("%s %s HTTP/1.1" % (start_line[0], start_line[1])))
393393
# Client requests with a non-empty body must have either a
394-
# Content-Length or a Transfer-Encoding.
394+
# Content-Length or a Transfer-Encoding. If Content-Length is not
395+
# present we'll add our Transfer-Encoding below.
395396
self._chunking_output = (
396397
start_line.method in ("POST", "PUT", "PATCH")
397398
and "Content-Length" not in headers
398-
and (
399-
"Transfer-Encoding" not in headers
400-
or headers["Transfer-Encoding"] == "chunked"
401-
)
402399
)
403400
else:
404401
assert isinstance(start_line, httputil.ResponseStartLine)
@@ -420,9 +417,6 @@ def write_headers(
420417
and (start_line.code < 100 or start_line.code >= 200)
421418
# No need to chunk the output if a Content-Length is specified.
422419
and "Content-Length" not in headers
423-
# Applications are discouraged from touching Transfer-Encoding,
424-
# but if they do, leave it alone.
425-
and "Transfer-Encoding" not in headers
426420
)
427421
# If connection to a 1.1 client will be closed, inform client
428422
if (
@@ -562,7 +556,7 @@ def _can_keep_alive(
562556
return connection_header != "close"
563557
elif (
564558
"Content-Length" in headers
565-
or headers.get("Transfer-Encoding", "").lower() == "chunked"
559+
or is_transfer_encoding_chunked(headers)
566560
or getattr(start_line, "method", None) in ("HEAD", "GET")
567561
):
568562
# start_line may be a request or response start line; only
@@ -600,13 +594,6 @@ def _read_body(
600594
delegate: httputil.HTTPMessageDelegate,
601595
) -> Optional[Awaitable[None]]:
602596
if "Content-Length" in headers:
603-
if "Transfer-Encoding" in headers:
604-
# Response cannot contain both Content-Length and
605-
# Transfer-Encoding headers.
606-
# http://tools.ietf.org/html/rfc7230#section-3.3.3
607-
raise httputil.HTTPInputError(
608-
"Response with both Transfer-Encoding and Content-Length"
609-
)
610597
if "," in headers["Content-Length"]:
611598
# Proxies sometimes cause Content-Length headers to get
612599
# duplicated. If all the values are identical then we can
@@ -633,20 +620,22 @@ def _read_body(
633620
else:
634621
content_length = None
635622

623+
is_chunked = is_transfer_encoding_chunked(headers)
624+
636625
if code == 204:
637626
# This response code is not allowed to have a non-empty body,
638627
# and has an implicit length of zero instead of read-until-close.
639628
# http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.3
640-
if "Transfer-Encoding" in headers or content_length not in (None, 0):
629+
if is_chunked or content_length not in (None, 0):
641630
raise httputil.HTTPInputError(
642631
"Response with code %d should not have body" % code
643632
)
644633
content_length = 0
645634

635+
if is_chunked:
636+
return self._read_chunked_body(delegate)
646637
if content_length is not None:
647638
return self._read_fixed_body(content_length, delegate)
648-
if headers.get("Transfer-Encoding", "").lower() == "chunked":
649-
return self._read_chunked_body(delegate)
650639
if self.is_client:
651640
return self._read_body_until_close(delegate)
652641
return None
@@ -865,3 +854,33 @@ def parse_hex_int(s: str) -> int:
865854
if HEXDIGITS.fullmatch(s) is None:
866855
raise ValueError("not a hexadecimal integer: %r" % s)
867856
return int(s, 16)
857+
858+
859+
def is_transfer_encoding_chunked(headers: httputil.HTTPHeaders) -> bool:
860+
"""Returns true if the headers specify Transfer-Encoding: chunked.
861+
862+
Raise httputil.HTTPInputError if any other transfer encoding is used.
863+
"""
864+
# Note that transfer-encoding is an area in which postel's law can lead
865+
# us astray. If a proxy and a backend server are liberal in what they accept,
866+
# but accept slightly different things, this can lead to mismatched framing
867+
# and request smuggling issues. Therefore we are as strict as possible here
868+
# (even technically going beyond the requirements of the RFCs: a value of
869+
# ",chunked" is legal but doesn't appear in practice for legitimate traffic)
870+
if "Transfer-Encoding" not in headers:
871+
return False
872+
if "Content-Length" in headers:
873+
# Message cannot contain both Content-Length and
874+
# Transfer-Encoding headers.
875+
# http://tools.ietf.org/html/rfc7230#section-3.3.3
876+
raise httputil.HTTPInputError(
877+
"Message with both Transfer-Encoding and Content-Length"
878+
)
879+
if headers["Transfer-Encoding"].lower() == "chunked":
880+
return True
881+
# We do not support any transfer-encodings other than chunked, and we do not
882+
# expect to add any support because the concept of transfer-encoding has
883+
# been removed in HTTP/2.
884+
raise httputil.HTTPInputError(
885+
"Unsupported Transfer-Encoding %s" % headers["Transfer-Encoding"]
886+
)

tornado/httputil.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,9 @@
6262
from asyncio import Future # noqa: F401
6363
import unittest # noqa: F401
6464

65+
# To be used with str.strip() and related methods.
66+
HTTP_WHITESPACE = " \t"
67+
6568

6669
@lru_cache(1000)
6770
def _normalize_header(name: str) -> str:
@@ -171,15 +174,15 @@ def parse_line(self, line: str) -> None:
171174
# continuation of a multi-line header
172175
if self._last_key is None:
173176
raise HTTPInputError("first header line cannot start with whitespace")
174-
new_part = " " + line.lstrip()
177+
new_part = " " + line.lstrip(HTTP_WHITESPACE)
175178
self._as_list[self._last_key][-1] += new_part
176179
self._dict[self._last_key] += new_part
177180
else:
178181
try:
179182
name, value = line.split(":", 1)
180183
except ValueError:
181184
raise HTTPInputError("no colon in header line")
182-
self.add(name, value.strip())
185+
self.add(name, value.strip(HTTP_WHITESPACE))
183186

184187
@classmethod
185188
def parse(cls, headers: str) -> "HTTPHeaders":

tornado/test/httpserver_test.py

+70
Original file line numberDiff line numberDiff line change
@@ -581,6 +581,76 @@ def test_chunked_request_body_invalid_size(self):
581581
)
582582
self.assertEqual(400, start_line.code)
583583

584+
def test_chunked_request_body_duplicate_header(self):
585+
# Repeated Transfer-Encoding headers should be an error (and not confuse
586+
# the chunked-encoding detection to mess up framing).
587+
self.stream.write(
588+
b"""\
589+
POST /echo HTTP/1.1
590+
Transfer-Encoding: chunked
591+
Transfer-encoding: chunked
592+
593+
2
594+
ok
595+
0
596+
597+
"""
598+
)
599+
with ExpectLog(
600+
gen_log,
601+
".*Unsupported Transfer-Encoding chunked,chunked",
602+
level=logging.INFO,
603+
):
604+
start_line, headers, response = self.io_loop.run_sync(
605+
lambda: read_stream_body(self.stream)
606+
)
607+
self.assertEqual(400, start_line.code)
608+
609+
def test_chunked_request_body_unsupported_transfer_encoding(self):
610+
# We don't support transfer-encodings other than chunked.
611+
self.stream.write(
612+
b"""\
613+
POST /echo HTTP/1.1
614+
Transfer-Encoding: gzip, chunked
615+
616+
2
617+
ok
618+
0
619+
620+
"""
621+
)
622+
with ExpectLog(
623+
gen_log, ".*Unsupported Transfer-Encoding gzip, chunked", level=logging.INFO
624+
):
625+
start_line, headers, response = self.io_loop.run_sync(
626+
lambda: read_stream_body(self.stream)
627+
)
628+
self.assertEqual(400, start_line.code)
629+
630+
def test_chunked_request_body_transfer_encoding_and_content_length(self):
631+
# Transfer-encoding and content-length are mutually exclusive
632+
self.stream.write(
633+
b"""\
634+
POST /echo HTTP/1.1
635+
Transfer-Encoding: chunked
636+
Content-Length: 2
637+
638+
2
639+
ok
640+
0
641+
642+
"""
643+
)
644+
with ExpectLog(
645+
gen_log,
646+
".*Message with both Transfer-Encoding and Content-Length",
647+
level=logging.INFO,
648+
):
649+
start_line, headers, response = self.io_loop.run_sync(
650+
lambda: read_stream_body(self.stream)
651+
)
652+
self.assertEqual(400, start_line.code)
653+
584654
@gen_test
585655
def test_invalid_content_length(self):
586656
# HTTP only allows decimal digits in content-length. Make sure we don't

tornado/test/httputil_test.py

+19
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,25 @@ def test_unicode_newlines(self):
334334
gen_log.warning("failed while trying %r in %s", newline, encoding)
335335
raise
336336

337+
def test_unicode_whitespace(self):
338+
# Only tabs and spaces are to be stripped according to the HTTP standard.
339+
# Other unicode whitespace is to be left as-is. In the context of headers,
340+
# this specifically means the whitespace characters falling within the
341+
# latin1 charset.
342+
whitespace = [
343+
(" ", True), # SPACE
344+
("\t", True), # TAB
345+
("\u00a0", False), # NON-BREAKING SPACE
346+
("\u0085", False), # NEXT LINE
347+
]
348+
for c, stripped in whitespace:
349+
headers = HTTPHeaders.parse("Transfer-Encoding: %schunked" % c)
350+
if stripped:
351+
expected = [("Transfer-Encoding", "chunked")]
352+
else:
353+
expected = [("Transfer-Encoding", "%schunked" % c)]
354+
self.assertEqual(expected, list(headers.get_all()))
355+
337356
def test_optional_cr(self):
338357
# Both CRLF and LF should be accepted as separators. CR should not be
339358
# part of the data when followed by LF, but it is a normal char

tornado/test/simple_httpclient_test.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -828,7 +828,7 @@ def test_chunked_with_content_length(self):
828828
with ExpectLog(
829829
gen_log,
830830
(
831-
"Malformed HTTP message from None: Response "
831+
"Malformed HTTP message from None: Message "
832832
"with both Transfer-Encoding and Content-Length"
833833
),
834834
level=logging.INFO,

0 commit comments

Comments
 (0)