Skip to content

Commit

Permalink
Update Python parser for RFCs 9110/9112 (#7661)
Browse files Browse the repository at this point in the history
  • Loading branch information
Dreamsorcerer committed Oct 6, 2023
1 parent 314e960 commit 85713a4
Show file tree
Hide file tree
Showing 2 changed files with 131 additions and 40 deletions.
84 changes: 49 additions & 35 deletions aiohttp/http_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,16 +51,16 @@

ASCIISET: Final[Set[str]] = set(string.printable)

# See https://tools.ietf.org/html/rfc7230#section-3.1.1
# and https://tools.ietf.org/html/rfc7230#appendix-B
# See https://www.rfc-editor.org/rfc/rfc9110.html#name-overview
# and https://www.rfc-editor.org/rfc/rfc9110.html#name-tokens
#
# method = token
# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
# "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
# token = 1*tchar
METHRE: Final[Pattern[str]] = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+")
VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d+).(\d+)")
HDRRE: Final[Pattern[bytes]] = re.compile(rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\\\\\"]")
VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d).(\d)")
HDRRE: Final[Pattern[bytes]] = re.compile(rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\"\\]")


class RawRequestMessage(NamedTuple):
Expand Down Expand Up @@ -135,8 +135,11 @@ def parse_headers(
except ValueError:
raise InvalidHeader(line) from None

bname = bname.strip(b" \t")
bvalue = bvalue.lstrip()
# https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2
if {bname[0], bname[-1]} & {32, 9}: # {" ", "\t"}
raise InvalidHeader(line)

bvalue = bvalue.lstrip(b" \t")
if HDRRE.search(bname):
raise InvalidHeader(bname)
if len(bname) > self.max_field_size:
Expand All @@ -157,6 +160,7 @@ def parse_headers(
# consume continuation lines
continuation = line and line[0] in (32, 9) # (' ', '\t')

# Deprecated: https://www.rfc-editor.org/rfc/rfc9112.html#name-obsolete-line-folding
if continuation:
bvalue_lst = [bvalue]
while continuation:
Expand Down Expand Up @@ -191,10 +195,14 @@ def parse_headers(
str(header_length),
)

bvalue = bvalue.strip()
bvalue = bvalue.strip(b" \t")
name = bname.decode("utf-8", "surrogateescape")
value = bvalue.decode("utf-8", "surrogateescape")

# https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5
if "\n" in value or "\r" in value or "\x00" in value:
raise InvalidHeader(bvalue)

headers.add(name, value)
raw_headers.append((bname, bvalue))

Expand Down Expand Up @@ -309,15 +317,12 @@ def get_content_length() -> Optional[int]:
if length_hdr is None:
return None

try:
length = int(length_hdr)
except ValueError:
# Shouldn't allow +/- or other number formats.
# https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2
if not length_hdr.strip(" \t").isdigit():
raise InvalidHeader(CONTENT_LENGTH)

if length < 0:
raise InvalidHeader(CONTENT_LENGTH)

return length
return int(length_hdr)

length = get_content_length()
# do not support old websocket spec
Expand Down Expand Up @@ -457,6 +462,24 @@ def parse_headers(
upgrade = False
chunked = False

# https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6
# https://www.rfc-editor.org/rfc/rfc9110.html#name-collected-abnf
singletons = (
hdrs.CONTENT_LENGTH,
hdrs.CONTENT_LOCATION,
hdrs.CONTENT_RANGE,
hdrs.CONTENT_TYPE,
hdrs.ETAG,
hdrs.HOST,
hdrs.MAX_FORWARDS,
hdrs.SERVER,
hdrs.TRANSFER_ENCODING,
hdrs.USER_AGENT,
)
bad_hdr = next((h for h in singletons if len(headers.getall(h, ())) > 1), None)
if bad_hdr is not None:
raise BadHttpMessage(f"Duplicate '{bad_hdr}' header found.")

# keep-alive
conn = headers.get(hdrs.CONNECTION)
if conn:
Expand Down Expand Up @@ -510,7 +533,7 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage:
# request line
line = lines[0].decode("utf-8", "surrogateescape")
try:
method, path, version = line.split(None, 2)
method, path, version = line.split(maxsplit=2)
except ValueError:
raise BadStatusLine(line) from None

Expand All @@ -524,14 +547,10 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage:
raise BadStatusLine(method)

# version
try:
if version.startswith("HTTP/"):
n1, n2 = version[5:].split(".", 1)
version_o = HttpVersion(int(n1), int(n2))
else:
raise BadStatusLine(version)
except Exception:
raise BadStatusLine(version)
match = VERSRE.match(version)
if match is None:
raise BadStatusLine(line)
version_o = HttpVersion(int(match.group(1)), int(match.group(2)))

if method == "CONNECT":
# authority-form,
Expand Down Expand Up @@ -598,12 +617,12 @@ class HttpResponseParser(HttpParser[RawResponseMessage]):
def parse_message(self, lines: List[bytes]) -> RawResponseMessage:
line = lines[0].decode("utf-8", "surrogateescape")
try:
version, status = line.split(None, 1)
version, status = line.split(maxsplit=1)
except ValueError:
raise BadStatusLine(line) from None

try:
status, reason = status.split(None, 1)
status, reason = status.split(maxsplit=1)
except ValueError:
reason = ""

Expand All @@ -619,13 +638,9 @@ def parse_message(self, lines: List[bytes]) -> RawResponseMessage:
version_o = HttpVersion(int(match.group(1)), int(match.group(2)))

# The status code is a three-digit number
try:
status_i = int(status)
except ValueError:
raise BadStatusLine(line) from None

if status_i > 999:
if len(status) != 3 or not status.isdigit():
raise BadStatusLine(line)
status_i = int(status)

# read headers
(
Expand Down Expand Up @@ -760,14 +775,13 @@ def feed_data(
else:
size_b = chunk[:pos]

try:
size = int(bytes(size_b), 16)
except ValueError:
if not size_b.isdigit():
exc = TransferEncodingError(
chunk[:pos].decode("ascii", "surrogateescape")
)
self.payload.set_exception(exc)
raise exc from None
raise exc
size = int(bytes(size_b), 16)

chunk = chunk[pos + 2 :]
if size == 0: # eof marker
Expand Down
87 changes: 82 additions & 5 deletions tests/test_http_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,74 @@ def test_invalid_name(parser) -> None:
parser.feed_data(text)


def test_cve_2023_37276(parser: Any) -> None:
text = b"""POST / HTTP/1.1\r\nHost: localhost:8080\r\nX-Abc: \rxTransfer-Encoding: chunked\r\n\r\n"""
with pytest.raises(http_exceptions.BadHttpMessage):
parser.feed_data(text)


@pytest.mark.parametrize(
"hdr",
(
"Content-Length: -5", # https://www.rfc-editor.org/rfc/rfc9110.html#name-content-length
"Content-Length: +256",
"Foo: abc\rdef", # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5
"Bar: abc\ndef",
"Baz: abc\x00def",
"Foo : bar", # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2
"Foo\t: bar",
),
)
def test_bad_headers(parser: Any, hdr: str) -> None:
text = f"POST / HTTP/1.1\r\n{hdr}\r\n\r\n".encode()
with pytest.raises(http_exceptions.InvalidHeader):
parser.feed_data(text)


def test_bad_chunked_py(loop: Any, protocol: Any) -> None:
"""Test that invalid chunked encoding doesn't allow content-length to be used."""
parser = HttpRequestParserPy(
protocol,
loop,
2**16,
max_line_size=8190,
max_field_size=8190,
)
text = (
b"GET / HTTP/1.1\r\nHost: a\r\nTransfer-Encoding: chunked\r\n\r\n0_2e\r\n\r\n"
+ b"GET / HTTP/1.1\r\nHost: a\r\nContent-Length: 5\r\n\r\n0\r\n\r\n"
)
messages, upgrade, tail = parser.feed_data(text)
assert isinstance(messages[0][1].exception(), http_exceptions.TransferEncodingError)


@pytest.mark.skipif(
"HttpRequestParserC" not in dir(aiohttp.http_parser),
reason="C based HTTP parser not available",
)
def test_bad_chunked_c(loop: Any, protocol: Any) -> None:
"""C parser behaves differently. Maybe we should align them later."""
parser = HttpRequestParserC(
protocol,
loop,
2**16,
max_line_size=8190,
max_field_size=8190,
)
text = (
b"GET / HTTP/1.1\r\nHost: a\r\nTransfer-Encoding: chunked\r\n\r\n0_2e\r\n\r\n"
+ b"GET / HTTP/1.1\r\nHost: a\r\nContent-Length: 5\r\n\r\n0\r\n\r\n"
)
with pytest.raises(http_exceptions.BadHttpMessage):
parser.feed_data(text)


def test_whitespace_before_header(parser: Any) -> None:
text = b"GET / HTTP/1.1\r\n\tContent-Length: 1\r\n\r\nX"
with pytest.raises(http_exceptions.BadHttpMessage):
parser.feed_data(text)


@pytest.mark.parametrize("size", [40960, 8191])
def test_max_header_field_size(parser, size) -> None:
name = b"t" * size
Expand Down Expand Up @@ -657,6 +725,11 @@ def test_http_request_parser_bad_version(parser) -> None:
parser.feed_data(b"GET //get HT/11\r\n\r\n")


def test_http_request_parser_bad_version_number(parser: Any) -> None:
with pytest.raises(http_exceptions.BadHttpMessage):
parser.feed_data(b"GET /test HTTP/12.3\r\n\r\n")


@pytest.mark.parametrize("size", [40965, 8191])
def test_http_request_max_status_line(parser, size) -> None:
path = b"t" * (size - 5)
Expand Down Expand Up @@ -724,6 +797,11 @@ def test_http_response_parser_bad_version(response) -> None:
response.feed_data(b"HT/11 200 Ok\r\n\r\n")


def test_http_response_parser_bad_version_number(response) -> None:
with pytest.raises(http_exceptions.BadHttpMessage):
response.feed_data(b"HTTP/12.3 200 Ok\r\n\r\n")


def test_http_response_parser_no_reason(response) -> None:
msg = response.feed_data(b"HTTP/1.1 200\r\n\r\n")[0][0][0]

Expand Down Expand Up @@ -754,19 +832,18 @@ def test_http_response_parser_bad(response) -> None:
response.feed_data(b"HTT/1\r\n\r\n")


@pytest.mark.skipif(not NO_EXTENSIONS, reason="Behaviour has changed in C parser")
def test_http_response_parser_code_under_100(response) -> None:
msg = response.feed_data(b"HTTP/1.1 99 test\r\n\r\n")[0][0][0]
assert msg.code == 99
with pytest.raises(http_exceptions.BadStatusLine):
response.feed_data(b"HTTP/1.1 99 test\r\n\r\n")


def test_http_response_parser_code_above_999(response) -> None:
with pytest.raises(http_exceptions.BadHttpMessage):
with pytest.raises(http_exceptions.BadStatusLine):
response.feed_data(b"HTTP/1.1 9999 test\r\n\r\n")


def test_http_response_parser_code_not_int(response) -> None:
with pytest.raises(http_exceptions.BadHttpMessage):
with pytest.raises(http_exceptions.BadStatusLine):
response.feed_data(b"HTTP/1.1 ttt test\r\n\r\n")


Expand Down

0 comments on commit 85713a4

Please sign in to comment.