Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update Python parser for RFCs 9110/9112 #7661

Merged
merged 3 commits into from
Oct 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
84 changes: 49 additions & 35 deletions aiohttp/http_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,16 +51,16 @@

ASCIISET: Final[Set[str]] = set(string.printable)

# See https://tools.ietf.org/html/rfc7230#section-3.1.1
# and https://tools.ietf.org/html/rfc7230#appendix-B
# See https://www.rfc-editor.org/rfc/rfc9110.html#name-overview
# and https://www.rfc-editor.org/rfc/rfc9110.html#name-tokens
#
# method = token
# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
# "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
# token = 1*tchar
METHRE: Final[Pattern[str]] = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+")
VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d+).(\d+)")
HDRRE: Final[Pattern[bytes]] = re.compile(rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\\\\\"]")
VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d).(\d)")
HDRRE: Final[Pattern[bytes]] = re.compile(rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\"\\]")


class RawRequestMessage(NamedTuple):
Expand Down Expand Up @@ -135,8 +135,11 @@ def parse_headers(
except ValueError:
raise InvalidHeader(line) from None

bname = bname.strip(b" \t")
bvalue = bvalue.lstrip()
# https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2
if {bname[0], bname[-1]} & {32, 9}: # {" ", "\t"}
raise InvalidHeader(line)

bvalue = bvalue.lstrip(b" \t")
if HDRRE.search(bname):
raise InvalidHeader(bname)
if len(bname) > self.max_field_size:
Expand All @@ -157,6 +160,7 @@ def parse_headers(
# consume continuation lines
continuation = line and line[0] in (32, 9) # (' ', '\t')

# Deprecated: https://www.rfc-editor.org/rfc/rfc9112.html#name-obsolete-line-folding
if continuation:
bvalue_lst = [bvalue]
while continuation:
Expand Down Expand Up @@ -191,10 +195,14 @@ def parse_headers(
str(header_length),
)

bvalue = bvalue.strip()
bvalue = bvalue.strip(b" \t")
name = bname.decode("utf-8", "surrogateescape")
value = bvalue.decode("utf-8", "surrogateescape")

# https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5
if "\n" in value or "\r" in value or "\x00" in value:
raise InvalidHeader(bvalue)

headers.add(name, value)
raw_headers.append((bname, bvalue))

Expand Down Expand Up @@ -309,15 +317,12 @@ def get_content_length() -> Optional[int]:
if length_hdr is None:
return None

try:
length = int(length_hdr)
except ValueError:
# Shouldn't allow +/- or other number formats.
# https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2
if not length_hdr.strip(" \t").isdigit():
raise InvalidHeader(CONTENT_LENGTH)

if length < 0:
raise InvalidHeader(CONTENT_LENGTH)

return length
return int(length_hdr)

length = get_content_length()
# do not support old websocket spec
Expand Down Expand Up @@ -457,6 +462,24 @@ def parse_headers(
upgrade = False
chunked = False

# https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6
# https://www.rfc-editor.org/rfc/rfc9110.html#name-collected-abnf
singletons = (
hdrs.CONTENT_LENGTH,
hdrs.CONTENT_LOCATION,
hdrs.CONTENT_RANGE,
hdrs.CONTENT_TYPE,
hdrs.ETAG,
hdrs.HOST,
hdrs.MAX_FORWARDS,
hdrs.SERVER,
hdrs.TRANSFER_ENCODING,
hdrs.USER_AGENT,
)
bad_hdr = next((h for h in singletons if len(headers.getall(h, ())) > 1), None)
if bad_hdr is not None:
raise BadHttpMessage(f"Duplicate '{bad_hdr}' header found.")

# keep-alive
conn = headers.get(hdrs.CONNECTION)
if conn:
Expand Down Expand Up @@ -510,7 +533,7 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage:
# request line
line = lines[0].decode("utf-8", "surrogateescape")
try:
method, path, version = line.split(None, 2)
method, path, version = line.split(maxsplit=2)
except ValueError:
raise BadStatusLine(line) from None

Expand All @@ -524,14 +547,10 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage:
raise BadStatusLine(method)

# version
try:
if version.startswith("HTTP/"):
n1, n2 = version[5:].split(".", 1)
version_o = HttpVersion(int(n1), int(n2))
else:
raise BadStatusLine(version)
except Exception:
raise BadStatusLine(version)
match = VERSRE.match(version)
if match is None:
raise BadStatusLine(line)
version_o = HttpVersion(int(match.group(1)), int(match.group(2)))

if method == "CONNECT":
# authority-form,
Expand Down Expand Up @@ -598,12 +617,12 @@ class HttpResponseParser(HttpParser[RawResponseMessage]):
def parse_message(self, lines: List[bytes]) -> RawResponseMessage:
line = lines[0].decode("utf-8", "surrogateescape")
try:
version, status = line.split(None, 1)
version, status = line.split(maxsplit=1)
except ValueError:
raise BadStatusLine(line) from None

try:
status, reason = status.split(None, 1)
status, reason = status.split(maxsplit=1)
except ValueError:
reason = ""

Expand All @@ -619,13 +638,9 @@ def parse_message(self, lines: List[bytes]) -> RawResponseMessage:
version_o = HttpVersion(int(match.group(1)), int(match.group(2)))

# The status code is a three-digit number
try:
status_i = int(status)
except ValueError:
raise BadStatusLine(line) from None

if status_i > 999:
if len(status) != 3 or not status.isdigit():
raise BadStatusLine(line)
status_i = int(status)

# read headers
(
Expand Down Expand Up @@ -760,14 +775,13 @@ def feed_data(
else:
size_b = chunk[:pos]

try:
size = int(bytes(size_b), 16)
except ValueError:
if not size_b.isdigit():
exc = TransferEncodingError(
chunk[:pos].decode("ascii", "surrogateescape")
)
self.payload.set_exception(exc)
raise exc from None
raise exc
size = int(bytes(size_b), 16)

chunk = chunk[pos + 2 :]
if size == 0: # eof marker
Expand Down
87 changes: 82 additions & 5 deletions tests/test_http_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,74 @@ def test_invalid_name(parser) -> None:
parser.feed_data(text)


def test_cve_2023_37276(parser: Any) -> None:
text = b"""POST / HTTP/1.1\r\nHost: localhost:8080\r\nX-Abc: \rxTransfer-Encoding: chunked\r\n\r\n"""
with pytest.raises(http_exceptions.BadHttpMessage):
parser.feed_data(text)


@pytest.mark.parametrize(
"hdr",
(
"Content-Length: -5", # https://www.rfc-editor.org/rfc/rfc9110.html#name-content-length
"Content-Length: +256",
"Foo: abc\rdef", # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5
"Bar: abc\ndef",
"Baz: abc\x00def",
"Foo : bar", # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2
"Foo\t: bar",
),
)
def test_bad_headers(parser: Any, hdr: str) -> None:
text = f"POST / HTTP/1.1\r\n{hdr}\r\n\r\n".encode()
with pytest.raises(http_exceptions.InvalidHeader):
parser.feed_data(text)


def test_bad_chunked_py(loop: Any, protocol: Any) -> None:
"""Test that invalid chunked encoding doesn't allow content-length to be used."""
parser = HttpRequestParserPy(
protocol,
loop,
2**16,
max_line_size=8190,
max_field_size=8190,
)
text = (
b"GET / HTTP/1.1\r\nHost: a\r\nTransfer-Encoding: chunked\r\n\r\n0_2e\r\n\r\n"
+ b"GET / HTTP/1.1\r\nHost: a\r\nContent-Length: 5\r\n\r\n0\r\n\r\n"
)
messages, upgrade, tail = parser.feed_data(text)
assert isinstance(messages[0][1].exception(), http_exceptions.TransferEncodingError)


@pytest.mark.skipif(
"HttpRequestParserC" not in dir(aiohttp.http_parser),
reason="C based HTTP parser not available",
)
def test_bad_chunked_c(loop: Any, protocol: Any) -> None:
"""C parser behaves differently. Maybe we should align them later."""
parser = HttpRequestParserC(
protocol,
loop,
2**16,
max_line_size=8190,
max_field_size=8190,
)
text = (
b"GET / HTTP/1.1\r\nHost: a\r\nTransfer-Encoding: chunked\r\n\r\n0_2e\r\n\r\n"
+ b"GET / HTTP/1.1\r\nHost: a\r\nContent-Length: 5\r\n\r\n0\r\n\r\n"
)
with pytest.raises(http_exceptions.BadHttpMessage):
parser.feed_data(text)


def test_whitespace_before_header(parser: Any) -> None:
text = b"GET / HTTP/1.1\r\n\tContent-Length: 1\r\n\r\nX"
with pytest.raises(http_exceptions.BadHttpMessage):
parser.feed_data(text)


@pytest.mark.parametrize("size", [40960, 8191])
def test_max_header_field_size(parser, size) -> None:
name = b"t" * size
Expand Down Expand Up @@ -657,6 +725,11 @@ def test_http_request_parser_bad_version(parser) -> None:
parser.feed_data(b"GET //get HT/11\r\n\r\n")


def test_http_request_parser_bad_version_number(parser: Any) -> None:
with pytest.raises(http_exceptions.BadHttpMessage):
parser.feed_data(b"GET /test HTTP/12.3\r\n\r\n")


@pytest.mark.parametrize("size", [40965, 8191])
def test_http_request_max_status_line(parser, size) -> None:
path = b"t" * (size - 5)
Expand Down Expand Up @@ -724,6 +797,11 @@ def test_http_response_parser_bad_version(response) -> None:
response.feed_data(b"HT/11 200 Ok\r\n\r\n")


def test_http_response_parser_bad_version_number(response) -> None:
with pytest.raises(http_exceptions.BadHttpMessage):
response.feed_data(b"HTTP/12.3 200 Ok\r\n\r\n")


def test_http_response_parser_no_reason(response) -> None:
msg = response.feed_data(b"HTTP/1.1 200\r\n\r\n")[0][0][0]

Expand Down Expand Up @@ -754,19 +832,18 @@ def test_http_response_parser_bad(response) -> None:
response.feed_data(b"HTT/1\r\n\r\n")


@pytest.mark.skipif(not NO_EXTENSIONS, reason="Behaviour has changed in C parser")
def test_http_response_parser_code_under_100(response) -> None:
msg = response.feed_data(b"HTTP/1.1 99 test\r\n\r\n")[0][0][0]
assert msg.code == 99
with pytest.raises(http_exceptions.BadStatusLine):
response.feed_data(b"HTTP/1.1 99 test\r\n\r\n")


def test_http_response_parser_code_above_999(response) -> None:
with pytest.raises(http_exceptions.BadHttpMessage):
with pytest.raises(http_exceptions.BadStatusLine):
response.feed_data(b"HTTP/1.1 9999 test\r\n\r\n")


def test_http_response_parser_code_not_int(response) -> None:
with pytest.raises(http_exceptions.BadHttpMessage):
with pytest.raises(http_exceptions.BadStatusLine):
response.feed_data(b"HTTP/1.1 ttt test\r\n\r\n")


Expand Down