Skip to content

Commit

Permalink
#11804 Remove > character replacement in escaped comments (#11805)
Browse files Browse the repository at this point in the history
  • Loading branch information
wsanchez committed Feb 27, 2023
2 parents 7f421f9 + 84c9087 commit 2181c52
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 19 deletions.
1 change: 1 addition & 0 deletions src/twisted/newsfragments/11804.bugfix
@@ -0,0 +1 @@
To prevent parsing errors and ensure validity when serializing HTML comments, twisted.web.template.flattenString has been updated to escape the --> sequence within comments.
8 changes: 5 additions & 3 deletions src/twisted/web/_flatten.py
Expand Up @@ -7,7 +7,6 @@
complex or arbitrarily nested, as strings.
"""


from inspect import iscoroutine
from io import BytesIO
from sys import exc_info
Expand Down Expand Up @@ -169,7 +168,10 @@ def escapedCDATA(data: Union[bytes, str]) -> bytes:

def escapedComment(data: Union[bytes, str]) -> bytes:
"""
Escape a comment for inclusion in a document.
Within comments the sequence C{-->} can be mistaken as the end of the comment.
To ensure consistent parsing and valid output the sequence is replaced with C{-->}.
Furthermore, whitespace is added when a comment ends in a dash. This is done to break
the connection of the ending C{-} with the closing C{-->}.
@param data: The string to escape.
Expand All @@ -178,7 +180,7 @@ def escapedComment(data: Union[bytes, str]) -> bytes:
"""
if isinstance(data, str):
data = data.encode("utf-8")
data = data.replace(b"--", b"- - ").replace(b">", b">")
data = data.replace(b"-->", b"-->")
if data and data[-1:] == b"-":
data += b" "
return data
Expand Down
26 changes: 10 additions & 16 deletions src/twisted/web/test/test_flatten.py
Expand Up @@ -267,16 +267,10 @@ def test_serializeComment(self) -> None:
def test_commentEscaping(self) -> Deferred[List[bytes]]:
"""
The data in a L{Comment} is escaped and mangled in the flattened output
so that the result is a legal SGML and XML comment.
so that the result can be safely included in an HTML document.
SGML comment syntax is complicated and hard to use. This rule is more
restrictive, and more compatible:
Comments start with <!-- and end with --> and never contain -- or >.
Also by XML syntax, a comment may not end with '-'.
@see: U{http://www.w3.org/TR/REC-xml/#sec-comments}
Test that C{>} is escaped when the sequence C{-->} is encountered
within a comment, and that comments do not end with C{-}.
"""

def verifyComment(c: bytes) -> None:
Expand All @@ -292,19 +286,19 @@ def verifyComment(c: bytes) -> None:
# illegally.
self.assertTrue(len(c) >= 7, f"{c!r} is too short to be a legal comment")
content = c[4:-3]
self.assertNotIn(b"--", content)
self.assertNotIn(b">", content)
if b"foo" in content:
self.assertIn(b">", content)
else:
self.assertNotIn(b">", content)
if content:
self.assertNotEqual(content[-1], b"-")

results = []
for c in [
"",
"foo---bar",
"foo---bar-",
"foo>bar",
"foo-->bar",
"----------------",
"foo > bar",
"abracadabra-",
"not-->magic",
]:
d = flattenString(None, Comment(c))
d.addCallback(verifyComment)
Expand Down

0 comments on commit 2181c52

Please sign in to comment.