Python-Markdown · waylan · Mar 12, 2024 · Nov 10, 2023 · Mar 8, 2024 · Mar 8, 2024
diff --git a/.spell-dict b/.spell-dict
@@ -146,6 +146,7 @@ Treeprocessor
 Treeprocessors
 tuple
 tuples
+unparsable
 unclosed
 unescape
 unescaping

diff --git a/docs/changelog.md b/docs/changelog.md
@@ -34,8 +34,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 * Include `scripts/*.py` in the generated source tarballs (#1430).
 * Ensure lines after heading in loose list are properly detabbed (#1443).
 * Give smarty tree processor higher priority than toc (#1440).
-* Permit carrots (`^`) and square brackets (`]`) but explicitly exclude
+* Permit carets (`^`) and square brackets (`]`) but explicitly exclude
   backslashes (`\`) from abbreviations (#1444).
+* In attribute lists (`attr_list`, `fenced_code`), quoted attribute values are
+  now allowed to contain curly braces (`}`) (#1414).
 
 ## [3.5.2] -- 2024-01-10
 

diff --git a/markdown/extensions/attr_list.py b/markdown/extensions/attr_list.py
@@ -57,17 +57,30 @@ def _handle_word(s, t):
 
 
 _scanner = re.Scanner([
-    (r'[^ =]+=".*?"', _handle_double_quote),
-    (r"[^ =]+='.*?'", _handle_single_quote),
-    (r'[^ =]+=[^ =]+', _handle_key_value),
-    (r'[^ =]+', _handle_word),
+    (r'[^ =}]+=".*?"', _handle_double_quote),
+    (r"[^ =}]+='.*?'", _handle_single_quote),
+    (r'[^ =}]+=[^ =}]+', _handle_key_value),
+    (r'[^ =}]+', _handle_word),
     (r' ', None)
 ])
 
 
-def get_attrs(str: str) -> list[tuple[str, str]]:
-    """ Parse attribute list and return a list of attribute tuples. """
-    return _scanner.scan(str)[0]
+def get_attrs_and_remainder(attrs_string: str) -> tuple[list[tuple[str, str]], str]:
+    """ Parse attribute list and return a list of attribute tuples.
+
+    Additionally, return any text that remained after a curly brace. In typical cases, its presence
+    should mean that the input does not match the intended attribute list syntax.
+    """
+    attrs, remainder = _scanner.scan(attrs_string)
+    # To keep historic behavior, discard all unparsable text prior to '}'.
+    index = remainder.find('}')
+    remainder = remainder[index:] if index != -1 else ''
+    return attrs, remainder
+
+
+def get_attrs(str: str) -> list[tuple[str, str]]:  # pragma: no cover
+    """ Soft-deprecated. Prefer `get_attrs_and_remainder`. """
+    return get_attrs_and_remainder(str)[0]
 
 
 def isheader(elem: Element) -> bool:
@@ -76,7 +89,7 @@ def isheader(elem: Element) -> bool:
 
 class AttrListTreeprocessor(Treeprocessor):
 
-    BASE_RE = r'\{\:?[ ]*([^\}\n ][^\}\n]*)[ ]*\}'
+    BASE_RE = r'\{\:?[ ]*([^\}\n ][^\n]*)[ ]*\}'
     HEADER_RE = re.compile(r'[ ]+{}[ ]*$'.format(BASE_RE))
     BLOCK_RE = re.compile(r'\n[ ]*{}[ ]*$'.format(BASE_RE))
     INLINE_RE = re.compile(r'^{}'.format(BASE_RE))
@@ -106,49 +119,58 @@ def run(self, doc: Element) -> None:
                         # use tail of last child. no `ul` or `ol`.
                         m = RE.search(elem[-1].tail)
                         if m:
-                            self.assign_attrs(elem, m.group(1))
-                            elem[-1].tail = elem[-1].tail[:m.start()]
+                            if not self.assign_attrs(elem, m.group(1), strict=True):
+                                elem[-1].tail = elem[-1].tail[:m.start()]
                     elif pos is not None and pos > 0 and elem[pos-1].tail:
                         # use tail of last child before `ul` or `ol`
                         m = RE.search(elem[pos-1].tail)
                         if m:
-                            self.assign_attrs(elem, m.group(1))
-                            elem[pos-1].tail = elem[pos-1].tail[:m.start()]
+                            if not self.assign_attrs(elem, m.group(1), strict=True):
+                                elem[pos-1].tail = elem[pos-1].tail[:m.start()]
                     elif elem.text:
                         # use text. `ul` is first child.
                         m = RE.search(elem.text)
                         if m:
-                            self.assign_attrs(elem, m.group(1))
-                            elem.text = elem.text[:m.start()]
+                            if not self.assign_attrs(elem, m.group(1), strict=True):
+                                elem.text = elem.text[:m.start()]
                 elif len(elem) and elem[-1].tail:
                     # has children. Get from tail of last child
                     m = RE.search(elem[-1].tail)
                     if m:
-                        self.assign_attrs(elem, m.group(1))
-                        elem[-1].tail = elem[-1].tail[:m.start()]
-                        if isheader(elem):
-                            # clean up trailing #s
-                            elem[-1].tail = elem[-1].tail.rstrip('#').rstrip()
+                        if not self.assign_attrs(elem, m.group(1), strict=True):
+                            elem[-1].tail = elem[-1].tail[:m.start()]
+                            if isheader(elem):
+                                # clean up trailing #s
+                                elem[-1].tail = elem[-1].tail.rstrip('#').rstrip()
                 elif elem.text:
                     # no children. Get from text.
                     m = RE.search(elem.text)
                     if m:
-                        self.assign_attrs(elem, m.group(1))
-                        elem.text = elem.text[:m.start()]
-                        if isheader(elem):
-                            # clean up trailing #s
-                            elem.text = elem.text.rstrip('#').rstrip()
+                        if not self.assign_attrs(elem, m.group(1), strict=True):
+                            elem.text = elem.text[:m.start()]
+                            if isheader(elem):
+                                # clean up trailing #s
+                                elem.text = elem.text.rstrip('#').rstrip()
             else:
                 # inline: check for `attrs` at start of tail
                 if elem.tail:
                     m = self.INLINE_RE.match(elem.tail)
                     if m:
-                        self.assign_attrs(elem, m.group(1))
-                        elem.tail = elem.tail[m.end():]
+                        remainder = self.assign_attrs(elem, m.group(1))
+                        elem.tail = elem.tail[m.end():] + remainder
+
+    def assign_attrs(self, elem: Element, attrs_string: str, *, strict: bool = False) -> str:
+        """ Assign `attrs` to element.
+
+        If the `attrs_string` has an extra closing curly brace, the remaining text is returned.
+
+        The `strict` argument controls whether to still assign `attrs` if there is a remaining `}`.
+        """
+        attrs, remainder = get_attrs_and_remainder(attrs_string)
+        if strict and remainder:
+            return remainder
 
-    def assign_attrs(self, elem: Element, attrs: str) -> None:
-        """ Assign `attrs` to element. """
-        for k, v in get_attrs(attrs):
+        for k, v in attrs:
             if k == '.':
                 # add to class
                 cls = elem.get('class')
@@ -159,11 +181,13 @@ def assign_attrs(self, elem: Element, attrs: str) -> None:
             else:
                 # assign attribute `k` with `v`
                 elem.set(self.sanitize_name(k), v)
+        # The text that we initially over-matched will be put back.
+        return remainder
 
     def sanitize_name(self, name: str) -> str:
         """
-        Sanitize name as 'an XML Name, minus the ":"'.
-        See https://www.w3.org/TR/REC-xml-names/#NT-NCName
+        Sanitize name as 'an XML Name, minus the `:`.'
+        See <https://www.w3.org/TR/REC-xml-names/#NT-NCName>.
         """
         return self.NAME_RE.sub('_', name)
 

diff --git a/markdown/extensions/fenced_code.py b/markdown/extensions/fenced_code.py
@@ -25,7 +25,7 @@
 from . import Extension
 from ..preprocessors import Preprocessor
 from .codehilite import CodeHilite, CodeHiliteExtension, parse_hl_lines
-from .attr_list import get_attrs, AttrListExtension
+from .attr_list import get_attrs_and_remainder, AttrListExtension
 from ..util import parseBoolValue
 from ..serializers import _escape_attrib_html
 import re
@@ -56,7 +56,7 @@ class FencedBlockPreprocessor(Preprocessor):
     FENCED_BLOCK_RE = re.compile(
         dedent(r'''
             (?P<fence>^(?:~{3,}|`{3,}))[ ]*                          # opening fence
-            ((\{(?P<attrs>[^\}\n]*)\})|                              # (optional {attrs} or
+            ((\{(?P<attrs>[^\n]*)\})|                                # (optional {attrs} or
             (\.?(?P<lang>[\w#.+-]*)[ ]*)?                            # optional (.)lang
             (hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot)[ ]*)?) # optional hl_lines)
             \n                                                       # newline (end of opening fence)
@@ -94,12 +94,17 @@ def run(self, lines: list[str]) -> list[str]:
             self.checked_for_deps = True
 
         text = "\n".join(lines)
+        index = 0
         while 1:
-            m = self.FENCED_BLOCK_RE.search(text)
+            m = self.FENCED_BLOCK_RE.search(text, index)
             if m:
                 lang, id, classes, config = None, '', [], {}
                 if m.group('attrs'):
-                    id, classes, config = self.handle_attrs(get_attrs(m.group('attrs')))
+                    attrs, remainder = get_attrs_and_remainder(m.group('attrs'))
+                    if remainder:  # Does not have correctly matching curly braces, so the syntax is invalid.
+                        index = m.end('attrs')  # Explicitly skip over this, to prevent an infinite loop.
+                        continue
+                    id, classes, config = self.handle_attrs(attrs)
                     if len(classes):
                         lang = classes.pop(0)
                 else:
@@ -151,6 +156,8 @@ def run(self, lines: list[str]) -> list[str]:
 
                 placeholder = self.md.htmlStash.store(code)
                 text = f'{text[:m.start()]}\n{placeholder}\n{text[m.end():]}'
+                # Continue from after the replaced text in the next iteration.
+                index = m.start() + 1 + len(placeholder)
             else:
                 break
         return text.split("\n")

diff --git a/tests/test_syntax/extensions/test_attr_list.py b/tests/test_syntax/extensions/test_attr_list.py
@@ -23,16 +23,53 @@
 
 
 class TestAttrList(TestCase):
-
     maxDiff = None
+    default_kwargs = {'extensions': ['attr_list']}
 
     # TODO: Move the rest of the `attr_list` tests here.
 
-    def test_empty_list(self):
+    def test_empty_attr_list(self):
         self.assertMarkdownRenders(
             '*foo*{ }',
-            '<p><em>foo</em>{ }</p>',
-            extensions=['attr_list']
+            '<p><em>foo</em>{ }</p>'
+        )
+
+    def test_curly_after_inline(self):
+        self.assertMarkdownRenders(
+            '*inline*{.a} } *text*{.a }}',
+            '<p><em class="a">inline</em> } <em class="a">text</em>}</p>'
+        )
+
+    def test_extra_eq_gets_ignored_inside_curly_inline(self):
+        # Undesired behavior but kept for historic compatibility.
+        self.assertMarkdownRenders(
+            '*inline*{data-test="x" =a} *text*',
+            '<p><em data-test="x">inline</em> <em>text</em></p>'
+        )
+
+    def test_curly_after_block(self):
+        self.assertMarkdownRenders(
+            '# Heading {.a} }',
+            '<h1>Heading {.a} }</h1>'
+        )
+
+    def test_curly_in_single_quote(self):
+        self.assertMarkdownRenders(
+            "# Heading {data-test='{}'}",
+            '<h1 data-test="{}">Heading</h1>'
+        )
+
+    def test_curly_in_double_quote(self):
+        self.assertMarkdownRenders(
+            '# Heading {data-test="{}"}',
+            '<h1 data-test="{}">Heading</h1>'
+        )
+
+    def test_unclosed_quote_ignored(self):
+        # Undesired behavior but kept for historic compatibility.
+        self.assertMarkdownRenders(
+            '# Heading {foo="bar}',
+            '<h1 foo="&quot;bar">Heading</h1>'
         )
 
     def test_table_td(self):

diff --git a/tests/test_syntax/extensions/test_fenced_code.py b/tests/test_syntax/extensions/test_fenced_code.py
@@ -394,6 +394,48 @@ def testFencedCodeEscapedAttrs(self):
             extensions=['fenced_code', 'attr_list']
         )
 
+    def testFencedCodeCurlyInAttrs(self):
+        self.assertMarkdownRenders(
+            self.dedent(
+                '''
+                ``` { data-test="{}" }
+                # Some python code
+                ```
+                '''
+            ),
+            self.dedent(
+                '''
+                <pre><code data-test="{}"># Some python code
+                </code></pre>
+                '''
+            ),
+            extensions=['fenced_code', 'attr_list']
+        )
+
+    def testFencedCodeMismatchedCurlyInAttrs(self):
+        self.assertMarkdownRenders(
+            self.dedent(
+                '''
+                ``` { data-test="{}" } }
+                # Some python code
+                ```
+                ```
+                test
+                ```
+                '''
+            ),
+            self.dedent(
+                '''
+                <p>``` { data-test="{}" } }</p>
+                <h1>Some python code</h1>
+                <pre><code></code></pre>
+                <p>test
+                ```</p>
+                '''
+            ),
+            extensions=['fenced_code', 'attr_list']
+        )
+
 
 class TestFencedCodeWithCodehilite(TestCase):