Fix multiple breaking issues from new pyparsing versions (#296)

Relevant discussions in: #296, #302 * Fix broken parsing (whitespace problems and multilingual identifiers) * Fix broken HTML parsing * Update pyparsing to `>=3.0.9` * Remove `parse_html` function * Set pyparsing version to >=3 (anything below will break) * Update the changelog, including the latest CI change * Update formatting according to `black`
pydot · Dec 16, 2023 · eabcff2 · eabcff2
1 parent 803114c
commit eabcff2
Show file tree

Hide file tree

Showing 3 changed files with 27 additions and 36 deletions.
diff --git a/ChangeLog b/ChangeLog
@@ -8,6 +8,11 @@ Pydot versions since 1.4.2 adhere to [PEP 440-style semantic versioning]
 ------------------
 
 Changed:
+- Broken parsing caused by `pyparsing` updates fixed. (#296)
+  With this, the pydot project rises from the dead.
+- (Internal) CI revived by @ferdnyc. (#302)
+  Modernized and clarified the development process.
+  Testing is done against multiple Python versions.
 - Reorganized package/module structure. (#230)
   The `pydot` package is installed as a directory now instead of as
   two modules:
@@ -76,11 +81,12 @@ Deprecated:
   structure" above.
 
 Removed:
-- Drop support for Python 2 and Python 3.4. (#229)
+- Drop support for Python 2 and Python < 3.7. (#229, #302, #296).
   **USER FEEDBACK REQUESTED**
-  We are considering if pydot 2.0 should drop support for Python 3.5
+  ~~We are considering if pydot 2.0 should drop support for Python 3.5
   and 3.6 as well. If this would affect you, please leave a comment in
-  https://github.com/pydot/pydot/issues/268.
+  https://github.com/pydot/pydot/issues/268.~~
+  EDIT: This was decided to be done, with a lot of time passed since this entry.
 
 
 1.4.2 (2021-02-15)

diff --git a/pyproject.toml b/pyproject.toml
@@ -8,7 +8,7 @@ readme = "README.md"
 license = {file = "LICENSE"}
 requires-python = ">= 3.7"
 dependencies = [
-  'pyparsing>=2.1.4,<3'
+  'pyparsing>=3'
 ]
 authors = [
   {name = "Ero Carrera", email = "ero.carrera@gmail.com"},

diff --git a/src/pydot/dot_parser.py b/src/pydot/dot_parser.py
@@ -8,24 +8,22 @@
 Fixes by: Ero Carrera <ero.carrera@gmail.com>
 """
 from pyparsing import (
-    nestedExpr,
-    Literal,
     CaselessLiteral,
-    Word,
-    OneOrMore,
+    CharsNotIn,
+    Combine,
     Forward,
     Group,
+    Literal,
+    OneOrMore,
     Optional,
-    Combine,
-    restOfLine,
-    cStyleComment,
-    nums,
-    alphanums,
-    printables,
     ParseException,
     ParseResults,
-    CharsNotIn,
     QuotedString,
+    Word,
+    cStyleComment,
+    nums,
+    pyparsing_unicode,
+    restOfLine,
 )
 
 import pydot
@@ -380,10 +378,6 @@ def graph_definition():
         rparen = Literal(")")
         equals = Literal("=")
         comma = Literal(",")
-        dot = Literal(".")
-        slash = Literal("/")
-        bslash = Literal("\\")
-        star = Literal("*")
         semi = Literal(";")
         at = Literal("@")
         minus = Literal("-")
@@ -397,29 +391,20 @@ def graph_definition():
         edge_ = CaselessLiteral("edge")
 
         # token definitions
-        identifier = Word(alphanums + "_.").setName("identifier")
+        identifier = Word(
+            pyparsing_unicode.BasicMultilingualPlane.alphanums + "_."
+        ).setName("identifier")
 
         double_quoted_string = QuotedString(
             '"', multiline=True, unquoteResults=False, escChar="\\"
         )
 
-        noncomma = "".join([c for c in printables if c != ","])
-        alphastring_ = OneOrMore(CharsNotIn(noncomma + " "))
-
-        def parse_html(s, loc, toks):
-            return "<%s>" % "".join(toks[0])
-
-        opener = "<"
-        closer = ">"
-        html_text = (
-            nestedExpr(opener, closer, (CharsNotIn(opener + closer)))
-            .setParseAction(parse_html)
-            .leaveWhitespace()
-        )
+        html_text = Forward()
+        inner_html = OneOrMore(CharsNotIn("<>") | html_text)
+        html_text << "<" + inner_html + ">"
+        html_text.setParseAction(lambda arr: "".join(arr))
 
-        ID = (
-            identifier | html_text | double_quoted_string | alphastring_
-        ).setName("ID")
+        ID = (identifier | html_text | double_quoted_string).setName("ID")
 
         float_number = Combine(
             Optional(minus) + OneOrMore(Word(nums + "."))