Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve type annotations (add more and fix wrong ones) #1394

Merged
merged 6 commits into from Oct 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/changelog.md
Expand Up @@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed

* Fix a performance problem with HTML extraction where large HTML input could trigger quadratic line counting behavior (PR#1392).
* Improve and expand type annotations in the code base (#1394)

## [3.5] -- 2023-10-06

Expand Down
13 changes: 7 additions & 6 deletions markdown/blockparser.py
Expand Up @@ -30,11 +30,12 @@
from __future__ import annotations

import xml.etree.ElementTree as etree
from typing import TYPE_CHECKING, Sequence, Any
from typing import TYPE_CHECKING, Iterable, Any
from . import util

if TYPE_CHECKING: # pragma: no cover
from markdown import Markdown
from .blockprocessors import BlockProcessor


class State(list):
Expand All @@ -59,7 +60,7 @@ def set(self, state: Any):
""" Set a new state. """
self.append(state)

def reset(self):
def reset(self) -> None:
""" Step back one step in nested state. """
self.pop()

Expand Down Expand Up @@ -92,11 +93,11 @@ def __init__(self, md: Markdown):
[`blockprocessors`][markdown.blockprocessors].

"""
self.blockprocessors = util.Registry()
self.blockprocessors: util.Registry[BlockProcessor] = util.Registry()
self.state = State()
self.md = md

def parseDocument(self, lines: Sequence[str]) -> etree.ElementTree:
def parseDocument(self, lines: Iterable[str]) -> etree.ElementTree:
""" Parse a Markdown document into an `ElementTree`.

Given a list of lines, an `ElementTree` object (not just a parent
Expand All @@ -116,7 +117,7 @@ def parseDocument(self, lines: Sequence[str]) -> etree.ElementTree:
self.parseChunk(self.root, '\n'.join(lines))
return etree.ElementTree(self.root)

def parseChunk(self, parent: etree.Element, text: str):
def parseChunk(self, parent: etree.Element, text: str) -> None:
""" Parse a chunk of Markdown text and attach to given `etree` node.

While the `text` argument is generally assumed to contain multiple
Expand All @@ -134,7 +135,7 @@ def parseChunk(self, parent: etree.Element, text: str):
"""
self.parseBlocks(parent, text.split('\n\n'))

def parseBlocks(self, parent: etree.Element, blocks: Sequence[str]):
def parseBlocks(self, parent: etree.Element, blocks: list[str]) -> None:
""" Process blocks of Markdown text and attach to given `etree` node.

Given a list of `blocks`, each `blockprocessor` is stepped through
Expand Down
12 changes: 6 additions & 6 deletions markdown/blockprocessors.py
Expand Up @@ -82,7 +82,7 @@ def lastChild(self, parent: etree.Element) -> etree.Element | None:
else:
return None

def detab(self, text: str, length: int = None) -> str:
def detab(self, text: str, length: int | None = None) -> tuple[str, str]:
""" Remove a tab from the front of each line of the given text. """
if length is None:
length = self.tab_length
Expand All @@ -105,7 +105,7 @@ def looseDetab(self, text: str, level: int = 1) -> str:
lines[i] = lines[i][self.tab_length*level:]
return '\n'.join(lines)

def test(self, parent: etree.Element, block: list[str]) -> bool:
def test(self, parent: etree.Element, block: str) -> bool:
""" Test for block type. Must be overridden by subclasses.

As the parser loops through processors, it will call the `test`
Expand Down Expand Up @@ -214,7 +214,7 @@ def run(self, parent, blocks):
self.create_item(sibling, block)
self.parser.state.reset()

def create_item(self, parent: etree.Element, block: str):
def create_item(self, parent: etree.Element, block: str) -> None:
""" Create a new `li` and parse the block with it as the parent. """
li = etree.SubElement(parent, 'li')
self.parser.parseBlocks(li, [block])
Expand Down Expand Up @@ -329,7 +329,7 @@ class OListProcessor(BlockProcessor):

TAG: str = 'ol'
""" The tag used for the the wrapping element. """
STARTSWITH: int = '1'
STARTSWITH: str = '1'
"""
The integer (as a string ) with which the list starts. For example, if a list is initialized as
`3. Item`, then the `ol` tag will be assigned an HTML attribute of `starts="3"`. Default: `"1"`.
Expand All @@ -342,7 +342,7 @@ class OListProcessor(BlockProcessor):
This is the list of types which can be mixed.
"""

def __init__(self, parser):
def __init__(self, parser: BlockParser):
super().__init__(parser)
# Detect an item (`1. item`). `group(1)` contains contents of item.
self.RE = re.compile(r'^[ ]{0,%d}\d+\.[ ]+(.*)' % (self.tab_length - 1))
Expand Down Expand Up @@ -448,7 +448,7 @@ class UListProcessor(OListProcessor):
TAG: str = 'ul'
""" The tag used for the the wrapping element. """

def __init__(self, parser):
def __init__(self, parser: BlockParser):
super().__init__(parser)
# Detect an item (`1. item`). `group(1)` contains contents of item.
self.RE = re.compile(r'^[ ]{0,%d}[*+-][ ]+(.*)' % (self.tab_length - 1))
Expand Down
12 changes: 8 additions & 4 deletions markdown/core.py
Expand Up @@ -23,7 +23,7 @@
import sys
import logging
import importlib
from typing import TYPE_CHECKING, Any, TextIO, Callable
from typing import TYPE_CHECKING, Any, Callable, ClassVar, Mapping, Sequence, TextIO
from . import util
from .preprocessors import build_preprocessors
from .blockprocessors import build_block_parser
Expand Down Expand Up @@ -76,7 +76,7 @@ class Markdown:

doc_tag = "div" # Element used to wrap document - later removed

output_formats: dict[str, Callable[Element]] = {
output_formats: ClassVar[dict[str, Callable[[Element], str]]] = {
'html': to_html_string,
'xhtml': to_xhtml_string,
}
Expand Down Expand Up @@ -156,7 +156,11 @@ def build_parser(self) -> Markdown:
self.postprocessors = build_postprocessors(self)
return self

def registerExtensions(self, extensions: list[Extension | str], configs: dict[str, dict[str, Any]]) -> Markdown:
def registerExtensions(
self,
extensions: Sequence[Extension | str],
configs: Mapping[str, Mapping[str, Any]]
) -> Markdown:
"""
Load a list of extensions into an instance of the `Markdown` class.

Expand Down Expand Up @@ -188,7 +192,7 @@ def registerExtensions(self, extensions: list[Extension | str], configs: dict[st
)
return self

def build_extension(self, ext_name: str, configs: dict[str, Any]) -> Extension:
def build_extension(self, ext_name: str, configs: Mapping[str, Any]) -> Extension:
"""
Build extension from a string name, then return an instance using the given `configs`.

Expand Down
10 changes: 5 additions & 5 deletions markdown/extensions/__init__.py
Expand Up @@ -27,7 +27,7 @@

from __future__ import annotations

from typing import TYPE_CHECKING, Any
from typing import TYPE_CHECKING, Any, Mapping, Sequence
from ..util import parseBoolValue

if TYPE_CHECKING: # pragma: no cover
Expand All @@ -37,7 +37,7 @@
class Extension:
""" Base class for extensions to subclass. """

config: dict[str, list[Any, str]] = {}
config: Mapping[str, list] = {}
"""
Default configuration for an extension.

Expand Down Expand Up @@ -91,7 +91,7 @@ def getConfigInfo(self) -> list[tuple[str, str]]:
"""
return [(key, self.config[key][1]) for key in self.config.keys()]

def setConfig(self, key: str, value: Any):
def setConfig(self, key: str, value: Any) -> None:
"""
Set a configuration option.

Expand All @@ -112,7 +112,7 @@ def setConfig(self, key: str, value: Any):
value = parseBoolValue(value, preserve_none=True)
self.config[key][0] = value

def setConfigs(self, items: dict[str, Any] | list[tuple[str, Any]]):
def setConfigs(self, items: Mapping[str, Any] | Sequence[tuple[str, Any]]):
"""
Loop through a collection of configuration options, passing each to
[`setConfig`][markdown.extensions.Extension.setConfig].
Expand All @@ -129,7 +129,7 @@ def setConfigs(self, items: dict[str, Any] | list[tuple[str, Any]]):
for key, value in items:
self.setConfig(key, value)

def extendMarkdown(self, md: Markdown):
def extendMarkdown(self, md: Markdown) -> None:
"""
Add the various processors and patterns to the Markdown Instance.

Expand Down
2 changes: 1 addition & 1 deletion markdown/extensions/attr_list.py
Expand Up @@ -146,7 +146,7 @@ def run(self, doc: Element):
self.assign_attrs(elem, m.group(1))
elem.tail = elem.tail[m.end():]

def assign_attrs(self, elem: Element, attrs: dict[str, str]):
def assign_attrs(self, elem: Element, attrs: str) -> None:
""" Assign `attrs` to element. """
for k, v in get_attrs(attrs):
if k == '.':
Expand Down
14 changes: 7 additions & 7 deletions markdown/extensions/footnotes.py
Expand Up @@ -98,14 +98,14 @@ def extendMarkdown(self, md):
# Insert a postprocessor after amp_substitute processor
md.postprocessors.register(FootnotePostprocessor(self), 'footnote', 25)

def reset(self):
def reset(self) -> None:
""" Clear footnotes on reset, and prepare for distinct document. """
self.footnotes = OrderedDict()
self.footnotes: OrderedDict[str, str] = OrderedDict()
self.unique_prefix += 1
self.found_refs = {}
self.used_refs = set()

def unique_ref(self, reference, found=False):
def unique_ref(self, reference, found: bool = False):
""" Get a unique reference if there are duplicates. """
if not found:
return reference
Expand Down Expand Up @@ -144,7 +144,7 @@ def finder(element):
res = finder(root)
return res

def setFootnote(self, id, text):
def setFootnote(self, id, text) -> None:
""" Store a footnote for later retrieval. """
self.footnotes[id] = text

Expand All @@ -159,7 +159,7 @@ def makeFootnoteId(self, id):
else:
return 'fn{}{}'.format(self.get_separator(), id)

def makeFootnoteRefId(self, id, found=False):
def makeFootnoteRefId(self, id, found: bool = False):
""" Return footnote back-link id. """
if self.getConfig("UNIQUE_IDS"):
return self.unique_ref('fnref%s%d-%s' % (self.get_separator(), self.unique_prefix, id), found)
Expand Down Expand Up @@ -329,7 +329,7 @@ class FootnotePostTreeprocessor(Treeprocessor):
def __init__(self, footnotes):
self.footnotes = footnotes

def add_duplicates(self, li, duplicates):
def add_duplicates(self, li, duplicates) -> None:
""" Adjust current `li` and add the duplicates: `fnref2`, `fnref3`, etc. """
for link in li.iter('a'):
# Find the link that needs to be duplicated.
Expand All @@ -355,7 +355,7 @@ def get_num_duplicates(self, li):
link_id = '{}ref{}{}'.format(fn, self.footnotes.get_separator(), rest)
return self.footnotes.found_refs.get(link_id, 0)

def handle_duplicates(self, parent):
def handle_duplicates(self, parent) -> None:
""" Find duplicate footnotes and format and add the duplicates. """
for li in list(parent):
# Check number of duplicates footnotes and insert
Expand Down
2 changes: 1 addition & 1 deletion markdown/extensions/meta.py
Expand Up @@ -44,7 +44,7 @@ def extendMarkdown(self, md):
self.md = md
md.preprocessors.register(MetaPreprocessor(md), 'meta', 27)

def reset(self):
def reset(self) -> None:
self.md.Meta = {}


Expand Down
10 changes: 5 additions & 5 deletions markdown/extensions/smarty.py
Expand Up @@ -193,7 +193,7 @@ def _addPatterns(self, md, patterns, serie, priority):
name = 'smarty-%s-%d' % (serie, ind)
self.inlinePatterns.register(pattern, name, priority-ind)

def educateDashes(self, md):
def educateDashes(self, md) -> None:
emDashesPattern = SubstituteTextPattern(
r'(?<!-)---(?!-)', (self.substitutions['mdash'],), md
)
Expand All @@ -203,13 +203,13 @@ def educateDashes(self, md):
self.inlinePatterns.register(emDashesPattern, 'smarty-em-dashes', 50)
self.inlinePatterns.register(enDashesPattern, 'smarty-en-dashes', 45)

def educateEllipses(self, md):
def educateEllipses(self, md) -> None:
ellipsesPattern = SubstituteTextPattern(
r'(?<!\.)\.{3}(?!\.)', (self.substitutions['ellipsis'],), md
)
self.inlinePatterns.register(ellipsesPattern, 'smarty-ellipses', 10)

def educateAngledQuotes(self, md):
def educateAngledQuotes(self, md) -> None:
leftAngledQuotePattern = SubstituteTextPattern(
r'\<\<', (self.substitutions['left-angle-quote'],), md
)
Expand All @@ -219,7 +219,7 @@ def educateAngledQuotes(self, md):
self.inlinePatterns.register(leftAngledQuotePattern, 'smarty-left-angle-quotes', 40)
self.inlinePatterns.register(rightAngledQuotePattern, 'smarty-right-angle-quotes', 35)

def educateQuotes(self, md):
def educateQuotes(self, md) -> None:
lsquo = self.substitutions['left-single-quote']
rsquo = self.substitutions['right-single-quote']
ldquo = self.substitutions['left-double-quote']
Expand All @@ -243,7 +243,7 @@ def educateQuotes(self, md):

def extendMarkdown(self, md):
configs = self.getConfigs()
self.inlinePatterns = Registry()
self.inlinePatterns: Registry[HtmlInlineProcessor] = Registry()
if configs['smart_ellipses']:
self.educateEllipses(md)
if configs['smart_quotes']:
Expand Down
12 changes: 6 additions & 6 deletions markdown/extensions/toc.py
Expand Up @@ -71,7 +71,7 @@ def get_name(el):
return ''.join(text).strip()


def stashedHTML2text(text, md, strip_entities=True):
def stashedHTML2text(text, md, strip_entities: bool = True):
""" Extract raw HTML from stash, reduce to plain text and swap with placeholder. """
def _html_sub(m):
""" Substitute raw html with plain text. """
Expand Down Expand Up @@ -198,7 +198,7 @@ def iterparent(self, node):
yield node, child
yield from self.iterparent(child)

def replace_marker(self, root, elem):
def replace_marker(self, root, elem) -> None:
""" Replace marker with elem. """
for (p, c) in self.iterparent(root):
text = ''.join(c.itertext()).strip()
Expand All @@ -219,14 +219,14 @@ def replace_marker(self, root, elem):
p[i] = elem
break

def set_level(self, elem):
def set_level(self, elem) -> None:
""" Adjust header level according to base level. """
level = int(elem.tag[-1]) + self.base_level
if level > 6:
level = 6
elem.tag = 'h%d' % level

def add_anchor(self, c, elem_id):
def add_anchor(self, c, elem_id) -> None:
anchor = etree.Element("a")
anchor.text = c.text
anchor.attrib["href"] = "#" + elem_id
Expand All @@ -238,7 +238,7 @@ def add_anchor(self, c, elem_id):
c.remove(c[0])
c.append(anchor)

def add_permalink(self, c, elem_id):
def add_permalink(self, c, elem_id) -> None:
permalink = etree.Element("a")
permalink.text = ("%spara;" % AMP_SUBSTITUTE
if self.use_permalinks is True
Expand Down Expand Up @@ -399,7 +399,7 @@ def extendMarkdown(self, md):
tocext = self.TreeProcessorClass(md, self.getConfigs())
md.treeprocessors.register(tocext, 'toc', 5)

def reset(self):
def reset(self) -> None:
self.md.toc = ''
self.md.toc_tokens = []

Expand Down
6 changes: 3 additions & 3 deletions markdown/htmlparser.py
Expand Up @@ -156,7 +156,7 @@ def get_endtag_text(self, tag: str) -> str:
# Failed to extract from raw data. Assume well formed and lowercase.
return '</{}>'.format(tag)

def handle_starttag(self, tag: str, attrs: dict[str, str]):
def handle_starttag(self, tag: str, attrs: list[tuple[str, str]]):
# Handle tags that should always be empty and do not specify a closing tag
if tag in self.empty_tags:
self.handle_startendtag(tag, attrs)
Expand Down Expand Up @@ -235,7 +235,7 @@ def handle_empty_tag(self, data: str, is_block: bool):
else:
self.cleandoc.append(data)

def handle_startendtag(self, tag: str, attrs: dict[str, str]):
def handle_startendtag(self, tag: str, attrs: list[tuple[str, str]]):
self.handle_empty_tag(self.get_starttag_text(), is_block=self.md.is_block_level(tag))

def handle_charref(self, name: str):
Expand Down Expand Up @@ -277,7 +277,7 @@ def parse_html_declaration(self, i: int) -> int:
# As `__startag_text` is private, all references to it must be in this subclass.
# The last few lines of `parse_starttag` are reversed so that `handle_starttag`
# can override `cdata_mode` in certain situations (in a code span).
__starttag_text = None
__starttag_text: str | None = None

def get_starttag_text(self) -> str:
"""Return full source of start tag: `<...>`."""
Expand Down