Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[8.x] Parser agnostic i18n Locale transform #12238

Open
wants to merge 29 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
0ab14a0
WIP: New i18n logic based on inline_parse function
n-peugnet Apr 7, 2024
2682f8b
Avoid 'Literal block expected; none found.' warnings
n-peugnet Apr 7, 2024
38c004b
Update gettext builder output in tests
n-peugnet Apr 7, 2024
9a2510a
parse_inline's input string is always a string
n-peugnet Apr 7, 2024
3c97944
Only output a single paragraph in RSTParser's parse_inline
n-peugnet Apr 7, 2024
66b14d3
Image nodes are handled separately, and literal can simply be manuall…
n-peugnet Apr 7, 2024
5994ca5
Fix ruff lints + more useful comment
n-peugnet Apr 7, 2024
0794348
Fix last ruff error
n-peugnet Apr 8, 2024
358c611
Remove unused type:ignore annotation
n-peugnet Apr 8, 2024
94b9b9d
Simplify parse_inline literal block handling for rST
n-peugnet Apr 14, 2024
17455d6
Fix rendering for parsed-literals
n-peugnet Apr 15, 2024
c10b0ec
Merge remote-tracking branch 'origin/master' into new-inline-parse-i1…
n-peugnet Apr 27, 2024
0fc3f28
Regression test for parsed literals translation
n-peugnet Apr 14, 2024
fe7675f
Regression test for strange markup
n-peugnet Apr 21, 2024
7f94dcf
Directly use Inliner.parse() instead of 'Text' initial_state
n-peugnet May 8, 2024
14e4833
Add warning messages to doccument in parse_inline
n-peugnet May 8, 2024
c81d310
Fix ruff lints
n-peugnet May 9, 2024
910dbd7
Apply ruff format patch
n-peugnet May 9, 2024
3b92bfb
Refactor to fix MyPy error
n-peugnet May 10, 2024
8c40fb6
Add tests for parse_inline
n-peugnet May 11, 2024
ca54a6c
Fix ruff lints
n-peugnet May 11, 2024
7de9a40
Skip parse_inline() tests with docutils < 0.19
n-peugnet May 11, 2024
7500a00
Try to ignore incorrect mypy lint
n-peugnet May 11, 2024
e7e7fad
Remove unneeded inliner, title and section options of memo
n-peugnet May 12, 2024
dba713d
Add test for substitution reference for parse_inline
n-peugnet May 12, 2024
2c37780
Simplify parse_inline for RST
n-peugnet May 19, 2024
3d5441d
Add test for titles in parse_inline
n-peugnet May 19, 2024
463ac3d
Merge remote-tracking branch 'origin/master' into new-inline-parse-i1…
n-peugnet May 19, 2024
d84d4e6
Fix <unknown> source file in translated warnings
n-peugnet May 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 5 additions & 0 deletions sphinx/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,11 @@ def setup(self, app: Sphinx) -> None:
if transform in self.transforms:
self.transforms.remove(transform)

def parse(self) -> None:
"""Override the BaseReader parse method to call self.parser.parse_inline()."""
self.document = document = self.new_document()
self.parser.parse_inline(self.input, document, 1)


class SphinxDummyWriter(UnfilteredWriter):
"""Dummy writer module used for generating doctree."""
Expand Down
31 changes: 30 additions & 1 deletion sphinx/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import docutils.parsers
import docutils.parsers.rst
from docutils import nodes
from docutils.parsers.rst import states
from docutils.parsers.rst import languages, states
from docutils.statemachine import StringList
from docutils.transforms.universal import SmartQuotes

Expand Down Expand Up @@ -46,6 +46,11 @@ def set_application(self, app: Sphinx) -> None:
self.config = app.config
self.env = app.env

def parse_inline(self, inputstring: str, document: nodes.document, lineno: int) -> None:
"""Parse the inline elements of a text block and generate a document tree."""
msg = 'Parser subclasses must implement parse_inline'
raise NotImplementedError(msg)


class RSTParser(docutils.parsers.rst.Parser, Parser):
"""A reST parser for Sphinx."""
Expand All @@ -60,6 +65,30 @@ def get_transforms(self) -> list[type[Transform]]:
transforms.remove(SmartQuotes)
return transforms

def parse_inline(self, inputstring: str, document: nodes.document, lineno: int) -> None:
"""Parse inline syntax from text and generate a document tree."""
# Avoid "Literal block expected; none found." warnings.
if inputstring.endswith('::'):
inputstring = inputstring[:-1]

reporter = document.reporter
reporter.get_source_and_line = lambda x: (document['source'], x) # type: ignore[attr-defined]
language = languages.get_language(document.settings.language_code, reporter)
if self.inliner is None:
inliner = states.Inliner()
else:
inliner = self.inliner
inliner.init_customizations(document.settings)
memo = states.Struct(
document=document,
reporter=reporter,
language=language,
)
textnodes, messages = inliner.parse(inputstring, lineno, memo, document)
p = nodes.paragraph(inputstring, '', *textnodes)
p.source, p.line = document['source'], lineno
document += [p, *messages]

def parse(self, inputstring: str | StringList, document: nodes.document) -> None:
"""Parse text and generate a document tree."""
self.setup_parse(inputstring, document) # type: ignore[arg-type]
Expand Down
74 changes: 15 additions & 59 deletions sphinx/transforms/i18n.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import contextlib
from os import path
from re import DOTALL, match
from textwrap import indent
from typing import TYPE_CHECKING, Any, TypeVar

from docutils import nodes
Expand All @@ -21,7 +20,6 @@
from sphinx.util.i18n import docname_to_domain
from sphinx.util.index_entries import split_index_msg
from sphinx.util.nodes import (
IMAGE_TYPE_NODES,
LITERAL_TYPE_NODES,
NodeMatcher,
extract_messages,
Expand Down Expand Up @@ -380,25 +378,12 @@ def apply(self, **kwargs: Any) -> None:
node['translated'] = True
continue

# Avoid "Literal block expected; none found." warnings.
# If msgstr ends with '::' then it cause warning message at
# parser.parse() processing.
# literal-block-warning is only appear in avobe case.
if msgstr.strip().endswith('::'):
msgstr += '\n\n dummy literal'
# dummy literal node will discard by 'patch = patch[0]'

# literalblock need literal block notation to avoid it become
# paragraph.
# literalblock can not contain references or terms
if isinstance(node, LITERAL_TYPE_NODES):
msgstr = '::\n\n' + indent(msgstr, ' ' * 3)
continue

patch = publish_msgstr(self.app, msgstr, source,
node.line, self.config, settings) # type: ignore[arg-type]
# FIXME: no warnings about inconsistent references in this part
# XXX doctest and other block markup
if not isinstance(patch, nodes.paragraph):
continue # skip for now

updater = _NodeUpdater(node, patch, self.document, noqa=False)
processed = updater.update_title_mapping()
Expand Down Expand Up @@ -453,45 +438,25 @@ def apply(self, **kwargs: Any) -> None:
node['alt'] = msgstr
continue

# Avoid "Literal block expected; none found." warnings.
# If msgstr ends with '::' then it cause warning message at
# parser.parse() processing.
# literal-block-warning is only appear in avobe case.
if msgstr.strip().endswith('::'):
msgstr += '\n\n dummy literal'
# dummy literal node will discard by 'patch = patch[0]'

# literalblock need literal block notation to avoid it become
# paragraph.
if isinstance(node, LITERAL_TYPE_NODES):
msgstr = '::\n\n' + indent(msgstr, ' ' * 3)
if isinstance(node, nodes.image) and node.get('uri') == msg:
node['uri'] = msgstr
continue

# Structural Subelements phase1
# There is a possibility that only the title node is created.
# see: https://docutils.sourceforge.io/docs/ref/doctree.html#structural-subelements
if isinstance(node, nodes.title):
# This generates: <section ...><title>msgstr</title></section>
msgstr = msgstr + '\n' + '=' * len(msgstr) * 2
# literalblock do not need to be parsed as they do not contain inline syntax,
# except for parsed-literals, but they use the same node type, so we differentiate
# them based on their number of children.
if isinstance(node, LITERAL_TYPE_NODES) and len(node.children) <= 1:
node.children = [nodes.Text(msgstr)]
# for highlighting that expects .rawsource and .astext() are same.
node.rawsource = node.astext()
node['translated'] = True
continue

patch = publish_msgstr(self.app, msgstr, source,
node.line, self.config, settings) # type: ignore[arg-type]
# Structural Subelements phase2
if isinstance(node, nodes.title):
# get <title> node that placed as a first child
patch = patch.next_node() # type: ignore[assignment]

# ignore unexpected markups in translation message
unexpected: tuple[type[nodes.Element], ...] = (
nodes.paragraph, # expected form of translation
nodes.title, # generated by above "Subelements phase2"
)

# following types are expected if
# config.gettext_additional_targets is configured
unexpected += LITERAL_TYPE_NODES
unexpected += IMAGE_TYPE_NODES

if not isinstance(patch, unexpected):
if not isinstance(patch, nodes.paragraph):
continue # skip

updater = _NodeUpdater(node, patch, self.document, noqa)
Expand All @@ -502,15 +467,6 @@ def apply(self, **kwargs: Any) -> None:
updater.update_pending_xrefs()
updater.update_leaves()

# for highlighting that expects .rawsource and .astext() are same.
if isinstance(node, LITERAL_TYPE_NODES):
node.rawsource = node.astext()

if isinstance(node, nodes.image) and node.get('alt') != msg:
node['uri'] = patch['uri']
node['translated'] = False
continue # do not mark translated

node['translated'] = True # to avoid double translation

if 'index' in self.config.gettext_additional_targets:
Expand Down
3 changes: 1 addition & 2 deletions sphinx/util/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,8 +266,7 @@ def extract_messages(doctree: Element) -> Iterable[tuple[Element, str]]:
if node.get('alt'):
yield node, node['alt']
if node.get('translatable'):
image_uri = node.get('original_uri', node['uri'])
msg = f'.. image:: {image_uri}'
msg = node.get('original_uri', node['uri'])
else:
msg = ''
elif isinstance(node, nodes.meta):
Expand Down
8 changes: 8 additions & 0 deletions tests/roots/test-intl/literalblock.txt
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,11 @@ doctest blocks
>>> if __name__ == '__main__': # if run this py file as python script
... main() # call main


parsed literal
==============

.. parsed-literal::

**this** *is*
`parsed literal`_
2 changes: 2 additions & 0 deletions tests/roots/test-intl/markup.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@ i18n with strange markup
1. title starting with 1.
-------------------------

A. Einstein was a really
smart dude.
8 changes: 4 additions & 4 deletions tests/roots/test-intl/xx/LC_MESSAGES/figure.po
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,14 @@ msgstr "IMAGE URL AND ALT"
msgid "img"
msgstr "IMG -> I18N"

msgid ".. image:: img.png"
msgstr ".. image:: i18n.png"
msgid "img.png"
msgstr "i18n.png"

msgid "i18n"
msgstr "I18N -> IMG"

msgid ".. image:: i18n.png"
msgstr ".. image:: img.png"
msgid "i18n.png"
msgstr "img.png"

msgid "image on substitution"
msgstr "IMAGE ON SUBSTITUTION"
Expand Down
10 changes: 10 additions & 0 deletions tests/roots/test-intl/xx/LC_MESSAGES/literalblock.po
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,13 @@ msgstr ""
">>> if __name__ == '__main__': # IF RUN THIS PY FILE AS PYTHON SCRIPT\n"
"... main() # CALL MAIN"

msgid "parsed literal"
msgstr "PARSED LITERAL"

msgid ""
"**this** *is*\n"
"`parsed literal`_"
msgstr ""
"**THIS** *IS*\n"
"`PARSED LITERAL`_"

3 changes: 3 additions & 0 deletions tests/roots/test-intl/xx/LC_MESSAGES/markup.po
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,6 @@ msgstr "I18N WITH STRANGE MARKUP"
msgid "1. title starting with 1."
msgstr "1. TITLE STARTING WITH 1."

msgid "A. Einstein was a really smart dude."
msgstr "A. EINSTEIN WAS A REALLY SMART DUDE."

Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ msgstr "SUBSTITUTED IMAGE |subst_epilog_2| HERE."
msgid "subst_prolog_2"
msgstr "SUBST_PROLOG_2 TRANSLATED"

msgid ".. image:: /img.png"
msgstr ".. image:: /i18n.png"
msgid "/img.png"
msgstr "/i18n.png"

msgid "subst_epilog_2"
msgstr "SUBST_EPILOG_2 TRANSLATED"

msgid ".. image:: /i18n.png"
msgstr ".. image:: /img.png"
msgid "/i18n.png"
msgstr "/img.png"
6 changes: 4 additions & 2 deletions tests/test_builders/test_build_gettext.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,11 +206,11 @@ def test_gettext_prolog_epilog_substitution(app):
"This is content that contains |subst_prolog_1|.",
"Substituted image |subst_prolog_2| here.",
"subst_prolog_2",
".. image:: /img.png",
"/img.png",
"This is content that contains |subst_epilog_1|.",
"Substituted image |subst_epilog_2| here.",
"subst_epilog_2",
".. image:: /i18n.png",
"/i18n.png",
]


Expand Down Expand Up @@ -265,4 +265,6 @@ def test_gettext_literalblock_additional(app):
"function\\n... sys.stdout.write('hello') # call write method of "
"stdout object\\n>>>\\n>>> if __name__ == '__main__': # if run this py "
'file as python script\\n... main() # call main',
'parsed literal',
'**this** *is*\\n`parsed literal`_',
]