From f82c3c99126e644125d243ba0d0788197401e416 Mon Sep 17 00:00:00 2001 From: James Addison <55152140+jayaddison@users.noreply.github.com> Date: Fri, 7 Apr 2023 17:49:36 +0100 Subject: [PATCH] Disable localisation when ``SOURCE_DATE_EPOCH`` is set (#10949) This commit disables Sphinx's localisation features when reproducible builds are requested, as determined by a non-empty SOURCE_DATE_EPOCH_ environment variable. The `Reproducible Builds`_ project aims to provide confidence to consumers of packaged software that the artefacts they're downloading and installing have not been altered by the environment they were built in, and can be replicated at a later date if required. Builds of localised documentation using Sphinx currently account for a large category of reproducible build testing failures, because the builders intentionally use varying environment locales at build-time. This can affect the contents of the ``objects.inv`` file. During investigation, it turned out that many ``gettext``-localised values (particularly in Python modules under ``sphinx.domains``) were being translated at module-load-time and would not subsequently be re-localised. This creates two unusual effects: 1. Attempting to write a test case to build the same application in two different languages was not initially possible, as the first-loaded translation catalogue (as found in the ``sphinx.locale.translators`` global variable) would remain in-use for subsequent application builds under different locales. 2. Localisation of strings could vary depending on whether the relevant modules were loaded before or after the resource catalogues were populated. We fix this by performing all translations lazily so that module imports can occur in any order and localisation of inventory entries should occur only when translations of those items are requested. Localisation can then be disabled by configuring the ``gettext`` language to the ISO-639-3 'undetermined' code (``'und'``), as this should not have an associated translation catalogue. We also want to prevent ``gettext`` from attempting to determine the host's locale from environment variables (including ``LANGUAGE``). .. _SOURCE_DATE_EPOCH: https://reproducible-builds.org/docs/source-date-epoch/ .. _Reproducible Builds: https://www.reproducible-builds.org/ --- CHANGES | 2 + sphinx/builders/html/__init__.py | 2 +- sphinx/builders/latex/__init__.py | 2 +- sphinx/domains/std.py | 4 +- sphinx/locale/__init__.py | 29 +++++---- sphinx/transforms/__init__.py | 2 +- sphinx/writers/manpage.py | 2 +- sphinx/writers/texinfo.py | 2 +- sphinx/writers/text.py | 4 +- .../locale1/et/LC_MESSAGES/myext.mo | Bin 0 -> 80 bytes .../locale1/et/LC_MESSAGES/myext.po | 2 + tests/test_locale.py | 41 +++++++++++++ tests/test_util_inventory.py | 56 ++++++++++++++++++ 13 files changed, 128 insertions(+), 20 deletions(-) create mode 100644 tests/roots/test-locale/locale1/et/LC_MESSAGES/myext.mo create mode 100644 tests/roots/test-locale/locale1/et/LC_MESSAGES/myext.po diff --git a/CHANGES b/CHANGES index a85952dd6b9..a66c2c46039 100644 --- a/CHANGES +++ b/CHANGES @@ -83,6 +83,8 @@ Bugs fixed * #11192: Restore correct parallel search index building. Patch by Jeremy Maitin-Shepard * Use the new Transifex ``tx`` client +* #9778: Disable localisation when the ``SOURCE_DATE_EPOCH`` environment + variable is set, to assist with 'reproducible builds'. Patch by James Addison Testing -------- diff --git a/sphinx/builders/html/__init__.py b/sphinx/builders/html/__init__.py index 2bf14672d46..5b7478e775d 100644 --- a/sphinx/builders/html/__init__.py +++ b/sphinx/builders/html/__init__.py @@ -502,7 +502,7 @@ def prepare_writing(self, docnames: set[str]) -> None: # typically doesn't include the time of day lufmt = self.config.html_last_updated_fmt if lufmt is not None: - self.last_updated = format_date(lufmt or _('%b %d, %Y'), + self.last_updated = format_date(lufmt or str(_('%b %d, %Y')), language=self.config.language) else: self.last_updated = None diff --git a/sphinx/builders/latex/__init__.py b/sphinx/builders/latex/__init__.py index 335518f2308..f1f0cff2be4 100644 --- a/sphinx/builders/latex/__init__.py +++ b/sphinx/builders/latex/__init__.py @@ -179,7 +179,7 @@ def init_context(self) -> None: if self.config.today: self.context['date'] = self.config.today else: - self.context['date'] = format_date(self.config.today_fmt or _('%b %d, %Y'), + self.context['date'] = format_date(self.config.today_fmt or str(_('%b %d, %Y')), language=self.config.language) if self.config.latex_logo: diff --git a/sphinx/domains/std.py b/sphinx/domains/std.py index d1a6b048fa9..ad28c9f2c9f 100644 --- a/sphinx/domains/std.py +++ b/sphinx/domains/std.py @@ -242,9 +242,9 @@ def add_target_and_index(self, firstname: str, sig: str, signode: desc_signature # create an index entry if currprogram: - descr = _('%s command line option') % currprogram + descr = str(_('%s command line option') % currprogram) else: - descr = _('command line option') + descr = str(_('command line option')) for option in signode.get('allnames', []): entry = '; '.join([descr, option]) self.indexnode['entries'].append(('pair', entry, signode['ids'][0], '', None)) diff --git a/sphinx/locale/__init__.py b/sphinx/locale/__init__.py index 95b7a9d2516..1cd8e9e143a 100644 --- a/sphinx/locale/__init__.py +++ b/sphinx/locale/__init__.py @@ -2,7 +2,7 @@ import locale from gettext import NullTranslations, translation -from os import path +from os import getenv, path from typing import Any, Callable, Dict, List, Optional, Tuple @@ -111,9 +111,21 @@ def init( # the None entry is the system's default locale path has_translation = True - if language and '_' in language: + if getenv('SOURCE_DATE_EPOCH') is not None: + # Disable localization during reproducible source builds + # See https://reproducible-builds.org/docs/source-date-epoch/ + # + # Note: Providing an empty/none value to gettext.translation causes + # it to consult various language-related environment variables to find + # locale(s). We don't want that during a reproducible build; we want + # to run through the same code path, but to return NullTranslations. + # + # To achieve that, specify the ISO-639-3 'undetermined' language code, + # which should not match any translation catalogs. + languages: Optional[List[str]] = ['und'] + elif language and '_' in language: # for language having country code (like "de_AT") - languages: Optional[List[str]] = [language, language.split('_')[0]] + languages = [language, language.split('_')[0]] elif language: languages = [language] else: @@ -167,7 +179,7 @@ def is_translator_registered(catalog: str = 'sphinx', namespace: str = 'general' return (namespace, catalog) in translators -def _lazy_translate(catalog: str, namespace: str, message: str) -> str: +def _lazy_translate(catalog: str, namespace: str, message: str, *args: Any) -> str: """Used instead of _ when creating TranslationProxy, because _ is not bound yet at that time. """ @@ -200,13 +212,8 @@ def setup(app): .. versionadded:: 1.8 """ - def gettext(message: str) -> str: - if not is_translator_registered(catalog, namespace): - # not initialized yet - return _TranslationProxy(_lazy_translate, catalog, namespace, message) # type: ignore[return-value] # noqa: E501 - else: - translator = get_translator(catalog, namespace) - return translator.gettext(message) + def gettext(message: str, *args: Any) -> str: + return _TranslationProxy(_lazy_translate, catalog, namespace, message, *args) # type: ignore[return-value] # NOQA return gettext diff --git a/sphinx/transforms/__init__.py b/sphinx/transforms/__init__.py index f3647308484..2045a7426ee 100644 --- a/sphinx/transforms/__init__.py +++ b/sphinx/transforms/__init__.py @@ -106,7 +106,7 @@ def apply(self, **kwargs: Any) -> None: text = self.config[refname] if refname == 'today' and not text: # special handling: can also specify a strftime format - text = format_date(self.config.today_fmt or _('%b %d, %Y'), + text = format_date(self.config.today_fmt or str(_('%b %d, %Y')), language=self.config.language) ref.replace_self(nodes.Text(text)) diff --git a/sphinx/writers/manpage.py b/sphinx/writers/manpage.py index 1e57f48addc..0731e972280 100644 --- a/sphinx/writers/manpage.py +++ b/sphinx/writers/manpage.py @@ -93,7 +93,7 @@ def __init__(self, document: nodes.document, builder: Builder) -> None: if self.config.today: self._docinfo['date'] = self.config.today else: - self._docinfo['date'] = format_date(self.config.today_fmt or _('%b %d, %Y'), + self._docinfo['date'] = format_date(self.config.today_fmt or str(_('%b %d, %Y')), language=self.config.language) self._docinfo['copyright'] = self.config.copyright self._docinfo['version'] = self.config.version diff --git a/sphinx/writers/texinfo.py b/sphinx/writers/texinfo.py index 927e74f3487..6979437686c 100644 --- a/sphinx/writers/texinfo.py +++ b/sphinx/writers/texinfo.py @@ -220,7 +220,7 @@ def init_settings(self) -> None: 'project': self.escape(self.config.project), 'copyright': self.escape(self.config.copyright), 'date': self.escape(self.config.today or - format_date(self.config.today_fmt or _('%b %d, %Y'), + format_date(self.config.today_fmt or str(_('%b %d, %Y')), language=self.config.language)), }) # title diff --git a/sphinx/writers/text.py b/sphinx/writers/text.py index 3bce03ac6cb..d78d600f80d 100644 --- a/sphinx/writers/text.py +++ b/sphinx/writers/text.py @@ -791,8 +791,8 @@ def visit_acks(self, node: Element) -> None: def visit_image(self, node: Element) -> None: if 'alt' in node.attributes: - self.add_text(_('[image: %s]') % node['alt']) - self.add_text(_('[image]')) + self.add_text(str(_('[image: %s]') % node['alt'])) + self.add_text(str(_('[image]'))) raise nodes.SkipNode def visit_transition(self, node: Element) -> None: diff --git a/tests/roots/test-locale/locale1/et/LC_MESSAGES/myext.mo b/tests/roots/test-locale/locale1/et/LC_MESSAGES/myext.mo new file mode 100644 index 0000000000000000000000000000000000000000..c99a36846a83f2fcd3d66fe637d8ef19060831df GIT binary patch literal 80 zcmca7#4?ou2pEA_28dOFm>Gz5fS4PIEugdukcI(}T94G6oP34y{Gyx`hLF^vRE6Bc J#LS#r1^^kF3hDp= literal 0 HcmV?d00001 diff --git a/tests/roots/test-locale/locale1/et/LC_MESSAGES/myext.po b/tests/roots/test-locale/locale1/et/LC_MESSAGES/myext.po new file mode 100644 index 00000000000..1ecf6e3ee90 --- /dev/null +++ b/tests/roots/test-locale/locale1/et/LC_MESSAGES/myext.po @@ -0,0 +1,2 @@ +msgid "Hello world" +msgstr "Tere maailm" diff --git a/tests/test_locale.py b/tests/test_locale.py index 4861079ec33..1d90473ca8d 100644 --- a/tests/test_locale.py +++ b/tests/test_locale.py @@ -55,3 +55,44 @@ def test_add_message_catalog(app, rootdir): assert _('Hello world') == 'HELLO WORLD' assert _('Hello sphinx') == 'Hello sphinx' assert _('Hello reST') == 'Hello reST' + + +def _empty_language_translation(rootdir): + locale_dirs, catalog = [rootdir / 'test-locale' / 'locale1'], 'myext' + locale.translators.clear() + locale.init(locale_dirs, language=None, catalog=catalog) + return locale.get_translation(catalog) + + +def test_init_environment_language(rootdir, monkeypatch): + with monkeypatch.context() as m: + m.setenv("LANGUAGE", "en_US:en") + _ = _empty_language_translation(rootdir) + assert _('Hello world') == 'HELLO WORLD' + + with monkeypatch.context() as m: + m.setenv("LANGUAGE", "et_EE:et") + _ = _empty_language_translation(rootdir) + assert _('Hello world') == 'Tere maailm' + + +def test_init_reproducible_build_language(rootdir, monkeypatch): + with monkeypatch.context() as m: + m.setenv("SOURCE_DATE_EPOCH", "0") + m.setenv("LANGUAGE", "en_US:en") + _ = _empty_language_translation(rootdir) + sde_en_translation = str(_('Hello world')) # str cast to evaluate lazy method + + with monkeypatch.context() as m: + m.setenv("SOURCE_DATE_EPOCH", "0") + m.setenv("LANGUAGE", "et_EE:et") + _ = _empty_language_translation(rootdir) + sde_et_translation = str(_('Hello world')) # str cast to evaluate lazy method + + with monkeypatch.context() as m: + m.setenv("LANGUAGE", "et_EE:et") + _ = _empty_language_translation(rootdir) + loc_et_translation = str(_('Hello world')) # str cast to evaluate lazy method + + assert sde_en_translation == sde_et_translation + assert sde_et_translation != loc_et_translation diff --git a/tests/test_util_inventory.py b/tests/test_util_inventory.py index 583edf6691f..675bba06b77 100644 --- a/tests/test_util_inventory.py +++ b/tests/test_util_inventory.py @@ -4,6 +4,7 @@ import zlib from io import BytesIO +from sphinx.testing.util import SphinxTestApp from sphinx.util.inventory import InventoryFile inventory_v1 = b'''\ @@ -83,3 +84,58 @@ def test_read_inventory_v2_not_having_version(): invdata = InventoryFile.load(f, '/util', posixpath.join) assert invdata['py:module']['module1'] == \ ('foo', '', '/util/foo.html#module-module1', 'Long Module desc') + + +def _write_appconfig(dir, language, prefix=None): + prefix = prefix or language + (dir / prefix).makedirs() + (dir / prefix / 'conf.py').write_text(f'language = "{language}"', encoding='utf8') + (dir / prefix / 'index.rst').write_text('index.rst', encoding='utf8') + assert sorted((dir / prefix).listdir()) == ['conf.py', 'index.rst'] + assert (dir / prefix / 'index.rst').exists() + return (dir / prefix) + + +def _build_inventory(srcdir): + app = SphinxTestApp(srcdir=srcdir) + app.build() + app.cleanup() + return (app.outdir / 'objects.inv') + + +def test_inventory_localization(tempdir): + # Build an app using Estonian (EE) locale + srcdir_et = _write_appconfig(tempdir, "et") + inventory_et = _build_inventory(srcdir_et) + + # Build the same app using English (US) locale + srcdir_en = _write_appconfig(tempdir, "en") + inventory_en = _build_inventory(srcdir_en) + + # Ensure that the inventory contents differ + assert inventory_et.read_bytes() != inventory_en.read_bytes() + + +def test_inventory_reproducible(tempdir, monkeypatch): + with monkeypatch.context() as m: + # Configure reproducible builds + # See: https://reproducible-builds.org/docs/source-date-epoch/ + m.setenv("SOURCE_DATE_EPOCH", "0") + + # Build an app using Estonian (EE) locale + srcdir_et = _write_appconfig(tempdir, "et") + reproducible_inventory_et = _build_inventory(srcdir_et) + + # Build the same app using English (US) locale + srcdir_en = _write_appconfig(tempdir, "en") + reproducible_inventory_en = _build_inventory(srcdir_en) + + # Also build the app using Estonian (EE) locale without build reproducibility enabled + srcdir_et = _write_appconfig(tempdir, "et", prefix="localized") + localized_inventory_et = _build_inventory(srcdir_et) + + # Ensure that the reproducible inventory contents are identical + assert reproducible_inventory_et.read_bytes() == reproducible_inventory_en.read_bytes() + + # Ensure that inventory contents are different between a localized and non-localized build + assert reproducible_inventory_et.read_bytes() != localized_inventory_et.read_bytes()