Skip to content

Commit

Permalink
Disable localisation when SOURCE_DATE_EPOCH is set (#10949)
Browse files Browse the repository at this point in the history
This commit disables Sphinx's localisation features when reproducible
builds are requested, as determined by a non-empty SOURCE_DATE_EPOCH_
environment variable.

The `Reproducible Builds`_ project aims to provide confidence to
consumers of packaged software that the artefacts they're downloading
and installing have not been altered by the environment they were
built in, and can be replicated at a later date if required.

Builds of localised documentation using Sphinx currently account for
a large category of reproducible build testing failures, because the
builders intentionally use varying environment locales at build-time.
This can affect the contents of the ``objects.inv`` file.

During investigation, it turned out that many ``gettext``-localised
values (particularly in Python modules under ``sphinx.domains``) were
being translated at module-load-time and would not subsequently be
re-localised.

This creates two unusual effects:

1. Attempting to write a test case to build the same application in
   two different languages was not initially possible, as the
   first-loaded translation catalogue (as found in the 
   ``sphinx.locale.translators`` global variable) would remain in-use
   for subsequent application builds under different locales.

2. Localisation of strings could vary depending on whether the
   relevant modules were loaded before or after the resource
   catalogues were populated.

We fix this by performing all translations lazily so that module
imports can occur in any order and localisation of inventory entries
should occur only when translations of those items are requested.

Localisation can then be disabled by configuring the ``gettext``
language to the ISO-639-3 'undetermined' code (``'und'``), as this
should not have an associated translation catalogue. We also want to
prevent ``gettext`` from  attempting to determine the host's locale
from environment variables (including ``LANGUAGE``).

.. _SOURCE_DATE_EPOCH: https://reproducible-builds.org/docs/source-date-epoch/
.. _Reproducible Builds: https://www.reproducible-builds.org/
  • Loading branch information
jayaddison committed Apr 7, 2023
1 parent 4659fc2 commit f82c3c9
Show file tree
Hide file tree
Showing 13 changed files with 128 additions and 20 deletions.
2 changes: 2 additions & 0 deletions CHANGES
Expand Up @@ -83,6 +83,8 @@ Bugs fixed
* #11192: Restore correct parallel search index building.
Patch by Jeremy Maitin-Shepard
* Use the new Transifex ``tx`` client
* #9778: Disable localisation when the ``SOURCE_DATE_EPOCH`` environment
variable is set, to assist with 'reproducible builds'. Patch by James Addison

Testing
--------
Expand Down
2 changes: 1 addition & 1 deletion sphinx/builders/html/__init__.py
Expand Up @@ -502,7 +502,7 @@ def prepare_writing(self, docnames: set[str]) -> None:
# typically doesn't include the time of day
lufmt = self.config.html_last_updated_fmt
if lufmt is not None:
self.last_updated = format_date(lufmt or _('%b %d, %Y'),
self.last_updated = format_date(lufmt or str(_('%b %d, %Y')),
language=self.config.language)
else:
self.last_updated = None
Expand Down
2 changes: 1 addition & 1 deletion sphinx/builders/latex/__init__.py
Expand Up @@ -179,7 +179,7 @@ def init_context(self) -> None:
if self.config.today:
self.context['date'] = self.config.today
else:
self.context['date'] = format_date(self.config.today_fmt or _('%b %d, %Y'),
self.context['date'] = format_date(self.config.today_fmt or str(_('%b %d, %Y')),
language=self.config.language)

if self.config.latex_logo:
Expand Down
4 changes: 2 additions & 2 deletions sphinx/domains/std.py
Expand Up @@ -242,9 +242,9 @@ def add_target_and_index(self, firstname: str, sig: str, signode: desc_signature

# create an index entry
if currprogram:
descr = _('%s command line option') % currprogram
descr = str(_('%s command line option') % currprogram)
else:
descr = _('command line option')
descr = str(_('command line option'))
for option in signode.get('allnames', []):
entry = '; '.join([descr, option])
self.indexnode['entries'].append(('pair', entry, signode['ids'][0], '', None))
Expand Down
29 changes: 18 additions & 11 deletions sphinx/locale/__init__.py
Expand Up @@ -2,7 +2,7 @@

import locale
from gettext import NullTranslations, translation
from os import path
from os import getenv, path
from typing import Any, Callable, Dict, List, Optional, Tuple


Expand Down Expand Up @@ -111,9 +111,21 @@ def init(
# the None entry is the system's default locale path
has_translation = True

if language and '_' in language:
if getenv('SOURCE_DATE_EPOCH') is not None:
# Disable localization during reproducible source builds
# See https://reproducible-builds.org/docs/source-date-epoch/
#
# Note: Providing an empty/none value to gettext.translation causes
# it to consult various language-related environment variables to find
# locale(s). We don't want that during a reproducible build; we want
# to run through the same code path, but to return NullTranslations.
#
# To achieve that, specify the ISO-639-3 'undetermined' language code,
# which should not match any translation catalogs.
languages: Optional[List[str]] = ['und']
elif language and '_' in language:
# for language having country code (like "de_AT")
languages: Optional[List[str]] = [language, language.split('_')[0]]
languages = [language, language.split('_')[0]]
elif language:
languages = [language]
else:
Expand Down Expand Up @@ -167,7 +179,7 @@ def is_translator_registered(catalog: str = 'sphinx', namespace: str = 'general'
return (namespace, catalog) in translators


def _lazy_translate(catalog: str, namespace: str, message: str) -> str:
def _lazy_translate(catalog: str, namespace: str, message: str, *args: Any) -> str:
"""Used instead of _ when creating TranslationProxy, because _ is
not bound yet at that time.
"""
Expand Down Expand Up @@ -200,13 +212,8 @@ def setup(app):
.. versionadded:: 1.8
"""
def gettext(message: str) -> str:
if not is_translator_registered(catalog, namespace):
# not initialized yet
return _TranslationProxy(_lazy_translate, catalog, namespace, message) # type: ignore[return-value] # noqa: E501
else:
translator = get_translator(catalog, namespace)
return translator.gettext(message)
def gettext(message: str, *args: Any) -> str:
return _TranslationProxy(_lazy_translate, catalog, namespace, message, *args) # type: ignore[return-value] # NOQA

return gettext

Expand Down
2 changes: 1 addition & 1 deletion sphinx/transforms/__init__.py
Expand Up @@ -106,7 +106,7 @@ def apply(self, **kwargs: Any) -> None:
text = self.config[refname]
if refname == 'today' and not text:
# special handling: can also specify a strftime format
text = format_date(self.config.today_fmt or _('%b %d, %Y'),
text = format_date(self.config.today_fmt or str(_('%b %d, %Y')),
language=self.config.language)
ref.replace_self(nodes.Text(text))

Expand Down
2 changes: 1 addition & 1 deletion sphinx/writers/manpage.py
Expand Up @@ -93,7 +93,7 @@ def __init__(self, document: nodes.document, builder: Builder) -> None:
if self.config.today:
self._docinfo['date'] = self.config.today
else:
self._docinfo['date'] = format_date(self.config.today_fmt or _('%b %d, %Y'),
self._docinfo['date'] = format_date(self.config.today_fmt or str(_('%b %d, %Y')),
language=self.config.language)
self._docinfo['copyright'] = self.config.copyright
self._docinfo['version'] = self.config.version
Expand Down
2 changes: 1 addition & 1 deletion sphinx/writers/texinfo.py
Expand Up @@ -220,7 +220,7 @@ def init_settings(self) -> None:
'project': self.escape(self.config.project),
'copyright': self.escape(self.config.copyright),
'date': self.escape(self.config.today or
format_date(self.config.today_fmt or _('%b %d, %Y'),
format_date(self.config.today_fmt or str(_('%b %d, %Y')),
language=self.config.language)),
})
# title
Expand Down
4 changes: 2 additions & 2 deletions sphinx/writers/text.py
Expand Up @@ -791,8 +791,8 @@ def visit_acks(self, node: Element) -> None:

def visit_image(self, node: Element) -> None:
if 'alt' in node.attributes:
self.add_text(_('[image: %s]') % node['alt'])
self.add_text(_('[image]'))
self.add_text(str(_('[image: %s]') % node['alt']))
self.add_text(str(_('[image]')))
raise nodes.SkipNode

def visit_transition(self, node: Element) -> None:
Expand Down
Binary file not shown.
2 changes: 2 additions & 0 deletions tests/roots/test-locale/locale1/et/LC_MESSAGES/myext.po
@@ -0,0 +1,2 @@
msgid "Hello world"
msgstr "Tere maailm"
41 changes: 41 additions & 0 deletions tests/test_locale.py
Expand Up @@ -55,3 +55,44 @@ def test_add_message_catalog(app, rootdir):
assert _('Hello world') == 'HELLO WORLD'
assert _('Hello sphinx') == 'Hello sphinx'
assert _('Hello reST') == 'Hello reST'


def _empty_language_translation(rootdir):
locale_dirs, catalog = [rootdir / 'test-locale' / 'locale1'], 'myext'
locale.translators.clear()
locale.init(locale_dirs, language=None, catalog=catalog)
return locale.get_translation(catalog)


def test_init_environment_language(rootdir, monkeypatch):
with monkeypatch.context() as m:
m.setenv("LANGUAGE", "en_US:en")
_ = _empty_language_translation(rootdir)
assert _('Hello world') == 'HELLO WORLD'

with monkeypatch.context() as m:
m.setenv("LANGUAGE", "et_EE:et")
_ = _empty_language_translation(rootdir)
assert _('Hello world') == 'Tere maailm'


def test_init_reproducible_build_language(rootdir, monkeypatch):
with monkeypatch.context() as m:
m.setenv("SOURCE_DATE_EPOCH", "0")
m.setenv("LANGUAGE", "en_US:en")
_ = _empty_language_translation(rootdir)
sde_en_translation = str(_('Hello world')) # str cast to evaluate lazy method

with monkeypatch.context() as m:
m.setenv("SOURCE_DATE_EPOCH", "0")
m.setenv("LANGUAGE", "et_EE:et")
_ = _empty_language_translation(rootdir)
sde_et_translation = str(_('Hello world')) # str cast to evaluate lazy method

with monkeypatch.context() as m:
m.setenv("LANGUAGE", "et_EE:et")
_ = _empty_language_translation(rootdir)
loc_et_translation = str(_('Hello world')) # str cast to evaluate lazy method

assert sde_en_translation == sde_et_translation
assert sde_et_translation != loc_et_translation
56 changes: 56 additions & 0 deletions tests/test_util_inventory.py
Expand Up @@ -4,6 +4,7 @@
import zlib
from io import BytesIO

from sphinx.testing.util import SphinxTestApp
from sphinx.util.inventory import InventoryFile

inventory_v1 = b'''\
Expand Down Expand Up @@ -83,3 +84,58 @@ def test_read_inventory_v2_not_having_version():
invdata = InventoryFile.load(f, '/util', posixpath.join)
assert invdata['py:module']['module1'] == \
('foo', '', '/util/foo.html#module-module1', 'Long Module desc')


def _write_appconfig(dir, language, prefix=None):
prefix = prefix or language
(dir / prefix).makedirs()
(dir / prefix / 'conf.py').write_text(f'language = "{language}"', encoding='utf8')
(dir / prefix / 'index.rst').write_text('index.rst', encoding='utf8')
assert sorted((dir / prefix).listdir()) == ['conf.py', 'index.rst']
assert (dir / prefix / 'index.rst').exists()
return (dir / prefix)


def _build_inventory(srcdir):
app = SphinxTestApp(srcdir=srcdir)
app.build()
app.cleanup()
return (app.outdir / 'objects.inv')


def test_inventory_localization(tempdir):
# Build an app using Estonian (EE) locale
srcdir_et = _write_appconfig(tempdir, "et")
inventory_et = _build_inventory(srcdir_et)

# Build the same app using English (US) locale
srcdir_en = _write_appconfig(tempdir, "en")
inventory_en = _build_inventory(srcdir_en)

# Ensure that the inventory contents differ
assert inventory_et.read_bytes() != inventory_en.read_bytes()


def test_inventory_reproducible(tempdir, monkeypatch):
with monkeypatch.context() as m:
# Configure reproducible builds
# See: https://reproducible-builds.org/docs/source-date-epoch/
m.setenv("SOURCE_DATE_EPOCH", "0")

# Build an app using Estonian (EE) locale
srcdir_et = _write_appconfig(tempdir, "et")
reproducible_inventory_et = _build_inventory(srcdir_et)

# Build the same app using English (US) locale
srcdir_en = _write_appconfig(tempdir, "en")
reproducible_inventory_en = _build_inventory(srcdir_en)

# Also build the app using Estonian (EE) locale without build reproducibility enabled
srcdir_et = _write_appconfig(tempdir, "et", prefix="localized")
localized_inventory_et = _build_inventory(srcdir_et)

# Ensure that the reproducible inventory contents are identical
assert reproducible_inventory_et.read_bytes() == reproducible_inventory_en.read_bytes()

# Ensure that inventory contents are different between a localized and non-localized build
assert reproducible_inventory_et.read_bytes() != localized_inventory_et.read_bytes()

0 comments on commit f82c3c9

Please sign in to comment.