Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support 't' specifier in keywords #1015

Merged
merged 10 commits into from Oct 1, 2023
102 changes: 59 additions & 43 deletions babel/messages/extract.py
Expand Up @@ -55,7 +55,8 @@ class _FileObj(SupportsRead[bytes], SupportsReadline[bytes], Protocol):
def seek(self, __offset: int, __whence: int = ...) -> int: ...
def tell(self) -> int: ...

_Keyword: TypeAlias = tuple[int | tuple[int, int] | tuple[int, str], ...] | None
_SimpleKeyword: TypeAlias = tuple[int | tuple[int, int] | tuple[int, str], ...] | None
_Keyword: TypeAlias = dict[int | None, _SimpleKeyword] | _SimpleKeyword

# 5-tuple of (filename, lineno, messages, comments, context)
_FileExtractionResult: TypeAlias = tuple[str, int, str | tuple[str, ...], list[str], str | None]
Expand Down Expand Up @@ -315,6 +316,47 @@ def extract_from_file(
options, strip_comment_tags))


def _match_messages_against_spec(lineno: int, messages: list[str|None], comments: list[str],
fileobj: _FileObj, spec: tuple[int|tuple[int, str], ...]):
translatable = []
context = None

# last_index is 1 based like the keyword spec
last_index = len(messages)
for index in spec:
if isinstance(index, tuple): # (n, 'c')
context = messages[index[0] - 1]
continue
if last_index < index:
# Not enough arguments
return
message = messages[index - 1]
if message is None:
return
translatable.append(message)

# keyword spec indexes are 1 based, therefore '-1'
if isinstance(spec[0], tuple):
# context-aware *gettext method
first_msg_index = spec[1] - 1
else:
first_msg_index = spec[0] - 1
# An empty string msgid isn't valid, emit a warning
if not messages[first_msg_index]:
filename = (getattr(fileobj, "name", None) or "(unknown)")
sys.stderr.write(
f"{filename}:{lineno}: warning: Empty msgid. It is reserved by GNU gettext: gettext(\"\") "
f"returns the header entry with meta information, not the empty string.\n"
)
return

translatable = tuple(translatable)
if len(translatable) == 1:
translatable = translatable[0]

return lineno, translatable, comments, context


def extract(
method: _ExtractionMethod,
fileobj: _FileObj,
Expand Down Expand Up @@ -400,56 +442,30 @@ def extract(
options=options or {})

for lineno, funcname, messages, comments in results:
spec = keywords[funcname] or (1,) if funcname else (1,)
if not isinstance(messages, (list, tuple)):
messages = [messages]
if not messages:
continue

# Validate the messages against the keyword's specification
context = None
msgs = []
invalid = False
# last_index is 1 based like the keyword spec
last_index = len(messages)
for index in spec:
if isinstance(index, tuple):
context = messages[index[0] - 1]
continue
if last_index < index:
# Not enough arguments
invalid = True
break
message = messages[index - 1]
if message is None:
invalid = True
break
msgs.append(message)
if invalid:
continue

# keyword spec indexes are 1 based, therefore '-1'
if isinstance(spec[0], tuple):
# context-aware *gettext method
first_msg_index = spec[1] - 1
else:
first_msg_index = spec[0] - 1
if not messages[first_msg_index]:
# An empty string msgid isn't valid, emit a warning
filename = (getattr(fileobj, "name", None) or "(unknown)")
sys.stderr.write(
f"{filename}:{lineno}: warning: Empty msgid. It is reserved by GNU gettext: gettext(\"\") "
f"returns the header entry with meta information, not the empty string.\n"
)
continue

messages = tuple(msgs)
if len(messages) == 1:
messages = messages[0]
specs = keywords[funcname] or None if funcname else None
# {None: x} may be collapsed into x for backwards compatibility.
if not isinstance(specs, dict):
specs = {None: specs}

if strip_comment_tags:
_strip_comment_tags(comments, comment_tags)
yield lineno, messages, comments, context

# None matches all arities.
for arity in (None, len(messages)):
try:
spec = specs[arity]
except KeyError:
continue
jeanas marked this conversation as resolved.
Show resolved Hide resolved
if spec is None:
spec = (1,)
result = _match_messages_against_spec(lineno, messages, comments, fileobj, spec)
if result is not None:
yield result


def extract_nothing(
Expand Down
69 changes: 50 additions & 19 deletions babel/messages/frontend.py
Expand Up @@ -8,6 +8,8 @@
:license: BSD, see LICENSE for more details.
"""

from __future__ import annotations

import datetime
import fnmatch
import logging
Expand Down Expand Up @@ -1111,34 +1113,63 @@ def parse_mapping(fileobj, filename=None):

return method_map, options_map

def _parse_spec(s: str) -> tuple[int | None, tuple[int|tuple[int, str], ...]]:
inds = []
number = None
for x in s.split(','):
if x[-1] == 't':
number = int(x[:-1])
elif x[-1] == 'c':
inds.append((int(x[:-1]), 'c'))
else:
inds.append(int(x))
return number, tuple(inds)

def parse_keywords(strings: Iterable[str] = ()):
"""Parse keywords specifications from the given list of strings.
jeanas marked this conversation as resolved.
Show resolved Hide resolved

>>> kw = sorted(parse_keywords(['_', 'dgettext:2', 'dngettext:2,3', 'pgettext:1c,2']).items())
>>> for keyword, indices in kw:
... print((keyword, indices))
('_', None)
('dgettext', (2,))
('dngettext', (2, 3))
('pgettext', ((1, 'c'), 2))
>>> import pprint
>>> keywords = ['_', 'dgettext:2', 'dngettext:2,3', 'pgettext:1c,2',
... 'polymorphic:1', 'polymorphic:2,2t', 'polymorphic:3c,3t']
>>> pprint.pprint(parse_keywords(keywords))
{'_': None,
'dgettext': (2,),
'dngettext': (2, 3),
'pgettext': ((1, 'c'), 2),
'polymorphic': {None: (1,), 2: (2,), 3: ((3, 'c'),)}}

The input keywords are in GNU Gettext style; see :doc:`cmdline` for details.

The output is a dictionary mapping keyword names to a dictionary of specifications.
Keys in this dictionary are numbers of arguments, where ``None`` means that all numbers
of arguments are matched, and a number means only calls with that number of arguments
are matched (which happens when using the "t" specifier). However, as a special
case for backwards compatibility, if the dictionary of specifications would
be ``{None: x}``, i.e., there is only one specification and it matches all argument
counts, then it is collapsed into just ``x``.

A specification is either a tuple or None. If a tuple, each element can be either a number
``n``, meaning that the nth argument should be extracted as a message, or the tuple
``(n, 'c')``, meaning that the nth argument should be extracted as context for the
messages. A ``None`` specification is equivalent to ``(1,)``, extracting the first
argument.
"""
keywords = {}
for string in strings:
if ':' in string:
funcname, indices = string.split(':')
funcname, spec_str = string.split(':')
number, spec = _parse_spec(spec_str)
else:
funcname, indices = string, None
if funcname not in keywords:
if indices:
inds = []
for x in indices.split(','):
if x[-1] == 'c':
inds.append((int(x[:-1]), 'c'))
else:
inds.append(int(x))
indices = tuple(inds)
keywords[funcname] = indices
funcname = string
number = None
spec = None
keywords.setdefault(funcname, {})[number] = spec

# For best backwards compatibility, collapse {None: x} into x.
for k, v in keywords.items():
if set(v) == {None}:
keywords[k] = v[None]

return keywords


Expand Down
39 changes: 39 additions & 0 deletions docs/cmdline.rst
Expand Up @@ -133,6 +133,45 @@ a collection of source files::
header comment for the catalog


The meaning of ``--keyword`` values is as follows:

- Pass a simple identifier like ``_`` to extract the first (and only the first)
argument of all function calls to ``_``,

- To extract other arguments than the first, add a colon and the argument
indices separated by commas. For example, the ``dngettext`` function
typically expects translatable strings as second and third arguments,
so you could pass ``dngettext:2,3``.

- Some arguments should not be interpreted as translatable strings, but
context strings. For that, append "c" to the argument index. For example:
``pgettext:1c,2``.

- In C++ and Python, you may have functions that behave differently
depending on how many arguments they take. For this use case, you can
add an integer followed by "t" after the colon. In this case, the
keyword will only match a function invocation if it has the specified
total number of arguments. For example, if you have a function
``foo`` that behaves as ``gettext`` (argument is a message) or
``pgettext`` (arguments are a context and a message) depending on
whether it takes one or two arguments, you can pass
``--keyword=foo:1,1t --keyword=foo:1c,2,2t``.

The default keywords are equivalent to passing ::

--keyword=_
--keyword=gettext
--keyword=ngettext:1,2
--keyword=ugettext
--keyword=ungettext:1,2
--keyword=dgettext:2
--keyword=dngettext:2,3
--keyword=N_
--keyword=pgettext:1c,2
--keyword=npgettext:1c,2,3



init
====

Expand Down
33 changes: 31 additions & 2 deletions tests/messages/test_frontend.py
Expand Up @@ -17,15 +17,15 @@
import time
import unittest
from datetime import datetime, timedelta
from io import StringIO
from io import BytesIO, StringIO

import pytest
from freezegun import freeze_time
from setuptools import Distribution

from babel import __version__ as VERSION
from babel.dates import format_datetime
from babel.messages import Catalog, frontend
from babel.messages import Catalog, extract, frontend
from babel.messages.frontend import (
BaseError,
CommandLineInterface,
Expand Down Expand Up @@ -1422,6 +1422,35 @@ def test_parse_keywords():
}


def test_parse_keywords_with_t():
kw = frontend.parse_keywords(['_:1', '_:2,2t', '_:2c,3,3t'])

assert kw == {
'_': {
None: (1,),
2: (2,),
3: ((2, 'c'), 3),
}
}

def test_extract_messages_with_t():
content = rb"""
_("1 arg, arg 1")
_("2 args, arg 1", "2 args, arg 2")
_("3 args, arg 1", "3 args, arg 2", "3 args, arg 3")
_("4 args, arg 1", "4 args, arg 2", "4 args, arg 3", "4 args, arg 4")
"""
kw = frontend.parse_keywords(['_:1', '_:2,2t', '_:2c,3,3t'])
result = list(extract.extract("python", BytesIO(content), kw))
expected = [(2, '1 arg, arg 1', [], None),
(3, '2 args, arg 1', [], None),
(3, '2 args, arg 2', [], None),
(4, '3 args, arg 1', [], None),
(4, '3 args, arg 3', [], '3 args, arg 2'),
(5, '4 args, arg 1', [], None)]
assert result == expected


def configure_cli_command(cmdline):
"""
Helper to configure a command class, but not run it just yet.
Expand Down