Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add f-string parsing for Python 3.12 (PEP 701) #1027

Merged
merged 1 commit into from Oct 1, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
34 changes: 34 additions & 0 deletions babel/messages/extract.py
Expand Up @@ -21,6 +21,7 @@
import io
import os
import sys
import tokenize
from collections.abc import (
Callable,
Collection,
Expand Down Expand Up @@ -90,6 +91,11 @@

DEFAULT_MAPPING: list[tuple[str, str]] = [('**.py', 'python')]

# New tokens in Python 3.12, or None on older versions
FSTRING_START = getattr(tokenize, "FSTRING_START", None)
FSTRING_MIDDLE = getattr(tokenize, "FSTRING_MIDDLE", None)
FSTRING_END = getattr(tokenize, "FSTRING_END", None)


def _strip_comment_tags(comments: MutableSequence[str], tags: Iterable[str]):
"""Helper function for `extract` that strips comment tags from strings
Expand Down Expand Up @@ -513,6 +519,11 @@
next_line = lambda: fileobj.readline().decode(encoding)

tokens = generate_tokens(next_line)

# Current prefix of a Python 3.12 (PEP 701) f-string, or None if we're not
# currently parsing one.
current_fstring_start = None

for tok, value, (lineno, _), _, _ in tokens:
if call_stack == -1 and tok == NAME and value in ('def', 'class'):
in_def = True
Expand Down Expand Up @@ -574,6 +585,20 @@
val = _parse_python_string(value, encoding, future_flags)
if val is not None:
buf.append(val)

# Python 3.12+, see https://peps.python.org/pep-0701/#new-tokens
elif tok == FSTRING_START:
current_fstring_start = value

Check warning on line 591 in babel/messages/extract.py

View check run for this annotation

Codecov / codecov/patch

babel/messages/extract.py#L591

Added line #L591 was not covered by tests
elif tok == FSTRING_MIDDLE:
if current_fstring_start is not None:
current_fstring_start += value

Check warning on line 594 in babel/messages/extract.py

View check run for this annotation

Codecov / codecov/patch

babel/messages/extract.py#L593-L594

Added lines #L593 - L594 were not covered by tests
elif tok == FSTRING_END:
if current_fstring_start is not None:
fstring = current_fstring_start + value
val = _parse_python_string(fstring, encoding, future_flags)
if val is not None:
buf.append(val)

Check warning on line 600 in babel/messages/extract.py

View check run for this annotation

Codecov / codecov/patch

babel/messages/extract.py#L596-L600

Added lines #L596 - L600 were not covered by tests

elif tok == OP and value == ',':
if buf:
messages.append(''.join(buf))
Expand All @@ -594,6 +619,15 @@
elif tok == NAME and value in keywords:
funcname = value

if (current_fstring_start is not None
and tok not in {FSTRING_START, FSTRING_MIDDLE}
):
# In Python 3.12, tokens other than FSTRING_* mean the
# f-string is dynamic, so we don't wan't to extract it.
# And if it's FSTRING_END, we've already handled it above.
# Let's forget that we're in an f-string.
current_fstring_start = None

Check warning on line 629 in babel/messages/extract.py

View check run for this annotation

Codecov / codecov/patch

babel/messages/extract.py#L629

Added line #L629 was not covered by tests


def _parse_python_string(value: str, encoding: str, future_flags: int) -> str | None:
# Unwrap quotes in a safe manner, maintaining the string's encoding
Expand Down