Skip to content

Commit

Permalink
Ignore quote escapes in expression part of f-string (#7597)
Browse files Browse the repository at this point in the history
This PR fixes the following issues w.r.t. the PEP 701 changes:
1. Mark all unformatted comments inside f-strings as formatted only _after_ the
   f-string has been formatted.
2. Do not escape or remove the quote escape when normalizing the expression
   part of a f-string.

This PR also updates the `--files-with-errors` number to be 1 less. This is
because we can now parse the
[`test_fstring.py`](https://discord.com/channels/1039017663004942429/1082324263199064206/1154633274887516254)
file in the CPython repository which contains the new f-string syntax. This is
also the file which updates the similarity index for CPython compared to main.

`cargo test -p ruff_python_formatter`

| project | similarity index | total files | changed files |

|--------------|------------------:|------------------:|------------------:|
| cpython | 0.76051 | 1789 | 1632 |
| django | 0.99983 | 2760 | 36 |
| transformers | 0.99963 | 2587 | 323 |
| twine | 1.00000 | 33 | 0 |
| typeshed | 0.99979 | 3496 | 22 |
| warehouse | 0.99967 | 648 | 15 |
| zulip | 0.99972 | 1437 | 21 |

| project | similarity index | total files | changed files |

|--------------|------------------:|------------------:|------------------:|
| cpython | 0.76083 | 1789 | 1631 |
| django | 0.99983 | 2760 | 36 |
| transformers | 0.99963 | 2587 | 323 |
| twine | 1.00000 | 33 | 0 |
| typeshed | 0.99979 | 3496 | 22 |
| warehouse | 0.99967 | 648 | 15 |
| zulip | 0.99972 | 1437 | 21 |
  • Loading branch information
dhruvmanila committed Sep 29, 2023
1 parent 97a5e35 commit 01123d5
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 21 deletions.
60 changes: 40 additions & 20 deletions crates/ruff_python_formatter/src/expression/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,16 +138,8 @@ impl<'a> FormatString<'a> {

impl<'a> Format<PyFormatContext<'_>> for FormatString<'a> {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
// TODO(dhruvmanila): With PEP 701, comments can be inside f-strings.
// This is to mark all of those comments as formatted but we need to
// figure out how to handle them.
if matches!(self.string, AnyString::FString(_)) {
f.context()
.comments()
.mark_verbatim_node_comments_formatted(self.string.into());
}
let locator = f.context().locator();
match self.layout {
let result = match self.layout {
StringLayout::Default => {
if self.string.is_implicit_concatenated() {
in_parentheses_only_group(&FormatStringContinuation::new(self.string)).fmt(f)
Expand All @@ -170,7 +162,19 @@ impl<'a> Format<PyFormatContext<'_>> for FormatString<'a> {
StringLayout::ImplicitConcatenatedStringInBinaryLike => {
FormatStringContinuation::new(self.string).fmt(f)
}
};
// TODO(dhruvmanila): With PEP 701, comments can be inside f-strings.
// This is to mark all of those comments as formatted but we need to
// figure out how to handle them. Note that this needs to be done only
// after the f-string is formatted, so only for all the non-formatted
// comments.
if let AnyString::FString(fstring) = self.string {
let comments = f.context().comments();
fstring.values.iter().for_each(|value| {
comments.mark_verbatim_node_comments_formatted(value.into());
});
}
result
}
}

Expand Down Expand Up @@ -435,11 +439,7 @@ impl StringPart {
}
};

let normalized = normalize_string(
locator.slice(self.content_range),
quotes,
self.prefix.is_raw_string(),
);
let normalized = normalize_string(locator.slice(self.content_range), quotes, self.prefix);

NormalizedString {
prefix: self.prefix,
Expand Down Expand Up @@ -531,6 +531,10 @@ impl StringPrefix {
pub(super) const fn is_raw_string(self) -> bool {
self.contains(StringPrefix::RAW) || self.contains(StringPrefix::RAW_UPPER)
}

pub(super) const fn is_fstring(self) -> bool {
self.contains(StringPrefix::F_STRING)
}
}

impl Format<PyFormatContext<'_>> for StringPrefix {
Expand Down Expand Up @@ -770,7 +774,7 @@ impl Format<PyFormatContext<'_>> for StringQuotes {
/// with the provided [`StringQuotes`] style.
///
/// Returns the normalized string and whether it contains new lines.
fn normalize_string(input: &str, quotes: StringQuotes, is_raw: bool) -> Cow<str> {
fn normalize_string(input: &str, quotes: StringQuotes, prefix: StringPrefix) -> Cow<str> {
// The normalized string if `input` is not yet normalized.
// `output` must remain empty if `input` is already normalized.
let mut output = String::new();
Expand All @@ -782,14 +786,30 @@ fn normalize_string(input: &str, quotes: StringQuotes, is_raw: bool) -> Cow<str>
let preferred_quote = style.as_char();
let opposite_quote = style.invert().as_char();

let mut chars = input.char_indices();
let mut chars = input.char_indices().peekable();

let is_raw = prefix.is_raw_string();
let is_fstring = prefix.is_fstring();
let mut formatted_value_nesting = 0u32;

while let Some((index, c)) = chars.next() {
if is_fstring && matches!(c, '{' | '}') {
if chars.peek().copied().is_some_and(|(_, next)| next == c) {
// Skip over the second character of the double braces
chars.next();
} else if c == '{' {
formatted_value_nesting += 1;
} else {
// Safe to assume that `c == '}'` here because of the matched pattern above
formatted_value_nesting = formatted_value_nesting.saturating_sub(1);
}
continue;
}
if c == '\r' {
output.push_str(&input[last_index..index]);

// Skip over the '\r' character, keep the `\n`
if input.as_bytes().get(index + 1).copied() == Some(b'\n') {
if chars.peek().copied().is_some_and(|(_, next)| next == '\n') {
chars.next();
}
// Replace the `\r` with a `\n`
Expand All @@ -800,9 +820,9 @@ fn normalize_string(input: &str, quotes: StringQuotes, is_raw: bool) -> Cow<str>
last_index = index + '\r'.len_utf8();
} else if !quotes.triple && !is_raw {
if c == '\\' {
if let Some(next) = input.as_bytes().get(index + 1).copied().map(char::from) {
if let Some((_, next)) = chars.peek().copied() {
#[allow(clippy::if_same_then_else)]
if next == opposite_quote {
if next == opposite_quote && formatted_value_nesting == 0 {
// Remove the escape by ending before the backslash and starting again with the quote
chars.next();
output.push_str(&input[last_index..index]);
Expand All @@ -815,7 +835,7 @@ fn normalize_string(input: &str, quotes: StringQuotes, is_raw: bool) -> Cow<str>
chars.next();
}
}
} else if c == preferred_quote {
} else if c == preferred_quote && formatted_value_nesting == 0 {
// Escape the quote
output.push_str(&input[last_index..index]);
output.push('\\');
Expand Down
2 changes: 1 addition & 1 deletion scripts/formatter_ecosystem_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ git -C "$dir/cpython" checkout 1a1bfc28912a39b500c578e9f10a8a222638d411

time cargo run --bin ruff_dev -- format-dev --stability-check \
--error-file "$target/progress_projects_errors.txt" --log-file "$target/progress_projects_log.txt" --stats-file "$target/progress_projects_stats.txt" \
--files-with-errors 16 --multi-project "$dir" || (
--files-with-errors 15 --multi-project "$dir" || (
echo "Ecosystem check failed"
cat "$target/progress_projects_log.txt"
exit 1
Expand Down

0 comments on commit 01123d5

Please sign in to comment.