Skip to content

Commit

Permalink
Track quoting style in the tokenizer (astral-sh#10256)
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexWaygood authored and nkxxll committed Mar 10, 2024
1 parent bd973e8 commit 4ac1b15
Show file tree
Hide file tree
Showing 55 changed files with 4,063 additions and 3,268 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,7 @@

# Make sure we do not unescape quotes
this_is_fine = "This is an \\'escaped\\' quote"
this_should_raise_Q004 = "This is an \\\'escaped\\\' quote with an extra backslash"
this_should_raise_Q004 = "This is an \\\'escaped\\\' quote with an extra backslash" # Q004

# Invalid escapes in bytestrings are also triggered:
x = b"\xe7\xeb\x0c\xa1\x1b\x83tN\xce=x\xe9\xbe\x01\xb9\x13B_\xba\xe7\x0c2\xce\'rm\x0e\xcd\xe9.\xf8\xd2" # Q004
5 changes: 1 addition & 4 deletions crates/ruff_linter/src/directives.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,10 +131,7 @@ fn extract_noqa_line_for(lxr: &[LexResult], locator: &Locator, indexer: &Indexer

// For multi-line strings, we expect `noqa` directives on the last line of the
// string.
Tok::String {
triple_quoted: true,
..
} => {
Tok::String { kind, .. } if kind.is_triple_quoted() => {
if locator.contains_line_break(*range) {
string_mappings.push(TextRange::new(
locator.line_start(range.start()),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ pub(crate) fn trailing_commas(
// F-strings are handled as `String` token type with the complete range
// of the outermost f-string. This means that the expression inside the
// f-string is not checked for trailing commas.
Tok::FStringStart => {
Tok::FStringStart(_) => {
fstrings = fstrings.saturating_add(1);
None
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ pub(crate) fn implicit(
{
let (a_range, b_range) = match (a_tok, b_tok) {
(Tok::String { .. }, Tok::String { .. }) => (*a_range, *b_range),
(Tok::String { .. }, Tok::FStringStart) => {
(Tok::String { .. }, Tok::FStringStart(_)) => {
match indexer.fstring_ranges().innermost(b_range.start()) {
Some(b_range) => (*a_range, b_range),
None => continue,
Expand All @@ -122,7 +122,7 @@ pub(crate) fn implicit(
None => continue,
}
}
(Tok::FStringEnd, Tok::FStringStart) => {
(Tok::FStringEnd, Tok::FStringStart(_)) => {
match (
indexer.fstring_ranges().innermost(a_range.start()),
indexer.fstring_ranges().innermost(b_range.start()),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::str::{is_triple_quote, leading_quote};
use ruff_python_parser::lexer::LexResult;
use ruff_python_parser::Tok;
use ruff_source_file::Locator;
Expand Down Expand Up @@ -158,7 +157,7 @@ pub(crate) fn avoidable_escaped_quote(
// ```python
// f'"foo" {'nested'}"
// ```
if matches!(tok, Tok::String { .. } | Tok::FStringStart) {
if matches!(tok, Tok::String { .. } | Tok::FStringStart(_)) {
if let Some(fstring_context) = fstrings.last_mut() {
fstring_context.ignore_escaped_quotes();
continue;
Expand All @@ -170,16 +169,13 @@ pub(crate) fn avoidable_escaped_quote(
Tok::String {
value: string_contents,
kind,
triple_quoted,
} => {
if kind.is_raw() || *triple_quoted {
if kind.is_raw_string() || kind.is_triple_quoted() {
continue;
}

// Check if we're using the preferred quotation style.
if !leading_quote(locator.slice(tok_range)).is_some_and(|text| {
contains_quote(text, quotes_settings.inline_quotes.as_char())
}) {
if Quote::from(kind.quote_style()) != quotes_settings.inline_quotes {
continue;
}

Expand All @@ -192,7 +188,7 @@ pub(crate) fn avoidable_escaped_quote(
let mut diagnostic = Diagnostic::new(AvoidableEscapedQuote, tok_range);
let fixed_contents = format!(
"{prefix}{quote}{value}{quote}",
prefix = kind.as_str(),
prefix = kind.prefix_str(),
quote = quotes_settings.inline_quotes.opposite().as_char(),
value = unescape_string(
string_contents,
Expand All @@ -206,12 +202,11 @@ pub(crate) fn avoidable_escaped_quote(
diagnostics.push(diagnostic);
}
}
Tok::FStringStart => {
let text = locator.slice(tok_range);
Tok::FStringStart(kind) => {
// Check for escaped quote only if we're using the preferred quotation
// style and it isn't a triple-quoted f-string.
let check_for_escaped_quote = !is_triple_quote(text)
&& contains_quote(text, quotes_settings.inline_quotes.as_char());
let check_for_escaped_quote = !kind.is_triple_quoted()
&& Quote::from(kind.quote_style()) == quotes_settings.inline_quotes;
fstrings.push(FStringContext::new(
check_for_escaped_quote,
tok_range,
Expand All @@ -220,9 +215,8 @@ pub(crate) fn avoidable_escaped_quote(
}
Tok::FStringMiddle {
value: string_contents,
is_raw,
triple_quoted: _,
} if !is_raw => {
kind,
} if !kind.is_raw_string() => {
let Some(context) = fstrings.last_mut() else {
continue;
};
Expand Down Expand Up @@ -315,25 +309,20 @@ pub(crate) fn unnecessary_escaped_quote(
Tok::String {
value: string_contents,
kind,
triple_quoted,
} => {
if kind.is_raw() || *triple_quoted {
if kind.is_raw_string() || kind.is_triple_quoted() {
continue;
}

let leading = match leading_quote(locator.slice(tok_range)) {
Some("\"") => Quote::Double,
Some("'") => Quote::Single,
_ => continue,
};
let leading = kind.quote_style();
if !contains_escaped_quote(string_contents, leading.opposite().as_char()) {
continue;
}

let mut diagnostic = Diagnostic::new(UnnecessaryEscapedQuote, tok_range);
let fixed_contents = format!(
"{prefix}{quote}{value}{quote}",
prefix = kind.as_str(),
prefix = kind.prefix_str(),
quote = leading.as_char(),
value = unescape_string(string_contents, leading.opposite().as_char())
);
Expand All @@ -343,16 +332,11 @@ pub(crate) fn unnecessary_escaped_quote(
)));
diagnostics.push(diagnostic);
}
Tok::FStringStart => {
let text = locator.slice(tok_range);
Tok::FStringStart(kind) => {
// Check for escaped quote only if we're using the preferred quotation
// style and it isn't a triple-quoted f-string.
let check_for_escaped_quote = !is_triple_quote(text);
let quote_style = if contains_quote(text, Quote::Single.as_char()) {
Quote::Single
} else {
Quote::Double
};
let check_for_escaped_quote = !kind.is_triple_quoted();
let quote_style = Quote::from(kind.quote_style());
fstrings.push(FStringContext::new(
check_for_escaped_quote,
tok_range,
Expand All @@ -361,9 +345,8 @@ pub(crate) fn unnecessary_escaped_quote(
}
Tok::FStringMiddle {
value: string_contents,
is_raw,
triple_quoted: _,
} if !is_raw => {
kind,
} if !kind.is_raw_string() => {
let Some(context) = fstrings.last_mut() else {
continue;
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ struct FStringRangeBuilder {
impl FStringRangeBuilder {
fn visit_token(&mut self, token: &Tok, range: TextRange) {
match token {
Tok::FStringStart => {
Tok::FStringStart(_) => {
if self.nesting == 0 {
self.start_location = range.start();
}
Expand Down
9 changes: 9 additions & 0 deletions crates/ruff_linter/src/rules/flake8_quotes/settings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,15 @@ impl Default for Quote {
}
}

impl From<ruff_python_parser::QuoteStyle> for Quote {
fn from(value: ruff_python_parser::QuoteStyle) -> Self {
match value {
ruff_python_parser::QuoteStyle::Double => Self::Double,
ruff_python_parser::QuoteStyle::Single => Self::Single,
}
}
}

#[derive(Debug, CacheKey)]
pub struct Settings {
pub inline_quotes: Quote,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -326,16 +326,34 @@ singles_escaped_unnecessary.py:43:26: Q004 [*] Unnecessary escape on inner quote
|
41 | # Make sure we do not unescape quotes
42 | this_is_fine = "This is an \\'escaped\\' quote"
43 | this_should_raise_Q004 = "This is an \\\'escaped\\\' quote with an extra backslash"
43 | this_should_raise_Q004 = "This is an \\\'escaped\\\' quote with an extra backslash" # Q004
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Q004
44 |
45 | # Invalid escapes in bytestrings are also triggered:
|
= help: Remove backslash

Safe fix
40 40 |
41 41 | # Make sure we do not unescape quotes
42 42 | this_is_fine = "This is an \\'escaped\\' quote"
43 |-this_should_raise_Q004 = "This is an \\\'escaped\\\' quote with an extra backslash"
43 |+this_should_raise_Q004 = "This is an \\'escaped\\' quote with an extra backslash"
43 |-this_should_raise_Q004 = "This is an \\\'escaped\\\' quote with an extra backslash" # Q004
43 |+this_should_raise_Q004 = "This is an \\'escaped\\' quote with an extra backslash" # Q004
44 44 |
45 45 | # Invalid escapes in bytestrings are also triggered:
46 46 | x = b"\xe7\xeb\x0c\xa1\x1b\x83tN\xce=x\xe9\xbe\x01\xb9\x13B_\xba\xe7\x0c2\xce\'rm\x0e\xcd\xe9.\xf8\xd2" # Q004

singles_escaped_unnecessary.py:46:5: Q004 [*] Unnecessary escape on inner quote character
|
45 | # Invalid escapes in bytestrings are also triggered:
46 | x = b"\xe7\xeb\x0c\xa1\x1b\x83tN\xce=x\xe9\xbe\x01\xb9\x13B_\xba\xe7\x0c2\xce\'rm\x0e\xcd\xe9.\xf8\xd2" # Q004
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Q004
|
= help: Remove backslash

Safe fix
43 43 | this_should_raise_Q004 = "This is an \\\'escaped\\\' quote with an extra backslash" # Q004
44 44 |
45 45 | # Invalid escapes in bytestrings are also triggered:
46 |-x = b"\xe7\xeb\x0c\xa1\x1b\x83tN\xce=x\xe9\xbe\x01\xb9\x13B_\xba\xe7\x0c2\xce\'rm\x0e\xcd\xe9.\xf8\xd2" # Q004
46 |+x = b"\xe7\xeb\x0c\xa1\x1b\x83tN\xce=x\xe9\xbe\x01\xb9\x13B_\xba\xe7\x0c2\xce'rm\x0e\xcd\xe9.\xf8\xd2" # Q004
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use memchr::memchr_iter;
use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_index::Indexer;
use ruff_python_parser::{StringKind, Tok};
use ruff_python_parser::Tok;
use ruff_source_file::Locator;
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};

Expand Down Expand Up @@ -66,21 +66,21 @@ pub(crate) fn invalid_escape_sequence(
token: &Tok,
token_range: TextRange,
) {
let (token_source_code, string_start_location) = match token {
Tok::FStringMiddle { value, is_raw, .. } => {
if *is_raw {
let (token_source_code, string_start_location, kind) = match token {
Tok::FStringMiddle { value, kind } => {
if kind.is_raw_string() {
return;
}
let Some(range) = indexer.fstring_ranges().innermost(token_range.start()) else {
return;
};
(&**value, range.start())
(&**value, range.start(), kind)
}
Tok::String { kind, .. } => {
if kind.is_raw() {
if kind.is_raw_string() {
return;
}
(locator.slice(token_range), token_range.start())
(locator.slice(token_range), token_range.start(), kind)
}
_ => return,
};
Expand Down Expand Up @@ -207,13 +207,7 @@ pub(crate) fn invalid_escape_sequence(
invalid_escape_char.range(),
);

if matches!(
token,
Tok::String {
kind: StringKind::Unicode,
..
}
) {
if kind.is_u_string() {
// Replace the Unicode prefix with `r`.
diagnostic.set_fix(Fix::safe_edit(Edit::replacement(
"r".to_string(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,14 @@ use std::str::FromStr;

use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::str::{leading_quote, trailing_quote};
use ruff_python_ast::Expr;
use ruff_python_literal::{
cformat::{CFormatErrorType, CFormatString},
format::FormatPart,
format::FromTemplate,
format::{FormatSpec, FormatSpecError, FormatString},
};
use ruff_python_parser::{lexer, Mode};
use ruff_python_parser::{lexer, Mode, StringKind, Tok};
use ruff_text_size::{Ranged, TextRange};

use crate::checkers::ast::Checker;
Expand Down Expand Up @@ -93,15 +92,15 @@ pub(crate) fn call(checker: &mut Checker, string: &str, range: TextRange) {
/// Ex) `"%z" % "1"`
pub(crate) fn percent(checker: &mut Checker, expr: &Expr) {
// Grab each string segment (in case there's an implicit concatenation).
let mut strings: Vec<TextRange> = vec![];
let mut strings: Vec<(TextRange, StringKind)> = vec![];
for (tok, range) in
lexer::lex_starts_at(checker.locator().slice(expr), Mode::Module, expr.start()).flatten()
{
if tok.is_string() {
strings.push(range);
} else if tok.is_percent() {
match tok {
Tok::String { kind, .. } => strings.push((range, kind)),
// Break as soon as we find the modulo symbol.
break;
Tok::Percent => break,
_ => {}
}
}

Expand All @@ -110,12 +109,10 @@ pub(crate) fn percent(checker: &mut Checker, expr: &Expr) {
return;
}

for range in &strings {
for (range, kind) in &strings {
let string = checker.locator().slice(*range);
let (Some(leader), Some(trailer)) = (leading_quote(string), trailing_quote(string)) else {
return;
};
let string = &string[leader.len()..string.len() - trailer.len()];
let string = &string
[usize::from(kind.opener_len())..(string.len() - usize::from(kind.closer_len()))];

// Parse the format string (e.g. `"%s"`) into a list of `PercentFormat`.
if let Err(format_error) = CFormatString::from_str(string) {
Expand Down

0 comments on commit 4ac1b15

Please sign in to comment.