Skip to content

Commit

Permalink
Separate Q003 to accomodate f-string context
Browse files Browse the repository at this point in the history
  • Loading branch information
dhruvmanila committed Sep 22, 2023
1 parent 32f1f48 commit 2d9069a
Show file tree
Hide file tree
Showing 14 changed files with 877 additions and 167 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,27 @@
'This is a'
'\'string\''
)

# Same as above, but with f-strings
f'This is a \'string\'' # Q003
f'This is \\ a \\\'string\'' # Q003
f'"This" is a \'string\''
f"This is a 'string'"
f"\"This\" is a 'string'"
fr'This is a \'string\''
fR'This is a \'string\''
foo = (
f'This is a'
f'\'string\'' # Q003
)

# Nested f-strings (Python 3.12+)
#
# The first one is interesting because the fix for it is valid pre 3.12:
#
# f"'foo' {'nested'}"
#
# but as the actual string itself is invalid pre 3.12, we don't catch it.
f'\'foo\' {'nested'}' # Q003
f'\'foo\' {f'nested'}' # Q003
f'\'foo\' {f'\'nested\''} \'\'' # Q003
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,26 @@
"This is a"
"\"string\""
)

# Same as above, but with f-strings
f"This is a \"string\""
f"'This' is a \"string\""
f'This is a "string"'
f'\'This\' is a "string"'
fr"This is a \"string\""
fR"This is a \"string\""
foo = (
f"This is a"
f"\"string\""
)

# Nested f-strings (Python 3.12+)
#
# The first one is interesting because the fix for it is valid pre 3.12:
#
# f'"foo" {"nested"}'
#
# but as the actual string itself is invalid pre 3.12, we don't catch it.
f"\"foo\" {"foo"}" # Q003
f"\"foo\" {f"foo"}" # Q003
f"\"foo\" {f"\"foo\""} \"\"" # Q003
5 changes: 4 additions & 1 deletion crates/ruff_linter/src/checkers/tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,11 +118,14 @@ pub(crate) fn check_tokens(
);
}

if settings.rules.enabled(Rule::AvoidableEscapedQuote) && settings.flake8_quotes.avoid_escape {
flake8_quotes::rules::avoidable_escaped_quote(&mut diagnostics, tokens, locator, settings);
}

if settings.rules.any_enabled(&[
Rule::BadQuotesInlineString,
Rule::BadQuotesMultilineString,
Rule::BadQuotesDocstring,
Rule::AvoidableEscapedQuote,
]) {
flake8_quotes::rules::from_tokens(&mut diagnostics, tokens, locator, settings);
}
Expand Down
39 changes: 39 additions & 0 deletions crates/ruff_linter/src/rules/flake8_quotes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ mod tests {

use crate::assert_messages;
use crate::registry::Rule;
use crate::settings::types::PythonVersion;
use crate::settings::LinterSettings;
use crate::test::test_path;

Expand Down Expand Up @@ -45,6 +46,44 @@ mod tests {
Ok(())
}

#[test]
fn require_singles_over_doubles_escaped_py311() -> Result<()> {
let diagnostics = test_path(
Path::new("flake8_quotes/doubles_escaped.py"),
&LinterSettings {
flake8_quotes: super::settings::Settings {
inline_quotes: Quote::Single,
multiline_quotes: Quote::Single,
docstring_quotes: Quote::Single,
avoid_escape: true,
},
..LinterSettings::for_rule(Rule::AvoidableEscapedQuote)
.with_target_version(PythonVersion::Py311)
},
)?;
assert_messages!(diagnostics);
Ok(())
}

#[test]
fn require_doubles_over_singles_escaped_py311() -> Result<()> {
let diagnostics = test_path(
Path::new("flake8_quotes/singles_escaped.py"),
&LinterSettings {
flake8_quotes: super::settings::Settings {
inline_quotes: Quote::Double,
multiline_quotes: Quote::Double,
docstring_quotes: Quote::Double,
avoid_escape: true,
},
..LinterSettings::for_rule(Rule::AvoidableEscapedQuote)
.with_target_version(PythonVersion::Py311)
},
)?;
assert_messages!(diagnostics);
Ok(())
}

#[test_case(Path::new("singles.py"))]
#[test_case(Path::new("singles_escaped.py"))]
#[test_case(Path::new("singles_implicit.py"))]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,258 @@
use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::str::{is_triple_quote, leading_quote};
use ruff_python_parser::lexer::LexResult;
use ruff_python_parser::Tok;
use ruff_source_file::Locator;
use ruff_text_size::TextRange;

use crate::lex::docstring_detection::StateMachine;
use crate::registry::AsRule;
use crate::settings::LinterSettings;

/// ## What it does
/// Checks for strings that include escaped quotes, and suggests changing
/// the quote style to avoid the need to escape them.
///
/// ## Why is this bad?
/// It's preferable to avoid escaped quotes in strings. By changing the
/// outer quote style, you can avoid escaping inner quotes.
///
/// ## Example
/// ```python
/// foo = 'bar\'s'
/// ```
///
/// Use instead:
/// ```python
/// foo = "bar's"
/// ```
#[violation]
pub struct AvoidableEscapedQuote;

impl AlwaysAutofixableViolation for AvoidableEscapedQuote {
#[derive_message_formats]
fn message(&self) -> String {
format!("Change outer quotes to avoid escaping inner quotes")
}

fn autofix_title(&self) -> String {
"Change outer quotes to avoid escaping inner quotes".to_string()
}
}

struct FStringContext {
/// Whether to check for escaped quotes in the f-string.
check_for_escaped_quote: bool,
/// The range of the f-string start token.
fstring_start_range: TextRange,
/// The ranges of the f-string middle tokens containing escaped quotes.
fstring_middle_ranges: Vec<TextRange>,
}

impl FStringContext {
fn new(check_for_escaped_quote: bool, fstring_start_range: TextRange) -> Self {
Self {
check_for_escaped_quote,
fstring_start_range,
fstring_middle_ranges: vec![],
}
}

/// Update the context to not check for escaped quotes, and clear any
/// existing reported ranges.
fn do_not_check_for_escaped_quote(&mut self) {
self.check_for_escaped_quote = false;
self.fstring_middle_ranges.clear();
}

fn push_fstring_middle_range(&mut self, range: TextRange) {
self.fstring_middle_ranges.push(range);
}
}

/// Q003
pub(crate) fn avoidable_escaped_quote(
diagnostics: &mut Vec<Diagnostic>,
lxr: &[LexResult],
locator: &Locator,
settings: &LinterSettings,
) {
let quotes_settings = &settings.flake8_quotes;
let supports_pep701 = settings.target_version.supports_pep701();
let mut fstrings: Vec<FStringContext> = Vec::new();
let mut state_machine = StateMachine::default();

for &(ref tok, tok_range) in lxr.iter().flatten() {
let is_docstring = state_machine.consume(tok);
if is_docstring {
continue;
}

if !supports_pep701 {
// If this is a string or a start of a f-string which is inside another
// f-string, we won't check for escaped quotes for the entire f-string
// if the target version doesn't support PEP 701. For example:
//
// ```python
// f"\"foo\" {'nested'}"
// # ^^^^^^^^
// # We're here
// ```
//
// If we try to fix the above example, the outer and inner quote
// will be the same which is invalid pre 3.12:
//
// ```python
// f'"foo" {'nested'}"
// ```
if matches!(tok, Tok::String { .. } | Tok::FStringStart) {
if let Some(fstring_context) = fstrings.last_mut() {
fstring_context.do_not_check_for_escaped_quote();
continue;
}
}
}

match tok {
Tok::String {
value: string_contents,
kind,
triple_quoted,
} => {
// Check if we're using the preferred quotation style.
let uses_preferred_quote = leading_quote(locator.slice(tok_range))
.is_some_and(|text| text.contains(quotes_settings.inline_quotes.as_char()));
if kind.is_raw() || *triple_quoted || !uses_preferred_quote {
continue;
}

if string_contents.contains(quotes_settings.inline_quotes.as_char())
&& !string_contents.contains(quotes_settings.inline_quotes.opposite().as_char())
{
let mut diagnostic = Diagnostic::new(AvoidableEscapedQuote, tok_range);
if settings.rules.should_fix(diagnostic.kind.rule()) {
let fixed_contents = format!(
"{prefix}{quote}{value}{quote}",
prefix = kind.as_str(),
quote = quotes_settings.inline_quotes.opposite().as_char(),
value = unescaped_string(string_contents)
);
diagnostic.set_fix(Fix::automatic(Edit::range_replacement(
fixed_contents,
tok_range,
)));
}
diagnostics.push(diagnostic);
}
}
Tok::FStringStart => {
let text = locator.slice(tok_range);
// Check for escaped quote only if we're using the preferred quotation
// style and it isn't a triple-quoted f-string.
let check_for_escaped_quote = text
.contains(quotes_settings.inline_quotes.as_char())
&& !is_triple_quote(text);
fstrings.push(FStringContext::new(check_for_escaped_quote, tok_range));
}
Tok::FStringMiddle {
value: string_contents,
is_raw,
} if !is_raw => {
let Some(context) = fstrings.last_mut() else {
continue;
};
if !context.check_for_escaped_quote {
continue;
}
if string_contents.contains(quotes_settings.inline_quotes.as_char())
&& !string_contents.contains(quotes_settings.inline_quotes.opposite().as_char())
{
context.push_fstring_middle_range(tok_range);
}
}
Tok::FStringEnd => {
let Some(context) = fstrings.pop() else {
continue;
};
if context.fstring_middle_ranges.is_empty() {
// There are no `FStringMiddle` tokens containing any escaped
// quotes.
continue;
}
let mut diagnostic = Diagnostic::new(
AvoidableEscapedQuote,
TextRange::new(context.fstring_start_range.start(), tok_range.end()),
);
if settings.rules.should_fix(diagnostic.kind.rule()) {
let fstring_start_edit = Edit::range_replacement(
// No need for `r`/`R` as we don't perform the checks
// for raw strings.
format!("f{}", quotes_settings.inline_quotes.opposite().as_char()),
context.fstring_start_range,
);
let fstring_middle_and_end_edits = context
.fstring_middle_ranges
.iter()
.map(|&range| {
Edit::range_replacement(unescaped_string(locator.slice(range)), range)
})
.chain(std::iter::once(
// `FStringEnd` edit
Edit::range_replacement(
quotes_settings
.inline_quotes
.opposite()
.as_char()
.to_string(),
tok_range,
),
));
diagnostic.set_fix(Fix::automatic_edits(
fstring_start_edit,
fstring_middle_and_end_edits,
));
}
diagnostics.push(diagnostic);
}
_ => {}
}
}
}

fn unescaped_string(value: &str) -> String {
let mut fixed_contents = String::with_capacity(value.len());

let chars: Vec<char> = value.chars().collect();
let mut backslash_count = 0;
for col_offset in 0..chars.len() {
let char = chars[col_offset];
if char != '\\' {
fixed_contents.push(char);
continue;
}
backslash_count += 1;
// If the previous character was also a backslash
if col_offset > 0 && chars[col_offset - 1] == '\\' && backslash_count == 2 {
fixed_contents.push(char);
// reset to 0
backslash_count = 0;
continue;
}
// If we're at the end of the line
if col_offset == chars.len() - 1 {
fixed_contents.push(char);
continue;
}
let next_char = chars[col_offset + 1];
// Remove quote escape
if next_char == '\'' || next_char == '"' {
// reset to 0
backslash_count = 0;
continue;
}
fixed_contents.push(char);
}

fixed_contents
}

0 comments on commit 2d9069a

Please sign in to comment.