Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid panics for implicitly-concatenated docstrings #3584

Merged
merged 2 commits into from
Mar 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
9 changes: 9 additions & 0 deletions crates/ruff/resources/test/fixtures/pydocstyle/sections.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,3 +504,12 @@ def test_incorrect_indent(self, x=1, y=2): # noqa: D207, D213, D407
x: Test argument.

"""


def implicit_string_concatenation():
"""Toggle the gizmo.

Returns
A value of some sort.

""""Extra content"
14 changes: 13 additions & 1 deletion crates/ruff/src/checkers/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ use crate::checkers::ast::deferred::Deferred;
use crate::docstrings::definition::{
transition_scope, Definition, DefinitionKind, Docstring, Documentable,
};
use crate::fs::relativize_path;
use crate::registry::{AsRule, Rule};
use crate::rules::{
flake8_2020, flake8_annotations, flake8_bandit, flake8_blind_except, flake8_boolean_trap,
Expand All @@ -50,7 +51,7 @@ use crate::rules::{
};
use crate::settings::types::PythonVersion;
use crate::settings::{flags, Settings};
use crate::{autofix, docstrings, noqa};
use crate::{autofix, docstrings, noqa, warn_user};

mod deferred;

Expand Down Expand Up @@ -5101,6 +5102,7 @@ impl<'a> Checker<'a> {
}
overloaded_name = flake8_annotations::helpers::overloaded_name(self, &definition);
}

if self.is_stub {
if self.settings.rules.enabled(Rule::DocstringInStub) {
flake8_pyi::rules::docstring_in_stubs(self, definition.docstring);
Expand Down Expand Up @@ -5130,6 +5132,16 @@ impl<'a> Checker<'a> {
Location::new(expr.location.row(), expr.location.column()),
));

if pydocstyle::helpers::should_ignore_docstring(contents) {
warn_user!(
"Docstring at {}:{}:{} contains implicit string concatenation; ignoring...",
relativize_path(self.path),
expr.location.row(),
expr.location.column() + 1
);
continue;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ideally this would be a diagnostic but I'm not sure that it fits any of our existing diagnostics under the pydocstyle category.

}

let body = str::raw_contents(contents);
let docstring = Docstring {
kind: definition.kind,
Expand Down
5 changes: 4 additions & 1 deletion crates/ruff/src/docstrings/extraction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,18 @@

use rustpython_parser::ast::{Constant, Expr, ExprKind, Stmt, StmtKind};

use crate::docstrings::definition::{Definition, DefinitionKind, Documentable};
use ruff_python_ast::visibility::{Modifier, VisibleScope};

use crate::docstrings::definition::{Definition, DefinitionKind, Documentable};

/// Extract a docstring from a function or class body.
pub fn docstring_from(suite: &[Stmt]) -> Option<&Expr> {
let stmt = suite.first()?;
// Require the docstring to be a standalone expression.
let StmtKind::Expr { value } = &stmt.node else {
return None;
};
// Only match strings.
if !matches!(
&value.node,
ExprKind::Constant {
Expand Down
15 changes: 12 additions & 3 deletions crates/ruff/src/rules/pydocstyle/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@ use std::collections::BTreeSet;
use ruff_python_ast::cast;
use ruff_python_ast::helpers::{map_callable, to_call_path};
use ruff_python_ast::newlines::StrExt;
use ruff_python_ast::str::is_implicit_concatenation;

use crate::checkers::ast::Checker;
use crate::docstrings::definition::{Definition, DefinitionKind};

/// Return the index of the first logical line in a string.
pub fn logical_line(content: &str) -> Option<usize> {
pub(crate) fn logical_line(content: &str) -> Option<usize> {
// Find the first logical line.
let mut logical_line = None;
for (i, line) in content.universal_newlines().enumerate() {
Expand All @@ -27,14 +28,14 @@ pub fn logical_line(content: &str) -> Option<usize> {

/// Normalize a word by removing all non-alphanumeric characters
/// and converting it to lowercase.
pub fn normalize_word(first_word: &str) -> String {
pub(crate) fn normalize_word(first_word: &str) -> String {
first_word
.replace(|c: char| !c.is_alphanumeric(), "")
.to_lowercase()
}

/// Check decorator list to see if function should be ignored.
pub fn should_ignore_definition(
pub(crate) fn should_ignore_definition(
checker: &Checker,
definition: &Definition,
ignore_decorators: &BTreeSet<String>,
Expand All @@ -60,3 +61,11 @@ pub fn should_ignore_definition(
}
false
}

/// Check if a docstring should be ignored.
pub(crate) fn should_ignore_docstring(contents: &str) -> bool {
// Avoid analyzing docstrings that contain implicit string concatenations.
// Python does consider these docstrings, but they're almost certainly a
// user error, and supporting them "properly" is extremely difficult.
is_implicit_concatenation(contents)
}
12 changes: 3 additions & 9 deletions crates/ruff/src/rules/pyupgrade/rules/f_strings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@ use rustpython_common::format::{
FieldName, FieldNamePart, FieldType, FormatPart, FormatString, FromTemplate,
};
use rustpython_parser::ast::{Constant, Expr, ExprKind, KeywordData};
use rustpython_parser::{lexer, Mode, Tok};

use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::str::{leading_quote, trailing_quote};
use ruff_python_ast::str::{is_implicit_concatenation, leading_quote, trailing_quote};
use ruff_python_ast::types::Range;

use crate::checkers::ast::Checker;
Expand Down Expand Up @@ -127,13 +126,8 @@ fn try_convert_to_f_string(checker: &Checker, expr: &Expr) -> Option<String> {

let contents = checker.locator.slice(value);

// Tokenize: we need to avoid trying to fix implicit string concatenations.
if lexer::lex(contents, Mode::Module)
.flatten()
.filter(|(_, tok, _)| matches!(tok, Tok::String { .. }))
.count()
> 1
{
// Skip implicit string concatenations.
if is_implicit_concatenation(contents) {
return None;
}

Expand Down
13 changes: 3 additions & 10 deletions crates/ruff/src/rules/pyupgrade/rules/native_literals.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
use std::fmt;

use rustpython_parser::ast::{Constant, Expr, ExprKind, Keyword};
use rustpython_parser::{lexer, Mode, Tok};

use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::str::is_implicit_concatenation;
use ruff_python_ast::types::Range;

use crate::checkers::ast::Checker;
Expand Down Expand Up @@ -112,16 +112,9 @@ pub fn native_literals(
return;
}

// rust-python merges adjacent string/bytes literals into one node, but we can't
// safely remove the outer call in this situation. We're following pyupgrade
// here and skip.
// Skip implicit string concatenations.
let arg_code = checker.locator.slice(arg);
if lexer::lex_located(arg_code, Mode::Module, arg.location)
.flatten()
.filter(|(_, tok, _)| matches!(tok, Tok::String { .. }))
.count()
> 1
{
if is_implicit_concatenation(arg_code) {
return;
}

Expand Down
92 changes: 91 additions & 1 deletion crates/ruff_python_ast/src/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,15 +67,71 @@ pub fn is_triple_quote(content: &str) -> bool {
TRIPLE_QUOTE_STR_PREFIXES.contains(&content) || TRIPLE_QUOTE_BYTE_PREFIXES.contains(&content)
}

/// Return `true` if the string expression is an implicit concatenation.
///
/// ## Examples
///
/// ```rust
/// use ruff_python_ast::str::is_implicit_concatenation;
///
/// assert!(is_implicit_concatenation(r#"'abc' 'def'"#));
/// assert!(!is_implicit_concatenation(r#"'abcdef'"#));
/// ```
pub fn is_implicit_concatenation(content: &str) -> bool {
MichaReiser marked this conversation as resolved.
Show resolved Hide resolved
charliermarsh marked this conversation as resolved.
Show resolved Hide resolved
let Some(leading_quote_str) = leading_quote(content) else {
return false;
};
let Some(trailing_quote_str) = trailing_quote(content) else {
return false;
};

// If the trailing quote doesn't match the _expected_ trailing quote, then the string is
// implicitly concatenated.
//
// For example, given:
// ```python
// u"""abc""" 'def'
// ```
//
// The leading quote would be `u"""`, and the trailing quote would be `'`, but the _expected_
// trailing quote would be `"""`. Since `'` does not equal `"""`, we'd return `true`.
if trailing_quote_str != trailing_quote(leading_quote_str).unwrap() {
return true;
}

// Search for any trailing quotes _before_ the end of the string.
let mut rest = &content[leading_quote_str.len()..content.len() - trailing_quote_str.len()];
while let Some(index) = rest.find(trailing_quote_str) {
let mut chars = rest[..index].chars().rev();
if let Some('\\') = chars.next() {
// If the quote is double-escaped, then it's _not_ escaped, so the string is
// implicitly concatenated.
if let Some('\\') = chars.next() {
return true;
}
} else {
// If the quote is _not_ escaped, then it's implicitly concatenated.
return true;
}
rest = &rest[index + trailing_quote_str.len()..];
}

// Otherwise, we know the string ends with the expected trailing quote, so it's not implicitly
// concatenated.
false
}

#[cfg(test)]
mod tests {
use crate::str::is_implicit_concatenation;

use super::{
SINGLE_QUOTE_BYTE_PREFIXES, SINGLE_QUOTE_STR_PREFIXES, TRIPLE_QUOTE_BYTE_PREFIXES,
TRIPLE_QUOTE_STR_PREFIXES,
};

#[test]
fn test_prefixes() {
fn prefix_uniqueness() {
let prefixes = TRIPLE_QUOTE_STR_PREFIXES
.iter()
.chain(TRIPLE_QUOTE_BYTE_PREFIXES)
Expand All @@ -93,4 +149,38 @@ mod tests {
}
}
}

#[test]
fn implicit_concatenation() {
// Positive cases.
assert!(is_implicit_concatenation(r#""abc" "def""#));
assert!(is_implicit_concatenation(r#""abc" 'def'"#));
assert!(is_implicit_concatenation(r#""""abc""" "def""#));
assert!(is_implicit_concatenation(r#"'''abc''' 'def'"#));
assert!(is_implicit_concatenation(r#""""abc""" 'def'"#));
assert!(is_implicit_concatenation(r#"'''abc''' "def""#));
assert!(is_implicit_concatenation(r#""""abc""""def""#));
assert!(is_implicit_concatenation(r#"'''abc''''def'"#));
assert!(is_implicit_concatenation(r#""""abc"""'def'"#));
assert!(is_implicit_concatenation(r#"'''abc'''"def""#));

// Negative cases.
assert!(!is_implicit_concatenation(r#""abc""#));
assert!(!is_implicit_concatenation(r#"'abc'"#));
assert!(!is_implicit_concatenation(r#""""abc""""#));
assert!(!is_implicit_concatenation(r#"'''abc'''"#));
assert!(!is_implicit_concatenation(r#""""ab"c""""#));
assert!(!is_implicit_concatenation(r#"'''ab'c'''"#));
assert!(!is_implicit_concatenation(r#""""ab'c""""#));
assert!(!is_implicit_concatenation(r#"'''ab"c'''"#));
assert!(!is_implicit_concatenation(r#""""ab'''c""""#));
assert!(!is_implicit_concatenation(r#"'''ab"""c'''"#));

// Positive cases with escaped quotes.
assert!(is_implicit_concatenation(r#""abc\\""def""#));
assert!(is_implicit_concatenation(r#""abc\\""def""#));

// Negative cases with escaped quotes.
assert!(!is_implicit_concatenation(r#""abc\"def""#));
}
}