Skip to content

Commit

Permalink
Add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
charliermarsh committed Mar 19, 2023
1 parent a277723 commit d71a70a
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 12 deletions.
8 changes: 4 additions & 4 deletions crates/ruff/src/rules/pydocstyle/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use crate::checkers::ast::Checker;
use crate::docstrings::definition::{Definition, DefinitionKind};

/// Return the index of the first logical line in a string.
pub fn logical_line(content: &str) -> Option<usize> {
pub(crate) fn logical_line(content: &str) -> Option<usize> {
// Find the first logical line.
let mut logical_line = None;
for (i, line) in content.universal_newlines().enumerate() {
Expand All @@ -28,14 +28,14 @@ pub fn logical_line(content: &str) -> Option<usize> {

/// Normalize a word by removing all non-alphanumeric characters
/// and converting it to lowercase.
pub fn normalize_word(first_word: &str) -> String {
pub(crate) fn normalize_word(first_word: &str) -> String {
first_word
.replace(|c: char| !c.is_alphanumeric(), "")
.to_lowercase()
}

/// Check decorator list to see if function should be ignored.
pub fn should_ignore_definition(
pub(crate) fn should_ignore_definition(
checker: &Checker,
definition: &Definition,
ignore_decorators: &BTreeSet<String>,
Expand Down Expand Up @@ -63,7 +63,7 @@ pub fn should_ignore_definition(
}

/// Check if a docstring should be ignored.
pub fn should_ignore_docstring(contents: &str) -> bool {
pub(crate) fn should_ignore_docstring(contents: &str) -> bool {
// Avoid analyzing docstrings that contain implicit string concatenations.
// Python does consider these docstrings, but they're almost certainly a
// user error, and supporting them "properly" is extremely difficult.
Expand Down
87 changes: 79 additions & 8 deletions crates/ruff_python_ast/src/str.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
use rustpython_parser::{lexer, Mode, Tok};

/// See: <https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals>
const TRIPLE_QUOTE_STR_PREFIXES: &[&str] = &[
"u\"\"\"", "u'''", "r\"\"\"", "r'''", "U\"\"\"", "U'''", "R\"\"\"", "R'''", "\"\"\"", "'''",
Expand Down Expand Up @@ -70,23 +68,62 @@ pub fn is_triple_quote(content: &str) -> bool {
}

/// Return `true` if the string expression is an implicit concatenation.
///
/// ## Examples
///
/// ```rust
/// use ruff_python_ast::str::is_implicit_concatenation;
///
/// assert!(is_implicit_concatenation(r#"'abc' 'def'"#));
/// assert!(!is_implicit_concatenation(r#"'abcdef'"#));
/// ```
pub fn is_implicit_concatenation(content: &str) -> bool {
lexer::lex(content, Mode::Module)
.flatten()
.filter(|(_, tok, _)| matches!(tok, Tok::String { .. }))
.nth(1)
.is_some()
let Some(leading_quote_str) = leading_quote(content) else {
return false;
};
let Some(trailing_quote_str) = trailing_quote(content) else {
return false;
};

// If the trailing quote doesn't match the _expected_ trailing quote, then the string is
// implicitly concatenated.
if trailing_quote_str != trailing_quote(leading_quote_str).unwrap() {
return true;
}

// Search for any trailing quotes _before_ the end of the string.
let mut rest = &content[leading_quote_str.len()..content.len() - trailing_quote_str.len()];
while let Some(index) = rest.find(trailing_quote_str) {
let mut chars = rest[..index].chars().rev();
if let Some('\\') = chars.next() {
// If the quote is double-escaped, then it's _not_ escaped, so the string is
// implicitly concatenated.
if let Some('\\') = chars.next() {
return true;
}
} else {
// If the quote is _not_ escaped, then it's implicitly concatenated.
return true;
}
rest = &rest[index + trailing_quote_str.len()..];
}

// Otherwise, we know the string ends with the expected trailing quote, so it's not implicitly
// concatenated.
false
}

#[cfg(test)]
mod tests {
use crate::str::is_implicit_concatenation;

use super::{
SINGLE_QUOTE_BYTE_PREFIXES, SINGLE_QUOTE_STR_PREFIXES, TRIPLE_QUOTE_BYTE_PREFIXES,
TRIPLE_QUOTE_STR_PREFIXES,
};

#[test]
fn test_prefixes() {
fn prefix_uniqueness() {
let prefixes = TRIPLE_QUOTE_STR_PREFIXES
.iter()
.chain(TRIPLE_QUOTE_BYTE_PREFIXES)
Expand All @@ -104,4 +141,38 @@ mod tests {
}
}
}

#[test]
fn implicit_concatenation() {
// Positive cases.
assert!(is_implicit_concatenation(r#""abc" "def""#));
assert!(is_implicit_concatenation(r#""abc" 'def'"#));
assert!(is_implicit_concatenation(r#""""abc""" "def""#));
assert!(is_implicit_concatenation(r#"'''abc''' 'def'"#));
assert!(is_implicit_concatenation(r#""""abc""" 'def'"#));
assert!(is_implicit_concatenation(r#"'''abc''' "def""#));
assert!(is_implicit_concatenation(r#""""abc""""def""#));
assert!(is_implicit_concatenation(r#"'''abc''''def'"#));
assert!(is_implicit_concatenation(r#""""abc"""'def'"#));
assert!(is_implicit_concatenation(r#"'''abc'''"def""#));

// Negative cases.
assert!(!is_implicit_concatenation(r#""abc""#));
assert!(!is_implicit_concatenation(r#"'abc'"#));
assert!(!is_implicit_concatenation(r#""""abc""""#));
assert!(!is_implicit_concatenation(r#"'''abc'''"#));
assert!(!is_implicit_concatenation(r#""""ab"c""""#));
assert!(!is_implicit_concatenation(r#"'''ab'c'''"#));
assert!(!is_implicit_concatenation(r#""""ab'c""""#));
assert!(!is_implicit_concatenation(r#"'''ab"c'''"#));
assert!(!is_implicit_concatenation(r#""""ab'''c""""#));
assert!(!is_implicit_concatenation(r#"'''ab"""c'''"#));

// Positive cases with escaped quotes.
assert!(is_implicit_concatenation(r#""abc\\""def""#));
assert!(is_implicit_concatenation(r#""abc\\""def""#));

// Negative cases with escaped quotes.
assert!(!is_implicit_concatenation(r#""abc\"def""#));
}
}

0 comments on commit d71a70a

Please sign in to comment.