From 5e1ea63c2447a2a28dce65666da87549858c47e7 Mon Sep 17 00:00:00 2001 From: Jane Lewis Date: Wed, 28 Feb 2024 11:12:05 -0800 Subject: [PATCH] Fix trailing comment visitation and add coverage for trailing skip comments on function definitions --- .../resources/test/fixtures/ruff/RUF028.py | 3 +- .../invalid_formatter_suppression_comment.rs | 9 +-- .../ruff/rules/suppression_comment_visitor.rs | 50 +++++++----- ..._rules__ruff__tests__RUF028_RUF028.py.snap | 69 ++++++---------- crates/ruff_python_ast/src/helpers.rs | 78 ++++++++++++++++++- crates/ruff_python_ast/src/node.rs | 78 +------------------ .../src/comments/placement.rs | 7 +- 7 files changed, 140 insertions(+), 154 deletions(-) diff --git a/crates/ruff_linter/resources/test/fixtures/ruff/RUF028.py b/crates/ruff_linter/resources/test/fixtures/ruff/RUF028.py index 5409ddf5fbc3e7..47100c2b3ef90a 100644 --- a/crates/ruff_linter/resources/test/fixtures/ruff/RUF028.py +++ b/crates/ruff_linter/resources/test/fixtures/ruff/RUF028.py @@ -53,8 +53,7 @@ class Test: def cls_method_a( # fmt: off cls, - ) -> None: - # noqa: test # fmt: skip + ) -> None: # noqa: test # fmt: skip pass diff --git a/crates/ruff_linter/src/rules/ruff/rules/invalid_formatter_suppression_comment.rs b/crates/ruff_linter/src/rules/ruff/rules/invalid_formatter_suppression_comment.rs index cbda57903a98a4..f72e87a330d744 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/invalid_formatter_suppression_comment.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/invalid_formatter_suppression_comment.rs @@ -3,7 +3,7 @@ use std::fmt::Display; use ast::{StmtClassDef, StmtFunctionDef}; use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_ast::{self as ast, AnyNodeRef}; +use ruff_python_ast::{self as ast, helpers::comment_indentation_after, AnyNodeRef}; use ruff_python_trivia::{indentation_at_offset, SuppressionKind}; use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextLen, TextRange}; @@ -168,11 +168,8 @@ impl<'src, 'loc> UselessSuppressionComments<'src, 'loc> { { if following.is_first_statement_in_alternate_body(enclosing) { // check indentation - let comment_indentation = AnyNodeRef::comment_indentation_after( - preceding, - comment.range, - self.locator, - ); + let comment_indentation = + comment_indentation_after(preceding, comment.range, self.locator); let preceding_indentation = indentation_at_offset(preceding.start(), self.locator) diff --git a/crates/ruff_linter/src/rules/ruff/rules/suppression_comment_visitor.rs b/crates/ruff_linter/src/rules/ruff/rules/suppression_comment_visitor.rs index 1dbe4c1a403943..aaff1f62070903 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/suppression_comment_visitor.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/suppression_comment_visitor.rs @@ -1,6 +1,7 @@ use std::iter::Peekable; use ruff_python_ast::{ + helpers::comment_indentation_after, visitor::preorder::{self, PreorderVisitor, TraversalSignal}, AnyNodeRef, Suite, }; @@ -114,32 +115,43 @@ where while let Some(SuppressionComment { range, kind }) = self.comments.peek().copied() { let line_position = CommentLinePosition::for_range(range, self.locator.contents()); if range.start() >= node_end { + let between = TextRange::new(node_end, range.start()); + // Check if a non-trivial token exists between the end of this node and the start of the comment. + // If it doesn't, that means this comment could possibly be a trailing comment that comes after the + // end of this node. + // For example: + // ``` + // def func(x): + // pass # fmt: skip + // ``` + // We want to make sure that `# fmt: skip` is associated with the `pass` statement, + // even though it comes after the end of that node. + if let Some(token) = SimpleTokenizer::new(self.locator.contents(), between) + .skip_trivia() + .next() + { + break; + } + // If the comment is on its own line, it could still be a trailing comment if it has a greater + // level of indentation compared to this node. For example: + // ``` + // def func(x): + // # fmt: off + // pass + // # fmt: on + // def func2(y): + // pass + // ``` + // We want `# fmt: on` to be considered a trailing comment of `func(x)` instead of a leading comment + // on `func2(y)`. if line_position.is_own_line() { - if let Some(token) = - SimpleTokenizer::starts_at(node_end, self.locator.contents()) - .skip_trivia() - .next() - { - if token.end() <= range.end() { - break; - } - } - let comment_indent = - AnyNodeRef::comment_indentation_after(node, range, self.locator); + let comment_indent = comment_indentation_after(node, range, self.locator); let node_indent = TextSize::of( indentation_at_offset(node.start(), self.locator).unwrap_or_default(), ); if node_indent >= comment_indent { break; } - } else { - // If the end-of-line comment is not on the same line, we can assume that there's a node in between - // the end of this node and this comment. - if self.locator.line_start(range.start()) - != self.locator.line_start(node.start()) - { - break; - } } } diff --git a/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__RUF028_RUF028.py.snap b/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__RUF028_RUF028.py.snap index 4fd416657b520c..400fe6637ef523 100644 --- a/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__RUF028_RUF028.py.snap +++ b/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__RUF028_RUF028.py.snap @@ -164,7 +164,7 @@ RUF028.py:54:9: RUF028 [*] This suppression comment is invalid because it cannot 54 | # fmt: off | ^^^^^^^^^^ RUF028 55 | cls, -56 | ) -> None: +56 | ) -> None: # noqa: test # fmt: skip | = help: Remove this comment @@ -174,60 +174,41 @@ RUF028.py:54:9: RUF028 [*] This suppression comment is invalid because it cannot 53 53 | def cls_method_a( 54 |- # fmt: off 55 54 | cls, -56 55 | ) -> None: -57 56 | # noqa: test # fmt: skip +56 55 | ) -> None: # noqa: test # fmt: skip +57 56 | pass -RUF028.py:57:9: RUF028 [*] This suppression comment is invalid because it cannot be on its own line +RUF028.py:62:13: RUF028 [*] This suppression comment is invalid because it cannot be at the end of a line | -55 | cls, -56 | ) -> None: -57 | # noqa: test # fmt: skip - | ^^^^^^^^^^^^^^^^^^^^^^^^ RUF028 -58 | pass - | - = help: Remove this comment - -ℹ Unsafe fix -54 54 | # fmt: off -55 55 | cls, -56 56 | ) -> None: -57 |- # noqa: test # fmt: skip -58 57 | pass -59 58 | -60 59 | - -RUF028.py:63:13: RUF028 [*] This suppression comment is invalid because it cannot be at the end of a line - | -61 | def fmt_on_trailing(): -62 | # fmt: off -63 | val = 5 # fmt: on +60 | def fmt_on_trailing(): +61 | # fmt: off +62 | val = 5 # fmt: on | ^^^^^^^^^ RUF028 -64 | pass # fmt: on +63 | pass # fmt: on | = help: Remove this comment ℹ Unsafe fix -60 60 | -61 61 | def fmt_on_trailing(): -62 62 | # fmt: off -63 |- val = 5 # fmt: on - 63 |+ val = 5 -64 64 | pass # fmt: on - -RUF028.py:64:10: RUF028 [*] This suppression comment is invalid because it cannot be at the end of a line - | -62 | # fmt: off -63 | val = 5 # fmt: on -64 | pass # fmt: on +59 59 | +60 60 | def fmt_on_trailing(): +61 61 | # fmt: off +62 |- val = 5 # fmt: on + 62 |+ val = 5 +63 63 | pass # fmt: on + +RUF028.py:63:10: RUF028 [*] This suppression comment is invalid because it cannot be at the end of a line + | +61 | # fmt: off +62 | val = 5 # fmt: on +63 | pass # fmt: on | ^^^^^^^^^ RUF028 | = help: Remove this comment ℹ Unsafe fix -61 61 | def fmt_on_trailing(): -62 62 | # fmt: off -63 63 | val = 5 # fmt: on -64 |- pass # fmt: on - 64 |+ pass +60 60 | def fmt_on_trailing(): +61 61 | # fmt: off +62 62 | val = 5 # fmt: on +63 |- pass # fmt: on + 63 |+ pass diff --git a/crates/ruff_python_ast/src/helpers.rs b/crates/ruff_python_ast/src/helpers.rs index 154be660d37ae7..d80bfef7691973 100644 --- a/crates/ruff_python_ast/src/helpers.rs +++ b/crates/ruff_python_ast/src/helpers.rs @@ -4,9 +4,9 @@ use std::path::Path; use rustc_hash::FxHashMap; use smallvec::SmallVec; -use ruff_python_trivia::CommentRanges; +use ruff_python_trivia::{indentation_at_offset, CommentRanges, SimpleTokenKind, SimpleTokenizer}; use ruff_source_file::Locator; -use ruff_text_size::{Ranged, TextRange}; +use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; use crate::call_path::CallPath; use crate::parenthesize::parenthesized_range; @@ -1481,6 +1481,80 @@ pub fn typing_union(elts: &[Expr], binding: String) -> Expr { }) } +/// Determine the indentation level of an own-line comment, defined as the minimum indentation of +/// all comments between the preceding node and the comment, including the comment itself. In +/// other words, we don't allow successive comments to ident _further_ than any preceding comments. +/// +/// For example, given: +/// ```python +/// if True: +/// pass +/// # comment +/// ``` +/// +/// The indentation would be 4, as the comment is indented by 4 spaces. +/// +/// Given: +/// ```python +/// if True: +/// pass +/// # comment +/// else: +/// pass +/// ``` +/// +/// The indentation would be 0, as the comment is not indented at all. +/// +/// Given: +/// ```python +/// if True: +/// pass +/// # comment +/// # comment +/// ``` +/// +/// Both comments would be marked as indented at 4 spaces, as the indentation of the first comment +/// is used for the second comment. +/// +/// This logic avoids pathological cases like: +/// ```python +/// try: +/// if True: +/// if True: +/// pass +/// +/// # a +/// # b +/// # c +/// except Exception: +/// pass +/// ``` +/// +/// If we don't use the minimum indentation of any preceding comments, we would mark `# b` as +/// indented to the same depth as `pass`, which could in turn lead to us treating it as a trailing +/// comment of `pass`, despite there being a comment between them that "resets" the indentation. +pub fn comment_indentation_after( + preceding: AnyNodeRef, + comment_range: TextRange, + locator: &Locator, +) -> TextSize { + let tokenizer = SimpleTokenizer::new( + locator.contents(), + TextRange::new(locator.full_line_end(preceding.end()), comment_range.end()), + ); + + tokenizer + .filter_map(|token| { + if token.kind() == SimpleTokenKind::Comment { + indentation_at_offset(token.start(), locator).map(TextLen::text_len) + } else { + None + } + }) + .min() + .unwrap_or_default() +} + #[cfg(test)] mod tests { use std::borrow::Cow; diff --git a/crates/ruff_python_ast/src/node.rs b/crates/ruff_python_ast/src/node.rs index 27528e44de86d8..16b19622c36d8f 100644 --- a/crates/ruff_python_ast/src/node.rs +++ b/crates/ruff_python_ast/src/node.rs @@ -5,9 +5,7 @@ use crate::{ PatternArguments, PatternKeyword, Stmt, TypeParam, TypeParamParamSpec, TypeParamTypeVar, TypeParamTypeVarTuple, TypeParams, WithItem, }; -use ruff_python_trivia::{indentation_at_offset, SimpleTokenKind, SimpleTokenizer}; -use ruff_source_file::Locator; -use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; +use ruff_text_size::{Ranged, TextRange}; use std::ptr::NonNull; pub trait AstNode: Ranged { @@ -6304,80 +6302,6 @@ impl<'a> AnyNodeRef<'a> { _ => false, } } - - /// Determine the indentation level of an own-line comment, defined as the minimum indentation of - /// all comments between the preceding node and the comment, including the comment itself. In - /// other words, we don't allow successive comments to ident _further_ than any preceding comments. - /// - /// For example, given: - /// ```python - /// if True: - /// pass - /// # comment - /// ``` - /// - /// The indentation would be 4, as the comment is indented by 4 spaces. - /// - /// Given: - /// ```python - /// if True: - /// pass - /// # comment - /// else: - /// pass - /// ``` - /// - /// The indentation would be 0, as the comment is not indented at all. - /// - /// Given: - /// ```python - /// if True: - /// pass - /// # comment - /// # comment - /// ``` - /// - /// Both comments would be marked as indented at 4 spaces, as the indentation of the first comment - /// is used for the second comment. - /// - /// This logic avoids pathological cases like: - /// ```python - /// try: - /// if True: - /// if True: - /// pass - /// - /// # a - /// # b - /// # c - /// except Exception: - /// pass - /// ``` - /// - /// If we don't use the minimum indentation of any preceding comments, we would mark `# b` as - /// indented to the same depth as `pass`, which could in turn lead to us treating it as a trailing - /// comment of `pass`, despite there being a comment between them that "resets" the indentation. - pub fn comment_indentation_after( - preceding: Self, - comment_range: TextRange, - locator: &Locator, - ) -> TextSize { - let tokenizer = SimpleTokenizer::new( - locator.contents(), - TextRange::new(locator.full_line_end(preceding.end()), comment_range.end()), - ); - - tokenizer - .filter_map(|token| { - if token.kind() == SimpleTokenKind::Comment { - indentation_at_offset(token.start(), locator).map(TextLen::text_len) - } else { - None - } - }) - .min() - .unwrap_or_default() - } } /// Returns `true` if `right` is `Some` and `left` and `right` are referentially equal. diff --git a/crates/ruff_python_formatter/src/comments/placement.rs b/crates/ruff_python_formatter/src/comments/placement.rs index 0b50e819760ce7..c8b5af6533f1a2 100644 --- a/crates/ruff_python_formatter/src/comments/placement.rs +++ b/crates/ruff_python_formatter/src/comments/placement.rs @@ -1,5 +1,6 @@ use std::cmp::Ordering; +use ast::helpers::comment_indentation_after; use ruff_python_ast::whitespace::indentation; use ruff_python_ast::{self as ast, AnyNodeRef, Comprehension, Expr, ModModule, Parameters}; use ruff_python_trivia::{ @@ -536,8 +537,7 @@ fn handle_own_line_comment_between_branches<'a>( // It depends on the indentation level of the comment if it is a leading comment for the // following branch or if it a trailing comment of the previous body's last statement. - let comment_indentation = - AnyNodeRef::comment_indentation_after(preceding, comment.range(), locator); + let comment_indentation = comment_indentation_after(preceding, comment.range(), locator); let preceding_indentation = indentation(locator, &preceding) .unwrap_or_default() @@ -621,8 +621,7 @@ fn handle_own_line_comment_after_branch<'a>( // We only care about the length because indentations with mixed spaces and tabs are only valid if // the indent-level doesn't depend on the tab width (the indent level must be the same if the tab width is 1 or 8). - let comment_indentation = - AnyNodeRef::comment_indentation_after(preceding, comment.range(), locator); + let comment_indentation = comment_indentation_after(preceding, comment.range(), locator); // Keep the comment on the entire statement in case it's a trailing comment // ```python