From adcf07f8bd5824a68c34d9c10fd4fe4247edc521 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Fri, 22 Sep 2023 15:04:05 -0400 Subject: [PATCH] Avoid reordering mixed-indent-level comments after branches --- .../test/fixtures/ruff/statement/if.py | 50 ++++++++ .../src/comments/placement.rs | 98 +++++++++++++--- .../snapshots/format@statement__if.py.snap | 109 +++++++++++++++++- 3 files changed, 240 insertions(+), 17 deletions(-) diff --git a/crates/ruff_python_formatter/resources/test/fixtures/ruff/statement/if.py b/crates/ruff_python_formatter/resources/test/fixtures/ruff/statement/if.py index 1ed151c97d64c..553b1db45904c 100644 --- a/crates/ruff_python_formatter/resources/test/fixtures/ruff/statement/if.py +++ b/crates/ruff_python_formatter/resources/test/fixtures/ruff/statement/if.py @@ -208,6 +208,56 @@ def f(): pass +# Regression test for: https://github.com/astral-sh/ruff/issues/7602 +if True: + if True: + if True: + pass + + #a + #b + #c +else: + pass + +if True: + if True: + if True: + pass + # b + + # a + # c + +else: + pass + +# Same indent + +if True: + if True: + if True: + pass + + #a + #b + #c +else: + pass + +if True: + if True: + if True: + pass + + # a + # b + # c + +else: + pass + + # Regression test for https://github.com/astral-sh/ruff/issues/5337 if parent_body: if current_body: diff --git a/crates/ruff_python_formatter/src/comments/placement.rs b/crates/ruff_python_formatter/src/comments/placement.rs index 6af0efcab35ad..cad5066678327 100644 --- a/crates/ruff_python_formatter/src/comments/placement.rs +++ b/crates/ruff_python_formatter/src/comments/placement.rs @@ -8,7 +8,7 @@ use ruff_python_trivia::{ SimpleToken, SimpleTokenKind, SimpleTokenizer, }; use ruff_source_file::Locator; -use ruff_text_size::{Ranged, TextLen, TextRange}; +use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; use crate::comments::visitor::{CommentPlacement, DecoratedComment}; use crate::expression::expr_slice::{assign_comment_in_slice, ExprSliceCommentSection}; @@ -587,11 +587,11 @@ fn handle_own_line_comment_between_branches<'a>( // It depends on the indentation level of the comment if it is a leading comment for the // following branch or if it a trailing comment of the previous body's last statement. - let comment_indentation = indentation_at_offset(comment.start(), locator) - .unwrap_or_default() - .len(); + let comment_indentation = own_line_comment_indentation(preceding, &comment, locator); - let preceding_indentation = indentation(locator, &preceding).unwrap_or_default().len(); + let preceding_indentation = indentation(locator, &preceding) + .unwrap_or_default() + .text_len(); // Compare to the last statement in the body match comment_indentation.cmp(&preceding_indentation) { @@ -662,18 +662,16 @@ fn handle_own_line_comment_between_branches<'a>( /// Determine where to attach an own line comment after a branch depending on its indentation fn handle_own_line_comment_after_branch<'a>( comment: DecoratedComment<'a>, - preceding_node: AnyNodeRef<'a>, + preceding: AnyNodeRef<'a>, locator: &Locator, ) -> CommentPlacement<'a> { - let Some(last_child) = last_child_in_body(preceding_node) else { + let Some(last_child) = last_child_in_body(preceding) else { return CommentPlacement::Default(comment); }; // We only care about the length because indentations with mixed spaces and tabs are only valid if // the indent-level doesn't depend on the tab width (the indent level must be the same if the tab width is 1 or 8). - let comment_indentation = indentation_at_offset(comment.start(), locator) - .unwrap_or_default() - .len(); + let comment_indentation = own_line_comment_indentation(preceding, &comment, locator); // Keep the comment on the entire statement in case it's a trailing comment // ```python @@ -684,9 +682,9 @@ fn handle_own_line_comment_after_branch<'a>( // # Trailing if comment // ``` // Here we keep the comment a trailing comment of the `if` - let preceding_indentation = indentation_at_offset(preceding_node.start(), locator) + let preceding_indentation = indentation_at_offset(preceding.start(), locator) .unwrap_or_default() - .len(); + .text_len(); if comment_indentation == preceding_indentation { return CommentPlacement::Default(comment); } @@ -697,7 +695,7 @@ fn handle_own_line_comment_after_branch<'a>( loop { let child_indentation = indentation(locator, &last_child_in_parent) .unwrap_or_default() - .len(); + .text_len(); // There a three cases: // ```python @@ -749,6 +747,80 @@ fn handle_own_line_comment_after_branch<'a>( } } +/// Determine the indentation level of an own-line comment, defined as the minimum indentation of +/// all comments between the preceding node and the comment, including the comment itself. In +/// other words, we don't allow successive comments to ident _further_ than any preceding comments. +/// +/// For example, given: +/// ```python +/// if True: +/// pass +/// # comment +/// ``` +/// +/// The indentation would be 4, as the comment is indented by 4 spaces. +/// +/// Given: +/// ```python +/// if True: +/// pass +/// # comment +/// else: +/// pass +/// ``` +/// +/// The indentation would be 0, as the comment is not indented at all. +/// +/// Given: +/// ```python +/// if True: +/// pass +/// # comment +/// # comment +/// ``` +/// +/// Both comments would be marked as indented at 4 spaces, as the indentation of the first comment +/// is used for the second comment. +/// +/// This logic avoids pathological cases like: +/// ```python +/// try: +/// if True: +/// if True: +/// pass +/// +/// # a +/// # b +/// # c +/// except Exception: +/// pass +/// ``` +/// +/// If we don't use the minimum indentation of any preceding comments, we would mark `# b` as +/// indented to the same depth as `pass`, which could in turn lead to us treating it as a trailing +/// comment of `pass`, despite there being a comment between them that "resets" the indentation. +fn own_line_comment_indentation( + preceding: AnyNodeRef, + comment: &DecoratedComment, + locator: &Locator, +) -> TextSize { + let tokenizer = SimpleTokenizer::new( + locator.contents(), + TextRange::new(locator.full_line_end(preceding.end()), comment.end()), + ); + + tokenizer + .filter_map(|token| { + if token.kind() == SimpleTokenKind::Comment { + indentation_at_offset(token.start(), locator).map(TextLen::text_len) + } else { + None + } + }) + .min() + .unwrap_or_default() +} + /// Attaches comments for the positional-only parameters separator `/` or the keywords-only /// parameters separator `*` as dangling comments to the enclosing [`Parameters`] node. /// diff --git a/crates/ruff_python_formatter/tests/snapshots/format@statement__if.py.snap b/crates/ruff_python_formatter/tests/snapshots/format@statement__if.py.snap index f85326b157d4b..3b4aac308edf4 100644 --- a/crates/ruff_python_formatter/tests/snapshots/format@statement__if.py.snap +++ b/crates/ruff_python_formatter/tests/snapshots/format@statement__if.py.snap @@ -214,6 +214,56 @@ else: pass +# Regression test for: https://github.com/astral-sh/ruff/issues/7602 +if True: + if True: + if True: + pass + + #a + #b + #c +else: + pass + +if True: + if True: + if True: + pass + # b + + # a + # c + +else: + pass + +# Same indent + +if True: + if True: + if True: + pass + + #a + #b + #c +else: + pass + +if True: + if True: + if True: + pass + + # a + # b + # c + +else: + pass + + # Regression test for https://github.com/astral-sh/ruff/issues/5337 if parent_body: if current_body: @@ -444,17 +494,68 @@ else: pass +# Regression test for: https://github.com/astral-sh/ruff/issues/7602 +if True: + if True: + if True: + pass + + # a + # b + # c + +else: + pass + +if True: + if True: + if True: + pass + # b + + # a + # c +else: + pass + +# Same indent + +if True: + if True: + if True: + pass + + # a + # b + # c + +else: + pass + +if True: + if True: + if True: + pass + + # a + # b + # c + +else: + pass + + # Regression test for https://github.com/astral-sh/ruff/issues/5337 if parent_body: if current_body: child_in_body() last_child_in_current_body() # may or may not have children on its own - # e - # f - # c - # d # a # b +# c +# d +# e +# f ```