Skip to content

Commit

Permalink
Avoid reordering mixed-indent-level comments after branches (#7609)
Browse files Browse the repository at this point in the history
## Summary

Given:

```python
if True:
    if True:
        if True:
            pass

        #a
            #b
        #c
else:
    pass
```

When determining the placement of the various comments, we compute the
indentation depth of each comment, and then compare it to the depth of
the previous statement. It turns out this can lead to reordering
comments, e.g., above, `#b` is assigned as a trailing comment of `pass`,
and so gets reordered above `#a`.

This PR modifies the logic such that when we compute the indentation
depth of `#b`, we limit it to at most the indentation depth of `#a`. In
other words, when analyzing comments at the end of branches, we don't
let successive comments go any _deeper_ than their preceding comments.

Closes #7602.

## Test Plan

`cargo test`

No change in similarity.

Before:

| project | similarity index | total files | changed files |

|--------------|------------------:|------------------:|------------------:|
| cpython | 0.76083 | 1789 | 1631 |
| django | 0.99983 | 2760 | 36 |
| transformers | 0.99963 | 2587 | 319 |
| twine | 1.00000 | 33 | 0 |
| typeshed | 0.99979 | 3496 | 22 |
| warehouse | 0.99967 | 648 | 15 |
| zulip | 0.99972 | 1437 | 21 |

After:

| project | similarity index | total files | changed files |

|--------------|------------------:|------------------:|------------------:|
| cpython | 0.76083 | 1789 | 1631 |
| django | 0.99983 | 2760 | 36 |
| transformers | 0.99963 | 2587 | 319 |
| twine | 1.00000 | 33 | 0 |
| typeshed | 0.99979 | 3496 | 22 |
| warehouse | 0.99967 | 648 | 15 |
| zulip | 0.99972 | 1437 | 21 |
  • Loading branch information
charliermarsh committed Sep 22, 2023
1 parent 19010f2 commit 1a4f2a9
Show file tree
Hide file tree
Showing 3 changed files with 240 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,56 @@ def f():
pass


# Regression test for: https://github.com/astral-sh/ruff/issues/7602
if True:
if True:
if True:
pass

#a
#b
#c
else:
pass

if True:
if True:
if True:
pass
# b

# a
# c

else:
pass

# Same indent

if True:
if True:
if True:
pass

#a
#b
#c
else:
pass

if True:
if True:
if True:
pass

# a
# b
# c

else:
pass


# Regression test for https://github.com/astral-sh/ruff/issues/5337
if parent_body:
if current_body:
Expand Down
98 changes: 85 additions & 13 deletions crates/ruff_python_formatter/src/comments/placement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use ruff_python_trivia::{
SimpleToken, SimpleTokenKind, SimpleTokenizer,
};
use ruff_source_file::Locator;
use ruff_text_size::{Ranged, TextLen, TextRange};
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};

use crate::comments::visitor::{CommentPlacement, DecoratedComment};
use crate::expression::expr_slice::{assign_comment_in_slice, ExprSliceCommentSection};
Expand Down Expand Up @@ -587,11 +587,11 @@ fn handle_own_line_comment_between_branches<'a>(

// It depends on the indentation level of the comment if it is a leading comment for the
// following branch or if it a trailing comment of the previous body's last statement.
let comment_indentation = indentation_at_offset(comment.start(), locator)
.unwrap_or_default()
.len();
let comment_indentation = own_line_comment_indentation(preceding, &comment, locator);

let preceding_indentation = indentation(locator, &preceding).unwrap_or_default().len();
let preceding_indentation = indentation(locator, &preceding)
.unwrap_or_default()
.text_len();

// Compare to the last statement in the body
match comment_indentation.cmp(&preceding_indentation) {
Expand Down Expand Up @@ -662,18 +662,16 @@ fn handle_own_line_comment_between_branches<'a>(
/// Determine where to attach an own line comment after a branch depending on its indentation
fn handle_own_line_comment_after_branch<'a>(
comment: DecoratedComment<'a>,
preceding_node: AnyNodeRef<'a>,
preceding: AnyNodeRef<'a>,
locator: &Locator,
) -> CommentPlacement<'a> {
let Some(last_child) = last_child_in_body(preceding_node) else {
let Some(last_child) = last_child_in_body(preceding) else {
return CommentPlacement::Default(comment);
};

// We only care about the length because indentations with mixed spaces and tabs are only valid if
// the indent-level doesn't depend on the tab width (the indent level must be the same if the tab width is 1 or 8).
let comment_indentation = indentation_at_offset(comment.start(), locator)
.unwrap_or_default()
.len();
let comment_indentation = own_line_comment_indentation(preceding, &comment, locator);

// Keep the comment on the entire statement in case it's a trailing comment
// ```python
Expand All @@ -684,9 +682,9 @@ fn handle_own_line_comment_after_branch<'a>(
// # Trailing if comment
// ```
// Here we keep the comment a trailing comment of the `if`
let preceding_indentation = indentation_at_offset(preceding_node.start(), locator)
let preceding_indentation = indentation_at_offset(preceding.start(), locator)
.unwrap_or_default()
.len();
.text_len();
if comment_indentation == preceding_indentation {
return CommentPlacement::Default(comment);
}
Expand All @@ -697,7 +695,7 @@ fn handle_own_line_comment_after_branch<'a>(
loop {
let child_indentation = indentation(locator, &last_child_in_parent)
.unwrap_or_default()
.len();
.text_len();

// There a three cases:
// ```python
Expand Down Expand Up @@ -749,6 +747,80 @@ fn handle_own_line_comment_after_branch<'a>(
}
}

/// Determine the indentation level of an own-line comment, defined as the minimum indentation of
/// all comments between the preceding node and the comment, including the comment itself. In
/// other words, we don't allow successive comments to ident _further_ than any preceding comments.
///
/// For example, given:
/// ```python
/// if True:
/// pass
/// # comment
/// ```
///
/// The indentation would be 4, as the comment is indented by 4 spaces.
///
/// Given:
/// ```python
/// if True:
/// pass
/// # comment
/// else:
/// pass
/// ```
///
/// The indentation would be 0, as the comment is not indented at all.
///
/// Given:
/// ```python
/// if True:
/// pass
/// # comment
/// # comment
/// ```
///
/// Both comments would be marked as indented at 4 spaces, as the indentation of the first comment
/// is used for the second comment.
///
/// This logic avoids pathological cases like:
/// ```python
/// try:
/// if True:
/// if True:
/// pass
///
/// # a
/// # b
/// # c
/// except Exception:
/// pass
/// ```
///
/// If we don't use the minimum indentation of any preceding comments, we would mark `# b` as
/// indented to the same depth as `pass`, which could in turn lead to us treating it as a trailing
/// comment of `pass`, despite there being a comment between them that "resets" the indentation.
fn own_line_comment_indentation(
preceding: AnyNodeRef,
comment: &DecoratedComment,
locator: &Locator,
) -> TextSize {
let tokenizer = SimpleTokenizer::new(
locator.contents(),
TextRange::new(locator.full_line_end(preceding.end()), comment.end()),
);

tokenizer
.filter_map(|token| {
if token.kind() == SimpleTokenKind::Comment {
indentation_at_offset(token.start(), locator).map(TextLen::text_len)
} else {
None
}
})
.min()
.unwrap_or_default()
}

/// Attaches comments for the positional-only parameters separator `/` or the keywords-only
/// parameters separator `*` as dangling comments to the enclosing [`Parameters`] node.
///
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,56 @@ else:
pass
# Regression test for: https://github.com/astral-sh/ruff/issues/7602
if True:
if True:
if True:
pass
#a
#b
#c
else:
pass
if True:
if True:
if True:
pass
# b
# a
# c
else:
pass
# Same indent
if True:
if True:
if True:
pass
#a
#b
#c
else:
pass
if True:
if True:
if True:
pass
# a
# b
# c
else:
pass
# Regression test for https://github.com/astral-sh/ruff/issues/5337
if parent_body:
if current_body:
Expand Down Expand Up @@ -444,17 +494,68 @@ else:
pass
# Regression test for: https://github.com/astral-sh/ruff/issues/7602
if True:
if True:
if True:
pass
# a
# b
# c
else:
pass
if True:
if True:
if True:
pass
# b
# a
# c
else:
pass
# Same indent
if True:
if True:
if True:
pass
# a
# b
# c
else:
pass
if True:
if True:
if True:
pass
# a
# b
# c
else:
pass
# Regression test for https://github.com/astral-sh/ruff/issues/5337
if parent_body:
if current_body:
child_in_body()
last_child_in_current_body() # may or may not have children on its own
# e
# f
# c
# d
# a
# b
# c
# d
# e
# f
```


Expand Down

0 comments on commit 1a4f2a9

Please sign in to comment.