Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid reordering mixed-indent-level comments after branches #7609

Merged
merged 1 commit into from Sep 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -208,6 +208,56 @@ def f():
pass


# Regression test for: https://github.com/astral-sh/ruff/issues/7602
if True:
if True:
if True:
pass

#a
#b
#c
else:
pass

if True:
if True:
if True:
pass
# b

# a
# c

else:
pass

# Same indent

if True:
if True:
if True:
pass

#a
#b
#c
else:
pass

if True:
if True:
if True:
pass

# a
# b
# c

else:
pass


# Regression test for https://github.com/astral-sh/ruff/issues/5337
if parent_body:
if current_body:
Expand Down
98 changes: 85 additions & 13 deletions crates/ruff_python_formatter/src/comments/placement.rs
Expand Up @@ -8,7 +8,7 @@ use ruff_python_trivia::{
SimpleToken, SimpleTokenKind, SimpleTokenizer,
};
use ruff_source_file::Locator;
use ruff_text_size::{Ranged, TextLen, TextRange};
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};

use crate::comments::visitor::{CommentPlacement, DecoratedComment};
use crate::expression::expr_slice::{assign_comment_in_slice, ExprSliceCommentSection};
Expand Down Expand Up @@ -587,11 +587,11 @@ fn handle_own_line_comment_between_branches<'a>(

// It depends on the indentation level of the comment if it is a leading comment for the
// following branch or if it a trailing comment of the previous body's last statement.
let comment_indentation = indentation_at_offset(comment.start(), locator)
.unwrap_or_default()
.len();
let comment_indentation = own_line_comment_indentation(preceding, &comment, locator);

let preceding_indentation = indentation(locator, &preceding).unwrap_or_default().len();
let preceding_indentation = indentation(locator, &preceding)
.unwrap_or_default()
.text_len();

// Compare to the last statement in the body
match comment_indentation.cmp(&preceding_indentation) {
Expand Down Expand Up @@ -662,18 +662,16 @@ fn handle_own_line_comment_between_branches<'a>(
/// Determine where to attach an own line comment after a branch depending on its indentation
fn handle_own_line_comment_after_branch<'a>(
comment: DecoratedComment<'a>,
preceding_node: AnyNodeRef<'a>,
preceding: AnyNodeRef<'a>,
locator: &Locator,
) -> CommentPlacement<'a> {
let Some(last_child) = last_child_in_body(preceding_node) else {
let Some(last_child) = last_child_in_body(preceding) else {
return CommentPlacement::Default(comment);
};

// We only care about the length because indentations with mixed spaces and tabs are only valid if
// the indent-level doesn't depend on the tab width (the indent level must be the same if the tab width is 1 or 8).
let comment_indentation = indentation_at_offset(comment.start(), locator)
.unwrap_or_default()
.len();
let comment_indentation = own_line_comment_indentation(preceding, &comment, locator);

// Keep the comment on the entire statement in case it's a trailing comment
// ```python
Expand All @@ -684,9 +682,9 @@ fn handle_own_line_comment_after_branch<'a>(
// # Trailing if comment
// ```
// Here we keep the comment a trailing comment of the `if`
let preceding_indentation = indentation_at_offset(preceding_node.start(), locator)
let preceding_indentation = indentation_at_offset(preceding.start(), locator)
.unwrap_or_default()
.len();
.text_len();
if comment_indentation == preceding_indentation {
return CommentPlacement::Default(comment);
}
Expand All @@ -697,7 +695,7 @@ fn handle_own_line_comment_after_branch<'a>(
loop {
let child_indentation = indentation(locator, &last_child_in_parent)
.unwrap_or_default()
.len();
.text_len();

// There a three cases:
// ```python
Expand Down Expand Up @@ -749,6 +747,80 @@ fn handle_own_line_comment_after_branch<'a>(
}
}

/// Determine the indentation level of an own-line comment, defined as the minimum indentation of
/// all comments between the preceding node and the comment, including the comment itself. In
/// other words, we don't allow successive comments to ident _further_ than any preceding comments.
///
/// For example, given:
/// ```python
/// if True:
/// pass
/// # comment
/// ```
///
/// The indentation would be 4, as the comment is indented by 4 spaces.
///
/// Given:
/// ```python
/// if True:
/// pass
/// # comment
/// else:
/// pass
/// ```
///
/// The indentation would be 0, as the comment is not indented at all.
///
/// Given:
/// ```python
/// if True:
/// pass
/// # comment
/// # comment
/// ```
///
/// Both comments would be marked as indented at 4 spaces, as the indentation of the first comment
/// is used for the second comment.
///
/// This logic avoids pathological cases like:
/// ```python
/// try:
/// if True:
/// if True:
/// pass
///
/// # a
/// # b
/// # c
/// except Exception:
/// pass
/// ```
///
/// If we don't use the minimum indentation of any preceding comments, we would mark `# b` as
/// indented to the same depth as `pass`, which could in turn lead to us treating it as a trailing
/// comment of `pass`, despite there being a comment between them that "resets" the indentation.
fn own_line_comment_indentation(
preceding: AnyNodeRef,
comment: &DecoratedComment,
locator: &Locator,
) -> TextSize {
let tokenizer = SimpleTokenizer::new(
locator.contents(),
TextRange::new(locator.full_line_end(preceding.end()), comment.end()),
);

tokenizer
.filter_map(|token| {
if token.kind() == SimpleTokenKind::Comment {
indentation_at_offset(token.start(), locator).map(TextLen::text_len)
} else {
None
}
})
.min()
.unwrap_or_default()
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, is there an easier way to access the comments that we care about here, rather than re-lexing?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could pass in the in-building comment map, but that would break isolation quite a bit

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we wouldn't even the lexer here, just the first non-whitespace position, i'm not sure though if manual is better here than the simple tokenizer

}

/// Attaches comments for the positional-only parameters separator `/` or the keywords-only
/// parameters separator `*` as dangling comments to the enclosing [`Parameters`] node.
///
Expand Down
Expand Up @@ -214,6 +214,56 @@ else:
pass


# Regression test for: https://github.com/astral-sh/ruff/issues/7602
if True:
if True:
if True:
pass

#a
#b
#c
else:
pass

if True:
if True:
if True:
pass
# b

# a
# c

else:
pass

# Same indent

if True:
if True:
if True:
pass

#a
#b
#c
else:
pass

if True:
if True:
if True:
pass

# a
# b
# c

else:
pass


# Regression test for https://github.com/astral-sh/ruff/issues/5337
if parent_body:
if current_body:
Expand Down Expand Up @@ -444,17 +494,68 @@ else:
pass


# Regression test for: https://github.com/astral-sh/ruff/issues/7602
if True:
if True:
if True:
pass

# a
# b
# c

else:
pass

if True:
if True:
if True:
pass
# b

# a
# c
else:
pass

# Same indent

if True:
if True:
if True:
pass

# a
# b
# c

else:
pass

if True:
if True:
if True:
pass

# a
# b
# c

else:
pass


# Regression test for https://github.com/astral-sh/ruff/issues/5337
if parent_body:
if current_body:
child_in_body()
last_child_in_current_body() # may or may not have children on its own
# e
# f
# c
# d
# a
# b
# c
# d
# e
# f
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a good change, IMO -- the previous result was reordering comments.

```


Expand Down