Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ignore overlong pragma comments when enforcing linter line length #7692

Merged
merged 1 commit into from Sep 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
@@ -1,6 +1,8 @@
# TODO: comments starting with one of the configured task-tags sometimes are longer than line-length so that you can easily find them with `git grep`
# TODO(charlie): comments starting with one of the configured task-tags sometimes are longer than line-length so that you can easily find them with `git grep`
# TODO comments starting with one of the configured task-tags sometimes are longer than line-length so that you can easily find them with `git grep`
# TODO comments starting with one of the configured task-tags sometimes are longer than line-length so that you can easily find them with `git grep`
# FIXME: comments starting with one of the configured task-tags sometimes are longer than line-length so that you can easily find them with `git grep`
# FIXME comments starting with one of the configured task-tags sometimes are longer than line-length so that you can easily find them with `git grep`
# FIXME comments starting with one of the configured task-tags sometimes are longer than line-length so that you can easily find them with `git grep`
# FIXME(charlie): comments starting with one of the configured task-tags sometimes are longer than line-length so that you can easily find them with `git grep`
11 changes: 11 additions & 0 deletions crates/ruff_linter/resources/test/fixtures/pycodestyle/E501_3.py
@@ -0,0 +1,11 @@
# OK (88 characters)
"shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:aaa" # type: ignore

# OK (88 characters)
"shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:aaa"# type: ignore

# OK (88 characters)
"shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:aaa" # type: ignore

# Error (89 characters)
"shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:aaaa" # type: ignore
4 changes: 2 additions & 2 deletions crates/ruff_linter/src/checkers/physical_lines.rs
Expand Up @@ -41,7 +41,7 @@ pub(crate) fn check_physical_lines(
.is_some()
{
if enforce_doc_line_too_long {
if let Some(diagnostic) = doc_line_too_long(&line, settings) {
if let Some(diagnostic) = doc_line_too_long(&line, indexer, settings) {
diagnostics.push(diagnostic);
}
}
Expand All @@ -54,7 +54,7 @@ pub(crate) fn check_physical_lines(
}

if enforce_line_too_long {
if let Some(diagnostic) = line_too_long(&line, settings) {
if let Some(diagnostic) = line_too_long(&line, indexer, settings) {
diagnostics.push(diagnostic);
}
}
Expand Down
85 changes: 2 additions & 83 deletions crates/ruff_linter/src/rules/pycodestyle/helpers.rs
@@ -1,13 +1,9 @@
use unicode_width::UnicodeWidthStr;

use ruff_python_ast::node::AnyNodeRef;
use ruff_python_ast::parenthesize::parenthesized_range;
use ruff_python_ast::{CmpOp, Expr};
use ruff_python_trivia::CommentRanges;
use ruff_source_file::{Line, Locator};
use ruff_text_size::{Ranged, TextLen, TextRange};

use crate::line_width::{LineLength, LineWidthBuilder, TabSize};
use ruff_source_file::Locator;
use ruff_text_size::Ranged;

pub(super) fn is_ambiguous_name(name: &str) -> bool {
name == "l" || name == "I" || name == "O"
Expand Down Expand Up @@ -64,80 +60,3 @@ pub(super) fn generate_comparison(

contents
}

pub(super) fn is_overlong(
line: &Line,
limit: LineLength,
ignore_overlong_task_comments: bool,
task_tags: &[String],
tab_size: TabSize,
) -> Option<Overlong> {
// The maximum width of the line is the number of bytes multiplied by the tab size (the
// worst-case scenario is that the line is all tabs). If the maximum width is less than the
// limit, then the line is not overlong.
let max_width = line.len() * tab_size.as_usize();
if max_width < limit.value() as usize {
return None;
}

let mut width = LineWidthBuilder::new(tab_size);
width = width.add_str(line.as_str());
if width <= limit {
return None;
}

let mut chunks = line.split_whitespace();
let (Some(first_chunk), Some(second_chunk)) = (chunks.next(), chunks.next()) else {
// Single word / no printable chars - no way to make the line shorter
return None;
};

if first_chunk == "#" {
if ignore_overlong_task_comments {
let second = second_chunk.trim_end_matches(':');
if task_tags.iter().any(|task_tag| task_tag == second) {
return None;
}
}
}

// Do not enforce the line length for lines that end with a URL, as long as the URL
// begins before the limit.
let last_chunk = chunks.last().unwrap_or(second_chunk);
if last_chunk.contains("://") {
if width.get() - last_chunk.width() <= limit.value() as usize {
return None;
}
}

// Obtain the start offset of the part of the line that exceeds the limit
let mut start_offset = line.start();
let mut start_width = LineWidthBuilder::new(tab_size);
for c in line.chars() {
if start_width < limit {
start_offset += c.text_len();
start_width = start_width.add_char(c);
} else {
break;
}
}
Some(Overlong {
range: TextRange::new(start_offset, line.end()),
width: width.get(),
})
}

pub(super) struct Overlong {
range: TextRange,
width: usize,
}

impl Overlong {
pub(super) const fn range(&self) -> TextRange {
self.range
}

pub(super) const fn width(&self) -> usize {
self.width
}
}
2 changes: 2 additions & 0 deletions crates/ruff_linter/src/rules/pycodestyle/mod.rs
Expand Up @@ -3,6 +3,7 @@ pub(crate) mod rules;
pub mod settings;

pub(crate) mod helpers;
pub(super) mod overlong;

#[cfg(test)]
mod tests {
Expand Down Expand Up @@ -30,6 +31,7 @@ mod tests {
#[test_case(Rule::InvalidEscapeSequence, Path::new("W605_1.py"))]
#[test_case(Rule::InvalidEscapeSequence, Path::new("W605_2.py"))]
#[test_case(Rule::LineTooLong, Path::new("E501.py"))]
#[test_case(Rule::LineTooLong, Path::new("E501_3.py"))]
#[test_case(Rule::MixedSpacesAndTabs, Path::new("E101.py"))]
#[test_case(Rule::ModuleImportNotAtTopOfFile, Path::new("E40.py"))]
#[test_case(Rule::ModuleImportNotAtTopOfFile, Path::new("E402.py"))]
Expand Down
165 changes: 165 additions & 0 deletions crates/ruff_linter/src/rules/pycodestyle/overlong.rs
@@ -0,0 +1,165 @@
use std::ops::Deref;

use unicode_width::UnicodeWidthStr;

use ruff_python_index::Indexer;
use ruff_python_trivia::is_pragma_comment;
use ruff_source_file::Line;
use ruff_text_size::{TextLen, TextRange};

use crate::line_width::{LineLength, LineWidthBuilder, TabSize};

#[derive(Debug)]
pub(super) struct Overlong {
range: TextRange,
width: usize,
}

impl Overlong {
/// Returns an [`Overlong`] if the measured line exceeds the configured line length, or `None`
/// otherwise.
pub(super) fn try_from_line(
line: &Line,
indexer: &Indexer,
limit: LineLength,
task_tags: &[String],
tab_size: TabSize,
) -> Option<Self> {
// The maximum width of the line is the number of bytes multiplied by the tab size (the
// worst-case scenario is that the line is all tabs). If the maximum width is less than the
// limit, then the line is not overlong.
Comment on lines +28 to +30
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's very conservative (but correct). It means only lines with less than 22 characters skip the check below. This optimisation becomes less effective if we increase the default tab_size to 8 (making it only applies to lines with less or equal to 11 characters).

Although I'm not sure what we should do about it. Dividing by two could be an option, considering that it's unlikely that every second character is a tab but it will reduce correctness,

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was already present, just moved. We used to have a much better heuristic, because we assumed each byte was at most one character, but tabs ruin that -- a single byte can be as large as the tab width, sadly.

let max_width = line.len() * tab_size.as_usize();
if max_width < limit.value() as usize {
return None;
}

// Measure the line. If it's already below the limit, exit early.
let width = measure(line.as_str(), tab_size);
if width <= limit {
return None;
}

// Strip trailing comments and re-measure the line, if needed.
let line = StrippedLine::from_line(line, indexer, task_tags);
let width = match &line {
StrippedLine::WithoutPragma(line) => {
let width = measure(line.as_str(), tab_size);
if width <= limit {
return None;
}
width
}
StrippedLine::Unchanged(_) => width,
};

let mut chunks = line.split_whitespace();
let (Some(_), Some(second_chunk)) = (chunks.next(), chunks.next()) else {
// Single word / no printable chars - no way to make the line shorter.
return None;
};

// Do not enforce the line length for lines that end with a URL, as long as the URL
// begins before the limit.
let last_chunk = chunks.last().unwrap_or(second_chunk);
if last_chunk.contains("://") {
if width.get() - last_chunk.width() <= limit.value() as usize {
return None;
}
}

// Obtain the start offset of the part of the line that exceeds the limit.
let mut start_offset = line.start();
let mut start_width = LineWidthBuilder::new(tab_size);
for c in line.chars() {
if start_width < limit {
start_offset += c.text_len();
start_width = start_width.add_char(c);
} else {
break;
}
}

Some(Self {
range: TextRange::new(start_offset, line.end()),
width: width.get(),
})
}

/// Return the range of the overlong portion of the line.
pub(super) const fn range(&self) -> TextRange {
self.range
}

/// Return the measured width of the line, without any trailing pragma comments.
pub(super) const fn width(&self) -> usize {
self.width
}
}

/// A [`Line`] that may have trailing pragma comments stripped.
#[derive(Debug)]
enum StrippedLine<'a> {
/// The [`Line`] was unchanged.
Unchanged(&'a Line<'a>),
/// The [`Line`] was changed such that a trailing pragma comment (e.g., `# type: ignore`) was
/// removed. The stored [`Line`] consists of the portion of the original line that precedes the
/// pragma comment.
WithoutPragma(Line<'a>),
}

impl<'a> StrippedLine<'a> {
/// Strip trailing comments from a [`Line`], if the line ends with a pragma comment (like
/// `# type: ignore`) or, if necessary, a task comment (like `# TODO`).
fn from_line(line: &'a Line<'a>, indexer: &Indexer, task_tags: &[String]) -> Self {
let [comment_range] = indexer.comment_ranges().comments_in_range(line.range()) else {
return Self::Unchanged(line);
};

// Convert from absolute to relative range.
let comment_range = comment_range - line.start();
let comment = &line.as_str()[comment_range];

// Ex) `# type: ignore`
if is_pragma_comment(comment) {
// Remove the pragma from the line.
let prefix = &line.as_str()[..usize::from(comment_range.start())].trim_end();
return Self::WithoutPragma(Line::new(prefix, line.start()));
}

// Ex) `# TODO(charlie): ...`
if !task_tags.is_empty() {
let Some(trimmed) = comment.strip_prefix('#') else {
return Self::Unchanged(line);
};
let trimmed = trimmed.trim_start();
if task_tags
.iter()
.any(|task_tag| trimmed.starts_with(task_tag))
{
// Remove the task tag from the line.
let prefix = &line.as_str()[..usize::from(comment_range.start())].trim_end();
return Self::WithoutPragma(Line::new(prefix, line.start()));
}
}

Self::Unchanged(line)
}
}

impl<'a> Deref for StrippedLine<'a> {
type Target = Line<'a>;

fn deref(&self) -> &Self::Target {
match self {
Self::Unchanged(line) => line,
Self::WithoutPragma(line) => line,
}
}
}

/// Returns the width of a given string, accounting for the tab size.
fn measure(s: &str, tab_size: TabSize) -> LineWidthBuilder {
let mut width = LineWidthBuilder::new(tab_size);
width = width.add_str(s);
width
}
36 changes: 26 additions & 10 deletions crates/ruff_linter/src/rules/pycodestyle/rules/doc_line_too_long.rs
@@ -1,8 +1,9 @@
use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_index::Indexer;
use ruff_source_file::Line;

use crate::rules::pycodestyle::helpers::is_overlong;
use crate::rules::pycodestyle::overlong::Overlong;
use crate::settings::LinterSettings;

/// ## What it does
Expand All @@ -19,10 +20,17 @@ use crate::settings::LinterSettings;
/// of either a standalone comment or a standalone string, like a docstring.
///
/// In the interest of pragmatism, this rule makes a few exceptions when
/// determining whether a line is overlong. Namely, it ignores lines that
/// consist of a single "word" (i.e., without any whitespace between its
/// characters), and lines that end with a URL (as long as the URL starts
/// before the line-length threshold).
/// determining whether a line is overlong. Namely, it:
///
/// 1. Ignores lines that consist of a single "word" (i.e., without any
/// whitespace between its characters).
/// 2. Ignores lines that end with a URL, as long as the URL starts before
/// the line-length threshold.
/// 3. Ignores line that end with a pragma comment (e.g., `# type: ignore`
/// or `# noqa`), as long as the pragma comment starts before the
/// line-length threshold. That is, a line will not be flagged as
/// overlong if a pragma comment _causes_ it to exceed the line length.
/// (This behavior aligns with that of the Ruff formatter.)
///
/// If [`pycodestyle.ignore-overlong-task-comments`] is `true`, this rule will
/// also ignore comments that start with any of the specified [`task-tags`]
Expand Down Expand Up @@ -50,7 +58,7 @@ use crate::settings::LinterSettings;
///
/// [PEP 8]: https://peps.python.org/pep-0008/#maximum-line-length
#[violation]
pub struct DocLineTooLong(pub usize, pub usize);
pub struct DocLineTooLong(usize, usize);

impl Violation for DocLineTooLong {
#[derive_message_formats]
Expand All @@ -61,16 +69,24 @@ impl Violation for DocLineTooLong {
}

/// W505
pub(crate) fn doc_line_too_long(line: &Line, settings: &LinterSettings) -> Option<Diagnostic> {
pub(crate) fn doc_line_too_long(
line: &Line,
indexer: &Indexer,
settings: &LinterSettings,
) -> Option<Diagnostic> {
let Some(limit) = settings.pycodestyle.max_doc_length else {
return None;
};

is_overlong(
Overlong::try_from_line(
line,
indexer,
limit,
settings.pycodestyle.ignore_overlong_task_comments,
&settings.task_tags,
if settings.pycodestyle.ignore_overlong_task_comments {
&settings.task_tags
} else {
&[]
},
settings.tab_size,
)
.map(|overlong| {
Expand Down