Skip to content

Commit

Permalink
Ignore overlong pragma comments when enforcing linter line length
Browse files Browse the repository at this point in the history
  • Loading branch information
charliermarsh committed Sep 29, 2023
1 parent b528006 commit 4ee4758
Show file tree
Hide file tree
Showing 11 changed files with 298 additions and 134 deletions.
@@ -1,6 +1,8 @@
# TODO: comments starting with one of the configured task-tags sometimes are longer than line-length so that you can easily find them with `git grep`
# TODO(charlie): comments starting with one of the configured task-tags sometimes are longer than line-length so that you can easily find them with `git grep`
# TODO comments starting with one of the configured task-tags sometimes are longer than line-length so that you can easily find them with `git grep`
# TODO comments starting with one of the configured task-tags sometimes are longer than line-length so that you can easily find them with `git grep`
# FIXME: comments starting with one of the configured task-tags sometimes are longer than line-length so that you can easily find them with `git grep`
# FIXME comments starting with one of the configured task-tags sometimes are longer than line-length so that you can easily find them with `git grep`
# FIXME comments starting with one of the configured task-tags sometimes are longer than line-length so that you can easily find them with `git grep`
# FIXME(charlie): comments starting with one of the configured task-tags sometimes are longer than line-length so that you can easily find them with `git grep`
11 changes: 11 additions & 0 deletions crates/ruff_linter/resources/test/fixtures/pycodestyle/E501_3.py
@@ -0,0 +1,11 @@
# OK (88 characters)
"shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:aaa" # type: ignore

# OK (88 characters)
"shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:aaa"# type: ignore

# OK (88 characters)
"shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:aaa" # type: ignore

# Error (89 characters)
"shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:aaaa" # type: ignore
4 changes: 2 additions & 2 deletions crates/ruff_linter/src/checkers/physical_lines.rs
Expand Up @@ -41,7 +41,7 @@ pub(crate) fn check_physical_lines(
.is_some()
{
if enforce_doc_line_too_long {
if let Some(diagnostic) = doc_line_too_long(&line, settings) {
if let Some(diagnostic) = doc_line_too_long(&line, indexer, settings) {
diagnostics.push(diagnostic);
}
}
Expand All @@ -54,7 +54,7 @@ pub(crate) fn check_physical_lines(
}

if enforce_line_too_long {
if let Some(diagnostic) = line_too_long(&line, settings) {
if let Some(diagnostic) = line_too_long(&line, indexer, settings) {
diagnostics.push(diagnostic);
}
}
Expand Down
85 changes: 2 additions & 83 deletions crates/ruff_linter/src/rules/pycodestyle/helpers.rs
@@ -1,13 +1,9 @@
use unicode_width::UnicodeWidthStr;

use ruff_python_ast::node::AnyNodeRef;
use ruff_python_ast::parenthesize::parenthesized_range;
use ruff_python_ast::{CmpOp, Expr};
use ruff_python_trivia::CommentRanges;
use ruff_source_file::{Line, Locator};
use ruff_text_size::{Ranged, TextLen, TextRange};

use crate::line_width::{LineLength, LineWidthBuilder, TabSize};
use ruff_source_file::Locator;
use ruff_text_size::Ranged;

pub(super) fn is_ambiguous_name(name: &str) -> bool {
name == "l" || name == "I" || name == "O"
Expand Down Expand Up @@ -64,80 +60,3 @@ pub(super) fn generate_comparison(

contents
}

pub(super) fn is_overlong(
line: &Line,
limit: LineLength,
ignore_overlong_task_comments: bool,
task_tags: &[String],
tab_size: TabSize,
) -> Option<Overlong> {
// The maximum width of the line is the number of bytes multiplied by the tab size (the
// worst-case scenario is that the line is all tabs). If the maximum width is less than the
// limit, then the line is not overlong.
let max_width = line.len() * tab_size.as_usize();
if max_width < limit.value() as usize {
return None;
}

let mut width = LineWidthBuilder::new(tab_size);
width = width.add_str(line.as_str());
if width <= limit {
return None;
}

let mut chunks = line.split_whitespace();
let (Some(first_chunk), Some(second_chunk)) = (chunks.next(), chunks.next()) else {
// Single word / no printable chars - no way to make the line shorter
return None;
};

if first_chunk == "#" {
if ignore_overlong_task_comments {
let second = second_chunk.trim_end_matches(':');
if task_tags.iter().any(|task_tag| task_tag == second) {
return None;
}
}
}

// Do not enforce the line length for lines that end with a URL, as long as the URL
// begins before the limit.
let last_chunk = chunks.last().unwrap_or(second_chunk);
if last_chunk.contains("://") {
if width.get() - last_chunk.width() <= limit.value() as usize {
return None;
}
}

// Obtain the start offset of the part of the line that exceeds the limit
let mut start_offset = line.start();
let mut start_width = LineWidthBuilder::new(tab_size);
for c in line.chars() {
if start_width < limit {
start_offset += c.text_len();
start_width = start_width.add_char(c);
} else {
break;
}
}
Some(Overlong {
range: TextRange::new(start_offset, line.end()),
width: width.get(),
})
}

pub(super) struct Overlong {
range: TextRange,
width: usize,
}

impl Overlong {
pub(super) const fn range(&self) -> TextRange {
self.range
}

pub(super) const fn width(&self) -> usize {
self.width
}
}
2 changes: 2 additions & 0 deletions crates/ruff_linter/src/rules/pycodestyle/mod.rs
Expand Up @@ -3,6 +3,7 @@ pub(crate) mod rules;
pub mod settings;

pub(crate) mod helpers;
pub(super) mod overlong;

#[cfg(test)]
mod tests {
Expand Down Expand Up @@ -30,6 +31,7 @@ mod tests {
#[test_case(Rule::InvalidEscapeSequence, Path::new("W605_1.py"))]
#[test_case(Rule::InvalidEscapeSequence, Path::new("W605_2.py"))]
#[test_case(Rule::LineTooLong, Path::new("E501.py"))]
#[test_case(Rule::LineTooLong, Path::new("E501_3.py"))]
#[test_case(Rule::MixedSpacesAndTabs, Path::new("E101.py"))]
#[test_case(Rule::ModuleImportNotAtTopOfFile, Path::new("E40.py"))]
#[test_case(Rule::ModuleImportNotAtTopOfFile, Path::new("E402.py"))]
Expand Down
167 changes: 167 additions & 0 deletions crates/ruff_linter/src/rules/pycodestyle/overlong.rs
@@ -0,0 +1,167 @@
use std::ops::Deref;

use unicode_width::UnicodeWidthStr;

use ruff_python_index::Indexer;
use ruff_python_trivia::is_pragma_comment;
use ruff_source_file::Line;
use ruff_text_size::{TextLen, TextRange};

use crate::line_width::{LineLength, LineWidthBuilder, TabSize};

#[derive(Debug)]
pub(super) struct Overlong {
range: TextRange,
width: usize,
}

impl Overlong {
/// Returns an [`Overlong`] if the measured line exceeds the configured line length, or `None`
/// otherwise.
pub(super) fn try_from_line(
line: &Line,
indexer: &Indexer,
limit: LineLength,
task_tags: &[String],
tab_size: TabSize,
) -> Option<Self> {
// The maximum width of the line is the number of bytes multiplied by the tab size (the
// worst-case scenario is that the line is all tabs). If the maximum width is less than the
// limit, then the line is not overlong.
let max_width = line.len() * tab_size.as_usize();
if max_width < limit.value() as usize {
return None;
}

// Measure the line. If it's already below the limit, exit early.
let width = measure(line.as_str(), tab_size);
if width <= limit {
return None;
}

// Strip trailing pragma comments and, if necessary, re-measure the line.
let line = StrippedLine::from_line(line, indexer, task_tags);

// Re-measure the line, if needed.
let width = match &line {
StrippedLine::WithoutPragma(line) => {
let width = measure(line.as_str(), tab_size);
if width <= limit {
return None;
}
width
}
StrippedLine::Unchanged(_) => width,
};

let mut chunks = line.split_whitespace();
let (Some(_), Some(second_chunk)) = (chunks.next(), chunks.next()) else {
// Single word / no printable chars - no way to make the line shorter.
return None;
};

// Do not enforce the line length for lines that end with a URL, as long as the URL
// begins before the limit.
let last_chunk = chunks.last().unwrap_or(second_chunk);
if last_chunk.contains("://") {
if width.get() - last_chunk.width() <= limit.value() as usize {
return None;
}
}

// Obtain the start offset of the part of the line that exceeds the limit.
let mut start_offset = line.start();
let mut start_width = LineWidthBuilder::new(tab_size);
for c in line.chars() {
if start_width < limit {
start_offset += c.text_len();
start_width = start_width.add_char(c);
} else {
break;
}
}

Some(Self {
range: TextRange::new(start_offset, line.end()),
width: width.get(),
})
}

/// Return the range of the overlong portion of the line.
pub(super) const fn range(&self) -> TextRange {
self.range
}

/// Return the measured width of the line, without any trailing pragma comments.
pub(super) const fn width(&self) -> usize {
self.width
}
}

/// A [`Line`] that may have trailing pragma comments stripped.
#[derive(Debug)]
enum StrippedLine<'a> {
/// The [`Line`] was unchanged.
Unchanged(&'a Line<'a>),
/// The [`Line`] was changed such that a trailing pragma comment (e.g., `# type: ignore`) was
/// removed. The stored [`Line`] consists of the portion of the original line that precedes the
/// pragma comment.
WithoutPragma(Line<'a>),
}

impl<'a> StrippedLine<'a> {
/// Strip trailing comments from a [`Line`], if the line ends with a pragma comment (like
/// `# type: ignore`) or, if necessary, a task comment (like `# TODO`).
fn from_line(line: &'a Line<'a>, indexer: &Indexer, task_tags: &[String]) -> Self {
let [comment_range] = indexer.comment_ranges().comments_in_range(line.range()) else {
return Self::Unchanged(line);
};

// Convert from absolute to relative range.
let comment_range = comment_range - line.start();
let comment = &line.as_str()[comment_range];

// Ex) `# type: ignore`
if is_pragma_comment(comment) {
// Remove the pragma from the line.
let prefix = &line.as_str()[..usize::from(comment_range.start())].trim_end();
return Self::WithoutPragma(Line::new(prefix, line.start()));
}

// Ex) `# TODO(charlie): ...`
if !task_tags.is_empty() {
let Some(trimmed) = comment.strip_prefix('#') else {
return Self::Unchanged(line);
};
let trimmed = trimmed.trim_start();
if task_tags
.iter()
.any(|task_tag| trimmed.starts_with(task_tag))
{
// Remove the task tag from the line.
let prefix = &line.as_str()[..usize::from(comment_range.start())].trim_end();
return Self::WithoutPragma(Line::new(prefix, line.start()));
}
}

Self::Unchanged(line)
}
}

impl<'a> Deref for StrippedLine<'a> {
type Target = Line<'a>;

fn deref(&self) -> &Self::Target {
match self {
Self::Unchanged(line) => line,
Self::WithoutPragma(line) => line,
}
}
}

/// Returns the width of a given string, accounting for the tab size.
fn measure(s: &str, tab_size: TabSize) -> LineWidthBuilder {
let mut width = LineWidthBuilder::new(tab_size);
width = width.add_str(s);
width
}
36 changes: 26 additions & 10 deletions crates/ruff_linter/src/rules/pycodestyle/rules/doc_line_too_long.rs
@@ -1,8 +1,9 @@
use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_index::Indexer;
use ruff_source_file::Line;

use crate::rules::pycodestyle::helpers::is_overlong;
use crate::rules::pycodestyle::overlong::Overlong;
use crate::settings::LinterSettings;

/// ## What it does
Expand All @@ -19,10 +20,17 @@ use crate::settings::LinterSettings;
/// of either a standalone comment or a standalone string, like a docstring.
///
/// In the interest of pragmatism, this rule makes a few exceptions when
/// determining whether a line is overlong. Namely, it ignores lines that
/// consist of a single "word" (i.e., without any whitespace between its
/// characters), and lines that end with a URL (as long as the URL starts
/// before the line-length threshold).
/// determining whether a line is overlong. Namely, it:
///
/// 1. Ignores lines that consist of a single "word" (i.e., without any
/// whitespace between its characters).
/// 2. Ignores lines that end with a URL, as long as the URL starts before
/// the line-length threshold.
/// 3. Ignores line that end with a pragma comment (e.g., `# type: ignore`
/// or `# noqa`), as long as the pragma comment starts before the
/// line-length threshold. That is, a line will not be flagged as
/// overlong if a pragma comment _causes_ it to exceed the line length.
/// (This behavior aligns with that of the Ruff formatter.)
///
/// If [`pycodestyle.ignore-overlong-task-comments`] is `true`, this rule will
/// also ignore comments that start with any of the specified [`task-tags`]
Expand Down Expand Up @@ -50,7 +58,7 @@ use crate::settings::LinterSettings;
///
/// [PEP 8]: https://peps.python.org/pep-0008/#maximum-line-length
#[violation]
pub struct DocLineTooLong(pub usize, pub usize);
pub struct DocLineTooLong(usize, usize);

impl Violation for DocLineTooLong {
#[derive_message_formats]
Expand All @@ -61,16 +69,24 @@ impl Violation for DocLineTooLong {
}

/// W505
pub(crate) fn doc_line_too_long(line: &Line, settings: &LinterSettings) -> Option<Diagnostic> {
pub(crate) fn doc_line_too_long(
line: &Line,
indexer: &Indexer,
settings: &LinterSettings,
) -> Option<Diagnostic> {
let Some(limit) = settings.pycodestyle.max_doc_length else {
return None;
};

is_overlong(
Overlong::try_from_line(
line,
indexer,
limit,
settings.pycodestyle.ignore_overlong_task_comments,
&settings.task_tags,
if settings.pycodestyle.ignore_overlong_task_comments {
&settings.task_tags
} else {
&[]
},
settings.tab_size,
)
.map(|overlong| {
Expand Down

0 comments on commit 4ee4758

Please sign in to comment.