astral-sh · charliermarsh · Sep 29, 2023 · Sep 28, 2023 · MichaReiser · Sep 29, 2023
diff --git a/crates/ruff_linter/resources/test/fixtures/pycodestyle/E501_1.py b/crates/ruff_linter/resources/test/fixtures/pycodestyle/E501_1.py
@@ -1,6 +1,8 @@
 # TODO: comments starting with one of the configured task-tags sometimes are longer than line-length so that you can easily find them with `git grep`
+# TODO(charlie): comments starting with one of the configured task-tags sometimes are longer than line-length so that you can easily find them with `git grep`
 # TODO comments starting with one of the configured task-tags sometimes are longer than line-length so that you can easily find them with `git grep`
 #     TODO    comments starting with one of the configured task-tags sometimes are longer than line-length so that you can easily find them with `git grep`
 # FIXME: comments starting with one of the configured task-tags sometimes are longer than line-length so that you can easily find them with `git grep`
 # FIXME comments starting with one of the configured task-tags sometimes are longer than line-length so that you can easily find them with `git grep`
 #     FIXME    comments starting with one of the configured task-tags sometimes are longer than line-length so that you can easily find them with `git grep`
+# FIXME(charlie): comments starting with one of the configured task-tags sometimes are longer than line-length so that you can easily find them with `git grep`
diff --git a/crates/ruff_linter/resources/test/fixtures/pycodestyle/E501_3.py b/crates/ruff_linter/resources/test/fixtures/pycodestyle/E501_3.py
@@ -0,0 +1,11 @@
+# OK (88 characters)
+"shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:aaa"  # type: ignore
+
+# OK (88 characters)
+"shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:aaa"# type: ignore
+
+# OK (88 characters)
+"shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:aaa"    # type: ignore
+
+# Error (89 characters)
+"shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:aaaa"  # type: ignore
diff --git a/crates/ruff_linter/src/checkers/physical_lines.rs b/crates/ruff_linter/src/checkers/physical_lines.rs
@@ -41,7 +41,7 @@ pub(crate) fn check_physical_lines(
             .is_some()
         {
             if enforce_doc_line_too_long {
-                if let Some(diagnostic) = doc_line_too_long(&line, settings) {
+                if let Some(diagnostic) = doc_line_too_long(&line, indexer, settings) {
                     diagnostics.push(diagnostic);
                 }
             }
@@ -54,7 +54,7 @@ pub(crate) fn check_physical_lines(
         }
 
         if enforce_line_too_long {
-            if let Some(diagnostic) = line_too_long(&line, settings) {
+            if let Some(diagnostic) = line_too_long(&line, indexer, settings) {
                 diagnostics.push(diagnostic);
             }
         }

diff --git a/crates/ruff_linter/src/rules/pycodestyle/helpers.rs b/crates/ruff_linter/src/rules/pycodestyle/helpers.rs
@@ -1,13 +1,9 @@
-use unicode_width::UnicodeWidthStr;
-
 use ruff_python_ast::node::AnyNodeRef;
 use ruff_python_ast::parenthesize::parenthesized_range;
 use ruff_python_ast::{CmpOp, Expr};
 use ruff_python_trivia::CommentRanges;
-use ruff_source_file::{Line, Locator};
-use ruff_text_size::{Ranged, TextLen, TextRange};
-
-use crate::line_width::{LineLength, LineWidthBuilder, TabSize};
+use ruff_source_file::Locator;
+use ruff_text_size::Ranged;
 
 pub(super) fn is_ambiguous_name(name: &str) -> bool {
     name == "l" || name == "I" || name == "O"
@@ -64,80 +60,3 @@ pub(super) fn generate_comparison(
 
     contents
 }
-
-pub(super) fn is_overlong(
-    line: &Line,
-    limit: LineLength,
-    ignore_overlong_task_comments: bool,
-    task_tags: &[String],
-    tab_size: TabSize,
-) -> Option<Overlong> {
-    // The maximum width of the line is the number of bytes multiplied by the tab size (the
-    // worst-case scenario is that the line is all tabs). If the maximum width is less than the
-    // limit, then the line is not overlong.
-    let max_width = line.len() * tab_size.as_usize();
-    if max_width < limit.value() as usize {
-        return None;
-    }
-
-    let mut width = LineWidthBuilder::new(tab_size);
-    width = width.add_str(line.as_str());
-    if width <= limit {
-        return None;
-    }
-
-    let mut chunks = line.split_whitespace();
-    let (Some(first_chunk), Some(second_chunk)) = (chunks.next(), chunks.next()) else {
-        // Single word / no printable chars - no way to make the line shorter
-        return None;
-    };
-
-    if first_chunk == "#" {
-        if ignore_overlong_task_comments {
-            let second = second_chunk.trim_end_matches(':');
-            if task_tags.iter().any(|task_tag| task_tag == second) {
-                return None;
-            }
-        }
-    }
-
-    // Do not enforce the line length for lines that end with a URL, as long as the URL
-    // begins before the limit.
-    let last_chunk = chunks.last().unwrap_or(second_chunk);
-    if last_chunk.contains("://") {
-        if width.get() - last_chunk.width() <= limit.value() as usize {
-            return None;
-        }
-    }
-
-    // Obtain the start offset of the part of the line that exceeds the limit
-    let mut start_offset = line.start();
-    let mut start_width = LineWidthBuilder::new(tab_size);
-    for c in line.chars() {
-        if start_width < limit {
-            start_offset += c.text_len();
-            start_width = start_width.add_char(c);
-        } else {
-            break;
-        }
-    }
-    Some(Overlong {
-        range: TextRange::new(start_offset, line.end()),
-        width: width.get(),
-    })
-}
-
-pub(super) struct Overlong {
-    range: TextRange,
-    width: usize,
-}
-
-impl Overlong {
-    pub(super) const fn range(&self) -> TextRange {
-        self.range
-    }
-
-    pub(super) const fn width(&self) -> usize {
-        self.width
-    }
-}
diff --git a/crates/ruff_linter/src/rules/pycodestyle/mod.rs b/crates/ruff_linter/src/rules/pycodestyle/mod.rs
@@ -3,6 +3,7 @@ pub(crate) mod rules;
 pub mod settings;
 
 pub(crate) mod helpers;
+pub(super) mod overlong;
 
 #[cfg(test)]
 mod tests {
@@ -30,6 +31,7 @@ mod tests {
     #[test_case(Rule::InvalidEscapeSequence, Path::new("W605_1.py"))]
     #[test_case(Rule::InvalidEscapeSequence, Path::new("W605_2.py"))]
     #[test_case(Rule::LineTooLong, Path::new("E501.py"))]
+    #[test_case(Rule::LineTooLong, Path::new("E501_3.py"))]
     #[test_case(Rule::MixedSpacesAndTabs, Path::new("E101.py"))]
     #[test_case(Rule::ModuleImportNotAtTopOfFile, Path::new("E40.py"))]
     #[test_case(Rule::ModuleImportNotAtTopOfFile, Path::new("E402.py"))]

diff --git a/crates/ruff_linter/src/rules/pycodestyle/overlong.rs b/crates/ruff_linter/src/rules/pycodestyle/overlong.rs
@@ -0,0 +1,165 @@
+use std::ops::Deref;
+
+use unicode_width::UnicodeWidthStr;
+
+use ruff_python_index::Indexer;
+use ruff_python_trivia::is_pragma_comment;
+use ruff_source_file::Line;
+use ruff_text_size::{TextLen, TextRange};
+
+use crate::line_width::{LineLength, LineWidthBuilder, TabSize};
+
+#[derive(Debug)]
+pub(super) struct Overlong {
+    range: TextRange,
+    width: usize,
+}
+
+impl Overlong {
+    /// Returns an [`Overlong`] if the measured line exceeds the configured line length, or `None`
+    /// otherwise.
+    pub(super) fn try_from_line(
+        line: &Line,
+        indexer: &Indexer,
+        limit: LineLength,
+        task_tags: &[String],
+        tab_size: TabSize,
+    ) -> Option<Self> {
+        // The maximum width of the line is the number of bytes multiplied by the tab size (the
+        // worst-case scenario is that the line is all tabs). If the maximum width is less than the
+        // limit, then the line is not overlong.
+        let max_width = line.len() * tab_size.as_usize();
+        if max_width < limit.value() as usize {
+            return None;
+        }
+
+        // Measure the line. If it's already below the limit, exit early.
+        let width = measure(line.as_str(), tab_size);
+        if width <= limit {
+            return None;
+        }
+
+        // Strip trailing comments and re-measure the line, if needed.
+        let line = StrippedLine::from_line(line, indexer, task_tags);
+        let width = match &line {
+            StrippedLine::WithoutPragma(line) => {
+                let width = measure(line.as_str(), tab_size);
+                if width <= limit {
+                    return None;
+                }
+                width
+            }
+            StrippedLine::Unchanged(_) => width,
+        };
+
+        let mut chunks = line.split_whitespace();
+        let (Some(_), Some(second_chunk)) = (chunks.next(), chunks.next()) else {
+            // Single word / no printable chars - no way to make the line shorter.
+            return None;
+        };
+
+        // Do not enforce the line length for lines that end with a URL, as long as the URL
+        // begins before the limit.
+        let last_chunk = chunks.last().unwrap_or(second_chunk);
+        if last_chunk.contains("://") {
+            if width.get() - last_chunk.width() <= limit.value() as usize {
+                return None;
+            }
+        }
+
+        // Obtain the start offset of the part of the line that exceeds the limit.
+        let mut start_offset = line.start();
+        let mut start_width = LineWidthBuilder::new(tab_size);
+        for c in line.chars() {
+            if start_width < limit {
+                start_offset += c.text_len();
+                start_width = start_width.add_char(c);
+            } else {
+                break;
+            }
+        }
+
+        Some(Self {
+            range: TextRange::new(start_offset, line.end()),
+            width: width.get(),
+        })
+    }
+
+    /// Return the range of the overlong portion of the line.
+    pub(super) const fn range(&self) -> TextRange {
+        self.range
+    }
+
+    /// Return the measured width of the line, without any trailing pragma comments.
+    pub(super) const fn width(&self) -> usize {
+        self.width
+    }
+}
+
+/// A [`Line`] that may have trailing pragma comments stripped.
+#[derive(Debug)]
+enum StrippedLine<'a> {
+    /// The [`Line`] was unchanged.
+    Unchanged(&'a Line<'a>),
+    /// The [`Line`] was changed such that a trailing pragma comment (e.g., `# type: ignore`) was
+    /// removed. The stored [`Line`] consists of the portion of the original line that precedes the
+    /// pragma comment.
+    WithoutPragma(Line<'a>),
+}
+
+impl<'a> StrippedLine<'a> {
+    /// Strip trailing comments from a [`Line`], if the line ends with a pragma comment (like
+    /// `# type: ignore`) or, if necessary, a task comment (like `# TODO`).
+    fn from_line(line: &'a Line<'a>, indexer: &Indexer, task_tags: &[String]) -> Self {
+        let [comment_range] = indexer.comment_ranges().comments_in_range(line.range()) else {
+            return Self::Unchanged(line);
+        };
+
+        // Convert from absolute to relative range.
+        let comment_range = comment_range - line.start();
+        let comment = &line.as_str()[comment_range];
+
+        // Ex) `# type: ignore`
+        if is_pragma_comment(comment) {
+            // Remove the pragma from the line.
+            let prefix = &line.as_str()[..usize::from(comment_range.start())].trim_end();
+            return Self::WithoutPragma(Line::new(prefix, line.start()));
+        }
+
+        // Ex) `# TODO(charlie): ...`
+        if !task_tags.is_empty() {
+            let Some(trimmed) = comment.strip_prefix('#') else {
+                return Self::Unchanged(line);
+            };
+            let trimmed = trimmed.trim_start();
+            if task_tags
+                .iter()
+                .any(|task_tag| trimmed.starts_with(task_tag))
+            {
+                // Remove the task tag from the line.
+                let prefix = &line.as_str()[..usize::from(comment_range.start())].trim_end();
+                return Self::WithoutPragma(Line::new(prefix, line.start()));
+            }
+        }
+
+        Self::Unchanged(line)
+    }
+}
+
+impl<'a> Deref for StrippedLine<'a> {
+    type Target = Line<'a>;
+
+    fn deref(&self) -> &Self::Target {
+        match self {
+            Self::Unchanged(line) => line,
+            Self::WithoutPragma(line) => line,
+        }
+    }
+}
+
+/// Returns the width of a given string, accounting for the tab size.
+fn measure(s: &str, tab_size: TabSize) -> LineWidthBuilder {
+    let mut width = LineWidthBuilder::new(tab_size);
+    width = width.add_str(s);
+    width
+}
diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/doc_line_too_long.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/doc_line_too_long.rs
@@ -1,8 +1,9 @@
 use ruff_diagnostics::{Diagnostic, Violation};
 use ruff_macros::{derive_message_formats, violation};
+use ruff_python_index::Indexer;
 use ruff_source_file::Line;
 
-use crate::rules::pycodestyle::helpers::is_overlong;
+use crate::rules::pycodestyle::overlong::Overlong;
 use crate::settings::LinterSettings;
 
 /// ## What it does
@@ -19,10 +20,17 @@ use crate::settings::LinterSettings;
 /// of either a standalone comment or a standalone string, like a docstring.
 ///
 /// In the interest of pragmatism, this rule makes a few exceptions when
-/// determining whether a line is overlong. Namely, it ignores lines that
-/// consist of a single "word" (i.e., without any whitespace between its
-/// characters), and lines that end with a URL (as long as the URL starts
-/// before the line-length threshold).
+/// determining whether a line is overlong. Namely, it:
+///
+/// 1. Ignores lines that consist of a single "word" (i.e., without any
+///    whitespace between its characters).
+/// 2. Ignores lines that end with a URL, as long as the URL starts before
+///    the line-length threshold.
+/// 3. Ignores line that end with a pragma comment (e.g., `# type: ignore`
+///    or `# noqa`), as long as the pragma comment starts before the
+///    line-length threshold. That is, a line will not be flagged as
+///    overlong if a pragma comment _causes_ it to exceed the line length.
+///    (This behavior aligns with that of the Ruff formatter.)
 ///
 /// If [`pycodestyle.ignore-overlong-task-comments`] is `true`, this rule will
 /// also ignore comments that start with any of the specified [`task-tags`]
@@ -50,7 +58,7 @@ use crate::settings::LinterSettings;
 ///
 /// [PEP 8]: https://peps.python.org/pep-0008/#maximum-line-length
 #[violation]
-pub struct DocLineTooLong(pub usize, pub usize);
+pub struct DocLineTooLong(usize, usize);
 
 impl Violation for DocLineTooLong {
     #[derive_message_formats]
@@ -61,16 +69,24 @@ impl Violation for DocLineTooLong {
 }
 
 /// W505
-pub(crate) fn doc_line_too_long(line: &Line, settings: &LinterSettings) -> Option<Diagnostic> {
+pub(crate) fn doc_line_too_long(
+    line: &Line,
+    indexer: &Indexer,
+    settings: &LinterSettings,
+) -> Option<Diagnostic> {
     let Some(limit) = settings.pycodestyle.max_doc_length else {
         return None;
     };
 
-    is_overlong(
+    Overlong::try_from_line(
         line,
+        indexer,
         limit,
-        settings.pycodestyle.ignore_overlong_task_comments,
-        &settings.task_tags,
+        if settings.pycodestyle.ignore_overlong_task_comments {
+            &settings.task_tags
+        } else {
+            &[]
+        },
         settings.tab_size,
     )
     .map(|overlong| {