From bdf285225db4c658c380698948e1aaa63183e012 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Mon, 2 Oct 2023 10:44:18 -0400 Subject: [PATCH] Enable formatting for Jupyter notebooks (#7749) ## Summary This PR enables `ruff format` to format Jupyter notebooks. Most of the work is contained in a new `format_source` method that formats a generic `SourceKind`, then returns `Some(transformed)` if the source required formatting, or `None` otherwise. Closes https://github.com/astral-sh/ruff/issues/7598. ## Test Plan Ran `cat foo.py | cargo run -p ruff_cli -- format --stdin-filename Untitled.ipynb`; verified that the console showed a reasonable error: ```console warning: Failed to read notebook Untitled.ipynb: Expected a Jupyter Notebook, which must be internally stored as JSON, but this file isn't valid JSON: EOF while parsing a value at line 1 column 0 ``` Ran `cat Untitled.ipynb | cargo run -p ruff_cli -- format --stdin-filename Untitled.ipynb`; verified that the JSON output contained formatted source code. --- crates/ruff_cli/src/commands/add_noqa.rs | 6 +- crates/ruff_cli/src/commands/format.rs | 172 ++++++++++++++++--- crates/ruff_cli/src/commands/format_stdin.rs | 60 ++++--- crates/ruff_cli/src/diagnostics.rs | 134 +++++---------- crates/ruff_cli/tests/integration_test.rs | 129 ++++++++++++++ crates/ruff_diagnostics/src/source_map.rs | 23 ++- crates/ruff_linter/src/source_kind.rs | 56 +++++- 7 files changed, 419 insertions(+), 161 deletions(-) diff --git a/crates/ruff_cli/src/commands/add_noqa.rs b/crates/ruff_cli/src/commands/add_noqa.rs index 513f4ba2487ca..1fd8f5425f979 100644 --- a/crates/ruff_cli/src/commands/add_noqa.rs +++ b/crates/ruff_cli/src/commands/add_noqa.rs @@ -7,12 +7,12 @@ use log::{debug, error}; use rayon::prelude::*; use ruff_linter::linter::add_noqa_to_path; +use ruff_linter::source_kind::SourceKind; use ruff_linter::warn_user_once; use ruff_python_ast::{PySourceType, SourceType}; use ruff_workspace::resolver::{python_files_in_path, PyprojectConfig}; use crate::args::CliOverrides; -use crate::diagnostics::LintSource; /// Add `noqa` directives to a collection of files. pub(crate) fn add_noqa( @@ -57,8 +57,8 @@ pub(crate) fn add_noqa( .and_then(|parent| package_roots.get(parent)) .and_then(|package| *package); let settings = resolver.resolve(path, pyproject_config); - let LintSource(source_kind) = match LintSource::try_from_path(path, source_type) { - Ok(Some(source)) => source, + let source_kind = match SourceKind::from_path(path, source_type) { + Ok(Some(source_kind)) => source_kind, Ok(None) => return None, Err(e) => { error!("Failed to extract source from {}: {e}", path.display()); diff --git a/crates/ruff_cli/src/commands/format.rs b/crates/ruff_cli/src/commands/format.rs index 826e0c7a2eb43..42776e8773043 100644 --- a/crates/ruff_cli/src/commands/format.rs +++ b/crates/ruff_cli/src/commands/format.rs @@ -1,21 +1,25 @@ use std::fmt::{Display, Formatter}; -use std::io; +use std::fs::File; use std::path::{Path, PathBuf}; use std::time::Instant; use anyhow::Result; use colored::Colorize; +use itertools::Itertools; use log::error; use rayon::iter::Either::{Left, Right}; use rayon::iter::{IntoParallelIterator, ParallelIterator}; use thiserror::Error; use tracing::debug; +use ruff_diagnostics::SourceMap; use ruff_linter::fs; use ruff_linter::logging::LogLevel; +use ruff_linter::source_kind::{SourceError, SourceKind}; use ruff_linter::warn_user_once; use ruff_python_ast::{PySourceType, SourceType}; use ruff_python_formatter::{format_module_source, FormatModuleError}; +use ruff_text_size::{TextLen, TextRange, TextSize}; use ruff_workspace::resolver::python_files_in_path; use ruff_workspace::FormatterSettings; @@ -64,10 +68,7 @@ pub(crate) fn format( Ok(entry) => { let path = entry.path(); - let SourceType::Python( - source_type @ (PySourceType::Python | PySourceType::Stub), - ) = SourceType::from(path) - else { + let SourceType::Python(source_type) = SourceType::from(path) else { // Ignore any non-Python files. return None; }; @@ -145,24 +146,149 @@ fn format_path( source_type: PySourceType, mode: FormatMode, ) -> Result { - let unformatted = std::fs::read_to_string(path) - .map_err(|err| FormatCommandError::Read(Some(path.to_path_buf()), err))?; + // Extract the sources from the file. + let source_kind = match SourceKind::from_path(path, source_type) { + Ok(Some(source_kind)) => source_kind, + Ok(None) => return Ok(FormatCommandResult::Unchanged), + Err(err) => { + return Err(FormatCommandError::Read(Some(path.to_path_buf()), err)); + } + }; - let options = settings.to_format_options(source_type, &unformatted); - debug!("Formatting {} with {:?}", path.display(), options); + // Format the source. + match format_source(source_kind, source_type, Some(path), settings)? { + FormattedSource::Formatted(formatted) => { + if mode.is_write() { + let mut writer = File::create(path).map_err(|err| { + FormatCommandError::Write(Some(path.to_path_buf()), err.into()) + })?; + formatted + .write(&mut writer) + .map_err(|err| FormatCommandError::Write(Some(path.to_path_buf()), err))?; + } + Ok(FormatCommandResult::Formatted) + } + FormattedSource::Unchanged(_) => Ok(FormatCommandResult::Unchanged), + } +} - let formatted = format_module_source(&unformatted, options) - .map_err(|err| FormatCommandError::FormatModule(Some(path.to_path_buf()), err))?; +#[derive(Debug)] +pub(crate) enum FormattedSource { + /// The source was formatted, and the [`SourceKind`] contains the transformed source code. + Formatted(SourceKind), + /// The source was unchanged, and the [`SourceKind`] contains the original source code. + Unchanged(SourceKind), +} - let formatted = formatted.as_code(); - if formatted.len() == unformatted.len() && formatted == unformatted { - Ok(FormatCommandResult::Unchanged) - } else { - if mode.is_write() { - std::fs::write(path, formatted.as_bytes()) - .map_err(|err| FormatCommandError::Write(Some(path.to_path_buf()), err))?; +impl From for FormatCommandResult { + fn from(value: FormattedSource) -> Self { + match value { + FormattedSource::Formatted(_) => FormatCommandResult::Formatted, + FormattedSource::Unchanged(_) => FormatCommandResult::Unchanged, + } + } +} + +impl FormattedSource { + pub(crate) fn source_kind(&self) -> &SourceKind { + match self { + FormattedSource::Formatted(source_kind) => source_kind, + FormattedSource::Unchanged(source_kind) => source_kind, + } + } +} + +/// Format a [`SourceKind`], returning the transformed [`SourceKind`], or `None` if the source was +/// unchanged. +pub(crate) fn format_source( + source_kind: SourceKind, + source_type: PySourceType, + path: Option<&Path>, + settings: &FormatterSettings, +) -> Result { + match source_kind { + SourceKind::Python(unformatted) => { + let options = settings.to_format_options(source_type, &unformatted); + + let formatted = format_module_source(&unformatted, options) + .map_err(|err| FormatCommandError::Format(path.map(Path::to_path_buf), err))?; + + let formatted = formatted.into_code(); + if formatted.len() == unformatted.len() && formatted == *unformatted { + Ok(FormattedSource::Unchanged(SourceKind::Python(unformatted))) + } else { + Ok(FormattedSource::Formatted(SourceKind::Python(formatted))) + } + } + SourceKind::IpyNotebook(notebook) => { + if !notebook.is_python_notebook() { + return Ok(FormattedSource::Unchanged(SourceKind::IpyNotebook( + notebook, + ))); + } + + let options = settings.to_format_options(source_type, notebook.source_code()); + + let mut output: Option = None; + let mut last: Option = None; + let mut source_map = SourceMap::default(); + + // Format each cell individually. + for (start, end) in notebook.cell_offsets().iter().tuple_windows::<(_, _)>() { + let range = TextRange::new(*start, *end); + let unformatted = ¬ebook.source_code()[range]; + + // Format the cell. + let formatted = format_module_source(unformatted, options.clone()) + .map_err(|err| FormatCommandError::Format(path.map(Path::to_path_buf), err))?; + + // If the cell is unchanged, skip it. + let formatted = formatted.as_code(); + if formatted.len() == unformatted.len() && formatted == unformatted { + continue; + } + + // If this is the first newly-formatted cell, initialize the output. + let output = output + .get_or_insert_with(|| String::with_capacity(notebook.source_code().len())); + + // Add all contents from `last` to the current cell. + let slice = ¬ebook.source_code() + [TextRange::new(last.unwrap_or_default(), range.start())]; + output.push_str(slice); + + // Add the start source marker for the cell. + source_map.push_marker(*start, output.text_len()); + + // Add the cell itself. + output.push_str(formatted); + + // Add the end source marker for the added cell. + source_map.push_marker(*end, output.text_len()); + + // Track that the cell was formatted. + last = Some(*end); + } + + // If the file was unchanged, return `None`. + let (Some(mut output), Some(last)) = (output, last) else { + return Ok(FormattedSource::Unchanged(SourceKind::IpyNotebook( + notebook, + ))); + }; + + // Add the remaining content. + let slice = ¬ebook.source_code()[usize::from(last)..]; + output.push_str(slice); + + // Update the notebook. + let mut notebook = notebook.clone(); + notebook.update(&source_map, output); + + Ok(FormattedSource::Formatted(SourceKind::IpyNotebook( + notebook, + ))) } - Ok(FormatCommandResult::Formatted) } } @@ -244,10 +370,10 @@ impl Display for FormatResultSummary { #[derive(Error, Debug)] pub(crate) enum FormatCommandError { Ignore(#[from] ignore::Error), - Read(Option, io::Error), - Write(Option, io::Error), - FormatModule(Option, FormatModuleError), Panic(Option, PanicError), + Read(Option, SourceError), + Format(Option, FormatModuleError), + Write(Option, SourceError), } impl Display for FormatCommandError { @@ -300,7 +426,7 @@ impl Display for FormatCommandError { write!(f, "{}{} {err}", "Failed to write".bold(), ":".bold()) } } - Self::FormatModule(path, err) => { + Self::Format(path, err) => { if let Some(path) = path { write!( f, diff --git a/crates/ruff_cli/src/commands/format_stdin.rs b/crates/ruff_cli/src/commands/format_stdin.rs index 1efb502a78437..a790754d2003b 100644 --- a/crates/ruff_cli/src/commands/format_stdin.rs +++ b/crates/ruff_cli/src/commands/format_stdin.rs @@ -1,16 +1,16 @@ -use std::io::{stdout, Write}; +use std::io::stdout; use std::path::Path; use anyhow::Result; use log::warn; -use ruff_python_ast::PySourceType; -use ruff_python_formatter::format_module_source; +use ruff_linter::source_kind::SourceKind; +use ruff_python_ast::{PySourceType, SourceType}; use ruff_workspace::resolver::python_file_at_path; use ruff_workspace::FormatterSettings; use crate::args::{CliOverrides, FormatArguments}; -use crate::commands::format::{FormatCommandError, FormatCommandResult, FormatMode}; +use crate::commands::format::{format_source, FormatCommandError, FormatCommandResult, FormatMode}; use crate::resolve::resolve; use crate::stdin::read_from_stdin; use crate::ExitStatus; @@ -35,10 +35,19 @@ pub(crate) fn format_stdin(cli: &FormatArguments, overrides: &CliOverrides) -> R } } - // Format the file. let path = cli.stdin_filename.as_deref(); - match format_source(path, &pyproject_config.settings.formatter, mode) { + let SourceType::Python(source_type) = path.map(SourceType::from).unwrap_or_default() else { + return Ok(ExitStatus::Success); + }; + + // Format the file. + match format_source_code( + path, + &pyproject_config.settings.formatter, + source_type, + mode, + ) { Ok(result) => match mode { FormatMode::Write => Ok(ExitStatus::Success), FormatMode::Check => { @@ -57,32 +66,35 @@ pub(crate) fn format_stdin(cli: &FormatArguments, overrides: &CliOverrides) -> R } /// Format source code read from `stdin`. -fn format_source( +fn format_source_code( path: Option<&Path>, settings: &FormatterSettings, + source_type: PySourceType, mode: FormatMode, ) -> Result { - let unformatted = read_from_stdin() - .map_err(|err| FormatCommandError::Read(path.map(Path::to_path_buf), err))?; + // Read the source from stdin. + let source_code = read_from_stdin() + .map_err(|err| FormatCommandError::Read(path.map(Path::to_path_buf), err.into()))?; - let options = settings.to_format_options( - path.map(PySourceType::from).unwrap_or_default(), - &unformatted, - ); + let source_kind = match SourceKind::from_source_code(source_code, source_type) { + Ok(Some(source_kind)) => source_kind, + Ok(None) => return Ok(FormatCommandResult::Unchanged), + Err(err) => { + return Err(FormatCommandError::Read(path.map(Path::to_path_buf), err)); + } + }; - let formatted = format_module_source(&unformatted, options) - .map_err(|err| FormatCommandError::FormatModule(path.map(Path::to_path_buf), err))?; - let formatted = formatted.as_code(); + // Format the source. + let formatted = format_source(source_kind, source_type, path, settings)?; + // Write to stdout regardless of whether the source was formatted. if mode.is_write() { - stdout() - .lock() - .write_all(formatted.as_bytes()) + let mut writer = stdout().lock(); + formatted + .source_kind() + .write(&mut writer) .map_err(|err| FormatCommandError::Write(path.map(Path::to_path_buf), err))?; } - if formatted.len() == unformatted.len() && formatted == unformatted { - Ok(FormatCommandResult::Unchanged) - } else { - Ok(FormatCommandResult::Formatted) - } + + Ok(FormatCommandResult::from(formatted)) } diff --git a/crates/ruff_cli/src/diagnostics.rs b/crates/ruff_cli/src/diagnostics.rs index fc49b0bfe648b..d3dea9f56aeba 100644 --- a/crates/ruff_cli/src/diagnostics.rs +++ b/crates/ruff_cli/src/diagnostics.rs @@ -14,7 +14,6 @@ use filetime::FileTime; use log::{debug, error, warn}; use rustc_hash::FxHashMap; use similar::TextDiff; -use thiserror::Error; use ruff_diagnostics::Diagnostic; use ruff_linter::linter::{lint_fix, lint_only, FixTable, FixerResult, LinterResult}; @@ -23,12 +22,12 @@ use ruff_linter::message::Message; use ruff_linter::pyproject_toml::lint_pyproject_toml; use ruff_linter::registry::AsRule; use ruff_linter::settings::{flags, LinterSettings}; -use ruff_linter::source_kind::SourceKind; +use ruff_linter::source_kind::{SourceError, SourceKind}; use ruff_linter::{fs, IOError, SyntaxError}; use ruff_macros::CacheKey; use ruff_notebook::{Cell, Notebook, NotebookError, NotebookIndex}; use ruff_python_ast::imports::ImportMap; -use ruff_python_ast::{PySourceType, SourceType, TomlSourceType}; +use ruff_python_ast::{SourceType, TomlSourceType}; use ruff_source_file::{LineIndex, SourceCode, SourceFileBuilder}; use ruff_text_size::{TextRange, TextSize}; use ruff_workspace::Settings; @@ -82,15 +81,38 @@ impl Diagnostics { } } - /// Generate [`Diagnostics`] based on a [`SourceExtractionError`]. + /// Generate [`Diagnostics`] based on a [`SourceError`]. pub(crate) fn from_source_error( - err: &SourceExtractionError, + err: &SourceError, path: Option<&Path>, settings: &LinterSettings, ) -> Self { - let diagnostic = Diagnostic::from(err); + let diagnostic = match err { + // IO errors. + SourceError::Io(_) + | SourceError::Notebook(NotebookError::Io(_) | NotebookError::Json(_)) => { + Diagnostic::new( + IOError { + message: err.to_string(), + }, + TextRange::default(), + ) + } + // Syntax errors. + SourceError::Notebook( + NotebookError::InvalidJson(_) + | NotebookError::InvalidSchema(_) + | NotebookError::InvalidFormat(_), + ) => Diagnostic::new( + SyntaxError { + message: err.to_string(), + }, + TextRange::default(), + ), + }; + if settings.rules.enabled(diagnostic.kind.rule()) { - let name = path.map_or_else(|| "-".into(), std::path::Path::to_string_lossy); + let name = path.map_or_else(|| "-".into(), Path::to_string_lossy); let dummy = SourceFileBuilder::new(name, "").finish(); Self::new( vec![Message::from_diagnostic( @@ -183,13 +205,12 @@ pub(crate) fn lint_path( .iter_enabled() .any(|rule_code| rule_code.lint_source().is_pyproject_toml()) { - let contents = - match std::fs::read_to_string(path).map_err(SourceExtractionError::Io) { - Ok(contents) => contents, - Err(err) => { - return Ok(Diagnostics::from_source_error(&err, Some(path), settings)); - } - }; + let contents = match std::fs::read_to_string(path).map_err(SourceError::from) { + Ok(contents) => contents, + Err(err) => { + return Ok(Diagnostics::from_source_error(&err, Some(path), settings)); + } + }; let source_file = SourceFileBuilder::new(path.to_string_lossy(), contents).finish(); lint_pyproject_toml(source_file, settings) } else { @@ -205,8 +226,8 @@ pub(crate) fn lint_path( }; // Extract the sources from the file. - let LintSource(source_kind) = match LintSource::try_from_path(path, source_type) { - Ok(Some(sources)) => sources, + let source_kind = match SourceKind::from_path(path, source_type) { + Ok(Some(source_kind)) => source_kind, Ok(None) => return Ok(Diagnostics::default()), Err(err) => { return Ok(Diagnostics::from_source_error(&err, Some(path), settings)); @@ -371,8 +392,8 @@ pub(crate) fn lint_stdin( }; // Extract the sources from the file. - let LintSource(source_kind) = match LintSource::try_from_source_code(contents, source_type) { - Ok(Some(sources)) => sources, + let source_kind = match SourceKind::from_source_code(contents, source_type) { + Ok(Some(source_kind)) => source_kind, Ok(None) => return Ok(Diagnostics::default()), Err(err) => { return Ok(Diagnostics::from_source_error(&err, path, &settings.linter)); @@ -487,80 +508,3 @@ pub(crate) fn lint_stdin( notebook_indexes, }) } - -#[derive(Debug)] -pub(crate) struct LintSource(pub(crate) SourceKind); - -impl LintSource { - /// Extract the lint [`LintSource`] from the given file path. - pub(crate) fn try_from_path( - path: &Path, - source_type: PySourceType, - ) -> Result, SourceExtractionError> { - if source_type.is_ipynb() { - let notebook = Notebook::from_path(path)?; - Ok(notebook - .is_python_notebook() - .then_some(LintSource(SourceKind::IpyNotebook(notebook)))) - } else { - // This is tested by ruff_cli integration test `unreadable_file` - let contents = std::fs::read_to_string(path)?; - Ok(Some(LintSource(SourceKind::Python(contents)))) - } - } - - /// Extract the lint [`LintSource`] from the raw string contents, optionally accompanied by a - /// file path indicating the path to the file from which the contents were read. If provided, - /// the file path should be used for diagnostics, but not for reading the file from disk. - pub(crate) fn try_from_source_code( - source_code: String, - source_type: PySourceType, - ) -> Result, SourceExtractionError> { - if source_type.is_ipynb() { - let notebook = Notebook::from_source_code(&source_code)?; - Ok(notebook - .is_python_notebook() - .then_some(LintSource(SourceKind::IpyNotebook(notebook)))) - } else { - Ok(Some(LintSource(SourceKind::Python(source_code)))) - } - } -} - -#[derive(Error, Debug)] -pub(crate) enum SourceExtractionError { - /// The extraction failed due to an [`io::Error`]. - #[error(transparent)] - Io(#[from] io::Error), - /// The extraction failed due to a [`NotebookError`]. - #[error(transparent)] - Notebook(#[from] NotebookError), -} - -impl From<&SourceExtractionError> for Diagnostic { - fn from(err: &SourceExtractionError) -> Self { - match err { - // IO errors. - SourceExtractionError::Io(_) - | SourceExtractionError::Notebook(NotebookError::Io(_) | NotebookError::Json(_)) => { - Diagnostic::new( - IOError { - message: err.to_string(), - }, - TextRange::default(), - ) - } - // Syntax errors. - SourceExtractionError::Notebook( - NotebookError::InvalidJson(_) - | NotebookError::InvalidSchema(_) - | NotebookError::InvalidFormat(_), - ) => Diagnostic::new( - SyntaxError { - message: err.to_string(), - }, - TextRange::default(), - ), - } - } -} diff --git a/crates/ruff_cli/tests/integration_test.rs b/crates/ruff_cli/tests/integration_test.rs index 94fa4926f1d85..a47a974d8175b 100644 --- a/crates/ruff_cli/tests/integration_test.rs +++ b/crates/ruff_cli/tests/integration_test.rs @@ -357,6 +357,135 @@ fn stdin_fix_when_no_issues_should_still_print_contents() { "###); } +#[test] +fn stdin_format_jupyter() { + let args = ["format", "--stdin-filename", "Jupyter.ipynb", "--isolated"]; + assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME)) + .args(args) + .pass_stdin(r#"{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "dccc687c-96e2-4604-b957-a8a89b5bec06", + "metadata": {}, + "outputs": [], + "source": [ + "x=1" + ] + }, + { + "cell_type": "markdown", + "id": "19e1b029-f516-4662-a9b9-623b93edac1a", + "metadata": {}, + "source": [ + "Foo" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cdce7b92-b0fb-4c02-86f6-e233b26fa84f", + "metadata": {}, + "outputs": [], + "source": [ + "def func():\n", + " pass\n", + "print(1)\n", + "import os" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} +"#), @r###" + success: true + exit_code: 0 + ----- stdout ----- + { + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "dccc687c-96e2-4604-b957-a8a89b5bec06", + "metadata": {}, + "outputs": [], + "source": [ + "x = 1" + ] + }, + { + "cell_type": "markdown", + "id": "19e1b029-f516-4662-a9b9-623b93edac1a", + "metadata": {}, + "source": [ + "Foo" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cdce7b92-b0fb-4c02-86f6-e233b26fa84f", + "metadata": {}, + "outputs": [], + "source": [ + "def func():\n", + " pass\n", + "\n", + "\n", + "print(1)\n", + "import os" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 + } + + ----- stderr ----- + warning: `ruff format` is a work-in-progress, subject to change at any time, and intended only for experimentation. + "###); +} + #[test] fn show_source() { let args = ["--show-source"]; diff --git a/crates/ruff_diagnostics/src/source_map.rs b/crates/ruff_diagnostics/src/source_map.rs index 161496eadc50d..5a3e2e8c6f9a9 100644 --- a/crates/ruff_diagnostics/src/source_map.rs +++ b/crates/ruff_diagnostics/src/source_map.rs @@ -46,10 +46,7 @@ impl SourceMap { /// The `output_length` is the length of the transformed string before the /// edit is applied. pub fn push_start_marker(&mut self, edit: &Edit, output_length: TextSize) { - self.0.push(SourceMarker { - source: edit.start(), - dest: output_length, - }); + self.push_marker(edit.start(), output_length); } /// Push the end marker for an [`Edit`]. @@ -58,16 +55,18 @@ impl SourceMap { /// edit has been applied. pub fn push_end_marker(&mut self, edit: &Edit, output_length: TextSize) { if edit.is_insertion() { - self.0.push(SourceMarker { - source: edit.start(), - dest: output_length, - }); + self.push_marker(edit.start(), output_length); } else { // Deletion or replacement - self.0.push(SourceMarker { - source: edit.end(), - dest: output_length, - }); + self.push_marker(edit.end(), output_length); } } + + /// Push a new marker to the sourcemap. + pub fn push_marker(&mut self, offset: TextSize, output_length: TextSize) { + self.0.push(SourceMarker { + source: offset, + dest: output_length, + }); + } } diff --git a/crates/ruff_linter/src/source_kind.rs b/crates/ruff_linter/src/source_kind.rs index b065f3e7c8d33..76982451d33bd 100644 --- a/crates/ruff_linter/src/source_kind.rs +++ b/crates/ruff_linter/src/source_kind.rs @@ -1,9 +1,13 @@ +use std::io; use std::io::Write; +use std::path::Path; use anyhow::Result; +use thiserror::Error; use ruff_diagnostics::SourceMap; -use ruff_notebook::Notebook; +use ruff_notebook::{Notebook, NotebookError}; +use ruff_python_ast::PySourceType; #[derive(Clone, Debug, PartialEq, is_macro::Is)] pub enum SourceKind { @@ -34,13 +38,57 @@ impl SourceKind { } } + /// Read the [`SourceKind`] from the given path. Returns `None` if the source is not a Python + /// source file. + pub fn from_path(path: &Path, source_type: PySourceType) -> Result, SourceError> { + if source_type.is_ipynb() { + let notebook = Notebook::from_path(path)?; + Ok(notebook + .is_python_notebook() + .then_some(Self::IpyNotebook(notebook))) + } else { + let contents = std::fs::read_to_string(path)?; + Ok(Some(Self::Python(contents))) + } + } + + /// Read the [`SourceKind`] from the given source code. Returns `None` if the source is not + /// Python source code. + pub fn from_source_code( + source_code: String, + source_type: PySourceType, + ) -> Result, SourceError> { + if source_type.is_ipynb() { + let notebook = Notebook::from_source_code(&source_code)?; + Ok(notebook + .is_python_notebook() + .then_some(Self::IpyNotebook(notebook))) + } else { + Ok(Some(Self::Python(source_code))) + } + } + /// Write the transformed source file to the given writer. /// /// For Jupyter notebooks, this will write out the notebook as JSON. - pub fn write(&self, writer: &mut dyn Write) -> Result<()> { + pub fn write(&self, writer: &mut dyn Write) -> Result<(), SourceError> { match self { - SourceKind::Python(source) => writer.write_all(source.as_bytes()).map_err(Into::into), - SourceKind::IpyNotebook(notebook) => notebook.write(writer).map_err(Into::into), + SourceKind::Python(source) => { + writer.write_all(source.as_bytes())?; + Ok(()) + } + SourceKind::IpyNotebook(notebook) => { + notebook.write(writer)?; + Ok(()) + } } } } + +#[derive(Error, Debug)] +pub enum SourceError { + #[error(transparent)] + Io(#[from] io::Error), + #[error(transparent)] + Notebook(#[from] NotebookError), +}