Skip to content

Commit

Permalink
Enable formatting for Jupyter notebooks (#7749)
Browse files Browse the repository at this point in the history
## Summary

This PR enables `ruff format` to format Jupyter notebooks.

Most of the work is contained in a new `format_source` method that
formats a generic `SourceKind`, then returns `Some(transformed)` if the
source required formatting, or `None` otherwise.

Closes #7598.

## Test Plan

Ran `cat foo.py | cargo run -p ruff_cli -- format --stdin-filename
Untitled.ipynb`; verified that the console showed a reasonable error:

```console
warning: Failed to read notebook Untitled.ipynb: Expected a Jupyter Notebook, which must be internally stored as JSON, but this file isn't valid JSON: EOF while parsing a value at line 1 column 0
```

Ran `cat Untitled.ipynb | cargo run -p ruff_cli -- format
--stdin-filename Untitled.ipynb`; verified that the JSON output
contained formatted source code.
  • Loading branch information
charliermarsh committed Oct 2, 2023
1 parent 0961f00 commit bdf2852
Show file tree
Hide file tree
Showing 7 changed files with 419 additions and 161 deletions.
6 changes: 3 additions & 3 deletions crates/ruff_cli/src/commands/add_noqa.rs
Expand Up @@ -7,12 +7,12 @@ use log::{debug, error};
use rayon::prelude::*;

use ruff_linter::linter::add_noqa_to_path;
use ruff_linter::source_kind::SourceKind;
use ruff_linter::warn_user_once;
use ruff_python_ast::{PySourceType, SourceType};
use ruff_workspace::resolver::{python_files_in_path, PyprojectConfig};

use crate::args::CliOverrides;
use crate::diagnostics::LintSource;

/// Add `noqa` directives to a collection of files.
pub(crate) fn add_noqa(
Expand Down Expand Up @@ -57,8 +57,8 @@ pub(crate) fn add_noqa(
.and_then(|parent| package_roots.get(parent))
.and_then(|package| *package);
let settings = resolver.resolve(path, pyproject_config);
let LintSource(source_kind) = match LintSource::try_from_path(path, source_type) {
Ok(Some(source)) => source,
let source_kind = match SourceKind::from_path(path, source_type) {
Ok(Some(source_kind)) => source_kind,
Ok(None) => return None,
Err(e) => {
error!("Failed to extract source from {}: {e}", path.display());
Expand Down
172 changes: 149 additions & 23 deletions crates/ruff_cli/src/commands/format.rs
@@ -1,21 +1,25 @@
use std::fmt::{Display, Formatter};
use std::io;
use std::fs::File;
use std::path::{Path, PathBuf};
use std::time::Instant;

use anyhow::Result;
use colored::Colorize;
use itertools::Itertools;
use log::error;
use rayon::iter::Either::{Left, Right};
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use thiserror::Error;
use tracing::debug;

use ruff_diagnostics::SourceMap;
use ruff_linter::fs;
use ruff_linter::logging::LogLevel;
use ruff_linter::source_kind::{SourceError, SourceKind};
use ruff_linter::warn_user_once;
use ruff_python_ast::{PySourceType, SourceType};
use ruff_python_formatter::{format_module_source, FormatModuleError};
use ruff_text_size::{TextLen, TextRange, TextSize};
use ruff_workspace::resolver::python_files_in_path;
use ruff_workspace::FormatterSettings;

Expand Down Expand Up @@ -64,10 +68,7 @@ pub(crate) fn format(
Ok(entry) => {
let path = entry.path();

let SourceType::Python(
source_type @ (PySourceType::Python | PySourceType::Stub),
) = SourceType::from(path)
else {
let SourceType::Python(source_type) = SourceType::from(path) else {
// Ignore any non-Python files.
return None;
};
Expand Down Expand Up @@ -145,24 +146,149 @@ fn format_path(
source_type: PySourceType,
mode: FormatMode,
) -> Result<FormatCommandResult, FormatCommandError> {
let unformatted = std::fs::read_to_string(path)
.map_err(|err| FormatCommandError::Read(Some(path.to_path_buf()), err))?;
// Extract the sources from the file.
let source_kind = match SourceKind::from_path(path, source_type) {
Ok(Some(source_kind)) => source_kind,
Ok(None) => return Ok(FormatCommandResult::Unchanged),
Err(err) => {
return Err(FormatCommandError::Read(Some(path.to_path_buf()), err));
}
};

let options = settings.to_format_options(source_type, &unformatted);
debug!("Formatting {} with {:?}", path.display(), options);
// Format the source.
match format_source(source_kind, source_type, Some(path), settings)? {
FormattedSource::Formatted(formatted) => {
if mode.is_write() {
let mut writer = File::create(path).map_err(|err| {
FormatCommandError::Write(Some(path.to_path_buf()), err.into())
})?;
formatted
.write(&mut writer)
.map_err(|err| FormatCommandError::Write(Some(path.to_path_buf()), err))?;
}
Ok(FormatCommandResult::Formatted)
}
FormattedSource::Unchanged(_) => Ok(FormatCommandResult::Unchanged),
}
}

let formatted = format_module_source(&unformatted, options)
.map_err(|err| FormatCommandError::FormatModule(Some(path.to_path_buf()), err))?;
#[derive(Debug)]
pub(crate) enum FormattedSource {
/// The source was formatted, and the [`SourceKind`] contains the transformed source code.
Formatted(SourceKind),
/// The source was unchanged, and the [`SourceKind`] contains the original source code.
Unchanged(SourceKind),
}

let formatted = formatted.as_code();
if formatted.len() == unformatted.len() && formatted == unformatted {
Ok(FormatCommandResult::Unchanged)
} else {
if mode.is_write() {
std::fs::write(path, formatted.as_bytes())
.map_err(|err| FormatCommandError::Write(Some(path.to_path_buf()), err))?;
impl From<FormattedSource> for FormatCommandResult {
fn from(value: FormattedSource) -> Self {
match value {
FormattedSource::Formatted(_) => FormatCommandResult::Formatted,
FormattedSource::Unchanged(_) => FormatCommandResult::Unchanged,
}
}
}

impl FormattedSource {
pub(crate) fn source_kind(&self) -> &SourceKind {
match self {
FormattedSource::Formatted(source_kind) => source_kind,
FormattedSource::Unchanged(source_kind) => source_kind,
}
}
}

/// Format a [`SourceKind`], returning the transformed [`SourceKind`], or `None` if the source was
/// unchanged.
pub(crate) fn format_source(
source_kind: SourceKind,
source_type: PySourceType,
path: Option<&Path>,
settings: &FormatterSettings,
) -> Result<FormattedSource, FormatCommandError> {
match source_kind {
SourceKind::Python(unformatted) => {
let options = settings.to_format_options(source_type, &unformatted);

let formatted = format_module_source(&unformatted, options)
.map_err(|err| FormatCommandError::Format(path.map(Path::to_path_buf), err))?;

let formatted = formatted.into_code();
if formatted.len() == unformatted.len() && formatted == *unformatted {
Ok(FormattedSource::Unchanged(SourceKind::Python(unformatted)))
} else {
Ok(FormattedSource::Formatted(SourceKind::Python(formatted)))
}
}
SourceKind::IpyNotebook(notebook) => {
if !notebook.is_python_notebook() {
return Ok(FormattedSource::Unchanged(SourceKind::IpyNotebook(
notebook,
)));
}

let options = settings.to_format_options(source_type, notebook.source_code());

let mut output: Option<String> = None;
let mut last: Option<TextSize> = None;
let mut source_map = SourceMap::default();

// Format each cell individually.
for (start, end) in notebook.cell_offsets().iter().tuple_windows::<(_, _)>() {
let range = TextRange::new(*start, *end);
let unformatted = &notebook.source_code()[range];

// Format the cell.
let formatted = format_module_source(unformatted, options.clone())
.map_err(|err| FormatCommandError::Format(path.map(Path::to_path_buf), err))?;

// If the cell is unchanged, skip it.
let formatted = formatted.as_code();
if formatted.len() == unformatted.len() && formatted == unformatted {
continue;
}

// If this is the first newly-formatted cell, initialize the output.
let output = output
.get_or_insert_with(|| String::with_capacity(notebook.source_code().len()));

// Add all contents from `last` to the current cell.
let slice = &notebook.source_code()
[TextRange::new(last.unwrap_or_default(), range.start())];
output.push_str(slice);

// Add the start source marker for the cell.
source_map.push_marker(*start, output.text_len());

// Add the cell itself.
output.push_str(formatted);

// Add the end source marker for the added cell.
source_map.push_marker(*end, output.text_len());

// Track that the cell was formatted.
last = Some(*end);
}

// If the file was unchanged, return `None`.
let (Some(mut output), Some(last)) = (output, last) else {
return Ok(FormattedSource::Unchanged(SourceKind::IpyNotebook(
notebook,
)));
};

// Add the remaining content.
let slice = &notebook.source_code()[usize::from(last)..];
output.push_str(slice);

// Update the notebook.
let mut notebook = notebook.clone();
notebook.update(&source_map, output);

Ok(FormattedSource::Formatted(SourceKind::IpyNotebook(
notebook,
)))
}
Ok(FormatCommandResult::Formatted)
}
}

Expand Down Expand Up @@ -244,10 +370,10 @@ impl Display for FormatResultSummary {
#[derive(Error, Debug)]
pub(crate) enum FormatCommandError {
Ignore(#[from] ignore::Error),
Read(Option<PathBuf>, io::Error),
Write(Option<PathBuf>, io::Error),
FormatModule(Option<PathBuf>, FormatModuleError),
Panic(Option<PathBuf>, PanicError),
Read(Option<PathBuf>, SourceError),
Format(Option<PathBuf>, FormatModuleError),
Write(Option<PathBuf>, SourceError),
}

impl Display for FormatCommandError {
Expand Down Expand Up @@ -300,7 +426,7 @@ impl Display for FormatCommandError {
write!(f, "{}{} {err}", "Failed to write".bold(), ":".bold())
}
}
Self::FormatModule(path, err) => {
Self::Format(path, err) => {
if let Some(path) = path {
write!(
f,
Expand Down
60 changes: 36 additions & 24 deletions crates/ruff_cli/src/commands/format_stdin.rs
@@ -1,16 +1,16 @@
use std::io::{stdout, Write};
use std::io::stdout;
use std::path::Path;

use anyhow::Result;
use log::warn;

use ruff_python_ast::PySourceType;
use ruff_python_formatter::format_module_source;
use ruff_linter::source_kind::SourceKind;
use ruff_python_ast::{PySourceType, SourceType};
use ruff_workspace::resolver::python_file_at_path;
use ruff_workspace::FormatterSettings;

use crate::args::{CliOverrides, FormatArguments};
use crate::commands::format::{FormatCommandError, FormatCommandResult, FormatMode};
use crate::commands::format::{format_source, FormatCommandError, FormatCommandResult, FormatMode};
use crate::resolve::resolve;
use crate::stdin::read_from_stdin;
use crate::ExitStatus;
Expand All @@ -35,10 +35,19 @@ pub(crate) fn format_stdin(cli: &FormatArguments, overrides: &CliOverrides) -> R
}
}

// Format the file.
let path = cli.stdin_filename.as_deref();

match format_source(path, &pyproject_config.settings.formatter, mode) {
let SourceType::Python(source_type) = path.map(SourceType::from).unwrap_or_default() else {
return Ok(ExitStatus::Success);
};

// Format the file.
match format_source_code(
path,
&pyproject_config.settings.formatter,
source_type,
mode,
) {
Ok(result) => match mode {
FormatMode::Write => Ok(ExitStatus::Success),
FormatMode::Check => {
Expand All @@ -57,32 +66,35 @@ pub(crate) fn format_stdin(cli: &FormatArguments, overrides: &CliOverrides) -> R
}

/// Format source code read from `stdin`.
fn format_source(
fn format_source_code(
path: Option<&Path>,
settings: &FormatterSettings,
source_type: PySourceType,
mode: FormatMode,
) -> Result<FormatCommandResult, FormatCommandError> {
let unformatted = read_from_stdin()
.map_err(|err| FormatCommandError::Read(path.map(Path::to_path_buf), err))?;
// Read the source from stdin.
let source_code = read_from_stdin()
.map_err(|err| FormatCommandError::Read(path.map(Path::to_path_buf), err.into()))?;

let options = settings.to_format_options(
path.map(PySourceType::from).unwrap_or_default(),
&unformatted,
);
let source_kind = match SourceKind::from_source_code(source_code, source_type) {
Ok(Some(source_kind)) => source_kind,
Ok(None) => return Ok(FormatCommandResult::Unchanged),
Err(err) => {
return Err(FormatCommandError::Read(path.map(Path::to_path_buf), err));
}
};

let formatted = format_module_source(&unformatted, options)
.map_err(|err| FormatCommandError::FormatModule(path.map(Path::to_path_buf), err))?;
let formatted = formatted.as_code();
// Format the source.
let formatted = format_source(source_kind, source_type, path, settings)?;

// Write to stdout regardless of whether the source was formatted.
if mode.is_write() {
stdout()
.lock()
.write_all(formatted.as_bytes())
let mut writer = stdout().lock();
formatted
.source_kind()
.write(&mut writer)
.map_err(|err| FormatCommandError::Write(path.map(Path::to_path_buf), err))?;
}
if formatted.len() == unformatted.len() && formatted == unformatted {
Ok(FormatCommandResult::Unchanged)
} else {
Ok(FormatCommandResult::Formatted)
}

Ok(FormatCommandResult::from(formatted))
}

0 comments on commit bdf2852

Please sign in to comment.