Skip to content

Commit

Permalink
Add cell field to JSON output format (#7664)
Browse files Browse the repository at this point in the history
## Summary

This PR adds a new `cell` field to the JSON output format which
indicates the Notebook cell this diagnostic (and fix) belongs to. It
also updates the location for the diagnostic and fixes as per the
`NotebookIndex`. It will be used in the VSCode extension to display the
diagnostic in the correct cell.

The diagnostic and edit start and end source locations are translated
for the notebook as per the `NotebookIndex`. The end source location for
an edit needs some special handling.

### Edit end location

To understand this, the following context is required:

1. Visible lines in Jupyter Notebook vs JSON array strings: The newline
is part of the string in the JSON format. This means that if there are 3
visible lines in a cell where the last line is empty then the JSON would
contain 2 strings in the source array, both ending with a newline:

**JSON format:**
```json
[
	"# first line\n",
	"# second line\n",
]
```

**Notebook view:**
```python
1 # first line
2 # second line
3
```

2. If an edit needs to remove an entire line including the newline, then
the end location would be the start of the next row.

To remove a statement in the following code:
```python
import os
```

The edit would be:
```
start: row 1, col 1
end: row 2, col 1
```

Now, here's where the problem lies. The notebook index doesn't have any
information for row 2 because it doesn't exists in the actual notebook.
The newline was added by Ruff to concatenate the source code and it's
removed before writing back. But, the edit is computed looking at that
newline.

This means that while translating the end location for an edit belong to
a Notebook, we need to check if both the start and end location belongs
to the same cell. If not, then the end location should be the first
character of the next row and if so, translate that back to the last
character of the previous row. Taking the above example, the translated
location for Notebook would be:
```
start: row 1, col 1
end: row 1, col 10
```

## Test Plan

Add test cases for notebook output in the JSON format and update
existing snapshots.
  • Loading branch information
dhruvmanila committed Oct 13, 2023
1 parent 1e184e6 commit 66179af
Show file tree
Hide file tree
Showing 11 changed files with 256 additions and 19 deletions.
1 change: 1 addition & 0 deletions crates/ruff_cli/tests/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ import os
----- stdout -----
[
{
"cell": null,
"code": "F401",
"end_location": {
"column": 10,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ exit_code: 1
----- stdout -----
[
{
"cell": null,
"code": "F401",
"end_location": {
"column": 10,
Expand Down
100 changes: 88 additions & 12 deletions crates/ruff_linter/src/message/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ use serde::{Serialize, Serializer};
use serde_json::{json, Value};

use ruff_diagnostics::Edit;
use ruff_source_file::SourceCode;
use ruff_notebook::NotebookIndex;
use ruff_source_file::{OneIndexed, SourceCode, SourceLocation};
use ruff_text_size::Ranged;

use crate::message::{Emitter, EmitterContext, Message};
Expand All @@ -19,16 +20,17 @@ impl Emitter for JsonEmitter {
&mut self,
writer: &mut dyn Write,
messages: &[Message],
_context: &EmitterContext,
context: &EmitterContext,
) -> anyhow::Result<()> {
serde_json::to_writer_pretty(writer, &ExpandedMessages { messages })?;
serde_json::to_writer_pretty(writer, &ExpandedMessages { messages, context })?;

Ok(())
}
}

struct ExpandedMessages<'a> {
messages: &'a [Message],
context: &'a EmitterContext<'a>,
}

impl Serialize for ExpandedMessages<'_> {
Expand All @@ -39,34 +41,48 @@ impl Serialize for ExpandedMessages<'_> {
let mut s = serializer.serialize_seq(Some(self.messages.len()))?;

for message in self.messages {
let value = message_to_json_value(message);
let value = message_to_json_value(message, self.context);
s.serialize_element(&value)?;
}

s.end()
}
}

pub(crate) fn message_to_json_value(message: &Message) -> Value {
pub(crate) fn message_to_json_value(message: &Message, context: &EmitterContext) -> Value {
let source_code = message.file.to_source_code();
let notebook_index = context.notebook_index(message.filename());

let fix = message.fix.as_ref().map(|fix| {
json!({
"applicability": fix.applicability(),
"message": message.kind.suggestion.as_deref(),
"edits": &ExpandedEdits { edits: fix.edits(), source_code: &source_code },
"edits": &ExpandedEdits { edits: fix.edits(), source_code: &source_code, notebook_index },
})
});

let start_location = source_code.source_location(message.start());
let end_location = source_code.source_location(message.end());
let noqa_location = source_code.source_location(message.noqa_offset);
let mut start_location = source_code.source_location(message.start());
let mut end_location = source_code.source_location(message.end());
let mut noqa_location = source_code.source_location(message.noqa_offset);
let mut notebook_cell_index = None;

if let Some(notebook_index) = notebook_index {
notebook_cell_index = Some(
notebook_index
.cell(start_location.row)
.unwrap_or(OneIndexed::MIN),
);
start_location = notebook_index.translate_location(&start_location);
end_location = notebook_index.translate_location(&end_location);
noqa_location = notebook_index.translate_location(&noqa_location);
}

json!({
"code": message.kind.rule().noqa_code().to_string(),
"url": message.kind.rule().url(),
"message": message.kind.body,
"fix": fix,
"cell": notebook_cell_index,
"location": start_location,
"end_location": end_location,
"filename": message.filename(),
Expand All @@ -77,6 +93,7 @@ pub(crate) fn message_to_json_value(message: &Message) -> Value {
struct ExpandedEdits<'a> {
edits: &'a [Edit],
source_code: &'a SourceCode<'a, 'a>,
notebook_index: Option<&'a NotebookIndex>,
}

impl Serialize for ExpandedEdits<'_> {
Expand All @@ -87,10 +104,57 @@ impl Serialize for ExpandedEdits<'_> {
let mut s = serializer.serialize_seq(Some(self.edits.len()))?;

for edit in self.edits {
let mut location = self.source_code.source_location(edit.start());
let mut end_location = self.source_code.source_location(edit.end());

if let Some(notebook_index) = self.notebook_index {
// There exists a newline between each cell's source code in the
// concatenated source code in Ruff. This newline doesn't actually
// exists in the JSON source field.
//
// Now, certain edits may try to remove this newline, which means
// the edit will spill over to the first character of the next cell.
// If it does, we need to translate the end location to the last
// character of the previous cell.
match (
notebook_index.cell(location.row),
notebook_index.cell(end_location.row),
) {
(Some(start_cell), Some(end_cell)) if start_cell != end_cell => {
debug_assert_eq!(end_location.column.get(), 1);

let prev_row = end_location.row.saturating_sub(1);
end_location = SourceLocation {
row: notebook_index.cell_row(prev_row).unwrap_or(OneIndexed::MIN),
column: self
.source_code
.source_location(self.source_code.line_end_exclusive(prev_row))
.column,
};
}
(Some(_), None) => {
debug_assert_eq!(end_location.column.get(), 1);

let prev_row = end_location.row.saturating_sub(1);
end_location = SourceLocation {
row: notebook_index.cell_row(prev_row).unwrap_or(OneIndexed::MIN),
column: self
.source_code
.source_location(self.source_code.line_end_exclusive(prev_row))
.column,
};
}
_ => {
end_location = notebook_index.translate_location(&end_location);
}
}
location = notebook_index.translate_location(&location);
}

let value = json!({
"content": edit.content().unwrap_or_default(),
"location": self.source_code.source_location(edit.start()),
"end_location": self.source_code.source_location(edit.end())
"location": location,
"end_location": end_location
});

s.serialize_element(&value)?;
Expand All @@ -104,7 +168,10 @@ impl Serialize for ExpandedEdits<'_> {
mod tests {
use insta::assert_snapshot;

use crate::message::tests::{capture_emitter_output, create_messages};
use crate::message::tests::{
capture_emitter_notebook_output, capture_emitter_output, create_messages,
create_notebook_messages,
};
use crate::message::JsonEmitter;

#[test]
Expand All @@ -114,4 +181,13 @@ mod tests {

assert_snapshot!(content);
}

#[test]
fn notebook_output() {
let mut emitter = JsonEmitter;
let (messages, notebook_indexes) = create_notebook_messages();
let content = capture_emitter_notebook_output(&mut emitter, &messages, &notebook_indexes);

assert_snapshot!(content);
}
}
18 changes: 15 additions & 3 deletions crates/ruff_linter/src/message/json_lines.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ impl Emitter for JsonLinesEmitter {
&mut self,
writer: &mut dyn Write,
messages: &[Message],
_context: &EmitterContext,
context: &EmitterContext,
) -> anyhow::Result<()> {
let mut w = writer;
for message in messages {
serde_json::to_writer(&mut w, &message_to_json_value(message))?;
serde_json::to_writer(&mut w, &message_to_json_value(message, context))?;
w.write_all(b"\n")?;
}
Ok(())
Expand All @@ -27,7 +27,10 @@ mod tests {
use insta::assert_snapshot;

use crate::message::json_lines::JsonLinesEmitter;
use crate::message::tests::{capture_emitter_output, create_messages};
use crate::message::tests::{
capture_emitter_notebook_output, capture_emitter_output, create_messages,
create_notebook_messages,
};

#[test]
fn output() {
Expand All @@ -36,4 +39,13 @@ mod tests {

assert_snapshot!(content);
}

#[test]
fn notebook_output() {
let mut emitter = JsonLinesEmitter;
let (messages, notebook_indexes) = create_notebook_messages();
let content = capture_emitter_notebook_output(&mut emitter, &messages, &notebook_indexes);

assert_snapshot!(content);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
---
source: crates/ruff_linter/src/message/json.rs
expression: content
---
[
{
"cell": 1,
"code": "F401",
"end_location": {
"column": 10,
"row": 2
},
"filename": "notebook.ipynb",
"fix": {
"applicability": "safe",
"edits": [
{
"content": "",
"end_location": {
"column": 10,
"row": 2
},
"location": {
"column": 1,
"row": 2
}
}
],
"message": "Remove unused import: `os`"
},
"location": {
"column": 8,
"row": 2
},
"message": "`os` imported but unused",
"noqa_row": 2,
"url": "https://docs.astral.sh/ruff/rules/unused-import"
},
{
"cell": 2,
"code": "F401",
"end_location": {
"column": 12,
"row": 2
},
"filename": "notebook.ipynb",
"fix": {
"applicability": "safe",
"edits": [
{
"content": "",
"end_location": {
"column": 1,
"row": 3
},
"location": {
"column": 1,
"row": 2
}
}
],
"message": "Remove unused import: `math`"
},
"location": {
"column": 8,
"row": 2
},
"message": "`math` imported but unused",
"noqa_row": 2,
"url": "https://docs.astral.sh/ruff/rules/unused-import"
},
{
"cell": 3,
"code": "F841",
"end_location": {
"column": 6,
"row": 4
},
"filename": "notebook.ipynb",
"fix": {
"applicability": "unsafe",
"edits": [
{
"content": "",
"end_location": {
"column": 10,
"row": 4
},
"location": {
"column": 1,
"row": 4
}
}
],
"message": "Remove assignment to unused variable `x`"
},
"location": {
"column": 5,
"row": 4
},
"message": "Local variable `x` is assigned to but never used",
"noqa_row": 4,
"url": "https://docs.astral.sh/ruff/rules/unused-variable"
}
]
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ expression: content
---
[
{
"cell": null,
"code": "F401",
"end_location": {
"column": 10,
Expand Down Expand Up @@ -36,6 +37,7 @@ expression: content
"url": "https://docs.astral.sh/ruff/rules/unused-import"
},
{
"cell": null,
"code": "F841",
"end_location": {
"column": 6,
Expand Down Expand Up @@ -68,6 +70,7 @@ expression: content
"url": "https://docs.astral.sh/ruff/rules/unused-variable"
},
{
"cell": null,
"code": "F821",
"end_location": {
"column": 5,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
source: crates/ruff_linter/src/message/json_lines.rs
expression: content
---
{"cell":1,"code":"F401","end_location":{"column":10,"row":2},"filename":"notebook.ipynb","fix":{"applicability":"safe","edits":[{"content":"","end_location":{"column":10,"row":2},"location":{"column":1,"row":2}}],"message":"Remove unused import: `os`"},"location":{"column":8,"row":2},"message":"`os` imported but unused","noqa_row":2,"url":"https://docs.astral.sh/ruff/rules/unused-import"}
{"cell":2,"code":"F401","end_location":{"column":12,"row":2},"filename":"notebook.ipynb","fix":{"applicability":"safe","edits":[{"content":"","end_location":{"column":1,"row":3},"location":{"column":1,"row":2}}],"message":"Remove unused import: `math`"},"location":{"column":8,"row":2},"message":"`math` imported but unused","noqa_row":2,"url":"https://docs.astral.sh/ruff/rules/unused-import"}
{"cell":3,"code":"F841","end_location":{"column":6,"row":4},"filename":"notebook.ipynb","fix":{"applicability":"unsafe","edits":[{"content":"","end_location":{"column":10,"row":4},"location":{"column":1,"row":4}}],"message":"Remove assignment to unused variable `x`"},"location":{"column":5,"row":4},"message":"Local variable `x` is assigned to but never used","noqa_row":4,"url":"https://docs.astral.sh/ruff/rules/unused-variable"}

Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
source: crates/ruff_linter/src/message/json_lines.rs
expression: content
---
{"code":"F401","end_location":{"column":10,"row":1},"filename":"fib.py","fix":{"applicability":"unsafe","edits":[{"content":"","end_location":{"column":1,"row":2},"location":{"column":1,"row":1}}],"message":"Remove unused import: `os`"},"location":{"column":8,"row":1},"message":"`os` imported but unused","noqa_row":1,"url":"https://docs.astral.sh/ruff/rules/unused-import"}
{"code":"F841","end_location":{"column":6,"row":6},"filename":"fib.py","fix":{"applicability":"unsafe","edits":[{"content":"","end_location":{"column":10,"row":6},"location":{"column":5,"row":6}}],"message":"Remove assignment to unused variable `x`"},"location":{"column":5,"row":6},"message":"Local variable `x` is assigned to but never used","noqa_row":6,"url":"https://docs.astral.sh/ruff/rules/unused-variable"}
{"code":"F821","end_location":{"column":5,"row":1},"filename":"undef.py","fix":null,"location":{"column":4,"row":1},"message":"Undefined name `a`","noqa_row":1,"url":"https://docs.astral.sh/ruff/rules/undefined-name"}
{"cell":null,"code":"F401","end_location":{"column":10,"row":1},"filename":"fib.py","fix":{"applicability":"unsafe","edits":[{"content":"","end_location":{"column":1,"row":2},"location":{"column":1,"row":1}}],"message":"Remove unused import: `os`"},"location":{"column":8,"row":1},"message":"`os` imported but unused","noqa_row":1,"url":"https://docs.astral.sh/ruff/rules/unused-import"}
{"cell":null,"code":"F841","end_location":{"column":6,"row":6},"filename":"fib.py","fix":{"applicability":"unsafe","edits":[{"content":"","end_location":{"column":10,"row":6},"location":{"column":5,"row":6}}],"message":"Remove assignment to unused variable `x`"},"location":{"column":5,"row":6},"message":"Local variable `x` is assigned to but never used","noqa_row":6,"url":"https://docs.astral.sh/ruff/rules/unused-variable"}
{"cell":null,"code":"F821","end_location":{"column":5,"row":1},"filename":"undef.py","fix":null,"location":{"column":4,"row":1},"message":"Undefined name `a`","noqa_row":1,"url":"https://docs.astral.sh/ruff/rules/undefined-name"}

0 comments on commit 66179af

Please sign in to comment.