Skip to content

Commit

Permalink
[refurb] Implement write-whole-file (FURB103) (#10802)
Browse files Browse the repository at this point in the history
## Summary

Implement `write-whole-file` (`FURB103`), part of #1348. This is largely
a copy and paste of `read-whole-file` #7682.

## Test Plan

Text fixture added.

---------

Co-authored-by: Dhruv Manilawala <dhruvmanila@gmail.com>
  • Loading branch information
augustelalande and dhruvmanila committed Apr 11, 2024
1 parent ac14d18 commit ffea1bb
Show file tree
Hide file tree
Showing 11 changed files with 640 additions and 189 deletions.
132 changes: 132 additions & 0 deletions crates/ruff_linter/resources/test/fixtures/refurb/FURB103.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
def foo():
...


def bar(x):
...


# Errors.

# FURB103
with open("file.txt", "w") as f:
f.write("test")

# FURB103
with open("file.txt", "wb") as f:
f.write(foobar)

# FURB103
with open("file.txt", mode="wb") as f:
f.write(b"abc")

# FURB103
with open("file.txt", "w", encoding="utf8") as f:
f.write(foobar)

# FURB103
with open("file.txt", "w", errors="ignore") as f:
f.write(foobar)

# FURB103
with open("file.txt", mode="w") as f:
f.write(foobar)

# FURB103
with open(foo(), "wb") as f:
# The body of `with` is non-trivial, but the recommendation holds.
bar("pre")
f.write(bar())
bar("post")
print("Done")

# FURB103
with open("a.txt", "w") as a, open("b.txt", "wb") as b:
a.write(x)
b.write(y)

# FURB103
with foo() as a, open("file.txt", "w") as b, foo() as c:
# We have other things in here, multiple with items, but the user
# writes a single time to file and that bit they can replace.
bar(a)
b.write(bar(bar(a + x)))
bar(c)


# FURB103
with open("file.txt", "w", newline="\r\n") as f:
f.write(foobar)


# Non-errors.

with open("file.txt", errors="ignore", mode="wb") as f:
# Path.write_bytes() does not support errors
f.write(foobar)

f2 = open("file2.txt", "w")
with open("file.txt", "w") as f:
f2.write(x)

# mode is dynamic
with open("file.txt", foo()) as f:
f.write(x)

# keyword mode is incorrect
with open("file.txt", mode="a+") as f:
f.write(x)

# enables line buffering, not supported in write_text()
with open("file.txt", buffering=1) as f:
f.write(x)

# dont mistake "newline" for "mode"
with open("file.txt", newline="wb") as f:
f.write(x)

# I guess we can possibly also report this case, but the question
# is why the user would put "w+" here in the first place.
with open("file.txt", "w+") as f:
f.write(x)

# Even though we write the whole file, we do other things.
with open("file.txt", "w") as f:
f.write(x)
f.seek(0)
x += f.read(100)

# This shouldn't error, since it could contain unsupported arguments, like `buffering`.
with open(*filename, mode="w") as f:
f.write(x)

# This shouldn't error, since it could contain unsupported arguments, like `buffering`.
with open(**kwargs) as f:
f.write(x)

# This shouldn't error, since it could contain unsupported arguments, like `buffering`.
with open("file.txt", **kwargs) as f:
f.write(x)

# This shouldn't error, since it could contain unsupported arguments, like `buffering`.
with open("file.txt", mode="w", **kwargs) as f:
f.write(x)

# This could error (but doesn't), since it can't contain unsupported arguments, like
# `buffering`.
with open(*filename, mode="w") as f:
f.write(x)

# This could error (but doesn't), since it can't contain unsupported arguments, like
# `buffering`.
with open(*filename, file="file.txt", mode="w") as f:
f.write(x)

# Loops imply multiple writes
with open("file.txt", "w") as f:
while x < 0:
f.write(foobar)

with open("file.txt", "w") as f:
for line in text:
f.write(line)
3 changes: 3 additions & 0 deletions crates/ruff_linter/src/checkers/ast/analyze/statement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1225,6 +1225,9 @@ pub(crate) fn statement(stmt: &Stmt, checker: &mut Checker) {
if checker.enabled(Rule::ReadWholeFile) {
refurb::rules::read_whole_file(checker, with_stmt);
}
if checker.enabled(Rule::WriteWholeFile) {
refurb::rules::write_whole_file(checker, with_stmt);
}
if checker.enabled(Rule::UselessWithLock) {
pylint::rules::useless_with_lock(checker, with_stmt);
}
Expand Down
1 change: 1 addition & 0 deletions crates/ruff_linter/src/codes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1037,6 +1037,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {

// refurb
(Refurb, "101") => (RuleGroup::Preview, rules::refurb::rules::ReadWholeFile),
(Refurb, "103") => (RuleGroup::Preview, rules::refurb::rules::WriteWholeFile),
(Refurb, "105") => (RuleGroup::Preview, rules::refurb::rules::PrintEmptyString),
(Refurb, "110") => (RuleGroup::Preview, rules::refurb::rules::IfExpInsteadOfOrOperator),
#[allow(deprecated)]
Expand Down
219 changes: 217 additions & 2 deletions crates/ruff_linter/src/rules/refurb/helpers.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use ruff_python_ast as ast;
use ruff_python_ast::{self as ast, Expr};
use ruff_python_codegen::Generator;
use ruff_text_size::TextRange;
use ruff_python_semantic::{BindingId, ResolvedReference, SemanticModel};
use ruff_text_size::{Ranged, TextRange};

/// Format a code snippet to call `name.method()`.
pub(super) fn generate_method_call(name: &str, method: &str, generator: Generator) -> String {
Expand Down Expand Up @@ -61,3 +62,217 @@ pub(super) fn generate_none_identity_comparison(
};
generator.expr(&compare.into())
}

// Helpers for read-whole-file and write-whole-file
#[derive(Debug, Copy, Clone)]
pub(super) enum OpenMode {
/// "r"
ReadText,
/// "rb"
ReadBytes,
/// "w"
WriteText,
/// "wb"
WriteBytes,
}

impl OpenMode {
pub(super) fn pathlib_method(self) -> String {
match self {
OpenMode::ReadText => "read_text".to_string(),
OpenMode::ReadBytes => "read_bytes".to_string(),
OpenMode::WriteText => "write_text".to_string(),
OpenMode::WriteBytes => "write_bytes".to_string(),
}
}
}

/// A grab bag struct that joins together every piece of information we need to track
/// about a file open operation.
#[derive(Debug)]
pub(super) struct FileOpen<'a> {
/// With item where the open happens, we use it for the reporting range.
pub(super) item: &'a ast::WithItem,
/// Filename expression used as the first argument in `open`, we use it in the diagnostic message.
pub(super) filename: &'a Expr,
/// The file open mode.
pub(super) mode: OpenMode,
/// The file open keywords.
pub(super) keywords: Vec<&'a ast::Keyword>,
/// We only check `open` operations whose file handles are used exactly once.
pub(super) reference: &'a ResolvedReference,
}

impl<'a> FileOpen<'a> {
/// Determine whether an expression is a reference to the file handle, by comparing
/// their ranges. If two expressions have the same range, they must be the same expression.
pub(super) fn is_ref(&self, expr: &Expr) -> bool {
expr.range() == self.reference.range()
}
}

/// Find and return all `open` operations in the given `with` statement.
pub(super) fn find_file_opens<'a>(
with: &'a ast::StmtWith,
semantic: &'a SemanticModel<'a>,
read_mode: bool,
) -> Vec<FileOpen<'a>> {
with.items
.iter()
.filter_map(|item| find_file_open(item, with, semantic, read_mode))
.collect()
}

/// Find `open` operation in the given `with` item.
fn find_file_open<'a>(
item: &'a ast::WithItem,
with: &'a ast::StmtWith,
semantic: &'a SemanticModel<'a>,
read_mode: bool,
) -> Option<FileOpen<'a>> {
// We want to match `open(...) as var`.
let ast::ExprCall {
func,
arguments: ast::Arguments { args, keywords, .. },
..
} = item.context_expr.as_call_expr()?;

if func.as_name_expr()?.id != "open" {
return None;
}

let var = item.optional_vars.as_deref()?.as_name_expr()?;

// Ignore calls with `*args` and `**kwargs`. In the exact case of `open(*filename, mode="w")`,
// it could be a match; but in all other cases, the call _could_ contain unsupported keyword
// arguments, like `buffering`.
if args.iter().any(Expr::is_starred_expr)
|| keywords.iter().any(|keyword| keyword.arg.is_none())
{
return None;
}

// Match positional arguments, get filename and mode.
let (filename, pos_mode) = match_open_args(args)?;

// Match keyword arguments, get keyword arguments to forward and possibly mode.
let (keywords, kw_mode) = match_open_keywords(keywords, read_mode)?;

let mode = kw_mode.unwrap_or(pos_mode);

match mode {
OpenMode::ReadText | OpenMode::ReadBytes => {
if !read_mode {
return None;
}
}
OpenMode::WriteText | OpenMode::WriteBytes => {
if read_mode {
return None;
}
}
}

// Path.read_bytes and Path.write_bytes do not support any kwargs.
if matches!(mode, OpenMode::ReadBytes | OpenMode::WriteBytes) && !keywords.is_empty() {
return None;
}

// Now we need to find what is this variable bound to...
let scope = semantic.current_scope();
let bindings: Vec<BindingId> = scope.get_all(var.id.as_str()).collect();

let binding = bindings
.iter()
.map(|x| semantic.binding(*x))
// We might have many bindings with the same name, but we only care
// for the one we are looking at right now.
.find(|binding| binding.range() == var.range())?;

// Since many references can share the same binding, we can limit our attention span
// exclusively to the body of the current `with` statement.
let references: Vec<&ResolvedReference> = binding
.references
.iter()
.map(|id| semantic.reference(*id))
.filter(|reference| with.range().contains_range(reference.range()))
.collect();

// And even with all these restrictions, if the file handle gets used not exactly once,
// it doesn't fit the bill.
let [reference] = references.as_slice() else {
return None;
};

Some(FileOpen {
item,
filename,
mode,
keywords,
reference,
})
}

/// Match positional arguments. Return expression for the file name and open mode.
fn match_open_args(args: &[Expr]) -> Option<(&Expr, OpenMode)> {
match args {
[filename] => Some((filename, OpenMode::ReadText)),
[filename, mode_literal] => match_open_mode(mode_literal).map(|mode| (filename, mode)),
// The third positional argument is `buffering` and the pathlib methods don't support it.
_ => None,
}
}

/// Match keyword arguments. Return keyword arguments to forward and mode.
fn match_open_keywords(
keywords: &[ast::Keyword],
read_mode: bool,
) -> Option<(Vec<&ast::Keyword>, Option<OpenMode>)> {
let mut result: Vec<&ast::Keyword> = vec![];
let mut mode: Option<OpenMode> = None;

for keyword in keywords {
match keyword.arg.as_ref()?.as_str() {
"encoding" | "errors" => result.push(keyword),
// newline is only valid for write_text
"newline" => {
if read_mode {
return None;
}
result.push(keyword);
}

// This might look bizarre, - why do we re-wrap this optional?
//
// The answer is quite simple, in the result of the current function
// mode being `None` is a possible and correct option meaning that there
// was NO "mode" keyword argument.
//
// The result of `match_open_mode` on the other hand is None
// in the cases when the mode is not compatible with `write_text`/`write_bytes`.
//
// So, here we return None from this whole function if the mode
// is incompatible.
"mode" => mode = Some(match_open_mode(&keyword.value)?),

// All other keywords cannot be directly forwarded.
_ => return None,
};
}
Some((result, mode))
}

/// Match open mode to see if it is supported.
fn match_open_mode(mode: &Expr) -> Option<OpenMode> {
let ast::ExprStringLiteral { value, .. } = mode.as_string_literal_expr()?;
if value.is_implicit_concatenated() {
return None;
}
match value.to_str() {
"r" => Some(OpenMode::ReadText),
"rb" => Some(OpenMode::ReadBytes),
"w" => Some(OpenMode::WriteText),
"wb" => Some(OpenMode::WriteBytes),
_ => None,
}
}
1 change: 1 addition & 0 deletions crates/ruff_linter/src/rules/refurb/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ mod tests {
#[test_case(Rule::MetaClassABCMeta, Path::new("FURB180.py"))]
#[test_case(Rule::HashlibDigestHex, Path::new("FURB181.py"))]
#[test_case(Rule::ListReverseCopy, Path::new("FURB187.py"))]
#[test_case(Rule::WriteWholeFile, Path::new("FURB103.py"))]
fn rules(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());
let diagnostics = test_path(
Expand Down
2 changes: 2 additions & 0 deletions crates/ruff_linter/src/rules/refurb/rules/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ pub(crate) use type_none_comparison::*;
pub(crate) use unnecessary_enumerate::*;
pub(crate) use unnecessary_from_float::*;
pub(crate) use verbose_decimal_constructor::*;
pub(crate) use write_whole_file::*;

mod bit_count;
mod check_and_remove_from_set;
Expand Down Expand Up @@ -53,3 +54,4 @@ mod type_none_comparison;
mod unnecessary_enumerate;
mod unnecessary_from_float;
mod verbose_decimal_constructor;
mod write_whole_file;

0 comments on commit ffea1bb

Please sign in to comment.