Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[refurb] Implement write-whole-file (FURB103) #10802

Merged
merged 6 commits into from Apr 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
132 changes: 132 additions & 0 deletions crates/ruff_linter/resources/test/fixtures/refurb/FURB103.py
@@ -0,0 +1,132 @@
def foo():
...


def bar(x):
...


# Errors.

# FURB103
with open("file.txt", "w") as f:
f.write("test")

# FURB103
with open("file.txt", "wb") as f:
f.write(foobar)

# FURB103
with open("file.txt", mode="wb") as f:
f.write(b"abc")

# FURB103
with open("file.txt", "w", encoding="utf8") as f:
f.write(foobar)

# FURB103
with open("file.txt", "w", errors="ignore") as f:
f.write(foobar)

# FURB103
with open("file.txt", mode="w") as f:
f.write(foobar)

# FURB103
with open(foo(), "wb") as f:
# The body of `with` is non-trivial, but the recommendation holds.
bar("pre")
f.write(bar())
bar("post")
print("Done")

# FURB103
with open("a.txt", "w") as a, open("b.txt", "wb") as b:
a.write(x)
b.write(y)

# FURB103
with foo() as a, open("file.txt", "w") as b, foo() as c:
# We have other things in here, multiple with items, but the user
# writes a single time to file and that bit they can replace.
bar(a)
b.write(bar(bar(a + x)))
bar(c)


# FURB103
with open("file.txt", "w", newline="\r\n") as f:
f.write(foobar)


# Non-errors.

with open("file.txt", errors="ignore", mode="wb") as f:
# Path.write_bytes() does not support errors
f.write(foobar)

f2 = open("file2.txt", "w")
with open("file.txt", "w") as f:
f2.write(x)

# mode is dynamic
with open("file.txt", foo()) as f:
f.write(x)

# keyword mode is incorrect
with open("file.txt", mode="a+") as f:
f.write(x)

# enables line buffering, not supported in write_text()
with open("file.txt", buffering=1) as f:
f.write(x)

# dont mistake "newline" for "mode"
with open("file.txt", newline="wb") as f:
f.write(x)

# I guess we can possibly also report this case, but the question
# is why the user would put "w+" here in the first place.
with open("file.txt", "w+") as f:
f.write(x)

# Even though we write the whole file, we do other things.
with open("file.txt", "w") as f:
f.write(x)
f.seek(0)
x += f.read(100)

# This shouldn't error, since it could contain unsupported arguments, like `buffering`.
with open(*filename, mode="w") as f:
f.write(x)

# This shouldn't error, since it could contain unsupported arguments, like `buffering`.
with open(**kwargs) as f:
f.write(x)

# This shouldn't error, since it could contain unsupported arguments, like `buffering`.
with open("file.txt", **kwargs) as f:
f.write(x)

# This shouldn't error, since it could contain unsupported arguments, like `buffering`.
with open("file.txt", mode="w", **kwargs) as f:
f.write(x)

# This could error (but doesn't), since it can't contain unsupported arguments, like
# `buffering`.
with open(*filename, mode="w") as f:
f.write(x)

# This could error (but doesn't), since it can't contain unsupported arguments, like
# `buffering`.
with open(*filename, file="file.txt", mode="w") as f:
f.write(x)

# Loops imply multiple writes
with open("file.txt", "w") as f:
while x < 0:
f.write(foobar)

with open("file.txt", "w") as f:
for line in text:
f.write(line)
3 changes: 3 additions & 0 deletions crates/ruff_linter/src/checkers/ast/analyze/statement.rs
Expand Up @@ -1225,6 +1225,9 @@ pub(crate) fn statement(stmt: &Stmt, checker: &mut Checker) {
if checker.enabled(Rule::ReadWholeFile) {
refurb::rules::read_whole_file(checker, with_stmt);
}
if checker.enabled(Rule::WriteWholeFile) {
refurb::rules::write_whole_file(checker, with_stmt);
}
if checker.enabled(Rule::UselessWithLock) {
pylint::rules::useless_with_lock(checker, with_stmt);
}
Expand Down
1 change: 1 addition & 0 deletions crates/ruff_linter/src/codes.rs
Expand Up @@ -1037,6 +1037,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {

// refurb
(Refurb, "101") => (RuleGroup::Preview, rules::refurb::rules::ReadWholeFile),
(Refurb, "103") => (RuleGroup::Preview, rules::refurb::rules::WriteWholeFile),
(Refurb, "105") => (RuleGroup::Preview, rules::refurb::rules::PrintEmptyString),
(Refurb, "110") => (RuleGroup::Preview, rules::refurb::rules::IfExpInsteadOfOrOperator),
#[allow(deprecated)]
Expand Down
219 changes: 217 additions & 2 deletions crates/ruff_linter/src/rules/refurb/helpers.rs
@@ -1,6 +1,7 @@
use ruff_python_ast as ast;
use ruff_python_ast::{self as ast, Expr};
use ruff_python_codegen::Generator;
use ruff_text_size::TextRange;
use ruff_python_semantic::{BindingId, ResolvedReference, SemanticModel};
use ruff_text_size::{Ranged, TextRange};

/// Format a code snippet to call `name.method()`.
pub(super) fn generate_method_call(name: &str, method: &str, generator: Generator) -> String {
Expand Down Expand Up @@ -61,3 +62,217 @@ pub(super) fn generate_none_identity_comparison(
};
generator.expr(&compare.into())
}

// Helpers for read-whole-file and write-whole-file
#[derive(Debug, Copy, Clone)]
pub(super) enum OpenMode {
/// "r"
ReadText,
/// "rb"
ReadBytes,
/// "w"
WriteText,
/// "wb"
WriteBytes,
}

impl OpenMode {
pub(super) fn pathlib_method(self) -> String {
match self {
OpenMode::ReadText => "read_text".to_string(),
OpenMode::ReadBytes => "read_bytes".to_string(),
OpenMode::WriteText => "write_text".to_string(),
OpenMode::WriteBytes => "write_bytes".to_string(),
}
}
}

/// A grab bag struct that joins together every piece of information we need to track
/// about a file open operation.
#[derive(Debug)]
pub(super) struct FileOpen<'a> {
/// With item where the open happens, we use it for the reporting range.
pub(super) item: &'a ast::WithItem,
/// Filename expression used as the first argument in `open`, we use it in the diagnostic message.
pub(super) filename: &'a Expr,
/// The file open mode.
pub(super) mode: OpenMode,
/// The file open keywords.
pub(super) keywords: Vec<&'a ast::Keyword>,
/// We only check `open` operations whose file handles are used exactly once.
pub(super) reference: &'a ResolvedReference,
}

impl<'a> FileOpen<'a> {
/// Determine whether an expression is a reference to the file handle, by comparing
/// their ranges. If two expressions have the same range, they must be the same expression.
pub(super) fn is_ref(&self, expr: &Expr) -> bool {
expr.range() == self.reference.range()
}
}

/// Find and return all `open` operations in the given `with` statement.
pub(super) fn find_file_opens<'a>(
with: &'a ast::StmtWith,
semantic: &'a SemanticModel<'a>,
read_mode: bool,
) -> Vec<FileOpen<'a>> {
with.items
.iter()
.filter_map(|item| find_file_open(item, with, semantic, read_mode))
.collect()
}

/// Find `open` operation in the given `with` item.
fn find_file_open<'a>(
item: &'a ast::WithItem,
with: &'a ast::StmtWith,
semantic: &'a SemanticModel<'a>,
read_mode: bool,
) -> Option<FileOpen<'a>> {
// We want to match `open(...) as var`.
let ast::ExprCall {
func,
arguments: ast::Arguments { args, keywords, .. },
..
} = item.context_expr.as_call_expr()?;

if func.as_name_expr()?.id != "open" {
return None;
}

let var = item.optional_vars.as_deref()?.as_name_expr()?;

// Ignore calls with `*args` and `**kwargs`. In the exact case of `open(*filename, mode="w")`,
// it could be a match; but in all other cases, the call _could_ contain unsupported keyword
// arguments, like `buffering`.
if args.iter().any(Expr::is_starred_expr)
|| keywords.iter().any(|keyword| keyword.arg.is_none())
{
return None;
}

// Match positional arguments, get filename and mode.
let (filename, pos_mode) = match_open_args(args)?;

// Match keyword arguments, get keyword arguments to forward and possibly mode.
let (keywords, kw_mode) = match_open_keywords(keywords, read_mode)?;

let mode = kw_mode.unwrap_or(pos_mode);

match mode {
OpenMode::ReadText | OpenMode::ReadBytes => {
if !read_mode {
return None;
}
}
OpenMode::WriteText | OpenMode::WriteBytes => {
if read_mode {
return None;
}
}
}

// Path.read_bytes and Path.write_bytes do not support any kwargs.
if matches!(mode, OpenMode::ReadBytes | OpenMode::WriteBytes) && !keywords.is_empty() {
return None;
}

// Now we need to find what is this variable bound to...
let scope = semantic.current_scope();
let bindings: Vec<BindingId> = scope.get_all(var.id.as_str()).collect();

let binding = bindings
.iter()
.map(|x| semantic.binding(*x))
// We might have many bindings with the same name, but we only care
// for the one we are looking at right now.
.find(|binding| binding.range() == var.range())?;

// Since many references can share the same binding, we can limit our attention span
// exclusively to the body of the current `with` statement.
let references: Vec<&ResolvedReference> = binding
.references
.iter()
.map(|id| semantic.reference(*id))
.filter(|reference| with.range().contains_range(reference.range()))
.collect();

// And even with all these restrictions, if the file handle gets used not exactly once,
// it doesn't fit the bill.
let [reference] = references.as_slice() else {
return None;
};

Some(FileOpen {
item,
filename,
mode,
keywords,
reference,
})
}

/// Match positional arguments. Return expression for the file name and open mode.
fn match_open_args(args: &[Expr]) -> Option<(&Expr, OpenMode)> {
match args {
[filename] => Some((filename, OpenMode::ReadText)),
[filename, mode_literal] => match_open_mode(mode_literal).map(|mode| (filename, mode)),
// The third positional argument is `buffering` and the pathlib methods don't support it.
_ => None,
}
}

/// Match keyword arguments. Return keyword arguments to forward and mode.
fn match_open_keywords(
keywords: &[ast::Keyword],
read_mode: bool,
) -> Option<(Vec<&ast::Keyword>, Option<OpenMode>)> {
let mut result: Vec<&ast::Keyword> = vec![];
let mut mode: Option<OpenMode> = None;

for keyword in keywords {
match keyword.arg.as_ref()?.as_str() {
"encoding" | "errors" => result.push(keyword),
// newline is only valid for write_text
"newline" => {
if read_mode {
return None;
}
result.push(keyword);
}

// This might look bizarre, - why do we re-wrap this optional?
//
// The answer is quite simple, in the result of the current function
// mode being `None` is a possible and correct option meaning that there
// was NO "mode" keyword argument.
//
// The result of `match_open_mode` on the other hand is None
// in the cases when the mode is not compatible with `write_text`/`write_bytes`.
//
// So, here we return None from this whole function if the mode
// is incompatible.
"mode" => mode = Some(match_open_mode(&keyword.value)?),

// All other keywords cannot be directly forwarded.
_ => return None,
};
}
Some((result, mode))
}

/// Match open mode to see if it is supported.
fn match_open_mode(mode: &Expr) -> Option<OpenMode> {
let ast::ExprStringLiteral { value, .. } = mode.as_string_literal_expr()?;
if value.is_implicit_concatenated() {
return None;
}
match value.to_str() {
"r" => Some(OpenMode::ReadText),
"rb" => Some(OpenMode::ReadBytes),
"w" => Some(OpenMode::WriteText),
"wb" => Some(OpenMode::WriteBytes),
_ => None,
}
}
1 change: 1 addition & 0 deletions crates/ruff_linter/src/rules/refurb/mod.rs
Expand Up @@ -41,6 +41,7 @@ mod tests {
#[test_case(Rule::MetaClassABCMeta, Path::new("FURB180.py"))]
#[test_case(Rule::HashlibDigestHex, Path::new("FURB181.py"))]
#[test_case(Rule::ListReverseCopy, Path::new("FURB187.py"))]
#[test_case(Rule::WriteWholeFile, Path::new("FURB103.py"))]
fn rules(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());
let diagnostics = test_path(
Expand Down
2 changes: 2 additions & 0 deletions crates/ruff_linter/src/rules/refurb/rules/mod.rs
Expand Up @@ -25,6 +25,7 @@ pub(crate) use type_none_comparison::*;
pub(crate) use unnecessary_enumerate::*;
pub(crate) use unnecessary_from_float::*;
pub(crate) use verbose_decimal_constructor::*;
pub(crate) use write_whole_file::*;

mod bit_count;
mod check_and_remove_from_set;
Expand Down Expand Up @@ -53,3 +54,4 @@ mod type_none_comparison;
mod unnecessary_enumerate;
mod unnecessary_from_float;
mod verbose_decimal_constructor;
mod write_whole_file;