Skip to content

Commit

Permalink
[refurb] New rule to suggest min/max over sorted() (FURB192) (#10868
Browse files Browse the repository at this point in the history
)

## Summary

Fixes #10463

Add `FURB192` which detects violations like this:

```python
# Bad
a = sorted(l)[0]

# Good
a = min(l)
```

There is a caveat that @Skylion007 has pointed out, which is that
violations with `reverse=True` technically aren't compatible with this
change, in the edge case where the unstable behavior is intended. For
example:

```python
from operator import itemgetter
data = [('red', 1), ('blue', 1), ('red', 2), ('blue', 2)]

min(data, key=itemgetter(0))  # ('blue', 1)
sorted(data, key=itemgetter(0))[0]  # ('blue', 1)
sorted(data, key=itemgetter(0), reverse=True)[-1]  # ('blue, 2')
```

This seems like a rare edge case, but I can make the `reverse=True`
fixes unsafe if that's best.

## Test Plan

This is unit tested.

## References

https://github.com/dosisod/refurb/pull/333/files

---------

Co-authored-by: Charlie Marsh <charlie.r.marsh@gmail.com>
  • Loading branch information
ottaviohartman and charliermarsh committed Apr 23, 2024
1 parent 925c7f8 commit 111bbc6
Show file tree
Hide file tree
Showing 8 changed files with 485 additions and 0 deletions.
48 changes: 48 additions & 0 deletions crates/ruff_linter/resources/test/fixtures/refurb/FURB192.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Errors

sorted(l)[0]

sorted(l)[-1]

sorted(l, reverse=False)[-1]

sorted(l, key=lambda x: x)[0]

sorted(l, key=key_fn)[0]

sorted([1, 2, 3])[0]

# Unsafe

sorted(l, key=key_fn, reverse=True)[-1]

sorted(l, reverse=True)[0]

sorted(l, reverse=True)[-1]

# Non-errors

sorted(l, reverse=foo())[0]

sorted(l)[1]

sorted(get_list())[1]

sorted()[0]

sorted(l)[1]

sorted(l)[-2]

b = True

sorted(l, reverse=b)[0]

sorted(l, invalid_kwarg=True)[0]


def sorted():
pass


sorted(l)[0]
3 changes: 3 additions & 0 deletions crates/ruff_linter/src/checkers/ast/analyze/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,9 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) {
if checker.enabled(Rule::PotentialIndexError) {
pylint::rules::potential_index_error(checker, value, slice);
}
if checker.enabled(Rule::SortedMinMax) {
refurb::rules::sorted_min_max(checker, subscript);
}

pandas_vet::rules::subscript(checker, value, expr);
}
Expand Down
1 change: 1 addition & 0 deletions crates/ruff_linter/src/codes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1075,6 +1075,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {
(Refurb, "180") => (RuleGroup::Preview, rules::refurb::rules::MetaClassABCMeta),
(Refurb, "181") => (RuleGroup::Preview, rules::refurb::rules::HashlibDigestHex),
(Refurb, "187") => (RuleGroup::Preview, rules::refurb::rules::ListReverseCopy),
(Refurb, "192") => (RuleGroup::Preview, rules::refurb::rules::SortedMinMax),

// flake8-logging
(Flake8Logging, "001") => (RuleGroup::Stable, rules::flake8_logging::rules::DirectLoggerInstantiation),
Expand Down
1 change: 1 addition & 0 deletions crates/ruff_linter/src/rules/refurb/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ mod tests {
#[test_case(Rule::HashlibDigestHex, Path::new("FURB181.py"))]
#[test_case(Rule::ListReverseCopy, Path::new("FURB187.py"))]
#[test_case(Rule::WriteWholeFile, Path::new("FURB103.py"))]
#[test_case(Rule::SortedMinMax, Path::new("FURB192.py"))]
fn rules(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());
let diagnostics = test_path(
Expand Down
2 changes: 2 additions & 0 deletions crates/ruff_linter/src/rules/refurb/rules/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ pub(crate) use reimplemented_starmap::*;
pub(crate) use repeated_append::*;
pub(crate) use single_item_membership_test::*;
pub(crate) use slice_copy::*;
pub(crate) use sorted_min_max::*;
pub(crate) use type_none_comparison::*;
pub(crate) use unnecessary_enumerate::*;
pub(crate) use unnecessary_from_float::*;
Expand Down Expand Up @@ -50,6 +51,7 @@ mod reimplemented_starmap;
mod repeated_append;
mod single_item_membership_test;
mod slice_copy;
mod sorted_min_max;
mod type_none_comparison;
mod unnecessary_enumerate;
mod unnecessary_from_float;
Expand Down
238 changes: 238 additions & 0 deletions crates/ruff_linter/src/rules/refurb/rules/sorted_min_max.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
use ruff_diagnostics::Diagnostic;
use ruff_diagnostics::Edit;
use ruff_diagnostics::Fix;
use ruff_diagnostics::FixAvailability;
use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::Number;
use ruff_python_ast::{self as ast, Expr};
use ruff_text_size::Ranged;

use crate::checkers::ast::Checker;

/// ## What it does
/// Checks for uses of `sorted()` to retrieve the minimum or maximum value in
/// a sequence.
///
/// ## Why is this bad?
/// Using `sorted()` to compute the minimum or maximum value in a sequence is
/// inefficient and less readable than using `min()` or `max()` directly.
///
/// ## Example
/// ```python
/// nums = [3, 1, 4, 1, 5]
/// lowest = sorted(nums)[0]
/// highest = sorted(nums)[-1]
/// highest = sorted(nums, reverse=True)[0]
/// ```
///
/// Use instead:
/// ```python
/// nums = [3, 1, 4, 1, 5]
/// lowest = min(nums)
/// highest = max(nums)
/// ```
///
/// ## Fix safety
/// In some cases, migrating to `min` or `max` can lead to a change in behavior,
/// notably when breaking ties.
///
/// As an example, `sorted(data, key=itemgetter(0), reverse=True)[0]` will return
/// the _last_ "minimum" element in the list, if there are multiple elements with
/// the same key. However, `min(data, key=itemgetter(0))` will return the _first_
/// "minimum" element in the list in the same scenario.
///
/// AS such, this rule's fix is marked as unsafe when the `reverse` keyword is used.
///
/// ## References
/// - [Python documentation: `min`](https://docs.python.org/3/library/functions.html#min)
/// - [Python documentation: `max`](https://docs.python.org/3/library/functions.html#max)
#[violation]
pub struct SortedMinMax {
min_max: MinMax,
}

impl Violation for SortedMinMax {
const FIX_AVAILABILITY: FixAvailability = FixAvailability::Sometimes;

#[derive_message_formats]
fn message(&self) -> String {
let Self { min_max } = self;
match min_max {
MinMax::Min => {
format!("Prefer `min` over `sorted()` to compute the minimum value in a sequence")
}
MinMax::Max => {
format!("Prefer `max` over `sorted()` to compute the maximum value in a sequence")
}
}
}

fn fix_title(&self) -> Option<String> {
let Self { min_max } = self;
match min_max {
MinMax::Min => Some("Replace with `min`".to_string()),
MinMax::Max => Some("Replace with `max`".to_string()),
}
}
}

/// FURB192
pub(crate) fn sorted_min_max(checker: &mut Checker, subscript: &ast::ExprSubscript) {
if subscript.ctx.is_store() || subscript.ctx.is_del() {
return;
}

let ast::ExprSubscript { slice, value, .. } = &subscript;

// Early return if index is not unary or a number literal.
if !(slice.is_number_literal_expr() || slice.is_unary_op_expr()) {
return;
}

// Early return if the value is not a call expression.
let Expr::Call(ast::ExprCall {
func, arguments, ..
}) = value.as_ref()
else {
return;
};

// Check if the index is either 0 or -1.
let index = match slice.as_ref() {
// [0]
Expr::NumberLiteral(ast::ExprNumberLiteral {
value: Number::Int(index),
..
}) if *index == 0 => Index::First,

// [-1]
Expr::UnaryOp(ast::ExprUnaryOp {
op: ast::UnaryOp::USub,
operand,
..
}) => {
match operand.as_ref() {
// [-1]
Expr::NumberLiteral(ast::ExprNumberLiteral {
value: Number::Int(index),
..
}) if *index == 1 => Index::Last,
_ => return,
}
}
_ => return,
};

// Check if the value is a call to `sorted()`.
if !checker.semantic().match_builtin_expr(func, "sorted") {
return;
}

// Check if the call to `sorted()` has a single argument.
let [list_expr] = arguments.args.as_ref() else {
return;
};

let mut reverse_keyword = None;
let mut key_keyword_expr = None;

// Check if the call to `sorted()` has the `reverse` and `key` keywords.
for keyword in arguments.keywords.iter() {
// If the call contains `**kwargs`, return.
let Some(arg) = keyword.arg.as_ref() else {
return;
};

match arg.as_str() {
"reverse" => {
reverse_keyword = Some(keyword);
}
"key" => {
key_keyword_expr = Some(keyword);
}
_ => {
// If unexpected keyword is found, return.
return;
}
}
}

let is_reversed = if let Some(reverse_keyword) = reverse_keyword {
match reverse_keyword.value.as_boolean_literal_expr() {
Some(ast::ExprBooleanLiteral { value, .. }) => *value,
// If the value is not a boolean literal, we can't determine if it is reversed.
_ => return,
}
} else {
// No reverse keyword, so it is not reversed.
false
};

// Determine whether the operation is computing a minimum or maximum value.
let min_max = match (index, is_reversed) {
(Index::First, false) => MinMax::Min,
(Index::First, true) => MinMax::Max,
(Index::Last, false) => MinMax::Max,
(Index::Last, true) => MinMax::Min,
};

let mut diagnostic = Diagnostic::new(SortedMinMax { min_max }, subscript.range());

if checker.semantic().has_builtin_binding(min_max.as_str()) {
diagnostic.set_fix({
let replacement = if let Some(key) = key_keyword_expr {
format!(
"{min_max}({}, {})",
checker.locator().slice(list_expr),
checker.locator().slice(key),
)
} else {
format!("{min_max}({})", checker.locator().slice(list_expr))
};

let replacement = Edit::range_replacement(replacement, subscript.range());
if is_reversed {
Fix::unsafe_edit(replacement)
} else {
Fix::safe_edit(replacement)
}
});
}

checker.diagnostics.push(diagnostic);
}

#[derive(Debug, Copy, Clone, PartialEq, Eq)]
enum MinMax {
/// E.g., `min(nums)`
Min,
/// E.g., `max(nums)`
Max,
}

impl MinMax {
fn as_str(self) -> &'static str {
match self {
Self::Min => "min",
Self::Max => "max",
}
}
}

impl std::fmt::Display for MinMax {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Min => write!(f, "min"),
Self::Max => write!(f, "max"),
}
}
}

#[derive(Debug, Copy, Clone, PartialEq, Eq)]
enum Index {
/// E.g., `sorted(nums)[0]`
First,
/// E.g., `sorted(nums)[-1]`
Last,
}

0 comments on commit 111bbc6

Please sign in to comment.