Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add AIR001: task variable name should be same as task_id arg #4687

Merged
merged 11 commits into from
May 29, 2023
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,7 @@ quality tools, including:
- [pep8-naming](https://pypi.org/project/pep8-naming/)
- [pydocstyle](https://pypi.org/project/pydocstyle/)
- [pygrep-hooks](https://github.com/pre-commit/pygrep-hooks)
- [pylint-airflow](https://pypi.org/project/pylint-airflow/)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I decided to link to the Pylint plugin, since this list refers to tools we've reimplemented, and it felt more appropriate than "Airflow" -- wdyt?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah this makes sense to me! hopefully we get to a point where we're linting for more than just pylint-airflow though 🙂

- [pyupgrade](https://pypi.org/project/pyupgrade/)
- [tryceratops](https://pypi.org/project/tryceratops/)
- [yesqa](https://pypi.org/project/yesqa/)
Expand Down
16 changes: 16 additions & 0 deletions crates/ruff/resources/test/fixtures/airflow/AIR001.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from airflow.operators import PythonOperator


def my_callable():
pass


my_task = PythonOperator(task_id="my_task", callable=my_callable)
my_task_2 = PythonOperator(callable=my_callable, task_id="my_task_2")

incorrect_name = PythonOperator(task_id="my_task")
incorrect_name_2 = PythonOperator(callable=my_callable, task_id="my_task_2")

from my_module import MyClass

incorrect_name = MyClass(task_id="my_task")
19 changes: 12 additions & 7 deletions crates/ruff/src/checkers/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ use crate::noqa::NoqaMapping;
use crate::registry::{AsRule, Rule};
use crate::rules::flake8_builtins::helpers::AnyShadowing;
use crate::rules::{
flake8_2020, flake8_annotations, flake8_async, flake8_bandit, flake8_blind_except,
airflow, flake8_2020, flake8_annotations, flake8_async, flake8_bandit, flake8_blind_except,
flake8_boolean_trap, flake8_bugbear, flake8_builtins, flake8_comprehensions, flake8_datetimez,
flake8_debugger, flake8_django, flake8_errmsg, flake8_future_annotations, flake8_gettext,
flake8_implicit_str_concat, flake8_import_conventions, flake8_logging_format, flake8_pie,
Expand Down Expand Up @@ -1633,27 +1633,23 @@ where
pycodestyle::rules::lambda_assignment(self, target, value, None, stmt);
}
}

if self.enabled(Rule::AssignmentToOsEnviron) {
flake8_bugbear::rules::assignment_to_os_environ(self, targets);
}

if self.enabled(Rule::HardcodedPasswordString) {
if let Some(diagnostic) =
flake8_bandit::rules::assign_hardcoded_password_string(value, targets)
{
self.diagnostics.push(diagnostic);
}
}

if self.enabled(Rule::GlobalStatement) {
for target in targets.iter() {
if let Expr::Name(ast::ExprName { id, .. }) = target {
pylint::rules::global_statement(self, id);
}
}
}

if self.enabled(Rule::UselessMetaclassType) {
pyupgrade::rules::useless_metaclass_type(self, stmt, value, targets);
}
Expand All @@ -1670,13 +1666,22 @@ where
if self.enabled(Rule::UnpackedListComprehension) {
pyupgrade::rules::unpacked_list_comprehension(self, targets, value);
}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Unrelated, but the newlines here felt inconsistent, so just removed.)

if self.enabled(Rule::PandasDfVariableName) {
if let Some(diagnostic) = pandas_vet::rules::assignment_to_df(targets) {
self.diagnostics.push(diagnostic);
}
}

if self
.settings
.rules
.enabled(Rule::AirflowVariableNameTaskIdMismatch)
{
if let Some(diagnostic) =
airflow::rules::variable_name_task_id(self, targets, value)
{
self.diagnostics.push(diagnostic);
}
}
if self.is_stub {
if self.any_enabled(&[
Rule::UnprefixedTypeParam,
Expand Down
3 changes: 3 additions & 0 deletions crates/ruff/src/codes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -748,6 +748,9 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {
(Flake8Todos, "006") => (RuleGroup::Unspecified, Rule::InvalidTodoCapitalization),
(Flake8Todos, "007") => (RuleGroup::Unspecified, Rule::MissingSpaceAfterTodoColon),

// airflow
(Airflow, "001") => (RuleGroup::Unspecified, Rule::AirflowVariableNameTaskIdMismatch),

_ => return None,
})
}
5 changes: 5 additions & 0 deletions crates/ruff/src/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -667,6 +667,8 @@ ruff_macros::register_rules!(
rules::flake8_todos::rules::MissingTodoDescription,
rules::flake8_todos::rules::InvalidTodoCapitalization,
rules::flake8_todos::rules::MissingSpaceAfterTodoColon,
// airflow
rules::airflow::rules::AirflowVariableNameTaskIdMismatch,
);

pub trait AsRule {
Expand Down Expand Up @@ -837,6 +839,9 @@ pub enum Linter {
/// NumPy-specific rules
#[prefix = "NPY"]
Numpy,
/// [Airflow](https://pypi.org/project/apache-airflow/)
#[prefix = "AIR"]
Airflow,
/// Ruff-specific rules
#[prefix = "RUF"]
Ruff,
Expand Down
25 changes: 25 additions & 0 deletions crates/ruff/src/rules/airflow/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
//! Airflow-specific rules.
pub(crate) mod rules;

#[cfg(test)]
mod tests {
use std::path::Path;

use anyhow::Result;
use test_case::test_case;

use crate::registry::Rule;
use crate::test::test_path;
use crate::{assert_messages, settings};

#[test_case(Rule::AirflowVariableNameTaskIdMismatch, Path::new("AIR001.py"); "AIR001")]
fn rules(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());
let diagnostics = test_path(
Path::new("airflow").join(path).as_path(),
&settings::Settings::for_rule(rule_code),
)?;
assert_messages!(snapshot, diagnostics);
Ok(())
}
}
3 changes: 3 additions & 0 deletions crates/ruff/src/rules/airflow/rules/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
mod task_variable_name;

pub(crate) use task_variable_name::{variable_name_task_id, AirflowVariableNameTaskIdMismatch};
102 changes: 102 additions & 0 deletions crates/ruff/src/rules/airflow/rules/task_variable_name.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
use rustpython_parser::ast;
use rustpython_parser::ast::{Expr, Ranged};

use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::prelude::Constant;

use crate::checkers::ast::Checker;

/// ## What it does
/// Checks that the task variable name matches the `task_id` value for
/// Airflow Operators.
///
/// ## Why is this bad?
/// When initializing an Airflow Operator, for consistency, the variable
/// name should match the `task_id` value. This makes it easier to
/// follow the flow of the DAG.
///
/// ## Example
/// ```python
/// from airflow.operators import PythonOperator
///
///
/// incorrect_name = PythonOperator(task_id="my_task")
/// ```
///
/// Use instead:
/// ```python
/// from airflow.operators import PythonOperator
///
///
/// my_task = PythonOperator(task_id="my_task")
/// ```
#[violation]
pub struct AirflowVariableNameTaskIdMismatch {
task_id: String,
}

impl Violation for AirflowVariableNameTaskIdMismatch {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I renamed this to prefix with "Airflow" to match the "Django" and "NumPy" conventions.

#[derive_message_formats]
fn message(&self) -> String {
let AirflowVariableNameTaskIdMismatch { task_id } = self;
format!("Task variable name should match the `task_id`: \"{task_id}\"")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Added the task_id itself to the message.)

}
}

/// AIR001
pub(crate) fn variable_name_task_id(
checker: &mut Checker,
targets: &[Expr],
value: &Expr,
) -> Option<Diagnostic> {
// If we have more than one target, we can't do anything.
if targets.len() != 1 {
return None;
}

let target = &targets[0];
let Expr::Name(ast::ExprName { id, .. }) = target else {
return None;
};

// If the value is not a call, we can't do anything.
let Expr::Call(ast::ExprCall { func, keywords, .. }) = value else {
return None;
};

// If the function doesn't come from Airflow, we can't do anything.
if !checker
.semantic_model()
.resolve_call_path(func)
.map_or(false, |call_path| matches!(call_path[0], "airflow"))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Modified this to use resolve_call_path, which will resolve to the fully qualified path -- so it'll do the right thing in cases like:

import airflow

foo = airflow.PythonOperator(...)

import airflow as Foo

foo = Foo.PythonOperator(...)

{
return None;
}

// If the call doesn't have a `task_id` keyword argument, we can't do anything.
let keyword = keywords
.iter()
.find(|keyword| keyword.arg.as_ref().map_or(false, |arg| arg == "task_id"))?;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can use ? to tighten this up a little.


// If the keyword argument is not a string, we can't do anything.
let task_id = match &keyword.value {
Expr::Constant(constant) => match &constant.value {
Constant::Str(value) => value,
_ => return None,
},
_ => return None,
};

// If the target name is the same as the task_id, no violation.
if id == task_id {
return None;
}

Some(Diagnostic::new(
AirflowVariableNameTaskIdMismatch {
task_id: task_id.to_string(),
},
target.range(),
))
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
---
source: crates/ruff/src/rules/airflow/mod.rs
---
AIR001.py:11:1: AIR001 Task variable name should match the `task_id`: "my_task"
|
11 | my_task_2 = PythonOperator(callable=my_callable, task_id="my_task_2")
12 |
13 | incorrect_name = PythonOperator(task_id="my_task")
| ^^^^^^^^^^^^^^ AIR001
14 | incorrect_name_2 = PythonOperator(callable=my_callable, task_id="my_task_2")
|

AIR001.py:12:1: AIR001 Task variable name should match the `task_id`: "my_task_2"
|
12 | incorrect_name = PythonOperator(task_id="my_task")
13 | incorrect_name_2 = PythonOperator(callable=my_callable, task_id="my_task_2")
| ^^^^^^^^^^^^^^^^ AIR001
14 |
15 | from my_module import MyClass
|


1 change: 1 addition & 0 deletions crates/ruff/src/rules/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#![allow(clippy::useless_format)]
pub mod airflow;
pub mod eradicate;
pub mod flake8_2020;
pub mod flake8_annotations;
Expand Down
4 changes: 4 additions & 0 deletions ruff.schema.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.