Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More stability checker options #5299

Merged
merged 6 commits into from Jun 22, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
51 changes: 44 additions & 7 deletions crates/ruff_dev/src/check_formatter_stability.rs
Expand Up @@ -42,6 +42,9 @@ pub(crate) struct Args {
/// Print only the first error and exit, `-x` is same as pytest
#[arg(long, short = 'x')]
pub(crate) exit_first_error: bool,
/// Checks each project inside a directory
#[arg(long)]
pub(crate) multi_project: bool,
}

/// Generate ourself a `try_parse_from` impl for `CheckArgs`. This is a strange way to use clap but
Expand All @@ -54,6 +57,33 @@ struct WrapperArgs {
}

pub(crate) fn main(args: &Args) -> anyhow::Result<ExitCode> {
let all_success = if args.multi_project {
let mut all_success = true;
for dir in args.files[0].read_dir()? {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why only the first path?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that should have been only my dummy, fixed

let dir = dir?;
println!("Starting {}", dir.path().display());
let success = check_repo(&Args {
files: vec![dir.path().clone()],
..*args
});
println!("Finished {}: {:?}", dir.path().display(), success);
if !matches!(success, Ok(true)) {
all_success = false;
}
}
all_success
} else {
check_repo(args)?
};
if all_success {
Ok(ExitCode::SUCCESS)
} else {
Ok(ExitCode::FAILURE)
}
}

/// Returns whether the check was successful
pub(crate) fn check_repo(args: &Args) -> anyhow::Result<bool> {
let start = Instant::now();

// Find files to check (or in this case, format twice). Adapted from ruff_cli
Expand All @@ -77,13 +107,20 @@ pub(crate) fn main(args: &Args) -> anyhow::Result<ExitCode> {
let (paths, _resolver) = python_files_in_path(&cli.files, &pyproject_config, &overrides)?;
assert!(!paths.is_empty(), "no python files in {:?}", cli.files);

let mut formatted_counter = 0;
let errors = paths
.into_iter()
.map(|dir_entry| {
// Doesn't make sense to recover here in this test script
let file = dir_entry
.expect("Iterating the files in the repository failed")
.into_path();
dir_entry.expect("Iterating the files in the repository failed")
})
.filter(|dir_entry| {
// For some reason it does not filter in the beginning
dir_entry.file_name() != "pyproject.toml"
})
.map(|dir_entry| {
let file = dir_entry.path().to_path_buf();
formatted_counter += 1;
// Handle panics (mostly in `debug_assert!`)
let result = match catch_unwind(|| check_file(&file)) {
Ok(result) => result,
Expand Down Expand Up @@ -166,20 +203,20 @@ Formatted twice:
}

if args.exit_first_error {
return Ok(ExitCode::FAILURE);
return Ok(false);
}
}
let duration = start.elapsed();
println!(
"Formatting {} files twice took {:.2}s",
cli.files.len(),
formatted_counter,
duration.as_secs_f32()
);

if any_errors {
Ok(ExitCode::FAILURE)
Ok(false)
} else {
Ok(ExitCode::SUCCESS)
Ok(true)
}
}

Expand Down
32 changes: 22 additions & 10 deletions scripts/check_ecosystem.py
Expand Up @@ -44,11 +44,11 @@ class Repository(NamedTuple):
async def clone(self: Self, checkout_dir: Path) -> AsyncIterator[Path]:
"""Shallow clone this repository to a temporary directory."""
if checkout_dir.exists():
logger.debug(f"Reusing {self.org}/{self.repo}")
logger.debug(f"Reusing {self.org}:{self.repo}")
yield Path(checkout_dir)
return

logger.debug(f"Cloning {self.org}/{self.repo}")
logger.debug(f"Cloning {self.org}:{self.repo}")
git_command = [
"git",
"clone",
Expand Down Expand Up @@ -177,18 +177,17 @@ async def compare(
"""Check a specific repository against two versions of ruff."""
removed, added = set(), set()

# Allows to keep the checkouts locations
# By the default, the git clone are transient, but if the user provides a
# directory for permanent storage we keep it there
if checkouts:
checkout_parent = checkouts.joinpath(repo.org)
# Don't create the repodir itself, we need that for checking for existing
# clones
checkout_parent.mkdir(exist_ok=True, parents=True)
location_context = nullcontext(checkout_parent)
location_context = nullcontext(checkouts)
else:
location_context = tempfile.TemporaryDirectory()

with location_context as checkout_parent:
checkout_dir = Path(checkout_parent).joinpath(repo.repo)
assert ":" not in repo.org
assert ":" not in repo.repo
checkout_dir = Path(checkout_parent).joinpath(f"{repo.org}:{repo.repo}")
async with repo.clone(checkout_dir) as path:
try:
async with asyncio.TaskGroup() as tg:
Expand Down Expand Up @@ -284,8 +283,19 @@ async def main(

logger.debug(f"Checking {len(repositories)} projects")

# https://stackoverflow.com/a/61478547/3549270
# Otherwise doing 3k repositories can take >8GB RAM
semaphore = asyncio.Semaphore(50)

async def limited_parallelism(coroutine): # noqa: ANN
async with semaphore:
return await coroutine

results = await asyncio.gather(
*[compare(ruff1, ruff2, repo, checkouts) for repo in repositories.values()],
*[
limited_parallelism(compare(ruff1, ruff2, repo, checkouts))
for repo in repositories.values()
],
return_exceptions=True,
)

Expand Down Expand Up @@ -433,6 +443,8 @@ async def main(
logging.basicConfig(level=logging.INFO)

loop = asyncio.get_event_loop()
if args.checkouts:
args.checkouts.mkdir(exist_ok=True, parents=True)
main_task = asyncio.ensure_future(
main(
ruff1=args.ruff1,
Expand Down