diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index c654e23302c38..ce09a0c08dc41 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -79,6 +79,10 @@ jobs: env: # Setting RUSTDOCFLAGS because `cargo doc --check` isn't yet implemented (https://github.com/rust-lang/cargo/issues/10025). RUSTDOCFLAGS: "-D warnings" + - uses: actions/upload-artifact@v3 + with: + name: ruff + path: target/debug/ruff scripts: name: "test scripts" @@ -103,3 +107,39 @@ jobs: - uses: crate-ci/typos@master with: files: . + + ecosystem: + name: "ecosystem" + runs-on: ubuntu-latest + needs: cargo-test + # Only runs on pull requests, since that is the only we way we can find the base version for comparison. + if: github.event_name == 'pull_request' + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: "3.11" + - uses: actions/download-artifact@v3 + id: ruff-target + with: + name: ruff + path: target/debug + - uses: dawidd6/action-download-artifact@v2 + with: + name: ruff + branch: ${{ github.event.pull_request.base_ref }} + check_artifacts: true + - name: Run ecosystem check + run: | + # Make executable, since artifact download doesn't preserve this + chmod +x ruff ${{ steps.ruff-target.outputs.download-path }}/ruff + + scripts/check_ecosystem.py ruff ${{ steps.ruff-target.outputs.download-path }}/ruff | tee ecosystem-result + + echo ${{ github.event.number }} > pr-number + - uses: actions/upload-artifact@v3 + with: + name: ecosystem-result + path: | + ecosystem-result + pr-number \ No newline at end of file diff --git a/.github/workflows/ecosystem-comment.yaml b/.github/workflows/ecosystem-comment.yaml new file mode 100644 index 0000000000000..fdee4472302b3 --- /dev/null +++ b/.github/workflows/ecosystem-comment.yaml @@ -0,0 +1,31 @@ +on: + workflow_run: + workflows: [CI] + types: [completed] + +permissions: + pull-requests: write + +jobs: + comment: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: dawidd6/action-download-artifact@v2 + id: download-result + with: + name: ecosystem-result + workflow: ci.yaml + run_id: ${{ github.event.workflow_run.id }} + if_no_artifact_found: ignore + - if: steps.download-result.outputs.found_artifact + id: result + run: | + echo "pr-number=$(> $GITHUB_OUTPUT + - name: Create comment + if: steps.download-result.outputs.found_artifact + uses: thollander/actions-comment-pull-request@v2 + with: + pr_number: ${{ steps.result.outputs.pr-number }} + filePath: ecosystem-result + comment_tag: ecosystem-results \ No newline at end of file diff --git a/scripts/check_ecosystem.py b/scripts/check_ecosystem.py new file mode 100755 index 0000000000000..9035e4751817b --- /dev/null +++ b/scripts/check_ecosystem.py @@ -0,0 +1,204 @@ +#!/usr/bin/env python3 +"""Check two versions of ruff against a corpus of open-source code. + +Example usage: + + scripts/check_ecosystem.py +""" + +# ruff: noqa: T201 + +import argparse +import asyncio +import difflib +import heapq +import re +import tempfile +import textwrap +from asyncio.subprocess import PIPE, create_subprocess_exec +from contextlib import asynccontextmanager +from pathlib import Path +from typing import TYPE_CHECKING, NamedTuple, Self + +if TYPE_CHECKING: + from collections.abc import AsyncIterator, Iterator, Sequence + + +class Repository(NamedTuple): + """A GitHub repository at a specific ref.""" + + org: str + repo: str + ref: str + + @asynccontextmanager + async def clone(self: Self) -> "AsyncIterator[Path]": + """Shallow clone this repository to a temporary directory.""" + with tempfile.TemporaryDirectory() as tmpdir: + process = await create_subprocess_exec( + "git", + "clone", + "--config", + "advice.detachedHead=false", + "--quiet", + "--depth", + "1", + "--no-tags", + "--branch", + self.ref, + f"https://github.com/{self.org}/{self.repo}", + tmpdir, + ) + + await process.wait() + + yield Path(tmpdir) + + +REPOSITORIES = { + "zulip": Repository("zulip", "zulip", "main"), + "bokeh": Repository("bokeh", "bokeh", "branch-3.2"), +} + +SUMMARY_LINE_RE = re.compile(r"^Found \d+ error.*$") + + +class RuffError(Exception): + """An error reported by ruff.""" + + +async def check(*, ruff: Path, path: Path) -> "Sequence[str]": + """Run the given ruff binary against the specified path.""" + proc = await create_subprocess_exec( + ruff.absolute(), + "check", + "--no-cache", + "--exit-zero", + ".", + stdout=PIPE, + stderr=PIPE, + cwd=path, + ) + + result, err = await proc.communicate() + + if proc.returncode != 0: + raise RuffError(err.decode("utf8")) + + lines = [ + line + for line in result.decode("utf8").splitlines() + if not SUMMARY_LINE_RE.match(line) + ] + + return sorted(lines) + + +class Diff(NamedTuple): + """A diff between two runs of ruff.""" + + removed: set[str] + added: set[str] + + def __bool__(self: Self) -> bool: + """Return true if this diff is non-empty.""" + return bool(self.removed or self.added) + + def __iter__(self: Self) -> "Iterator[str]": + """Iterate through the changed lines in diff format.""" + for line in heapq.merge(self.removed, self.added): + if line in self.removed: + yield f"- {line}" + else: + yield f"+ {line}" + + +async def compare(ruff1: Path, ruff2: Path, repo: Repository) -> Diff | None: + """Check a specific repository against two versions of ruff.""" + removed, added = set(), set() + + async with repo.clone() as path: + try: + async with asyncio.TaskGroup() as tg: + check1 = tg.create_task(check(ruff=ruff1, path=path)) + check2 = tg.create_task(check(ruff=ruff2, path=path)) + except ExceptionGroup as e: + raise e.exceptions[0] from e + + for line in difflib.ndiff(check1.result(), check2.result()): + if line.startswith("- "): + removed.add(line[2:]) + elif line.startswith("+ "): + added.add(line[2:]) + + return Diff(removed, added) + + +async def main(*, ruff1: Path, ruff2: Path) -> None: + """Check two versions of ruff against a corpus of open-source code.""" + results = await asyncio.gather( + *[compare(ruff1, ruff2, repo) for repo in REPOSITORIES.values()], + return_exceptions=True, + ) + + diffs = { name: result for name, result in zip(REPOSITORIES, results, strict=True) } + + total_removed = total_added = 0 + errors = 0 + + for diff in diffs.values(): + if isinstance(diff, Exception): + errors += 1 + else: + total_removed += len(diff.removed) + total_added += len(diff.added) + + if total_removed == 0 and total_added == 0 and errors == 0: + print("\u2705 ecosystem check detected no changes.") + else: + changes = f"(+{total_added}, -{total_removed}, {errors} error(s))" + + print(f"\u2139\ufe0f ecosystem check **detected changes**. {changes}") + print() + + for name, diff in diffs.items(): + print(f"
{name}") + print("

") + print() + + if isinstance(diff, Exception): + print("```") + print(str(diff)) + print("```") + elif diff: + diff_str = "\n".join(f" {line}" for line in diff) + + print("```diff") + print(diff_str) + print("```") + else: + print("_No changes detected_.") + + print() + print("

") + print("
") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Check two versions of ruff against a corpus of open-source code.", + epilog="scripts/check_ecosystem.py ", + ) + + parser.add_argument( + "ruff1", + type=Path, + ) + parser.add_argument( + "ruff2", + type=Path, + ) + + args = parser.parse_args() + + asyncio.run(main(ruff1=args.ruff1, ruff2=args.ruff2))