astral-sh · charliermarsh · Mar 10, 2023 · Mar 7, 2023 · Mar 10, 2023 · Mar 10, 2023
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -79,6 +79,10 @@ jobs:
         env:
           # Setting RUSTDOCFLAGS because `cargo doc --check` isn't yet implemented (https://github.com/rust-lang/cargo/issues/10025).
           RUSTDOCFLAGS: "-D warnings"
+      - uses: actions/upload-artifact@v3
+        with:
+          name: ruff
+          path: target/debug/ruff
 
 
   cargo-test-wasm:
@@ -123,3 +127,39 @@ jobs:
       - uses: crate-ci/typos@master
         with:
           files: .
+
+  ecosystem:
+    name: "ecosystem"
+    runs-on: ubuntu-latest
+    needs: cargo-test
+    # Only runs on pull requests, since that is the only we way we can find the base version for comparison.
+    if: github.event_name == 'pull_request'
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+        with:
+          python-version: "3.11"
+      - uses: actions/download-artifact@v3
+        id: ruff-target
+        with:
+          name: ruff
+          path: target/debug
+      - uses: dawidd6/action-download-artifact@v2
+        with:
+          name: ruff
+          branch: ${{ github.event.pull_request.base_ref }}
+          check_artifacts: true
+      - name: Run ecosystem check
+        run: |
+          # Make executable, since artifact download doesn't preserve this
+          chmod +x ruff ${{ steps.ruff-target.outputs.download-path }}/ruff
+
+          scripts/check_ecosystem.py ruff ${{ steps.ruff-target.outputs.download-path }}/ruff | tee ecosystem-result
+
+          echo ${{ github.event.number }} > pr-number
+      - uses: actions/upload-artifact@v3
+        with:
+          name: ecosystem-result
+          path: |
+            ecosystem-result
+            pr-number
diff --git a/.github/workflows/ecosystem-comment.yaml b/.github/workflows/ecosystem-comment.yaml
@@ -0,0 +1,31 @@
+on:
+  workflow_run:
+    workflows: [CI]
+    types: [completed]
+
+permissions:
+  pull-requests: write
+
+jobs:
+  comment:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: dawidd6/action-download-artifact@v2
+        id: download-result
+        with:
+          name: ecosystem-result
+          workflow: ci.yaml
+          run_id: ${{ github.event.workflow_run.id }}
+          if_no_artifact_found: ignore
+      - if: steps.download-result.outputs.found_artifact
+        id: result
+        run: |
+          echo "pr-number=$(<pr-number)" >> $GITHUB_OUTPUT
+      - name: Create comment
+        if: steps.download-result.outputs.found_artifact
+        uses: thollander/actions-comment-pull-request@v2
+        with:
+          pr_number: ${{ steps.result.outputs.pr-number }}
+          filePath: ecosystem-result
+          comment_tag: ecosystem-results
diff --git a/scripts/check_ecosystem.py b/scripts/check_ecosystem.py
@@ -0,0 +1,208 @@
+#!/usr/bin/env python3
+"""Check two versions of ruff against a corpus of open-source code.
+
+Example usage:
+
+    scripts/check_ecosystem.py <path/to/ruff1> <path/to/ruff2>
+"""
+
+# ruff: noqa: T201
+
+import argparse
+import asyncio
+import difflib
+import heapq
+import re
+import tempfile
+from asyncio.subprocess import PIPE, create_subprocess_exec
+from contextlib import asynccontextmanager
+from pathlib import Path
+from typing import TYPE_CHECKING, NamedTuple, Self
+
+if TYPE_CHECKING:
+    from collections.abc import AsyncIterator, Iterator, Sequence
+
+
+class Repository(NamedTuple):
+    """A GitHub repository at a specific ref."""
+
+    org: str
+    repo: str
+    ref: str
+
+    @asynccontextmanager
+    async def clone(self: Self) -> "AsyncIterator[Path]":
+        """Shallow clone this repository to a temporary directory."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            process = await create_subprocess_exec(
+                "git",
+                "clone",
+                "--config",
+                "advice.detachedHead=false",
+                "--quiet",
+                "--depth",
+                "1",
+                "--no-tags",
+                "--branch",
+                self.ref,
+                f"https://github.com/{self.org}/{self.repo}",
+                tmpdir,
+            )
+
+            await process.wait()
+
+            yield Path(tmpdir)
+
+
+REPOSITORIES = {
+    "zulip": Repository("zulip", "zulip", "main"),
+    "bokeh": Repository("bokeh", "bokeh", "branch-3.2"),
+    "scikit-build": Repository("scikit-build", "scikit-build", "main"),
+    "airflow": Repository("apache", "airflow", "main"),
+}
+
+SUMMARY_LINE_RE = re.compile(r"^(Found \d+ error.*)|(.*potentially fixable with.*)$")
+
+
+class RuffError(Exception):
+    """An error reported by ruff."""
+
+
+async def check(*, ruff: Path, path: Path) -> "Sequence[str]":
+    """Run the given ruff binary against the specified path."""
+    proc = await create_subprocess_exec(
+        ruff.absolute(),
+        "check",
+        "--no-cache",
+        "--exit-zero",
+        "--isolated",
+        "--select",
+        "ALL",
+        ".",
+        stdout=PIPE,
+        stderr=PIPE,
+        cwd=path,
+    )
+
+    result, err = await proc.communicate()
+
+    if proc.returncode != 0:
+        raise RuffError(err.decode("utf8"))
+
+    lines = [
+        line
+        for line in result.decode("utf8").splitlines()
+        if not SUMMARY_LINE_RE.match(line)
+    ]
+
+    return sorted(lines)
+
+
+class Diff(NamedTuple):
+    """A diff between two runs of ruff."""
+
+    removed: set[str]
+    added: set[str]
+
+    def __bool__(self: Self) -> bool:
+        """Return true if this diff is non-empty."""
+        return bool(self.removed or self.added)
+
+    def __iter__(self: Self) -> "Iterator[str]":
+        """Iterate through the changed lines in diff format."""
+        for line in heapq.merge(sorted(self.removed), sorted(self.added)):
+            if line in self.removed:
+                yield f"- {line}"
+            else:
+                yield f"+ {line}"
+
+
+async def compare(ruff1: Path, ruff2: Path, repo: Repository) -> Diff | None:
+    """Check a specific repository against two versions of ruff."""
+    removed, added = set(), set()
+
+    async with repo.clone() as path:
+        try:
+            async with asyncio.TaskGroup() as tg:
+                check1 = tg.create_task(check(ruff=ruff1, path=path))
+                check2 = tg.create_task(check(ruff=ruff2, path=path))
+        except ExceptionGroup as e:
+            raise e.exceptions[0] from e
+
+        for line in difflib.ndiff(check1.result(), check2.result()):
+            if line.startswith("- "):
+                removed.add(line[2:])
+            elif line.startswith("+ "):
+                added.add(line[2:])
+
+    return Diff(removed, added)
+
+
+async def main(*, ruff1: Path, ruff2: Path) -> None:
+    """Check two versions of ruff against a corpus of open-source code."""
+    results = await asyncio.gather(
+        *[compare(ruff1, ruff2, repo) for repo in REPOSITORIES.values()],
+        return_exceptions=True,
+    )
+
+    diffs = {name: result for name, result in zip(REPOSITORIES, results, strict=True)}
+
+    total_removed = total_added = 0
+    errors = 0
+
+    for diff in diffs.values():
+        if isinstance(diff, Exception):
+            errors += 1
+        else:
+            total_removed += len(diff.removed)
+            total_added += len(diff.added)
+
+    if total_removed == 0 and total_added == 0 and errors == 0:
+        print("\u2705 ecosystem check detected no changes.")
+    else:
+        changes = f"(+{total_added}, -{total_removed}, {errors} error(s))"
+
+        print(f"\u2139\ufe0f ecosystem check **detected changes**. {changes}")
+        print()
+
+        for name, diff in diffs.items():
+            print(f"<details><summary>{name}</summary>")
+            print("<p>")
+            print()
+
+            if isinstance(diff, Exception):
+                print("```")
+                print(str(diff))
+                print("```")
+            elif diff:
+                diff_str = "\n".join(diff)
+
+                print("```diff")
+                print(diff_str)
+                print("```")
+            else:
+                print("_No changes detected_.")
+
+            print()
+            print("</p>")
+            print("</details>")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Check two versions of ruff against a corpus of open-source code.",
+        epilog="scripts/check_ecosystem.py <path/to/ruff1> <path/to/ruff2>",
+    )
+
+    parser.add_argument(
+        "ruff1",
+        type=Path,
+    )
+    parser.add_argument(
+        "ruff2",
+        type=Path,
+    )
+
+    args = parser.parse_args()
+
+    asyncio.run(main(ruff1=args.ruff1, ruff2=args.ruff2))