Skip to content

Commit

Permalink
Use Matchit to Resolve Per-File Settings (#11111)
Browse files Browse the repository at this point in the history
## Summary

Continuation of #9444.

> When the formatter is fully cached, it turns out we actually spend
meaningful time mapping from file to `Settings` (since we use a
hierarchical approach to settings). Using `matchit` rather than
`BTreeMap` improves fully-cached performance by anywhere from 2-5%
depending on the project, and since these are all implementation details
of `Resolver`, it's minimally invasive.

`matchit` supports escaping routing characters so this change should now
be fully compatible.

## Test Plan

On my machine I'm seeing a ~3% improvement with this change.

```
hyperfine --warmup 20 -i "./target/release/main format ../airflow" "./target/release/ruff format ../airflow"
Benchmark 1: ./target/release/main format ../airflow
  Time (mean ± σ):      58.1 ms ±   1.4 ms    [User: 63.1 ms, System: 66.5 ms]
  Range (min … max):    56.1 ms …  62.9 ms    49 runs
 
Benchmark 2: ./target/release/ruff format ../airflow
  Time (mean ± σ):      56.6 ms ±   1.5 ms    [User: 57.8 ms, System: 67.7 ms]
  Range (min … max):    54.1 ms …  63.0 ms    51 runs
 
Summary
  ./target/release/ruff format ../airflow ran
    1.03 ± 0.04 times faster than ./target/release/main format ../airflow
```
  • Loading branch information
ibraheemdev committed Apr 24, 2024
1 parent 37af6e6 commit 1c8849f
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 19 deletions.
14 changes: 14 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,15 @@ libcst = { version = "1.1.0", default-features = false }
log = { version = "0.4.17" }
lsp-server = { version = "0.7.6" }
lsp-types = { version = "0.95.0", features = ["proposed"] }
matchit = { version = "0.8.1" }
memchr = { version = "2.7.1" }
mimalloc = { version = "0.1.39" }
natord = { version = "1.0.9" }
notify = { version = "6.1.1" }
num_cpus = { version = "1.16.0" }
once_cell = { version = "1.19.0" }
path-absolutize = { version = "3.1.1" }
path-slash = { version = "0.2.1" }
pathdiff = { version = "0.2.1" }
pep440_rs = { version = "0.6.0", features = ["serde"] }
pretty_assertions = "1.3.0"
Expand Down
2 changes: 2 additions & 0 deletions crates/ruff_workspace/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,11 @@ ignore = { workspace = true }
is-macro = { workspace = true }
itertools = { workspace = true }
log = { workspace = true }
matchit = { workspace = true }
glob = { workspace = true }
globset = { workspace = true }
path-absolutize = { workspace = true }
path-slash = { workspace = true }
pep440_rs = { workspace = true, features = ["serde"] }
regex = { workspace = true }
rustc-hash = { workspace = true }
Expand Down
54 changes: 35 additions & 19 deletions crates/ruff_workspace/src/resolver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
//! filesystem.

use std::cmp::Ordering;
use std::collections::BTreeMap;
use std::ffi::OsStr;
use std::path::{Path, PathBuf};
use std::sync::RwLock;
Expand All @@ -13,7 +12,9 @@ use globset::{Candidate, GlobSet};
use ignore::{WalkBuilder, WalkState};
use itertools::Itertools;
use log::debug;
use matchit::{InsertError, Match, Router};
use path_absolutize::path_dedot;
use path_slash::PathExt;
use rustc_hash::{FxHashMap, FxHashSet};

use ruff_linter::fs;
Expand Down Expand Up @@ -86,29 +87,32 @@ pub enum Relativity {
}

impl Relativity {
pub fn resolve(self, path: &Path) -> PathBuf {
pub fn resolve(self, path: &Path) -> &Path {
match self {
Relativity::Parent => path
.parent()
.expect("Expected pyproject.toml file to be in parent directory")
.to_path_buf(),
Relativity::Cwd => path_dedot::CWD.clone(),
.expect("Expected pyproject.toml file to be in parent directory"),
Relativity::Cwd => &path_dedot::CWD,
}
}
}

#[derive(Debug)]
pub struct Resolver<'a> {
pyproject_config: &'a PyprojectConfig,
settings: BTreeMap<PathBuf, Settings>,
/// All [`Settings`] that have been added to the resolver.
settings: Vec<Settings>,
/// A router from path to index into the `settings` vector.
router: Router<usize>,
}

impl<'a> Resolver<'a> {
/// Create a new [`Resolver`] for the given [`PyprojectConfig`].
pub fn new(pyproject_config: &'a PyprojectConfig) -> Self {
Self {
pyproject_config,
settings: BTreeMap::new(),
settings: Vec::new(),
router: Router::new(),
}
}

Expand Down Expand Up @@ -140,19 +144,31 @@ impl<'a> Resolver<'a> {
}

/// Add a resolved [`Settings`] under a given [`PathBuf`] scope.
fn add(&mut self, path: PathBuf, settings: Settings) {
self.settings.insert(path, settings);
fn add(&mut self, path: &Path, settings: Settings) {
self.settings.push(settings);

// normalize the path to use `/` separators and escape the '{' and '}' characters,
// which matchit uses for routing parameters
let path = path.to_slash_lossy().replace('{', "{{").replace('}', "}}");

match self
.router
.insert(format!("{path}/{{*filepath}}"), self.settings.len() - 1)
{
Ok(()) => {}
Err(InsertError::Conflict { .. }) => {}
Err(_) => unreachable!("file paths are escaped before being inserted in the router"),
}
}

/// Return the appropriate [`Settings`] for a given [`Path`].
pub fn resolve(&self, path: &Path) -> &Settings {
match self.pyproject_config.strategy {
PyprojectDiscoveryStrategy::Fixed => &self.pyproject_config.settings,
PyprojectDiscoveryStrategy::Hierarchical => self
.settings
.iter()
.rev()
.find_map(|(root, settings)| path.starts_with(root).then_some(settings))
.router
.at(path.to_slash_lossy().as_ref())
.map(|Match { value, .. }| &self.settings[*value])
.unwrap_or(&self.pyproject_config.settings),
}
}
Expand Down Expand Up @@ -196,7 +212,7 @@ impl<'a> Resolver<'a> {

/// Return an iterator over the resolved [`Settings`] in this [`Resolver`].
pub fn settings(&self) -> impl Iterator<Item = &Settings> {
std::iter::once(&self.pyproject_config.settings).chain(self.settings.values())
std::iter::once(&self.pyproject_config.settings).chain(self.settings.iter())
}
}

Expand Down Expand Up @@ -257,7 +273,7 @@ fn resolve_configuration(
let options = pyproject::load_options(&path)?;

let project_root = relativity.resolve(&path);
let configuration = Configuration::from_options(options, Some(&path), &project_root)?;
let configuration = Configuration::from_options(options, Some(&path), project_root)?;

// If extending, continue to collect.
next = configuration.extend.as_ref().map(|extend| {
Expand Down Expand Up @@ -285,14 +301,14 @@ fn resolve_configuration(

/// Extract the project root (scope) and [`Settings`] from a given
/// `pyproject.toml`.
fn resolve_scoped_settings(
pyproject: &Path,
fn resolve_scoped_settings<'a>(
pyproject: &'a Path,
relativity: Relativity,
transformer: &dyn ConfigurationTransformer,
) -> Result<(PathBuf, Settings)> {
) -> Result<(&'a Path, Settings)> {
let configuration = resolve_configuration(pyproject, relativity, transformer)?;
let project_root = relativity.resolve(pyproject);
let settings = configuration.into_settings(&project_root)?;
let settings = configuration.into_settings(project_root)?;
Ok((project_root, settings))
}

Expand Down

0 comments on commit 1c8849f

Please sign in to comment.