Skip to content

Commit 1c8849f

Browse files
authored
Use Matchit to Resolve Per-File Settings (#11111)
## Summary Continuation of #9444. > When the formatter is fully cached, it turns out we actually spend meaningful time mapping from file to `Settings` (since we use a hierarchical approach to settings). Using `matchit` rather than `BTreeMap` improves fully-cached performance by anywhere from 2-5% depending on the project, and since these are all implementation details of `Resolver`, it's minimally invasive. `matchit` supports escaping routing characters so this change should now be fully compatible. ## Test Plan On my machine I'm seeing a ~3% improvement with this change. ``` hyperfine --warmup 20 -i "./target/release/main format ../airflow" "./target/release/ruff format ../airflow" Benchmark 1: ./target/release/main format ../airflow Time (mean ± σ): 58.1 ms ± 1.4 ms [User: 63.1 ms, System: 66.5 ms] Range (min … max): 56.1 ms … 62.9 ms 49 runs Benchmark 2: ./target/release/ruff format ../airflow Time (mean ± σ): 56.6 ms ± 1.5 ms [User: 57.8 ms, System: 67.7 ms] Range (min … max): 54.1 ms … 63.0 ms 51 runs Summary ./target/release/ruff format ../airflow ran 1.03 ± 0.04 times faster than ./target/release/main format ../airflow ```
1 parent 37af6e6 commit 1c8849f

File tree

4 files changed

+53
-19
lines changed

4 files changed

+53
-19
lines changed

Cargo.lock

+14
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+2
Original file line numberDiff line numberDiff line change
@@ -58,13 +58,15 @@ libcst = { version = "1.1.0", default-features = false }
5858
log = { version = "0.4.17" }
5959
lsp-server = { version = "0.7.6" }
6060
lsp-types = { version = "0.95.0", features = ["proposed"] }
61+
matchit = { version = "0.8.1" }
6162
memchr = { version = "2.7.1" }
6263
mimalloc = { version = "0.1.39" }
6364
natord = { version = "1.0.9" }
6465
notify = { version = "6.1.1" }
6566
num_cpus = { version = "1.16.0" }
6667
once_cell = { version = "1.19.0" }
6768
path-absolutize = { version = "3.1.1" }
69+
path-slash = { version = "0.2.1" }
6870
pathdiff = { version = "0.2.1" }
6971
pep440_rs = { version = "0.6.0", features = ["serde"] }
7072
pretty_assertions = "1.3.0"

crates/ruff_workspace/Cargo.toml

+2
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,11 @@ ignore = { workspace = true }
2828
is-macro = { workspace = true }
2929
itertools = { workspace = true }
3030
log = { workspace = true }
31+
matchit = { workspace = true }
3132
glob = { workspace = true }
3233
globset = { workspace = true }
3334
path-absolutize = { workspace = true }
35+
path-slash = { workspace = true }
3436
pep440_rs = { workspace = true, features = ["serde"] }
3537
regex = { workspace = true }
3638
rustc-hash = { workspace = true }

crates/ruff_workspace/src/resolver.rs

+35-19
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
//! filesystem.
33
44
use std::cmp::Ordering;
5-
use std::collections::BTreeMap;
65
use std::ffi::OsStr;
76
use std::path::{Path, PathBuf};
87
use std::sync::RwLock;
@@ -13,7 +12,9 @@ use globset::{Candidate, GlobSet};
1312
use ignore::{WalkBuilder, WalkState};
1413
use itertools::Itertools;
1514
use log::debug;
15+
use matchit::{InsertError, Match, Router};
1616
use path_absolutize::path_dedot;
17+
use path_slash::PathExt;
1718
use rustc_hash::{FxHashMap, FxHashSet};
1819

1920
use ruff_linter::fs;
@@ -86,29 +87,32 @@ pub enum Relativity {
8687
}
8788

8889
impl Relativity {
89-
pub fn resolve(self, path: &Path) -> PathBuf {
90+
pub fn resolve(self, path: &Path) -> &Path {
9091
match self {
9192
Relativity::Parent => path
9293
.parent()
93-
.expect("Expected pyproject.toml file to be in parent directory")
94-
.to_path_buf(),
95-
Relativity::Cwd => path_dedot::CWD.clone(),
94+
.expect("Expected pyproject.toml file to be in parent directory"),
95+
Relativity::Cwd => &path_dedot::CWD,
9696
}
9797
}
9898
}
9999

100100
#[derive(Debug)]
101101
pub struct Resolver<'a> {
102102
pyproject_config: &'a PyprojectConfig,
103-
settings: BTreeMap<PathBuf, Settings>,
103+
/// All [`Settings`] that have been added to the resolver.
104+
settings: Vec<Settings>,
105+
/// A router from path to index into the `settings` vector.
106+
router: Router<usize>,
104107
}
105108

106109
impl<'a> Resolver<'a> {
107110
/// Create a new [`Resolver`] for the given [`PyprojectConfig`].
108111
pub fn new(pyproject_config: &'a PyprojectConfig) -> Self {
109112
Self {
110113
pyproject_config,
111-
settings: BTreeMap::new(),
114+
settings: Vec::new(),
115+
router: Router::new(),
112116
}
113117
}
114118

@@ -140,19 +144,31 @@ impl<'a> Resolver<'a> {
140144
}
141145

142146
/// Add a resolved [`Settings`] under a given [`PathBuf`] scope.
143-
fn add(&mut self, path: PathBuf, settings: Settings) {
144-
self.settings.insert(path, settings);
147+
fn add(&mut self, path: &Path, settings: Settings) {
148+
self.settings.push(settings);
149+
150+
// normalize the path to use `/` separators and escape the '{' and '}' characters,
151+
// which matchit uses for routing parameters
152+
let path = path.to_slash_lossy().replace('{', "{{").replace('}', "}}");
153+
154+
match self
155+
.router
156+
.insert(format!("{path}/{{*filepath}}"), self.settings.len() - 1)
157+
{
158+
Ok(()) => {}
159+
Err(InsertError::Conflict { .. }) => {}
160+
Err(_) => unreachable!("file paths are escaped before being inserted in the router"),
161+
}
145162
}
146163

147164
/// Return the appropriate [`Settings`] for a given [`Path`].
148165
pub fn resolve(&self, path: &Path) -> &Settings {
149166
match self.pyproject_config.strategy {
150167
PyprojectDiscoveryStrategy::Fixed => &self.pyproject_config.settings,
151168
PyprojectDiscoveryStrategy::Hierarchical => self
152-
.settings
153-
.iter()
154-
.rev()
155-
.find_map(|(root, settings)| path.starts_with(root).then_some(settings))
169+
.router
170+
.at(path.to_slash_lossy().as_ref())
171+
.map(|Match { value, .. }| &self.settings[*value])
156172
.unwrap_or(&self.pyproject_config.settings),
157173
}
158174
}
@@ -196,7 +212,7 @@ impl<'a> Resolver<'a> {
196212

197213
/// Return an iterator over the resolved [`Settings`] in this [`Resolver`].
198214
pub fn settings(&self) -> impl Iterator<Item = &Settings> {
199-
std::iter::once(&self.pyproject_config.settings).chain(self.settings.values())
215+
std::iter::once(&self.pyproject_config.settings).chain(self.settings.iter())
200216
}
201217
}
202218

@@ -257,7 +273,7 @@ fn resolve_configuration(
257273
let options = pyproject::load_options(&path)?;
258274

259275
let project_root = relativity.resolve(&path);
260-
let configuration = Configuration::from_options(options, Some(&path), &project_root)?;
276+
let configuration = Configuration::from_options(options, Some(&path), project_root)?;
261277

262278
// If extending, continue to collect.
263279
next = configuration.extend.as_ref().map(|extend| {
@@ -285,14 +301,14 @@ fn resolve_configuration(
285301

286302
/// Extract the project root (scope) and [`Settings`] from a given
287303
/// `pyproject.toml`.
288-
fn resolve_scoped_settings(
289-
pyproject: &Path,
304+
fn resolve_scoped_settings<'a>(
305+
pyproject: &'a Path,
290306
relativity: Relativity,
291307
transformer: &dyn ConfigurationTransformer,
292-
) -> Result<(PathBuf, Settings)> {
308+
) -> Result<(&'a Path, Settings)> {
293309
let configuration = resolve_configuration(pyproject, relativity, transformer)?;
294310
let project_root = relativity.resolve(pyproject);
295-
let settings = configuration.into_settings(&project_root)?;
311+
let settings = configuration.into_settings(project_root)?;
296312
Ok((project_root, settings))
297313
}
298314

0 commit comments

Comments
 (0)