Skip to content

Commit

Permalink
perf: parse and walk globs in parallel (#7244)
Browse files Browse the repository at this point in the history
### Description

This PR brings parallelism to 2 aspects of our globwalk behavior:
 - parsing the input globs
 - walking of each of the input globs

This will primarily provide a speedup for tasks which have a large
amount of input globs, but from quick spot checks I think this doesn't
negatively impact tasks with small (2-3) inputs.

### Testing Instructions
Run lint against our examples (this is a noop, we just calculate task
hash for this task) `turbo lint --filter=turborepo-examples`.
Main:
<img width="1610" alt="Screenshot 2024-02-02 at 3 33 40 PM"
src="https://github.com/vercel/turbo/assets/4131117/dc468c8c-89de-4a51-b6cf-ebeb5416fdbd">
<img width="1829" alt="Screenshot 2024-02-02 at 3 48 38 PM"
src="https://github.com/vercel/turbo/assets/4131117/4d42dc73-e99f-41b3-86eb-dc3b4e93fccd">
This PR:
<img width="1602" alt="Screenshot 2024-02-02 at 3 33 53 PM"
src="https://github.com/vercel/turbo/assets/4131117/cbbf7d52-56a1-4b2d-a925-91879162b1b8">
<img width="1324" alt="Screenshot 2024-02-02 at 3 48 52 PM"
src="https://github.com/vercel/turbo/assets/4131117/e227f104-a5fd-4643-b05d-808bdfb51357">



Closes TURBO-2234
  • Loading branch information
chris-olszewski committed Feb 14, 2024
1 parent e55f011 commit 65dba57
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 15 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/turborepo-globwalk/Cargo.toml
Expand Up @@ -14,6 +14,7 @@ camino = { workspace = true }
itertools.workspace = true
path-clean = "1.0.1"
path-slash = "0.2.1"
rayon = "1"
regex.workspace = true
thiserror.workspace = true
tracing = "0.1.37"
Expand Down
32 changes: 17 additions & 15 deletions crates/turborepo-globwalk/src/lib.rs
Expand Up @@ -13,6 +13,7 @@ use camino::Utf8PathBuf;
use itertools::Itertools;
use path_clean::PathClean;
use path_slash::PathExt;
use rayon::prelude::*;
use regex::Regex;
use turbopath::{AbsoluteSystemPath, AbsoluteSystemPathBuf, PathError};
use wax::{walk::FileIterator, BuildError, Glob};
Expand Down Expand Up @@ -308,37 +309,38 @@ pub fn globwalk_internal(
let (base_path_new, include_paths, exclude_paths) =
preprocess_paths_and_globs(base_path, include, exclude)?;

let ex_patterns = exclude_paths
let ex_patterns: Vec<_> = exclude_paths
.into_iter()
.map(glob_with_contextual_error)
.collect::<Result<_, _>>()?;

include_paths
.into_iter()
let include_patterns = include_paths
.into_par_iter()
.map(glob_with_contextual_error)
.map_ok(|glob| walk_glob(walk_type, &base_path_new, &ex_patterns, glob))
// flat map to bring the results in the vec to the same level as the potential outer err
// this is the same as a flat_map_ok
.flat_map(|s| s.unwrap_or_else(|e| vec![Err(e)]))
.collect::<Result<Vec<_>, _>>()?;

include_patterns
.into_par_iter()
// Use flat_map_iter as we only want parallelism for walking the globs and not iterating
// over the results.
// See https://docs.rs/rayon/latest/rayon/iter/trait.ParallelIterator.html#method.flat_map_iter
.flat_map_iter(|glob| walk_glob(walk_type, &base_path_new, ex_patterns.clone(), glob))
.collect()
}

#[tracing::instrument(skip(ex_patterns), fields(glob=glob.to_string().as_str()))]
fn walk_glob(
walk_type: WalkType,
base_path_new: &PathBuf,
ex_patterns: &Vec<Glob>,
base_path_new: &Path,
ex_patterns: Vec<Glob>,
glob: Glob,
) -> Vec<Result<AbsoluteSystemPathBuf, WalkError>> {
glob.walk(&base_path_new)
.not(ex_patterns.clone())
glob.walk(base_path_new)
.not(ex_patterns)
.unwrap_or_else(|e| {
// Per docs, only fails if exclusion list is too large, since we're using
// pre-compiled globs
panic!(
"Failed to compile exclusion globs: {:?}: {}",
ex_patterns, e,
)
panic!("Failed to compile exclusion globs: {}", e,)
})
.filter_map(|entry| visit_file(walk_type, entry))
.collect::<Vec<_>>()
Expand Down

0 comments on commit 65dba57

Please sign in to comment.