From 7227e94ce5f661355a1547a1838284bb7ad5b815 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Fri, 4 Aug 2023 14:05:54 -0400 Subject: [PATCH] globset: use non-capture groups in regex transform We currently implement globs by converting them to regexes, and in doing so, sometimes use grouping. In all but one case, we used non-capturing groups. But for alternations, we used capturing groups, which was likely just an oversight. We don't make use of capture groups at all, and while they usually don't have any overhead, they lead to weird cases like this one: https://github.com/rust-lang/regex/issues/1059 That particular issue is also a bug in the regex crate itself, which is fixed in https://github.com/rust-lang/regex/pull/1062. Note though that the bug fix in the regex crate is required. Even with this patch to globset, memory usage is reduced (by about half in rust-lang/regex#1059) but is not returned to where it was prior to the regex 1.9 release. --- crates/globset/src/glob.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/globset/src/glob.rs b/crates/globset/src/glob.rs index cda39cab1..d19c70ed2 100644 --- a/crates/globset/src/glob.rs +++ b/crates/globset/src/glob.rs @@ -736,7 +736,7 @@ impl Tokens { // It is possible to have an empty set in which case the // resulting alternation '()' would be an error. if !parts.is_empty() { - re.push('('); + re.push_str("(?:"); re.push_str(&parts.join("|")); re.push(')'); } @@ -1276,6 +1276,7 @@ mod tests { toregex!(re32, "/a**", r"^/a.*.*$"); toregex!(re33, "/**a", r"^/.*.*a$"); toregex!(re34, "/a**b", r"^/a.*.*b$"); + toregex!(re35, "{a,b}", r"^(?:b|a)$"); matches!(match1, "a", "a"); matches!(match2, "a*b", "a_b");