From ffabaa9cbb715ef127063870ab4edc9907479f32 Mon Sep 17 00:00:00 2001 From: Leachim <32847549+Licheam@users.noreply.github.com> Date: Thu, 12 Oct 2023 22:30:34 +0800 Subject: [PATCH 1/2] Fixes #1103, #1102 Fix the `push` function in #1051 --- regex-syntax/src/hir/interval.rs | 44 +++++++++++++++----------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/regex-syntax/src/hir/interval.rs b/regex-syntax/src/hir/interval.rs index e3051bf31..de614d39f 100644 --- a/regex-syntax/src/hir/interval.rs +++ b/regex-syntax/src/hir/interval.rs @@ -94,32 +94,30 @@ impl IntervalSet { // Find the first range that is not greater than the new interval. // This is the first range that could possibly be unioned with the // new interval. - let mut drain_end = self.ranges.len(); - while drain_end > 0 - && self.ranges[drain_end - 1].lower() > interval.upper() - && !self.ranges[drain_end - 1].is_contiguous(&interval) - { - drain_end -= 1; - } - - // Try to union the new interval with old intervals backwards. - if drain_end > 0 && self.ranges[drain_end - 1].is_contiguous(&interval) - { - self.ranges[drain_end - 1] = - self.ranges[drain_end - 1].union(&interval).unwrap(); - for i in (0..drain_end - 1).rev() { - if let Some(union) = - self.ranges[drain_end - 1].union(&self.ranges[i]) - { - self.ranges[drain_end - 1] = union; - } else { - self.ranges.drain(i + 1..drain_end - 1); - break; + for i in 0..self.ranges.len() { + if self.ranges[i].is_contiguous(&interval) { + self.ranges[i] = self.ranges[i].union(&interval).unwrap(); + // Try to union the new interval with all subsequent ranges. + // When it's no longer possible to union, remove the remaining + // ranges and return. + for j in i + 1..self.ranges.len() { + if let Some(union) = self.ranges[i].union(&self.ranges[j]) + { + self.ranges[i] = union; + } else { + self.ranges.drain(i + 1..j); + return; + } } + self.ranges.drain(i + 1..); + return; + } else if self.ranges[i].lower() > interval.upper() { + self.ranges.insert(i, interval); + return; } - } else { - self.ranges.insert(drain_end, interval); } + + self.ranges.push(interval); } /// Return an iterator over all intervals in this set. From 2ef1906b790eacc7578f080aeb8eba3b48c67881 Mon Sep 17 00:00:00 2001 From: Leachim <32847549+Licheam@users.noreply.github.com> Date: Thu, 12 Oct 2023 22:43:32 +0800 Subject: [PATCH 2/2] add all the new test cases --- ...case-minimized-ast_fuzz_regex-4596093180313600 | Bin 0 -> 329 bytes regex-syntax/Cargo.toml | 2 +- testdata/regression.toml | 13 +++++++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 fuzz/regressions/clusterfuzz-testcase-minimized-ast_fuzz_regex-4596093180313600 diff --git a/fuzz/regressions/clusterfuzz-testcase-minimized-ast_fuzz_regex-4596093180313600 b/fuzz/regressions/clusterfuzz-testcase-minimized-ast_fuzz_regex-4596093180313600 new file mode 100644 index 0000000000000000000000000000000000000000..711817e4ed98c89f3eac4def9acfbea0451dbbf3 GIT binary patch literal 329 zcmeybps-MZMDQQ1{yzjLfGLOx{~"] license = "MIT OR Apache-2.0" repository = "https://github.com/rust-lang/regex/tree/master/regex-syntax" diff --git a/testdata/regression.toml b/testdata/regression.toml index 09b2b1d1c..2954c9118 100644 --- a/testdata/regression.toml +++ b/testdata/regression.toml @@ -800,3 +800,16 @@ name = "non-prefix-literal-quit-state" regex = '.+\b\n' haystack = "β77\n" matches = [[0, 5]] + +# This is a regression test for some errant HIR interval set operations that +# were made in the regex-syntax 0.8.0 release and then reverted in 0.8.1. The +# issue here is that the HIR produced from the regex had out-of-order ranges. +# +# See: https://github.com/rust-lang/regex/issues/1103 +# Ref: https://github.com/rust-lang/regex/pull/1051 +# Ref: https://github.com/rust-lang/regex/pull/1102 +[[test]] +name = "hir-optimization-out-of-order-class" +regex = '^[[:alnum:]./-]+$' +haystack = "a-b" +matches = [[0, 3]]