diff --git a/fuzz/regressions/clusterfuzz-testcase-minimized-ast_fuzz_regex-4596093180313600 b/fuzz/regressions/clusterfuzz-testcase-minimized-ast_fuzz_regex-4596093180313600 new file mode 100644 index 000000000..711817e4e Binary files /dev/null and b/fuzz/regressions/clusterfuzz-testcase-minimized-ast_fuzz_regex-4596093180313600 differ diff --git a/regex-syntax/Cargo.toml b/regex-syntax/Cargo.toml index f14298299..b0ba658b8 100644 --- a/regex-syntax/Cargo.toml +++ b/regex-syntax/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "regex-syntax" -version = "0.8.0" #:version +version = "0.8.1" #:version authors = ["The Rust Project Developers", "Andrew Gallant "] license = "MIT OR Apache-2.0" repository = "https://github.com/rust-lang/regex/tree/master/regex-syntax" diff --git a/regex-syntax/src/hir/interval.rs b/regex-syntax/src/hir/interval.rs index e3051bf31..de614d39f 100644 --- a/regex-syntax/src/hir/interval.rs +++ b/regex-syntax/src/hir/interval.rs @@ -94,32 +94,30 @@ impl IntervalSet { // Find the first range that is not greater than the new interval. // This is the first range that could possibly be unioned with the // new interval. - let mut drain_end = self.ranges.len(); - while drain_end > 0 - && self.ranges[drain_end - 1].lower() > interval.upper() - && !self.ranges[drain_end - 1].is_contiguous(&interval) - { - drain_end -= 1; - } - - // Try to union the new interval with old intervals backwards. - if drain_end > 0 && self.ranges[drain_end - 1].is_contiguous(&interval) - { - self.ranges[drain_end - 1] = - self.ranges[drain_end - 1].union(&interval).unwrap(); - for i in (0..drain_end - 1).rev() { - if let Some(union) = - self.ranges[drain_end - 1].union(&self.ranges[i]) - { - self.ranges[drain_end - 1] = union; - } else { - self.ranges.drain(i + 1..drain_end - 1); - break; + for i in 0..self.ranges.len() { + if self.ranges[i].is_contiguous(&interval) { + self.ranges[i] = self.ranges[i].union(&interval).unwrap(); + // Try to union the new interval with all subsequent ranges. + // When it's no longer possible to union, remove the remaining + // ranges and return. + for j in i + 1..self.ranges.len() { + if let Some(union) = self.ranges[i].union(&self.ranges[j]) + { + self.ranges[i] = union; + } else { + self.ranges.drain(i + 1..j); + return; + } } + self.ranges.drain(i + 1..); + return; + } else if self.ranges[i].lower() > interval.upper() { + self.ranges.insert(i, interval); + return; } - } else { - self.ranges.insert(drain_end, interval); } + + self.ranges.push(interval); } /// Return an iterator over all intervals in this set. diff --git a/testdata/regression.toml b/testdata/regression.toml index 09b2b1d1c..2954c9118 100644 --- a/testdata/regression.toml +++ b/testdata/regression.toml @@ -800,3 +800,16 @@ name = "non-prefix-literal-quit-state" regex = '.+\b\n' haystack = "β77\n" matches = [[0, 5]] + +# This is a regression test for some errant HIR interval set operations that +# were made in the regex-syntax 0.8.0 release and then reverted in 0.8.1. The +# issue here is that the HIR produced from the regex had out-of-order ranges. +# +# See: https://github.com/rust-lang/regex/issues/1103 +# Ref: https://github.com/rust-lang/regex/pull/1051 +# Ref: https://github.com/rust-lang/regex/pull/1102 +[[test]] +name = "hir-optimization-out-of-order-class" +regex = '^[[:alnum:]./-]+$' +haystack = "a-b" +matches = [[0, 3]]