diff --git a/regex-automata/src/nfa/thompson/compiler.rs b/regex-automata/src/nfa/thompson/compiler.rs index e6b1c9122..668bca87c 100644 --- a/regex-automata/src/nfa/thompson/compiler.rs +++ b/regex-automata/src/nfa/thompson/compiler.rs @@ -961,10 +961,12 @@ impl Compiler { // for all matches. When an unanchored prefix is not added, then the // NFA's anchored and unanchored start states are equivalent. let all_anchored = exprs.iter().all(|e| { - e.borrow() - .properties() - .look_set_prefix() - .contains(hir::Look::Start) + let props = e.borrow().properties(); + if self.config.get_reverse() { + props.look_set_suffix().contains(hir::Look::End) + } else { + props.look_set_prefix().contains(hir::Look::Start) + } }); let anchored = !self.config.get_unanchored_prefix() || all_anchored; let unanchored_prefix = if anchored { @@ -1928,6 +1930,11 @@ mod tests { State::Sparse(SparseTransitions { transitions }) } + fn s_look(look: Look, next: usize) -> State { + let next = sid(next); + State::Look { look, next } + } + fn s_bin_union(alt1: usize, alt2: usize) -> State { State::BinaryUnion { alt1: sid(alt1), alt2: sid(alt2) } } @@ -1978,6 +1985,80 @@ mod tests { ); } + #[test] + fn compile_no_unanchored_prefix_with_start_anchor() { + let nfa = NFA::compiler() + .configure(NFA::config().which_captures(WhichCaptures::None)) + .build(r"^a") + .unwrap(); + assert_eq!( + nfa.states(), + &[s_look(Look::Start, 1), s_byte(b'a', 2), s_match(0)] + ); + } + + #[test] + fn compile_yes_unanchored_prefix_with_end_anchor() { + let nfa = NFA::compiler() + .configure(NFA::config().which_captures(WhichCaptures::None)) + .build(r"a$") + .unwrap(); + assert_eq!( + nfa.states(), + &[ + s_bin_union(2, 1), + s_range(0, 255, 0), + s_byte(b'a', 3), + s_look(Look::End, 4), + s_match(0), + ] + ); + } + + #[test] + fn compile_yes_reverse_unanchored_prefix_with_start_anchor() { + let nfa = NFA::compiler() + .configure( + NFA::config() + .reverse(true) + .which_captures(WhichCaptures::None), + ) + .build(r"^a") + .unwrap(); + assert_eq!( + nfa.states(), + &[ + s_bin_union(2, 1), + s_range(0, 255, 0), + s_byte(b'a', 3), + // Anchors get flipped in a reverse automaton. + s_look(Look::End, 4), + s_match(0), + ], + ); + } + + #[test] + fn compile_no_reverse_unanchored_prefix_with_end_anchor() { + let nfa = NFA::compiler() + .configure( + NFA::config() + .reverse(true) + .which_captures(WhichCaptures::None), + ) + .build(r"a$") + .unwrap(); + assert_eq!( + nfa.states(), + &[ + // Anchors get flipped in a reverse automaton. + s_look(Look::Start, 1), + s_byte(b'a', 2), + s_match(0), + ], + ); + } + #[test] fn compile_empty() { assert_eq!(build("").states(), &[s_match(0),]);