Skip to content

Commit

Permalink
automata: add 'is_match' as its own path to meta regex internals
Browse files Browse the repository at this point in the history
I originally prided myself on not having a dedicated `is_match` routine
on the meta regex engine's internal `Strategy` trait, and actually spent
a fair amount of attention ensuring that `is_match` and `find` always
returned the same results. That is, `is_match` returns true if and only
if `find` returns a match.

But the fix in the previous commits for #1059 means that a `PikeVM` and
a `BoundedBacktracker` can be used to run a search with an NFA that has
no capture states. Since both engines are implemented to only track
offsets via those capture states, it follows that the only thing that
can be returned in such cases is whether a match occurs (and if so,
which pattern matched). That in turn means that `is_match` can return
`true` while `find` can return `None` for the same search. This is
because the latter returns `None` even when a match is found but there
are no capture states to record the offsets of the match.

This in theory could be resolved by adding APIs to the `PikeVM` and the
`BoundedBacktracker` that return a `HalfMatch` without depending on any
capture states at all. Then `is_match` could be implemented in terms of
those APIs. That is probably the right path, but it's pretty gnarly to
do without breaking changes and I don't want to do any breaking changes
right now.

So instead, we just add a special path to the meta regex engine for
`is_match` and permit some cases to have different results between
`is_match` and `find`. Sigh.
  • Loading branch information
BurntSushi committed Aug 5, 2023
1 parent 930770b commit e003cae
Show file tree
Hide file tree
Showing 3 changed files with 158 additions and 3 deletions.
9 changes: 8 additions & 1 deletion regex-automata/src/meta/regex.rs
Expand Up @@ -529,7 +529,14 @@ impl Regex {
#[inline]
pub fn is_match<'h, I: Into<Input<'h>>>(&self, input: I) -> bool {
let input = input.into().earliest(true);
self.search_half(&input).is_some()
if self.imp.info.is_impossible(&input) {
return false;
}
let mut guard = self.pool.get();
let result = self.imp.strat.is_match(&mut guard, &input);
// See 'Regex::search' for why we put the guard back explicitly.
PoolGuard::put(guard);
result
}

/// Executes a leftmost search and returns the first match that is found,
Expand Down
120 changes: 118 additions & 2 deletions regex-automata/src/meta/strategy.rs
Expand Up @@ -58,6 +58,8 @@ pub(super) trait Strategy:
input: &Input<'_>,
) -> Option<HalfMatch>;

fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool;

fn search_slots(
&self,
cache: &mut Cache,
Expand Down Expand Up @@ -399,6 +401,10 @@ impl<P: PrefilterI> Strategy for Pre<P> {
self.search(cache, input).map(|m| HalfMatch::new(m.pattern(), m.end()))
}

fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
self.search(cache, input).is_some()
}

fn search_slots(
&self,
cache: &mut Cache,
Expand Down Expand Up @@ -623,6 +629,29 @@ impl Core {
}
}

fn is_match_nofail(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
if let Some(ref e) = self.onepass.get(input) {
trace!(
"using OnePass for is-match search at {:?}",
input.get_span()
);
e.search_slots(&mut cache.onepass, input, &mut []).is_some()
} else if let Some(ref e) = self.backtrack.get(input) {
trace!(
"using BoundedBacktracker for is-match search at {:?}",
input.get_span()
);
e.is_match(&mut cache.backtrack, input)
} else {
trace!(
"using PikeVM for is-match search at {:?}",
input.get_span()
);
let e = self.pikevm.get();
e.is_match(&mut cache.pikevm, input)
}
}

fn is_capture_search_needed(&self, slots_len: usize) -> bool {
slots_len > self.nfa.group_info().implicit_slot_len()
}
Expand Down Expand Up @@ -703,7 +732,7 @@ impl Strategy for Core {
// The main difference with 'search' is that if we're using a DFA, we
// can use a single forward scan without needing to run the reverse
// DFA.
return if let Some(e) = self.dfa.get(input) {
if let Some(e) = self.dfa.get(input) {
trace!("using full DFA for half search at {:?}", input.get_span());
match e.try_search_half_fwd(input) {
Ok(x) => x,
Expand All @@ -723,7 +752,38 @@ impl Strategy for Core {
}
} else {
self.search_half_nofail(cache, input)
};
}
}

#[cfg_attr(feature = "perf-inline", inline(always))]
fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
if let Some(e) = self.dfa.get(input) {
trace!(
"using full DFA for is-match search at {:?}",
input.get_span()
);
match e.try_search_half_fwd(input) {
Ok(x) => x.is_some(),
Err(_err) => {
trace!("full DFA half search failed: {}", _err);
self.is_match_nofail(cache, input)
}
}
} else if let Some(e) = self.hybrid.get(input) {
trace!(
"using lazy DFA for is-match search at {:?}",
input.get_span()
);
match e.try_search_half_fwd(&mut cache.hybrid, input) {
Ok(x) => x.is_some(),
Err(_err) => {
trace!("lazy DFA half search failed: {}", _err);
self.is_match_nofail(cache, input)
}
}
} else {
self.is_match_nofail(cache, input)
}
}

#[cfg_attr(feature = "perf-inline", inline(always))]
Expand Down Expand Up @@ -983,6 +1043,21 @@ impl Strategy for ReverseAnchored {
}
}

#[cfg_attr(feature = "perf-inline", inline(always))]
fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
if input.get_anchored().is_anchored() {
return self.core.is_match(cache, input);
}
match self.try_search_half_anchored_rev(cache, input) {
Err(_err) => {
trace!("fast reverse anchored search failed: {}", _err);
self.core.is_match_nofail(cache, input)
}
Ok(None) => false,
Ok(Some(_)) => true,
}
}

#[cfg_attr(feature = "perf-inline", inline(always))]
fn search_slots(
&self,
Expand Down Expand Up @@ -1335,6 +1410,28 @@ impl Strategy for ReverseSuffix {
}
}

#[cfg_attr(feature = "perf-inline", inline(always))]
fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
if input.get_anchored().is_anchored() {
return self.core.is_match(cache, input);
}
match self.try_search_half_start(cache, input) {
Err(RetryError::Quadratic(_err)) => {
trace!("reverse suffix half optimization failed: {}", _err);
self.core.is_match_nofail(cache, input)
}
Err(RetryError::Fail(_err)) => {
trace!(
"reverse suffix reverse fast half search failed: {}",
_err
);
self.core.is_match_nofail(cache, input)
}
Ok(None) => false,
Ok(Some(_)) => true,
}
}

#[cfg_attr(feature = "perf-inline", inline(always))]
fn search_slots(
&self,
Expand Down Expand Up @@ -1717,6 +1814,25 @@ impl Strategy for ReverseInner {
}
}

#[cfg_attr(feature = "perf-inline", inline(always))]
fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
if input.get_anchored().is_anchored() {
return self.core.is_match(cache, input);
}
match self.try_search_full(cache, input) {
Err(RetryError::Quadratic(_err)) => {
trace!("reverse inner half optimization failed: {}", _err);
self.core.is_match_nofail(cache, input)
}
Err(RetryError::Fail(_err)) => {
trace!("reverse inner fast half search failed: {}", _err);
self.core.is_match_nofail(cache, input)
}
Ok(None) => false,
Ok(Some(_)) => true,
}
}

#[cfg_attr(feature = "perf-inline", inline(always))]
fn search_slots(
&self,
Expand Down
32 changes: 32 additions & 0 deletions regex-automata/src/meta/wrappers.rs
Expand Up @@ -87,6 +87,15 @@ impl PikeVMEngine {
Ok(PikeVMEngine(engine))
}

#[cfg_attr(feature = "perf-inline", inline(always))]
pub(crate) fn is_match(
&self,
cache: &mut PikeVMCache,
input: &Input<'_>,
) -> bool {
self.0.is_match(cache.0.as_mut().unwrap(), input.clone())
}

#[cfg_attr(feature = "perf-inline", inline(always))]
pub(crate) fn search_slots(
&self,
Expand Down Expand Up @@ -212,6 +221,29 @@ impl BoundedBacktrackerEngine {
}
}

#[cfg_attr(feature = "perf-inline", inline(always))]
pub(crate) fn is_match(
&self,
cache: &mut BoundedBacktrackerCache,
input: &Input<'_>,
) -> bool {
#[cfg(feature = "nfa-backtrack")]
{
// OK because we only permit access to this engine when we know
// the haystack is short enough for the backtracker to run without
// reporting an error.
self.0
.try_is_match(cache.0.as_mut().unwrap(), input.clone())
.unwrap()
}
#[cfg(not(feature = "nfa-backtrack"))]
{
// Impossible to reach because this engine is never constructed
// if the requisite features aren't enabled.
unreachable!()
}
}

#[cfg_attr(feature = "perf-inline", inline(always))]
pub(crate) fn search_slots(
&self,
Expand Down

0 comments on commit e003cae

Please sign in to comment.