diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index a7eec2c81..c4346abb8 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -67,6 +67,30 @@ path = "fuzz_targets/ast_fuzz_regex.rs" name = "ast_fuzz_match_bytes" path = "fuzz_targets/ast_fuzz_match_bytes.rs" +[[bin]] +name = "ast_diff_default" +path = "fuzz_targets/ast_diff_default.rs" + +[[bin]] +name = "ast_diff_dense_dfa" +path = "fuzz_targets/ast_diff_dense_dfa.rs" + +[[bin]] +name = "ast_diff_onepass_dfa" +path = "fuzz_targets/ast_diff_onepass_dfa.rs" + +[[bin]] +name = "ast_diff_sparse_dfa" +path = "fuzz_targets/ast_diff_sparse_dfa.rs" + +[[bin]] +name = "ast_diff_hybrid" +path = "fuzz_targets/ast_diff_hybrid.rs" + +[[bin]] +name = "ast_diff_nfas" +path = "fuzz_targets/ast_diff_nfas.rs" + [profile.release] opt-level = 3 debug = true diff --git a/fuzz/fuzz_targets/ast_diff_default.rs b/fuzz/fuzz_targets/ast_diff_default.rs new file mode 100644 index 000000000..ed58c3507 --- /dev/null +++ b/fuzz/fuzz_targets/ast_diff_default.rs @@ -0,0 +1,72 @@ +#![no_main] + +use { + libfuzzer_sys::{fuzz_target, Corpus}, + regex::RegexBuilder, + regex_automata::nfa::thompson::{pikevm::PikeVM as NfaRegex, NFA}, + regex_syntax::ast::Ast, +}; + +#[derive(Eq, PartialEq, arbitrary::Arbitrary)] +struct FuzzData { + ast: Ast, + haystack: String, +} + +impl std::fmt::Debug for FuzzData { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut builder = f.debug_struct("FuzzData"); + builder.field("ast", &format!("{}", self.ast)); + builder.field("haystack", &self.haystack); + builder.finish() + } +} + +fn do_fuzz(data: FuzzData) -> Corpus { + let _ = env_logger::try_init(); + + let pattern = format!("{}", data.ast); + let config = NFA::config().nfa_size_limit(Some(1 << 20)); + let Ok(nfa) = NFA::compiler().configure(config).build(&pattern) else { + return Corpus::Reject; + }; + let Ok(baseline) = NfaRegex::new_from_nfa(nfa) else { + return Corpus::Reject; + }; + let mut cache = baseline.create_cache(); + + let Ok(re) = RegexBuilder::new(&pattern).size_limit(1 << 20).build() else { + return Corpus::Reject; + }; + + assert_eq!( + re.is_match(&data.haystack), + baseline.is_match(&mut cache, &data.haystack) + ); + let found1 = re.find(&data.haystack); + let found2 = baseline.find(&mut cache, &data.haystack); + if let Some(found1) = found1 { + let found2 = found2.expect("Found in target, but not in baseline!"); + assert_eq!(found1.start(), found2.start()); + assert_eq!(found1.end(), found2.end()); + } + if let Some(captures) = re.captures(&data.haystack) { + let mut baseline_captures = baseline.create_captures(); + + baseline.captures(&mut cache, &data.haystack, &mut baseline_captures); + drop(cache); + assert_eq!(captures.len(), baseline_captures.group_len()); + for (c1, c2) in captures.iter().zip(baseline_captures.iter()) { + if let Some(c1) = c1 { + let c2 = c2.expect("Matched in target, but not baseline!"); + assert_eq!(c1.start(), c2.start); + assert_eq!(c1.end(), c2.end); + } else { + assert!(c2.is_none(), "Matched in baseline, but not target!"); + } + } + } + Corpus::Keep +} + +fuzz_target!(|data: FuzzData| -> Corpus { do_fuzz(data) }); diff --git a/fuzz/fuzz_targets/ast_diff_dense_dfa.rs b/fuzz/fuzz_targets/ast_diff_dense_dfa.rs new file mode 100644 index 000000000..cf83b503e --- /dev/null +++ b/fuzz/fuzz_targets/ast_diff_dense_dfa.rs @@ -0,0 +1,65 @@ +#![no_main] + +use regex_automata::Input; +use { + libfuzzer_sys::{fuzz_target, Corpus}, + regex_automata::{ + dfa::{dense::DFA, regex::Builder as RegexBuilder}, + nfa::thompson::{pikevm::PikeVM as NfaRegex, NFA}, + }, + regex_syntax::ast::Ast, +}; + +#[derive(Eq, PartialEq, arbitrary::Arbitrary)] +struct FuzzData { + ast: Ast, + haystack: String, +} + +impl std::fmt::Debug for FuzzData { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut builder = f.debug_struct("FuzzData"); + builder.field("ast", &format!("{}", self.ast)); + builder.field("haystack", &self.haystack); + builder.finish() + } +} + +fn do_fuzz(data: FuzzData) -> Corpus { + let _ = env_logger::try_init(); + + let pattern = format!("{}", data.ast); + let config = NFA::config().nfa_size_limit(Some(1 << 20)); + let Ok(nfa) = NFA::compiler().configure(config).build(&pattern) else { + return Corpus::Reject; + }; + let Ok(baseline) = NfaRegex::new_from_nfa(nfa) else { + return Corpus::Reject; + }; + let mut cache = baseline.create_cache(); + + let config = DFA::config().dfa_size_limit(Some(1 << 20)); + let Ok(re) = RegexBuilder::new().dense(config).build(&pattern) else { + return Corpus::Reject; + }; + + if let Ok(maybe_match) = re.try_search(&Input::new(&pattern)) { + assert_eq!( + maybe_match.is_some(), + baseline.is_match(&mut cache, &data.haystack) + ); + let found2 = baseline.find(&mut cache, &data.haystack); + if let Some(found1) = maybe_match { + let found2 = + found2.expect("Found in target, but not in baseline!"); + assert_eq!(found1.start(), found2.start()); + assert_eq!(found1.end(), found2.end()); + } + } + + // no captures + + Corpus::Keep +} + +fuzz_target!(|data: FuzzData| -> Corpus { do_fuzz(data) }); diff --git a/fuzz/fuzz_targets/ast_diff_hybrid.rs b/fuzz/fuzz_targets/ast_diff_hybrid.rs new file mode 100644 index 000000000..0e2016d93 --- /dev/null +++ b/fuzz/fuzz_targets/ast_diff_hybrid.rs @@ -0,0 +1,68 @@ +#![no_main] + +use regex_automata::Input; +use { + libfuzzer_sys::{fuzz_target, Corpus}, + regex_automata::{ + hybrid::{dfa::DFA, regex::Builder as RegexBuilder}, + nfa::thompson::{pikevm::PikeVM as NfaRegex, NFA}, + }, + regex_syntax::ast::Ast, +}; + +#[derive(Eq, PartialEq, arbitrary::Arbitrary)] +struct FuzzData { + ast: Ast, + haystack: String, +} + +impl std::fmt::Debug for FuzzData { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut builder = f.debug_struct("FuzzData"); + builder.field("ast", &format!("{}", self.ast)); + builder.field("haystack", &self.haystack); + builder.finish() + } +} + +fn do_fuzz(data: FuzzData) -> Corpus { + let _ = env_logger::try_init(); + + let pattern = format!("{}", data.ast); + let config = NFA::config().nfa_size_limit(Some(1 << 20)); + let Ok(nfa) = NFA::compiler().configure(config).build(&pattern) else { + return Corpus::Reject; + }; + let Ok(baseline) = NfaRegex::new_from_nfa(nfa) else { + return Corpus::Reject; + }; + let mut cache = baseline.create_cache(); + + let config = DFA::config().cache_capacity(1 << 20); + let Ok(re) = RegexBuilder::new().dfa(config).build(&pattern) else { + return Corpus::Reject; + }; + let mut hybrid_cache = re.create_cache(); + + if let Ok(maybe_match) = + re.try_search(&mut hybrid_cache, &Input::new(&pattern)) + { + assert_eq!( + maybe_match.is_some(), + baseline.is_match(&mut cache, &data.haystack) + ); + let found2 = baseline.find(&mut cache, &data.haystack); + if let Some(found1) = maybe_match { + let found2 = + found2.expect("Found in target, but not in baseline!"); + assert_eq!(found1.start(), found2.start()); + assert_eq!(found1.end(), found2.end()); + } + } + + // no captures + + Corpus::Keep +} + +fuzz_target!(|data: FuzzData| -> Corpus { do_fuzz(data) }); diff --git a/fuzz/fuzz_targets/ast_diff_nfas.rs b/fuzz/fuzz_targets/ast_diff_nfas.rs new file mode 100644 index 000000000..294c68ba9 --- /dev/null +++ b/fuzz/fuzz_targets/ast_diff_nfas.rs @@ -0,0 +1,99 @@ +#![no_main] + +use { + libfuzzer_sys::{fuzz_target, Corpus}, + regex_automata::nfa::thompson::{ + backtrack::BoundedBacktracker, pikevm::PikeVM, NFA, + }, + regex_syntax::ast::Ast, +}; + +#[derive(Eq, PartialEq, arbitrary::Arbitrary)] +struct FuzzData { + ast: Ast, + haystack: String, +} + +impl std::fmt::Debug for FuzzData { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut builder = f.debug_struct("FuzzData"); + builder.field("ast", &format!("{}", self.ast)); + builder.field("haystack", &self.haystack); + builder.finish() + } +} + +fn do_fuzz(data: FuzzData) -> Corpus { + let _ = env_logger::try_init(); + + let pattern = format!("{}", data.ast); + let config = NFA::config().nfa_size_limit(Some(1 << 20)); + let Ok(nfa) = NFA::compiler().configure(config).build(&pattern) else { + return Corpus::Reject; + }; + let Ok(baseline) = PikeVM::new_from_nfa(nfa.clone()) else { + return Corpus::Reject; + }; + let mut baseline_cache = baseline.create_cache(); + + let Ok(backtracker) = BoundedBacktracker::new_from_nfa(nfa) else { + return Corpus::Reject; + }; + let mut backtracker_cache = backtracker.create_cache(); + + if let Ok(backtracked) = + backtracker.try_is_match(&mut backtracker_cache, &data.haystack) + { + assert_eq!( + backtracked, + baseline.is_match(&mut baseline_cache, &data.haystack) + ); + } + if let Ok(found1) = + backtracker.try_find(&mut backtracker_cache, &data.haystack) + { + let found2 = baseline.find(&mut baseline_cache, &data.haystack); + if let Some(found1) = found1 { + let found2 = + found2.expect("Found in target, but not in baseline!"); + assert_eq!(found1.start(), found2.start()); + assert_eq!(found1.end(), found2.end()); + } + } + let mut backtracker_captures = backtracker.create_captures(); + if backtracker + .try_captures( + &mut backtracker_cache, + &data.haystack, + &mut backtracker_captures, + ) + .is_ok() + { + let mut baseline_captures = baseline.create_captures(); + + baseline.captures( + &mut baseline_cache, + &data.haystack, + &mut baseline_captures, + ); + drop(baseline_cache); + assert_eq!( + backtracker_captures.group_len(), + baseline_captures.group_len() + ); + for (c1, c2) in + backtracker_captures.iter().zip(baseline_captures.iter()) + { + if let Some(c1) = c1 { + let c2 = c2.expect("Matched in target, but not baseline!"); + assert_eq!(c1.start, c2.start); + assert_eq!(c1.end, c2.end); + } else { + assert!(c2.is_none(), "Matched in baseline, but not target!"); + } + } + } + Corpus::Keep +} + +fuzz_target!(|data: FuzzData| -> Corpus { do_fuzz(data) }); diff --git a/fuzz/fuzz_targets/ast_diff_onepass_dfa.rs b/fuzz/fuzz_targets/ast_diff_onepass_dfa.rs new file mode 100644 index 000000000..f6c81d4d5 --- /dev/null +++ b/fuzz/fuzz_targets/ast_diff_onepass_dfa.rs @@ -0,0 +1,86 @@ +#![no_main] + +use { + libfuzzer_sys::{fuzz_target, Corpus}, + regex_automata::{ + dfa::{onepass::Builder as RegexBuilder, onepass::DFA}, + nfa::thompson::{pikevm::PikeVM as NfaRegex, NFA}, + }, + regex_syntax::ast::Ast, +}; + +#[derive(Eq, PartialEq, arbitrary::Arbitrary)] +struct FuzzData { + ast: Ast, + haystack: String, +} + +impl std::fmt::Debug for FuzzData { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut builder = f.debug_struct("FuzzData"); + builder.field("ast", &format!("{}", self.ast)); + builder.field("haystack", &self.haystack); + builder.finish() + } +} + +fn do_fuzz(data: FuzzData) -> Corpus { + let _ = env_logger::try_init(); + + let pattern = format!("{}", data.ast); + let config = NFA::config().nfa_size_limit(Some(1 << 20)); + let Ok(nfa) = NFA::compiler().configure(config).build(&pattern) else { + return Corpus::Reject; + }; + let Ok(baseline) = NfaRegex::new_from_nfa(nfa) else { + return Corpus::Reject; + }; + let mut baseline_cache = baseline.create_cache(); + + let config = DFA::config().size_limit(Some(1 << 20)); + let Ok(onepass) = RegexBuilder::new().configure(config).build(&pattern) else { + return Corpus::Reject; + }; + let mut onepass_cache = onepass.create_cache(); + + assert_eq!( + onepass.is_match(&mut onepass_cache, &data.haystack), + baseline.is_match(&mut baseline_cache, &data.haystack) + ); + let found1 = onepass.find(&mut onepass_cache, &data.haystack); + let found2 = baseline.find(&mut baseline_cache, &data.haystack); + if let Some(found1) = found1 { + let found2 = found2.expect("Found in target, but not in baseline!"); + assert_eq!(found1.start(), found2.start()); + assert_eq!(found1.end(), found2.end()); + } + + let mut onepass_captures = onepass.create_captures(); + onepass.captures( + &mut onepass_cache, + &data.haystack, + &mut onepass_captures, + ); + + let mut baseline_captures = baseline.create_captures(); + baseline.captures( + &mut baseline_cache, + &data.haystack, + &mut baseline_captures, + ); + drop(baseline_cache); + assert_eq!(onepass_captures.group_len(), baseline_captures.group_len()); + for (c1, c2) in onepass_captures.iter().zip(baseline_captures.iter()) { + if let Some(c1) = c1 { + let c2 = c2.expect("Matched in target, but not baseline!"); + assert_eq!(c1.start, c2.start); + assert_eq!(c1.end, c2.end); + } else { + assert!(c2.is_none(), "Matched in baseline, but not target!"); + } + } + + Corpus::Keep +} + +fuzz_target!(|data: FuzzData| -> Corpus { do_fuzz(data) }); diff --git a/fuzz/fuzz_targets/ast_diff_sparse_dfa.rs b/fuzz/fuzz_targets/ast_diff_sparse_dfa.rs new file mode 100644 index 000000000..a0fb25a12 --- /dev/null +++ b/fuzz/fuzz_targets/ast_diff_sparse_dfa.rs @@ -0,0 +1,69 @@ +#![no_main] + +use regex_automata::Input; +use { + libfuzzer_sys::{fuzz_target, Corpus}, + regex_automata::{ + dfa::{dense::DFA, regex::Builder as RegexBuilder}, + nfa::thompson::{pikevm::PikeVM as NfaRegex, NFA}, + }, + regex_syntax::ast::Ast, +}; + +#[derive(Eq, PartialEq, arbitrary::Arbitrary)] +struct FuzzData { + ast: Ast, + haystack: String, +} + +impl std::fmt::Debug for FuzzData { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut builder = f.debug_struct("FuzzData"); + builder.field("ast", &format!("{}", self.ast)); + builder.field("haystack", &self.haystack); + builder.finish() + } +} + +fn do_fuzz(data: FuzzData) -> Corpus { + let _ = env_logger::try_init(); + + let pattern = format!("{}", data.ast); + let config = NFA::config().nfa_size_limit(Some(1 << 20)); + let Ok(nfa) = NFA::compiler().configure(config).build(&pattern) else { + return Corpus::Reject; + }; + let Ok(baseline) = NfaRegex::new_from_nfa(nfa) else { + return Corpus::Reject; + }; + let mut cache = baseline.create_cache(); + + let config = DFA::config().dfa_size_limit(Some(1 << 20)); + let Ok(dense_re) = RegexBuilder::new().dense(config).build(&pattern) else { + return Corpus::Reject; + }; + let (Ok(fwd), Ok(rev)) = (dense_re.forward().to_sparse(), dense_re.reverse().to_sparse()) else { + return Corpus::Reject; + }; + let re = RegexBuilder::new().build_from_dfas(fwd, rev); + + if let Ok(maybe_match) = re.try_search(&Input::new(&pattern)) { + assert_eq!( + maybe_match.is_some(), + baseline.is_match(&mut cache, &data.haystack) + ); + let found2 = baseline.find(&mut cache, &data.haystack); + if let Some(found1) = maybe_match { + let found2 = + found2.expect("Found in target, but not in baseline!"); + assert_eq!(found1.start(), found2.start()); + assert_eq!(found1.end(), found2.end()); + } + } + + // no captures + + Corpus::Keep +} + +fuzz_target!(|data: FuzzData| -> Corpus { do_fuzz(data) }); diff --git a/fuzz/oss-fuzz-build.sh b/fuzz/oss-fuzz-build.sh index 81f619dcb..27ef8875d 100755 --- a/fuzz/oss-fuzz-build.sh +++ b/fuzz/oss-fuzz-build.sh @@ -3,16 +3,7 @@ cd $SRC/regex cargo fuzz build -O --debug-assertions -targets=( - fuzz_regex_match - fuzz_regex_lite_match - fuzz_regex_automata_deserialize_dense_dfa - fuzz_regex_automata_deserialize_sparse_dfa - ast_roundtrip - ast_fuzz_match - ast_fuzz_regex - ast_fuzz_match_bytes -) +targets=$(cargo fuzz list) for target in "${targets[@]}"; do cp "fuzz/target/x86_64-unknown-linux-gnu/release/${target}" "${OUT}/" if [[ "$target" == ast_* ]]; then