From 62769657a627cad3f81dcb4f123ecd6338729e06 Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Sat, 15 Jul 2023 16:51:47 +0200 Subject: [PATCH 01/10] begin work on differential fuzzers --- fuzz/Cargo.toml | 4 ++ fuzz/fuzz_targets/ast_diff_default.rs | 65 +++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 fuzz/fuzz_targets/ast_diff_default.rs diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index a7eec2c81..bd9041c90 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -67,6 +67,10 @@ path = "fuzz_targets/ast_fuzz_regex.rs" name = "ast_fuzz_match_bytes" path = "fuzz_targets/ast_fuzz_match_bytes.rs" +[[bin]] +name = "ast_diff_default" +path = "fuzz_targets/ast_diff_default.rs" + [profile.release] opt-level = 3 debug = true diff --git a/fuzz/fuzz_targets/ast_diff_default.rs b/fuzz/fuzz_targets/ast_diff_default.rs new file mode 100644 index 000000000..0f33ccf07 --- /dev/null +++ b/fuzz/fuzz_targets/ast_diff_default.rs @@ -0,0 +1,65 @@ +#![no_main] + +use { + libfuzzer_sys::{fuzz_target, Corpus}, + regex::RegexBuilder, + regex_automata::nfa::thompson::pikevm::PikeVM as NfaRegex, + regex_syntax::ast::Ast, +}; + +#[derive(Eq, PartialEq, arbitrary::Arbitrary)] +struct FuzzData { + ast: Ast, + haystack: String, +} + +impl std::fmt::Debug for FuzzData { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut builder = f.debug_struct("FuzzData"); + builder.field("ast", &format!("{}", self.ast)); + builder.field("haystack", &self.haystack); + builder.finish() + } +} + +fuzz_target!(|data: FuzzData| -> Corpus { + let _ = env_logger::try_init(); + + let pattern = format!("{}", data.ast); + let Ok(re) = RegexBuilder::new(&pattern).size_limit(1<<20).build() else { + return Corpus::Reject; + }; + let Ok(baseline) = NfaRegex::new(&pattern) else { + return Corpus::Reject; // should we error here? + }; + let mut cache = baseline.create_cache(); + + assert_eq!( + re.is_match(&data.haystack), + baseline.is_match(&mut cache, &data.haystack) + ); + let found1 = re.find(&data.haystack); + let found2 = baseline.find(&mut cache, &data.haystack); + if let Some(found1) = found1 { + let found2 = found2.expect("Found in target, but not in baseline!"); + assert_eq!(found1.start(), found2.start()); + assert_eq!(found1.end(), found2.end()); + } + if let Some(captures) = re.captures(&data.haystack) { + let mut baseline_captures = baseline.create_captures(); + + baseline.captures(&mut cache, &data.haystack, &mut baseline_captures); + drop(cache); + assert_eq!(captures.len(), baseline_captures.group_len()); + for (c1, c2) in captures.iter().zip(baseline_captures.iter()) { + if let Some(c1) = c1 { + let c2 = c2.expect("Matched in target, but not baseline!"); + assert_eq!(c1.start(), c2.start); + assert_eq!(c1.end(), c2.end); + } else { + assert!(!c2.is_some(), "Matched in baseline, but not target!"); + } + } + } + Corpus::Keep +}); From b5c53cb1c5beec57a4c47fe6853770df2cca86e8 Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Sat, 15 Jul 2023 16:55:30 +0200 Subject: [PATCH 02/10] add to oss-fuzz build --- fuzz/oss-fuzz-build.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/fuzz/oss-fuzz-build.sh b/fuzz/oss-fuzz-build.sh index 81f619dcb..8af78dd0e 100755 --- a/fuzz/oss-fuzz-build.sh +++ b/fuzz/oss-fuzz-build.sh @@ -12,6 +12,7 @@ targets=( ast_fuzz_match ast_fuzz_regex ast_fuzz_match_bytes + ast_diff_default ) for target in "${targets[@]}"; do cp "fuzz/target/x86_64-unknown-linux-gnu/release/${target}" "${OUT}/" From c1dcf3e6226e2e529a0328bbe8a98e117dcf2b5b Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Sat, 15 Jul 2023 17:11:36 +0200 Subject: [PATCH 03/10] utilise nfa size limits to prevent oom --- fuzz/fuzz_targets/ast_diff_default.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fuzz/fuzz_targets/ast_diff_default.rs b/fuzz/fuzz_targets/ast_diff_default.rs index 0f33ccf07..40e5ff51b 100644 --- a/fuzz/fuzz_targets/ast_diff_default.rs +++ b/fuzz/fuzz_targets/ast_diff_default.rs @@ -3,7 +3,7 @@ use { libfuzzer_sys::{fuzz_target, Corpus}, regex::RegexBuilder, - regex_automata::nfa::thompson::pikevm::PikeVM as NfaRegex, + regex_automata::nfa::thompson::{pikevm::PikeVM as NfaRegex, NFA}, regex_syntax::ast::Ast, }; @@ -26,11 +26,15 @@ fuzz_target!(|data: FuzzData| -> Corpus { let _ = env_logger::try_init(); let pattern = format!("{}", data.ast); - let Ok(re) = RegexBuilder::new(&pattern).size_limit(1<<20).build() else { + let Ok(re) = RegexBuilder::new(&pattern).size_limit(1 << 20).build() else { return Corpus::Reject; }; - let Ok(baseline) = NfaRegex::new(&pattern) else { - return Corpus::Reject; // should we error here? + let config = NFA::config().nfa_size_limit(Some(1 << 20)); + let Ok(nfa) = NFA::compiler().configure(config).build(&pattern) else { + return Corpus::Reject; + }; + let Ok(baseline) = NfaRegex::new_from_nfa(nfa) else { + return Corpus::Reject; }; let mut cache = baseline.create_cache(); From 1be61a5643374c6dcf2eae06f5f2b5e3d697b9d8 Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Sun, 16 Jul 2023 21:51:37 +0200 Subject: [PATCH 04/10] add diff_nfas, function out do_fuzz so clippy can do its thing --- fuzz/Cargo.toml | 4 ++ fuzz/fuzz_targets/ast_diff_default.rs | 8 ++- fuzz/fuzz_targets/ast_diff_nfas.rs | 94 +++++++++++++++++++++++++++ fuzz/oss-fuzz-build.sh | 12 +--- 4 files changed, 104 insertions(+), 14 deletions(-) create mode 100644 fuzz/fuzz_targets/ast_diff_nfas.rs diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index bd9041c90..211e5c0c3 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -71,6 +71,10 @@ path = "fuzz_targets/ast_fuzz_match_bytes.rs" name = "ast_diff_default" path = "fuzz_targets/ast_diff_default.rs" +[[bin]] +name = "ast_diff_nfas" +path = "fuzz_targets/ast_diff_nfas.rs" + [profile.release] opt-level = 3 debug = true diff --git a/fuzz/fuzz_targets/ast_diff_default.rs b/fuzz/fuzz_targets/ast_diff_default.rs index 40e5ff51b..cabf4fbcc 100644 --- a/fuzz/fuzz_targets/ast_diff_default.rs +++ b/fuzz/fuzz_targets/ast_diff_default.rs @@ -22,7 +22,7 @@ impl std::fmt::Debug for FuzzData { } } -fuzz_target!(|data: FuzzData| -> Corpus { +fn do_fuzz(data: FuzzData) -> Corpus { let _ = env_logger::try_init(); let pattern = format!("{}", data.ast); @@ -61,9 +61,11 @@ fuzz_target!(|data: FuzzData| -> Corpus { assert_eq!(c1.start(), c2.start); assert_eq!(c1.end(), c2.end); } else { - assert!(!c2.is_some(), "Matched in baseline, but not target!"); + assert!(c2.is_none(), "Matched in baseline, but not target!"); } } } Corpus::Keep -}); +} + +fuzz_target!(|data: FuzzData| -> Corpus { do_fuzz(data) }); diff --git a/fuzz/fuzz_targets/ast_diff_nfas.rs b/fuzz/fuzz_targets/ast_diff_nfas.rs new file mode 100644 index 000000000..0443a87a3 --- /dev/null +++ b/fuzz/fuzz_targets/ast_diff_nfas.rs @@ -0,0 +1,94 @@ +#![no_main] + +use { + libfuzzer_sys::{fuzz_target, Corpus}, + regex_automata::nfa::thompson::{ + backtrack::BoundedBacktracker, pikevm::PikeVM, NFA, + }, + regex_syntax::ast::Ast, +}; + +#[derive(Eq, PartialEq, arbitrary::Arbitrary)] +struct FuzzData { + ast: Ast, + haystack: String, +} + +impl std::fmt::Debug for FuzzData { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut builder = f.debug_struct("FuzzData"); + builder.field("ast", &format!("{}", self.ast)); + builder.field("haystack", &self.haystack); + builder.finish() + } +} + +fn do_fuzz(data: FuzzData) -> Corpus { + let _ = env_logger::try_init(); + + let pattern = format!("{}", data.ast); + let config = NFA::config().nfa_size_limit(Some(1 << 20)); + let Ok(nfa) = NFA::compiler().configure(config).build(&pattern) else { + return Corpus::Reject; + }; + let Ok(backtracker) = BoundedBacktracker::new_from_nfa(nfa.clone()) else { + return Corpus::Reject; }; + let mut backtracker_cache = backtracker.create_cache(); + let Ok(baseline) = PikeVM::new_from_nfa(nfa) else { + return Corpus::Reject; + }; + let mut baseline_cache = baseline.create_cache(); + + if let Ok(backtracked) = + backtracker.try_is_match(&mut backtracker_cache, &data.haystack) + { + assert_eq!( + backtracked, + baseline.is_match(&mut baseline_cache, &data.haystack) + ); + } + if let Ok(found1) = + backtracker.try_find(&mut backtracker_cache, &data.haystack) + { + let found2 = baseline.find(&mut baseline_cache, &data.haystack); + if let Some(found1) = found1 { + let found2 = + found2.expect("Found in target, but not in baseline!"); + assert_eq!(found1.start(), found2.start()); + assert_eq!(found1.end(), found2.end()); + } + } + let mut backtracker_captures = backtracker.create_captures(); + if let Ok(()) = backtracker.try_captures( + &mut backtracker_cache, + &data.haystack, + &mut backtracker_captures, + ) { + let mut baseline_captures = baseline.create_captures(); + + baseline.captures( + &mut baseline_cache, + &data.haystack, + &mut baseline_captures, + ); + drop(baseline_cache); + assert_eq!( + backtracker_captures.group_len(), + baseline_captures.group_len() + ); + for (c1, c2) in + backtracker_captures.iter().zip(baseline_captures.iter()) + { + if let Some(c1) = c1 { + let c2 = c2.expect("Matched in target, but not baseline!"); + assert_eq!(c1.start, c2.start); + assert_eq!(c1.end, c2.end); + } else { + assert!(c2.is_none(), "Matched in baseline, but not target!"); + } + } + } + Corpus::Keep +} + +fuzz_target!(|data: FuzzData| -> Corpus { do_fuzz(data) }); diff --git a/fuzz/oss-fuzz-build.sh b/fuzz/oss-fuzz-build.sh index 8af78dd0e..27ef8875d 100755 --- a/fuzz/oss-fuzz-build.sh +++ b/fuzz/oss-fuzz-build.sh @@ -3,17 +3,7 @@ cd $SRC/regex cargo fuzz build -O --debug-assertions -targets=( - fuzz_regex_match - fuzz_regex_lite_match - fuzz_regex_automata_deserialize_dense_dfa - fuzz_regex_automata_deserialize_sparse_dfa - ast_roundtrip - ast_fuzz_match - ast_fuzz_regex - ast_fuzz_match_bytes - ast_diff_default -) +targets=$(cargo fuzz list) for target in "${targets[@]}"; do cp "fuzz/target/x86_64-unknown-linux-gnu/release/${target}" "${OUT}/" if [[ "$target" == ast_* ]]; then From eb2dbaa568b14f032c63a84bc4a9aac4e8582c7d Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Sun, 16 Jul 2023 22:08:49 +0200 Subject: [PATCH 05/10] add dense dfa --- fuzz/Cargo.toml | 4 ++ fuzz/fuzz_targets/ast_diff_default.rs | 7 +-- fuzz/fuzz_targets/ast_diff_dense_dfa.rs | 62 +++++++++++++++++++++++++ fuzz/fuzz_targets/ast_diff_nfas.rs | 10 ++-- 4 files changed, 76 insertions(+), 7 deletions(-) create mode 100644 fuzz/fuzz_targets/ast_diff_dense_dfa.rs diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 211e5c0c3..d65fac5bf 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -71,6 +71,10 @@ path = "fuzz_targets/ast_fuzz_match_bytes.rs" name = "ast_diff_default" path = "fuzz_targets/ast_diff_default.rs" +[[bin]] +name = "ast_diff_dense_dfa" +path = "fuzz_targets/ast_diff_dense_dfa.rs" + [[bin]] name = "ast_diff_nfas" path = "fuzz_targets/ast_diff_nfas.rs" diff --git a/fuzz/fuzz_targets/ast_diff_default.rs b/fuzz/fuzz_targets/ast_diff_default.rs index cabf4fbcc..ed58c3507 100644 --- a/fuzz/fuzz_targets/ast_diff_default.rs +++ b/fuzz/fuzz_targets/ast_diff_default.rs @@ -26,9 +26,6 @@ fn do_fuzz(data: FuzzData) -> Corpus { let _ = env_logger::try_init(); let pattern = format!("{}", data.ast); - let Ok(re) = RegexBuilder::new(&pattern).size_limit(1 << 20).build() else { - return Corpus::Reject; - }; let config = NFA::config().nfa_size_limit(Some(1 << 20)); let Ok(nfa) = NFA::compiler().configure(config).build(&pattern) else { return Corpus::Reject; @@ -38,6 +35,10 @@ fn do_fuzz(data: FuzzData) -> Corpus { }; let mut cache = baseline.create_cache(); + let Ok(re) = RegexBuilder::new(&pattern).size_limit(1 << 20).build() else { + return Corpus::Reject; + }; + assert_eq!( re.is_match(&data.haystack), baseline.is_match(&mut cache, &data.haystack) diff --git a/fuzz/fuzz_targets/ast_diff_dense_dfa.rs b/fuzz/fuzz_targets/ast_diff_dense_dfa.rs new file mode 100644 index 000000000..2a073386a --- /dev/null +++ b/fuzz/fuzz_targets/ast_diff_dense_dfa.rs @@ -0,0 +1,62 @@ +#![no_main] + +use { + libfuzzer_sys::{fuzz_target, Corpus}, + regex_automata::{ + dfa::{dense::DFA, regex::Builder as RegexBuilder}, + nfa::thompson::{pikevm::PikeVM as NfaRegex, NFA}, + }, + regex_syntax::ast::Ast, +}; + +#[derive(Eq, PartialEq, arbitrary::Arbitrary)] +struct FuzzData { + ast: Ast, + haystack: String, +} + +impl std::fmt::Debug for FuzzData { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut builder = f.debug_struct("FuzzData"); + builder.field("ast", &format!("{}", self.ast)); + builder.field("haystack", &self.haystack); + builder.finish() + } +} + +fn do_fuzz(data: FuzzData) -> Corpus { + let _ = env_logger::try_init(); + + let pattern = format!("{}", data.ast); + let config = NFA::config().nfa_size_limit(Some(1 << 20)); + let Ok(nfa) = NFA::compiler().configure(config).build(&pattern) else { + return Corpus::Reject; + }; + let Ok(baseline) = NfaRegex::new_from_nfa(nfa) else { + return Corpus::Reject; + }; + let mut cache = baseline.create_cache(); + + let config = DFA::config().dfa_size_limit(Some(1 << 20)); + let Ok(re) = RegexBuilder::new().dense(config).build(&pattern) else { + return Corpus::Reject; + }; + + assert_eq!( + re.is_match(&data.haystack), + baseline.is_match(&mut cache, &data.haystack) + ); + let found1 = re.find(&data.haystack); + let found2 = baseline.find(&mut cache, &data.haystack); + if let Some(found1) = found1 { + let found2 = found2.expect("Found in target, but not in baseline!"); + assert_eq!(found1.start(), found2.start()); + assert_eq!(found1.end(), found2.end()); + } + + // no captures + + Corpus::Keep +} + +fuzz_target!(|data: FuzzData| -> Corpus { do_fuzz(data) }); diff --git a/fuzz/fuzz_targets/ast_diff_nfas.rs b/fuzz/fuzz_targets/ast_diff_nfas.rs index 0443a87a3..6e2670b86 100644 --- a/fuzz/fuzz_targets/ast_diff_nfas.rs +++ b/fuzz/fuzz_targets/ast_diff_nfas.rs @@ -31,14 +31,16 @@ fn do_fuzz(data: FuzzData) -> Corpus { let Ok(nfa) = NFA::compiler().configure(config).build(&pattern) else { return Corpus::Reject; }; - let Ok(backtracker) = BoundedBacktracker::new_from_nfa(nfa.clone()) else { - return Corpus::Reject; }; - let mut backtracker_cache = backtracker.create_cache(); - let Ok(baseline) = PikeVM::new_from_nfa(nfa) else { + let Ok(baseline) = PikeVM::new_from_nfa(nfa.clone()) else { return Corpus::Reject; }; let mut baseline_cache = baseline.create_cache(); + let Ok(backtracker) = BoundedBacktracker::new_from_nfa(nfa) else { + return Corpus::Reject; + }; + let mut backtracker_cache = backtracker.create_cache(); + if let Ok(backtracked) = backtracker.try_is_match(&mut backtracker_cache, &data.haystack) { From d8603f5ad3718aa8e753c897dd975b2c168edf68 Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Sun, 16 Jul 2023 22:14:26 +0200 Subject: [PATCH 06/10] add sparse dfa --- fuzz/Cargo.toml | 4 ++ fuzz/fuzz_targets/ast_diff_sparse_dfa.rs | 66 ++++++++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 fuzz/fuzz_targets/ast_diff_sparse_dfa.rs diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index d65fac5bf..a4f2ad6a4 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -75,6 +75,10 @@ path = "fuzz_targets/ast_diff_default.rs" name = "ast_diff_dense_dfa" path = "fuzz_targets/ast_diff_dense_dfa.rs" +[[bin]] +name = "ast_diff_sparse_dfa" +path = "fuzz_targets/ast_diff_sparse_dfa.rs" + [[bin]] name = "ast_diff_nfas" path = "fuzz_targets/ast_diff_nfas.rs" diff --git a/fuzz/fuzz_targets/ast_diff_sparse_dfa.rs b/fuzz/fuzz_targets/ast_diff_sparse_dfa.rs new file mode 100644 index 000000000..4e560f685 --- /dev/null +++ b/fuzz/fuzz_targets/ast_diff_sparse_dfa.rs @@ -0,0 +1,66 @@ +#![no_main] + +use { + libfuzzer_sys::{fuzz_target, Corpus}, + regex_automata::{ + dfa::{dense::DFA, regex::Builder as RegexBuilder}, + nfa::thompson::{pikevm::PikeVM as NfaRegex, NFA}, + }, + regex_syntax::ast::Ast, +}; + +#[derive(Eq, PartialEq, arbitrary::Arbitrary)] +struct FuzzData { + ast: Ast, + haystack: String, +} + +impl std::fmt::Debug for FuzzData { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut builder = f.debug_struct("FuzzData"); + builder.field("ast", &format!("{}", self.ast)); + builder.field("haystack", &self.haystack); + builder.finish() + } +} + +fn do_fuzz(data: FuzzData) -> Corpus { + let _ = env_logger::try_init(); + + let pattern = format!("{}", data.ast); + let config = NFA::config().nfa_size_limit(Some(1 << 20)); + let Ok(nfa) = NFA::compiler().configure(config).build(&pattern) else { + return Corpus::Reject; + }; + let Ok(baseline) = NfaRegex::new_from_nfa(nfa) else { + return Corpus::Reject; + }; + let mut cache = baseline.create_cache(); + + let config = DFA::config().dfa_size_limit(Some(1 << 20)); + let Ok(dense_re) = RegexBuilder::new().dense(config).build(&pattern) else { + return Corpus::Reject; + }; + let (Ok(fwd), Ok(rev)) = (dense_re.forward().to_sparse(), dense_re.reverse().to_sparse()) else { + return Corpus::Reject; + }; + let re = RegexBuilder::new().build_from_dfas(fwd, rev); + + assert_eq!( + re.is_match(&data.haystack), + baseline.is_match(&mut cache, &data.haystack) + ); + let found1 = re.find(&data.haystack); + let found2 = baseline.find(&mut cache, &data.haystack); + if let Some(found1) = found1 { + let found2 = found2.expect("Found in target, but not in baseline!"); + assert_eq!(found1.start(), found2.start()); + assert_eq!(found1.end(), found2.end()); + } + + // no captures + + Corpus::Keep +} + +fuzz_target!(|data: FuzzData| -> Corpus { do_fuzz(data) }); From 308448d283ba188062a13aeaf4fe2cebbe672254 Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Sun, 16 Jul 2023 22:20:44 +0200 Subject: [PATCH 07/10] onepass diff --- fuzz/Cargo.toml | 4 ++ fuzz/fuzz_targets/ast_diff_onepass_dfa.rs | 82 +++++++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 fuzz/fuzz_targets/ast_diff_onepass_dfa.rs diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index a4f2ad6a4..30a47aa8c 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -75,6 +75,10 @@ path = "fuzz_targets/ast_diff_default.rs" name = "ast_diff_dense_dfa" path = "fuzz_targets/ast_diff_dense_dfa.rs" +[[bin]] +name = "ast_diff_onepass_dfa" +path = "fuzz_targets/ast_diff_onepass_dfa.rs" + [[bin]] name = "ast_diff_sparse_dfa" path = "fuzz_targets/ast_diff_sparse_dfa.rs" diff --git a/fuzz/fuzz_targets/ast_diff_onepass_dfa.rs b/fuzz/fuzz_targets/ast_diff_onepass_dfa.rs new file mode 100644 index 000000000..631f9d0a7 --- /dev/null +++ b/fuzz/fuzz_targets/ast_diff_onepass_dfa.rs @@ -0,0 +1,82 @@ +#![no_main] + +use { + libfuzzer_sys::{fuzz_target, Corpus}, + regex_automata::{ + dfa::{onepass::Builder as RegexBuilder, onepass::DFA}, + nfa::thompson::{pikevm::PikeVM as NfaRegex, NFA}, + }, + regex_syntax::ast::Ast, +}; + +#[derive(Eq, PartialEq, arbitrary::Arbitrary)] +struct FuzzData { + ast: Ast, + haystack: String, +} + +impl std::fmt::Debug for FuzzData { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut builder = f.debug_struct("FuzzData"); + builder.field("ast", &format!("{}", self.ast)); + builder.field("haystack", &self.haystack); + builder.finish() + } +} + +fn do_fuzz(data: FuzzData) -> Corpus { + let _ = env_logger::try_init(); + + let pattern = format!("{}", data.ast); + let config = NFA::config().nfa_size_limit(Some(1 << 20)); + let Ok(nfa) = NFA::compiler().configure(config).build(&pattern) else { + return Corpus::Reject; + }; + let Ok(baseline) = NfaRegex::new_from_nfa(nfa) else { + return Corpus::Reject; + }; + let mut baseline_cache = baseline.create_cache(); + + let config = DFA::config().size_limit(Some(1 << 20)); + let Ok(re) = RegexBuilder::new().configure(config).build(&pattern) else { + return Corpus::Reject; + }; + let mut onepass_cache = re.create_cache(); + + assert_eq!( + re.is_match(&mut onepass_cache, &data.haystack), + baseline.is_match(&mut baseline_cache, &data.haystack) + ); + let found1 = re.find(&mut onepass_cache, &data.haystack); + let found2 = baseline.find(&mut baseline_cache, &data.haystack); + if let Some(found1) = found1 { + let found2 = found2.expect("Found in target, but not in baseline!"); + assert_eq!(found1.start(), found2.start()); + assert_eq!(found1.end(), found2.end()); + } + + let mut onepass_captures = re.create_captures(); + re.captures(&mut onepass_cache, &data.haystack, &mut onepass_captures); + + let mut baseline_captures = baseline.create_captures(); + baseline.captures( + &mut baseline_cache, + &data.haystack, + &mut baseline_captures, + ); + drop(baseline_cache); + assert_eq!(onepass_captures.group_len(), baseline_captures.group_len()); + for (c1, c2) in onepass_captures.iter().zip(baseline_captures.iter()) { + if let Some(c1) = c1 { + let c2 = c2.expect("Matched in target, but not baseline!"); + assert_eq!(c1.start, c2.start); + assert_eq!(c1.end, c2.end); + } else { + assert!(c2.is_none(), "Matched in baseline, but not target!"); + } + } + + Corpus::Keep +} + +fuzz_target!(|data: FuzzData| -> Corpus { do_fuzz(data) }); From 5b9ab60da9b1704dca671b2eefbc3ef5b07565f4 Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Sun, 16 Jul 2023 22:21:13 +0200 Subject: [PATCH 08/10] rename for consistency --- fuzz/fuzz_targets/ast_diff_onepass_dfa.rs | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/fuzz/fuzz_targets/ast_diff_onepass_dfa.rs b/fuzz/fuzz_targets/ast_diff_onepass_dfa.rs index 631f9d0a7..f6c81d4d5 100644 --- a/fuzz/fuzz_targets/ast_diff_onepass_dfa.rs +++ b/fuzz/fuzz_targets/ast_diff_onepass_dfa.rs @@ -38,16 +38,16 @@ fn do_fuzz(data: FuzzData) -> Corpus { let mut baseline_cache = baseline.create_cache(); let config = DFA::config().size_limit(Some(1 << 20)); - let Ok(re) = RegexBuilder::new().configure(config).build(&pattern) else { + let Ok(onepass) = RegexBuilder::new().configure(config).build(&pattern) else { return Corpus::Reject; }; - let mut onepass_cache = re.create_cache(); + let mut onepass_cache = onepass.create_cache(); assert_eq!( - re.is_match(&mut onepass_cache, &data.haystack), + onepass.is_match(&mut onepass_cache, &data.haystack), baseline.is_match(&mut baseline_cache, &data.haystack) ); - let found1 = re.find(&mut onepass_cache, &data.haystack); + let found1 = onepass.find(&mut onepass_cache, &data.haystack); let found2 = baseline.find(&mut baseline_cache, &data.haystack); if let Some(found1) = found1 { let found2 = found2.expect("Found in target, but not in baseline!"); @@ -55,8 +55,12 @@ fn do_fuzz(data: FuzzData) -> Corpus { assert_eq!(found1.end(), found2.end()); } - let mut onepass_captures = re.create_captures(); - re.captures(&mut onepass_cache, &data.haystack, &mut onepass_captures); + let mut onepass_captures = onepass.create_captures(); + onepass.captures( + &mut onepass_cache, + &data.haystack, + &mut onepass_captures, + ); let mut baseline_captures = baseline.create_captures(); baseline.captures( From 2a5d8aa5d1235658fbf74bfc902f666a2e9fb26c Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Sun, 16 Jul 2023 22:31:37 +0200 Subject: [PATCH 09/10] hybrid diff --- fuzz/Cargo.toml | 4 ++ fuzz/fuzz_targets/ast_diff_hybrid.rs | 63 ++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 fuzz/fuzz_targets/ast_diff_hybrid.rs diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 30a47aa8c..c4346abb8 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -83,6 +83,10 @@ path = "fuzz_targets/ast_diff_onepass_dfa.rs" name = "ast_diff_sparse_dfa" path = "fuzz_targets/ast_diff_sparse_dfa.rs" +[[bin]] +name = "ast_diff_hybrid" +path = "fuzz_targets/ast_diff_hybrid.rs" + [[bin]] name = "ast_diff_nfas" path = "fuzz_targets/ast_diff_nfas.rs" diff --git a/fuzz/fuzz_targets/ast_diff_hybrid.rs b/fuzz/fuzz_targets/ast_diff_hybrid.rs new file mode 100644 index 000000000..700c50f9c --- /dev/null +++ b/fuzz/fuzz_targets/ast_diff_hybrid.rs @@ -0,0 +1,63 @@ +#![no_main] + +use { + libfuzzer_sys::{fuzz_target, Corpus}, + regex_automata::{ + hybrid::{dfa::DFA, regex::Builder as RegexBuilder}, + nfa::thompson::{pikevm::PikeVM as NfaRegex, NFA}, + }, + regex_syntax::ast::Ast, +}; + +#[derive(Eq, PartialEq, arbitrary::Arbitrary)] +struct FuzzData { + ast: Ast, + haystack: String, +} + +impl std::fmt::Debug for FuzzData { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut builder = f.debug_struct("FuzzData"); + builder.field("ast", &format!("{}", self.ast)); + builder.field("haystack", &self.haystack); + builder.finish() + } +} + +fn do_fuzz(data: FuzzData) -> Corpus { + let _ = env_logger::try_init(); + + let pattern = format!("{}", data.ast); + let config = NFA::config().nfa_size_limit(Some(1 << 20)); + let Ok(nfa) = NFA::compiler().configure(config).build(&pattern) else { + return Corpus::Reject; + }; + let Ok(baseline) = NfaRegex::new_from_nfa(nfa) else { + return Corpus::Reject; + }; + let mut cache = baseline.create_cache(); + + let config = DFA::config().cache_capacity(1 << 20); + let Ok(re) = RegexBuilder::new().dfa(config).build(&pattern) else { + return Corpus::Reject; + }; + let mut hybrid_cache = re.create_cache(); + + assert_eq!( + re.is_match(&mut hybrid_cache, &data.haystack), + baseline.is_match(&mut cache, &data.haystack) + ); + let found1 = re.find(&mut hybrid_cache, &data.haystack); + let found2 = baseline.find(&mut cache, &data.haystack); + if let Some(found1) = found1 { + let found2 = found2.expect("Found in target, but not in baseline!"); + assert_eq!(found1.start(), found2.start()); + assert_eq!(found1.end(), found2.end()); + } + + // no captures + + Corpus::Keep +} + +fuzz_target!(|data: FuzzData| -> Corpus { do_fuzz(data) }); From 5c0e0876f42983da193c619d1bc0b09fe1fad336 Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Sun, 16 Jul 2023 22:37:53 +0200 Subject: [PATCH 10/10] panic safety --- fuzz/fuzz_targets/ast_diff_dense_dfa.rs | 23 ++++++++++++---------- fuzz/fuzz_targets/ast_diff_hybrid.rs | 25 ++++++++++++++---------- fuzz/fuzz_targets/ast_diff_nfas.rs | 13 +++++++----- fuzz/fuzz_targets/ast_diff_sparse_dfa.rs | 23 ++++++++++++---------- 4 files changed, 49 insertions(+), 35 deletions(-) diff --git a/fuzz/fuzz_targets/ast_diff_dense_dfa.rs b/fuzz/fuzz_targets/ast_diff_dense_dfa.rs index 2a073386a..cf83b503e 100644 --- a/fuzz/fuzz_targets/ast_diff_dense_dfa.rs +++ b/fuzz/fuzz_targets/ast_diff_dense_dfa.rs @@ -1,5 +1,6 @@ #![no_main] +use regex_automata::Input; use { libfuzzer_sys::{fuzz_target, Corpus}, regex_automata::{ @@ -42,16 +43,18 @@ fn do_fuzz(data: FuzzData) -> Corpus { return Corpus::Reject; }; - assert_eq!( - re.is_match(&data.haystack), - baseline.is_match(&mut cache, &data.haystack) - ); - let found1 = re.find(&data.haystack); - let found2 = baseline.find(&mut cache, &data.haystack); - if let Some(found1) = found1 { - let found2 = found2.expect("Found in target, but not in baseline!"); - assert_eq!(found1.start(), found2.start()); - assert_eq!(found1.end(), found2.end()); + if let Ok(maybe_match) = re.try_search(&Input::new(&pattern)) { + assert_eq!( + maybe_match.is_some(), + baseline.is_match(&mut cache, &data.haystack) + ); + let found2 = baseline.find(&mut cache, &data.haystack); + if let Some(found1) = maybe_match { + let found2 = + found2.expect("Found in target, but not in baseline!"); + assert_eq!(found1.start(), found2.start()); + assert_eq!(found1.end(), found2.end()); + } } // no captures diff --git a/fuzz/fuzz_targets/ast_diff_hybrid.rs b/fuzz/fuzz_targets/ast_diff_hybrid.rs index 700c50f9c..0e2016d93 100644 --- a/fuzz/fuzz_targets/ast_diff_hybrid.rs +++ b/fuzz/fuzz_targets/ast_diff_hybrid.rs @@ -1,5 +1,6 @@ #![no_main] +use regex_automata::Input; use { libfuzzer_sys::{fuzz_target, Corpus}, regex_automata::{ @@ -43,16 +44,20 @@ fn do_fuzz(data: FuzzData) -> Corpus { }; let mut hybrid_cache = re.create_cache(); - assert_eq!( - re.is_match(&mut hybrid_cache, &data.haystack), - baseline.is_match(&mut cache, &data.haystack) - ); - let found1 = re.find(&mut hybrid_cache, &data.haystack); - let found2 = baseline.find(&mut cache, &data.haystack); - if let Some(found1) = found1 { - let found2 = found2.expect("Found in target, but not in baseline!"); - assert_eq!(found1.start(), found2.start()); - assert_eq!(found1.end(), found2.end()); + if let Ok(maybe_match) = + re.try_search(&mut hybrid_cache, &Input::new(&pattern)) + { + assert_eq!( + maybe_match.is_some(), + baseline.is_match(&mut cache, &data.haystack) + ); + let found2 = baseline.find(&mut cache, &data.haystack); + if let Some(found1) = maybe_match { + let found2 = + found2.expect("Found in target, but not in baseline!"); + assert_eq!(found1.start(), found2.start()); + assert_eq!(found1.end(), found2.end()); + } } // no captures diff --git a/fuzz/fuzz_targets/ast_diff_nfas.rs b/fuzz/fuzz_targets/ast_diff_nfas.rs index 6e2670b86..294c68ba9 100644 --- a/fuzz/fuzz_targets/ast_diff_nfas.rs +++ b/fuzz/fuzz_targets/ast_diff_nfas.rs @@ -61,11 +61,14 @@ fn do_fuzz(data: FuzzData) -> Corpus { } } let mut backtracker_captures = backtracker.create_captures(); - if let Ok(()) = backtracker.try_captures( - &mut backtracker_cache, - &data.haystack, - &mut backtracker_captures, - ) { + if backtracker + .try_captures( + &mut backtracker_cache, + &data.haystack, + &mut backtracker_captures, + ) + .is_ok() + { let mut baseline_captures = baseline.create_captures(); baseline.captures( diff --git a/fuzz/fuzz_targets/ast_diff_sparse_dfa.rs b/fuzz/fuzz_targets/ast_diff_sparse_dfa.rs index 4e560f685..a0fb25a12 100644 --- a/fuzz/fuzz_targets/ast_diff_sparse_dfa.rs +++ b/fuzz/fuzz_targets/ast_diff_sparse_dfa.rs @@ -1,5 +1,6 @@ #![no_main] +use regex_automata::Input; use { libfuzzer_sys::{fuzz_target, Corpus}, regex_automata::{ @@ -46,16 +47,18 @@ fn do_fuzz(data: FuzzData) -> Corpus { }; let re = RegexBuilder::new().build_from_dfas(fwd, rev); - assert_eq!( - re.is_match(&data.haystack), - baseline.is_match(&mut cache, &data.haystack) - ); - let found1 = re.find(&data.haystack); - let found2 = baseline.find(&mut cache, &data.haystack); - if let Some(found1) = found1 { - let found2 = found2.expect("Found in target, but not in baseline!"); - assert_eq!(found1.start(), found2.start()); - assert_eq!(found1.end(), found2.end()); + if let Ok(maybe_match) = re.try_search(&Input::new(&pattern)) { + assert_eq!( + maybe_match.is_some(), + baseline.is_match(&mut cache, &data.haystack) + ); + let found2 = baseline.find(&mut cache, &data.haystack); + if let Some(found1) = maybe_match { + let found2 = + found2.expect("Found in target, but not in baseline!"); + assert_eq!(found1.start(), found2.start()); + assert_eq!(found1.end(), found2.end()); + } } // no captures