From 0873b6f05f36edbc10a6fc98ab3b9a8563fee457 Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Sat, 15 Jul 2023 16:51:47 +0200 Subject: [PATCH] begin work on differential fuzzers --- fuzz/Cargo.toml | 4 ++ fuzz/fuzz_targets/ast_diff_default.rs | 65 +++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 fuzz/fuzz_targets/ast_diff_default.rs diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index a7eec2c81..bd9041c90 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -67,6 +67,10 @@ path = "fuzz_targets/ast_fuzz_regex.rs" name = "ast_fuzz_match_bytes" path = "fuzz_targets/ast_fuzz_match_bytes.rs" +[[bin]] +name = "ast_diff_default" +path = "fuzz_targets/ast_diff_default.rs" + [profile.release] opt-level = 3 debug = true diff --git a/fuzz/fuzz_targets/ast_diff_default.rs b/fuzz/fuzz_targets/ast_diff_default.rs new file mode 100644 index 000000000..0f33ccf07 --- /dev/null +++ b/fuzz/fuzz_targets/ast_diff_default.rs @@ -0,0 +1,65 @@ +#![no_main] + +use { + libfuzzer_sys::{fuzz_target, Corpus}, + regex::RegexBuilder, + regex_automata::nfa::thompson::pikevm::PikeVM as NfaRegex, + regex_syntax::ast::Ast, +}; + +#[derive(Eq, PartialEq, arbitrary::Arbitrary)] +struct FuzzData { + ast: Ast, + haystack: String, +} + +impl std::fmt::Debug for FuzzData { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut builder = f.debug_struct("FuzzData"); + builder.field("ast", &format!("{}", self.ast)); + builder.field("haystack", &self.haystack); + builder.finish() + } +} + +fuzz_target!(|data: FuzzData| -> Corpus { + let _ = env_logger::try_init(); + + let pattern = format!("{}", data.ast); + let Ok(re) = RegexBuilder::new(&pattern).size_limit(1<<20).build() else { + return Corpus::Reject; + }; + let Ok(baseline) = NfaRegex::new(&pattern) else { + return Corpus::Reject; // should we error here? + }; + let mut cache = baseline.create_cache(); + + assert_eq!( + re.is_match(&data.haystack), + baseline.is_match(&mut cache, &data.haystack) + ); + let found1 = re.find(&data.haystack); + let found2 = baseline.find(&mut cache, &data.haystack); + if let Some(found1) = found1 { + let found2 = found2.expect("Found in target, but not in baseline!"); + assert_eq!(found1.start(), found2.start()); + assert_eq!(found1.end(), found2.end()); + } + if let Some(captures) = re.captures(&data.haystack) { + let mut baseline_captures = baseline.create_captures(); + + baseline.captures(&mut cache, &data.haystack, &mut baseline_captures); + drop(cache); + assert_eq!(captures.len(), baseline_captures.group_len()); + for (c1, c2) in captures.iter().zip(baseline_captures.iter()) { + if let Some(c1) = c1 { + let c2 = c2.expect("Matched in target, but not baseline!"); + assert_eq!(c1.start(), c2.start); + assert_eq!(c1.end(), c2.end); + } else { + assert!(!c2.is_some(), "Matched in baseline, but not target!"); + } + } + } + Corpus::Keep +});