Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create differential fuzzers and add them to OSS-Fuzz #1044

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
24 changes: 24 additions & 0 deletions fuzz/Cargo.toml
Expand Up @@ -67,6 +67,30 @@ path = "fuzz_targets/ast_fuzz_regex.rs"
name = "ast_fuzz_match_bytes"
path = "fuzz_targets/ast_fuzz_match_bytes.rs"

[[bin]]
name = "ast_diff_default"
path = "fuzz_targets/ast_diff_default.rs"

[[bin]]
name = "ast_diff_dense_dfa"
path = "fuzz_targets/ast_diff_dense_dfa.rs"

[[bin]]
name = "ast_diff_onepass_dfa"
path = "fuzz_targets/ast_diff_onepass_dfa.rs"

[[bin]]
name = "ast_diff_sparse_dfa"
path = "fuzz_targets/ast_diff_sparse_dfa.rs"

[[bin]]
name = "ast_diff_hybrid"
path = "fuzz_targets/ast_diff_hybrid.rs"

[[bin]]
name = "ast_diff_nfas"
path = "fuzz_targets/ast_diff_nfas.rs"

[profile.release]
opt-level = 3
debug = true
Expand Down
72 changes: 72 additions & 0 deletions fuzz/fuzz_targets/ast_diff_default.rs
@@ -0,0 +1,72 @@
#![no_main]

use {
libfuzzer_sys::{fuzz_target, Corpus},
regex::RegexBuilder,
regex_automata::nfa::thompson::{pikevm::PikeVM as NfaRegex, NFA},
regex_syntax::ast::Ast,
};

#[derive(Eq, PartialEq, arbitrary::Arbitrary)]
struct FuzzData {
ast: Ast,
haystack: String,
}

impl std::fmt::Debug for FuzzData {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut builder = f.debug_struct("FuzzData");
builder.field("ast", &format!("{}", self.ast));
builder.field("haystack", &self.haystack);
builder.finish()
}
}

fn do_fuzz(data: FuzzData) -> Corpus {
let _ = env_logger::try_init();

let pattern = format!("{}", data.ast);
let config = NFA::config().nfa_size_limit(Some(1 << 20));
let Ok(nfa) = NFA::compiler().configure(config).build(&pattern) else {
return Corpus::Reject;
};
let Ok(baseline) = NfaRegex::new_from_nfa(nfa) else {
return Corpus::Reject;
};
let mut cache = baseline.create_cache();

let Ok(re) = RegexBuilder::new(&pattern).size_limit(1 << 20).build() else {
return Corpus::Reject;
};

assert_eq!(
re.is_match(&data.haystack),
baseline.is_match(&mut cache, &data.haystack)
);
let found1 = re.find(&data.haystack);
let found2 = baseline.find(&mut cache, &data.haystack);
if let Some(found1) = found1 {
let found2 = found2.expect("Found in target, but not in baseline!");
assert_eq!(found1.start(), found2.start());
assert_eq!(found1.end(), found2.end());
}
if let Some(captures) = re.captures(&data.haystack) {
let mut baseline_captures = baseline.create_captures();

baseline.captures(&mut cache, &data.haystack, &mut baseline_captures);
drop(cache);
assert_eq!(captures.len(), baseline_captures.group_len());
for (c1, c2) in captures.iter().zip(baseline_captures.iter()) {
if let Some(c1) = c1 {
let c2 = c2.expect("Matched in target, but not baseline!");
assert_eq!(c1.start(), c2.start);
assert_eq!(c1.end(), c2.end);
} else {
assert!(c2.is_none(), "Matched in baseline, but not target!");
}
}
}
Corpus::Keep
}

fuzz_target!(|data: FuzzData| -> Corpus { do_fuzz(data) });
65 changes: 65 additions & 0 deletions fuzz/fuzz_targets/ast_diff_dense_dfa.rs
@@ -0,0 +1,65 @@
#![no_main]

use regex_automata::Input;
use {
libfuzzer_sys::{fuzz_target, Corpus},
regex_automata::{
dfa::{dense::DFA, regex::Builder as RegexBuilder},
nfa::thompson::{pikevm::PikeVM as NfaRegex, NFA},
},
regex_syntax::ast::Ast,
};

#[derive(Eq, PartialEq, arbitrary::Arbitrary)]
struct FuzzData {
ast: Ast,
haystack: String,
}

impl std::fmt::Debug for FuzzData {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut builder = f.debug_struct("FuzzData");
builder.field("ast", &format!("{}", self.ast));
builder.field("haystack", &self.haystack);
builder.finish()
}
}

fn do_fuzz(data: FuzzData) -> Corpus {
let _ = env_logger::try_init();

let pattern = format!("{}", data.ast);
let config = NFA::config().nfa_size_limit(Some(1 << 20));
let Ok(nfa) = NFA::compiler().configure(config).build(&pattern) else {
return Corpus::Reject;
};
let Ok(baseline) = NfaRegex::new_from_nfa(nfa) else {
return Corpus::Reject;
};
let mut cache = baseline.create_cache();

let config = DFA::config().dfa_size_limit(Some(1 << 20));
let Ok(re) = RegexBuilder::new().dense(config).build(&pattern) else {
return Corpus::Reject;
};

if let Ok(maybe_match) = re.try_search(&Input::new(&pattern)) {
assert_eq!(
maybe_match.is_some(),
baseline.is_match(&mut cache, &data.haystack)
);
let found2 = baseline.find(&mut cache, &data.haystack);
if let Some(found1) = maybe_match {
let found2 =
found2.expect("Found in target, but not in baseline!");
assert_eq!(found1.start(), found2.start());
assert_eq!(found1.end(), found2.end());
}
}

// no captures

Corpus::Keep
}

fuzz_target!(|data: FuzzData| -> Corpus { do_fuzz(data) });
68 changes: 68 additions & 0 deletions fuzz/fuzz_targets/ast_diff_hybrid.rs
@@ -0,0 +1,68 @@
#![no_main]

use regex_automata::Input;
use {
libfuzzer_sys::{fuzz_target, Corpus},
regex_automata::{
hybrid::{dfa::DFA, regex::Builder as RegexBuilder},
nfa::thompson::{pikevm::PikeVM as NfaRegex, NFA},
},
regex_syntax::ast::Ast,
};

#[derive(Eq, PartialEq, arbitrary::Arbitrary)]
struct FuzzData {
ast: Ast,
haystack: String,
}

impl std::fmt::Debug for FuzzData {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut builder = f.debug_struct("FuzzData");
builder.field("ast", &format!("{}", self.ast));
builder.field("haystack", &self.haystack);
builder.finish()
}
}

fn do_fuzz(data: FuzzData) -> Corpus {
let _ = env_logger::try_init();

let pattern = format!("{}", data.ast);
let config = NFA::config().nfa_size_limit(Some(1 << 20));
let Ok(nfa) = NFA::compiler().configure(config).build(&pattern) else {
return Corpus::Reject;
};
let Ok(baseline) = NfaRegex::new_from_nfa(nfa) else {
return Corpus::Reject;
};
let mut cache = baseline.create_cache();

let config = DFA::config().cache_capacity(1 << 20);
let Ok(re) = RegexBuilder::new().dfa(config).build(&pattern) else {
return Corpus::Reject;
};
let mut hybrid_cache = re.create_cache();

if let Ok(maybe_match) =
re.try_search(&mut hybrid_cache, &Input::new(&pattern))
{
assert_eq!(
maybe_match.is_some(),
baseline.is_match(&mut cache, &data.haystack)
);
let found2 = baseline.find(&mut cache, &data.haystack);
if let Some(found1) = maybe_match {
let found2 =
found2.expect("Found in target, but not in baseline!");
assert_eq!(found1.start(), found2.start());
assert_eq!(found1.end(), found2.end());
}
}

// no captures

Corpus::Keep
}

fuzz_target!(|data: FuzzData| -> Corpus { do_fuzz(data) });
99 changes: 99 additions & 0 deletions fuzz/fuzz_targets/ast_diff_nfas.rs
@@ -0,0 +1,99 @@
#![no_main]

use {
libfuzzer_sys::{fuzz_target, Corpus},
regex_automata::nfa::thompson::{
backtrack::BoundedBacktracker, pikevm::PikeVM, NFA,
},
regex_syntax::ast::Ast,
};

#[derive(Eq, PartialEq, arbitrary::Arbitrary)]
struct FuzzData {
ast: Ast,
haystack: String,
}

impl std::fmt::Debug for FuzzData {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut builder = f.debug_struct("FuzzData");
builder.field("ast", &format!("{}", self.ast));
builder.field("haystack", &self.haystack);
builder.finish()
}
}

fn do_fuzz(data: FuzzData) -> Corpus {
let _ = env_logger::try_init();

let pattern = format!("{}", data.ast);
let config = NFA::config().nfa_size_limit(Some(1 << 20));
let Ok(nfa) = NFA::compiler().configure(config).build(&pattern) else {
return Corpus::Reject;
};
let Ok(baseline) = PikeVM::new_from_nfa(nfa.clone()) else {
return Corpus::Reject;
};
let mut baseline_cache = baseline.create_cache();

let Ok(backtracker) = BoundedBacktracker::new_from_nfa(nfa) else {
return Corpus::Reject;
};
let mut backtracker_cache = backtracker.create_cache();

if let Ok(backtracked) =
backtracker.try_is_match(&mut backtracker_cache, &data.haystack)
{
assert_eq!(
backtracked,
baseline.is_match(&mut baseline_cache, &data.haystack)
);
}
if let Ok(found1) =
backtracker.try_find(&mut backtracker_cache, &data.haystack)
{
let found2 = baseline.find(&mut baseline_cache, &data.haystack);
if let Some(found1) = found1 {
let found2 =
found2.expect("Found in target, but not in baseline!");
assert_eq!(found1.start(), found2.start());
assert_eq!(found1.end(), found2.end());
}
}
let mut backtracker_captures = backtracker.create_captures();
if backtracker
.try_captures(
&mut backtracker_cache,
&data.haystack,
&mut backtracker_captures,
)
.is_ok()
{
let mut baseline_captures = baseline.create_captures();

baseline.captures(
&mut baseline_cache,
&data.haystack,
&mut baseline_captures,
);
drop(baseline_cache);
assert_eq!(
backtracker_captures.group_len(),
baseline_captures.group_len()
);
for (c1, c2) in
backtracker_captures.iter().zip(baseline_captures.iter())
{
if let Some(c1) = c1 {
let c2 = c2.expect("Matched in target, but not baseline!");
assert_eq!(c1.start, c2.start);
assert_eq!(c1.end, c2.end);
} else {
assert!(c2.is_none(), "Matched in baseline, but not target!");
}
}
}
Corpus::Keep
}

fuzz_target!(|data: FuzzData| -> Corpus { do_fuzz(data) });