DumpOnDiskStage in fuzzbench_text to dump the grimoire inputs as bytes for the fuzzbench measurers (#869)

* FuzzbenchDumpStage in fuzzbench_text

* fix

* DumpOnDiskStage

* clippy

* removed duplicated code from example fuzzer

* shorthand to move OwnedSlice into vec

* clippy

* fiz

* fix missing semicolon

Co-authored-by: Dominik Maier <domenukk@gmail.com>
Co-authored-by: Dominik Maier <dmnk@google.com>
This commit is contained in:
Andrea Fioraldi 2022-11-11 08:38:48 -08:00 committed by GitHub
parent e340d35674
commit fe459f6fa5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 261 additions and 52 deletions

View File

@ -27,6 +27,7 @@ clap = { version = "4.0", features = ["default"] }
nix = "0.25" nix = "0.25"
mimalloc = { version = "*", default-features = false } mimalloc = { version = "*", default-features = false }
content_inspector = "0.2.4" content_inspector = "0.2.4"
serde = { version = "1.0", default-features = false, features = ["alloc"] } # serialization lib
[lib] [lib]
name = "fuzzbench" name = "fuzzbench"

View File

@ -1,5 +1,5 @@
pub mod libafl_cc; pub mod libafl_cc;
fn main() { fn main() {
libafl_cc::main() libafl_cc::main();
} }

View File

@ -47,8 +47,8 @@ use libafl::{
powersched::PowerSchedule, IndexesLenTimeMinimizerScheduler, StdWeightedScheduler, powersched::PowerSchedule, IndexesLenTimeMinimizerScheduler, StdWeightedScheduler,
}, },
stages::{ stages::{
calibrate::CalibrationStage, power::StdPowerMutationalStage, GeneralizationStage, calibrate::CalibrationStage, dump::DumpToDiskStage, power::StdPowerMutationalStage,
StdMutationalStage, TracingStage, GeneralizationStage, StdMutationalStage, TracingStage,
}, },
state::{HasCorpus, HasMetadata, StdState}, state::{HasCorpus, HasMetadata, StdState},
Error, Error,
@ -64,6 +64,7 @@ use nix::{self, unistd::dup};
/// The fuzzer main (as `no_mangle` C function) /// The fuzzer main (as `no_mangle` C function)
#[no_mangle] #[no_mangle]
#[allow(clippy::too_many_lines)]
pub fn libafl_main() { pub fn libafl_main() {
// Registry the metadata types used in this fuzzer // Registry the metadata types used in this fuzzer
// Needed only on no_std // Needed only on no_std
@ -127,7 +128,7 @@ pub fn libafl_main() {
); );
if let Some(filenames) = res.get_many::<String>("remaining") { if let Some(filenames) = res.get_many::<String>("remaining") {
let filenames: Vec<&str> = filenames.map(|v| v.as_str()).collect(); let filenames: Vec<&str> = filenames.map(std::string::String::as_str).collect();
if !filenames.is_empty() { if !filenames.is_empty() {
run_testcases(&filenames); run_testcases(&filenames);
return; return;
@ -148,8 +149,11 @@ pub fn libafl_main() {
} }
} }
let mut crashes = out_dir.clone(); let mut crashes = out_dir.clone();
let mut report = out_dir.clone();
crashes.push("crashes"); crashes.push("crashes");
report.push("report");
out_dir.push("queue"); out_dir.push("queue");
drop(fs::create_dir(&report));
let in_dir = PathBuf::from( let in_dir = PathBuf::from(
res.get_one::<String>("in") res.get_one::<String>("in")
@ -174,10 +178,12 @@ pub fn libafl_main() {
); );
if check_if_textual(&in_dir, &tokens) { if check_if_textual(&in_dir, &tokens) {
fuzz_text(out_dir, crashes, in_dir, tokens, logfile, timeout) fuzz_text(
.expect("An error occurred while fuzzing"); out_dir, crashes, &report, &in_dir, tokens, &logfile, timeout,
)
.expect("An error occurred while fuzzing");
} else { } else {
fuzz_binary(out_dir, crashes, in_dir, tokens, logfile, timeout) fuzz_binary(out_dir, crashes, &in_dir, tokens, &logfile, timeout)
.expect("An error occurred while fuzzing"); .expect("An error occurred while fuzzing");
} }
} }
@ -215,7 +221,7 @@ fn count_textual_inputs(dir: &Path) -> (usize, usize) {
} }
fn check_if_textual(seeds_dir: &Path, tokenfile: &Option<PathBuf>) -> bool { fn check_if_textual(seeds_dir: &Path, tokenfile: &Option<PathBuf>) -> bool {
let (found, tot) = count_textual_inputs(&seeds_dir); let (found, tot) = count_textual_inputs(seeds_dir);
let is_text = found * 100 / tot > 90; // 90% of text inputs let is_text = found * 100 / tot > 90; // 90% of text inputs
if let Some(tokenfile) = tokenfile { if let Some(tokenfile) = tokenfile {
let toks = Tokens::from_file(tokenfile).unwrap(); let toks = Tokens::from_file(tokenfile).unwrap();
@ -237,7 +243,7 @@ fn run_testcases(filenames: &[&str]) {
// Call LLVMFUzzerInitialize() if present. // Call LLVMFUzzerInitialize() if present.
let args: Vec<String> = env::args().collect(); let args: Vec<String> = env::args().collect();
if libfuzzer_initialize(&args) == -1 { if libfuzzer_initialize(&args) == -1 {
println!("Warning: LLVMFuzzerInitialize failed with -1") println!("Warning: LLVMFuzzerInitialize failed with -1");
} }
println!( println!(
@ -256,20 +262,16 @@ fn run_testcases(filenames: &[&str]) {
} }
/// The actual fuzzer /// The actual fuzzer
#[allow(clippy::too_many_lines)]
fn fuzz_binary( fn fuzz_binary(
corpus_dir: PathBuf, corpus_dir: PathBuf,
objective_dir: PathBuf, objective_dir: PathBuf,
seed_dir: PathBuf, seed_dir: &PathBuf,
tokenfile: Option<PathBuf>, tokenfile: Option<PathBuf>,
logfile: PathBuf, logfile: &PathBuf,
timeout: Duration, timeout: Duration,
) -> Result<(), Error> { ) -> Result<(), Error> {
let log = RefCell::new( let log = RefCell::new(OpenOptions::new().append(true).create(true).open(logfile)?);
OpenOptions::new()
.append(true)
.create(true)
.open(&logfile)?,
);
#[cfg(unix)] #[cfg(unix)]
let mut stdout_cpy = unsafe { let mut stdout_cpy = unsafe {
@ -357,7 +359,7 @@ fn fuzz_binary(
// Call LLVMFUzzerInitialize() if present. // Call LLVMFUzzerInitialize() if present.
let args: Vec<String> = env::args().collect(); let args: Vec<String> = env::args().collect();
if libfuzzer_initialize(&args) == -1 { if libfuzzer_initialize(&args) == -1 {
println!("Warning: LLVMFuzzerInitialize failed with -1") println!("Warning: LLVMFuzzerInitialize failed with -1");
} }
// Setup a randomic Input2State stage // Setup a randomic Input2State stage
@ -454,12 +456,7 @@ fn fuzz_binary(
dup2(null_fd, io::stderr().as_raw_fd())?; dup2(null_fd, io::stderr().as_raw_fd())?;
} }
// reopen file to make sure we're at the end // reopen file to make sure we're at the end
log.replace( log.replace(OpenOptions::new().append(true).create(true).open(logfile)?);
OpenOptions::new()
.append(true)
.create(true)
.open(&logfile)?,
);
fuzzer.fuzz_loop(&mut stages, &mut executor, &mut state, &mut mgr)?; fuzzer.fuzz_loop(&mut stages, &mut executor, &mut state, &mut mgr)?;
@ -467,21 +464,18 @@ fn fuzz_binary(
Ok(()) Ok(())
} }
/// The actual fuzzer based on Grimoire /// The actual fuzzer based on `Grimoire`
#[allow(clippy::too_many_lines)]
fn fuzz_text( fn fuzz_text(
corpus_dir: PathBuf, corpus_dir: PathBuf,
objective_dir: PathBuf, objective_dir: PathBuf,
seed_dir: PathBuf, report_dir: &Path,
seed_dir: &PathBuf,
tokenfile: Option<PathBuf>, tokenfile: Option<PathBuf>,
logfile: PathBuf, logfile: &PathBuf,
timeout: Duration, timeout: Duration,
) -> Result<(), Error> { ) -> Result<(), Error> {
let log = RefCell::new( let log = RefCell::new(OpenOptions::new().append(true).create(true).open(logfile)?);
OpenOptions::new()
.append(true)
.create(true)
.open(&logfile)?,
);
#[cfg(unix)] #[cfg(unix)]
let mut stdout_cpy = unsafe { let mut stdout_cpy = unsafe {
@ -570,7 +564,7 @@ fn fuzz_text(
// Call LLVMFUzzerInitialize() if present. // Call LLVMFUzzerInitialize() if present.
let args: Vec<String> = env::args().collect(); let args: Vec<String> = env::args().collect();
if libfuzzer_initialize(&args) == -1 { if libfuzzer_initialize(&args) == -1 {
println!("Warning: LLVMFuzzerInitialize failed with -1") println!("Warning: LLVMFuzzerInitialize failed with -1");
} }
// Setup a randomic Input2State stage // Setup a randomic Input2State stage
@ -644,8 +638,23 @@ fn fuzz_text(
timeout * 10, timeout * 10,
)); ));
let fuzzbench = DumpToDiskStage::new(
|input: &GeneralizedInput| input.target_bytes().into(),
&report_dir.join("queue"),
&report_dir.join("crashes"),
)
.unwrap();
// The order of the stages matter! // The order of the stages matter!
let mut stages = tuple_list!(generalization, calibration, tracing, i2s, power, grimoire); let mut stages = tuple_list!(
fuzzbench,
generalization,
calibration,
tracing,
i2s,
power,
grimoire
);
// Read tokens // Read tokens
if state.metadata().get::<Tokens>().is_none() { if state.metadata().get::<Tokens>().is_none() {
@ -682,12 +691,7 @@ fn fuzz_text(
dup2(null_fd, io::stderr().as_raw_fd())?; dup2(null_fd, io::stderr().as_raw_fd())?;
} }
// reopen file to make sure we're at the end // reopen file to make sure we're at the end
log.replace( log.replace(OpenOptions::new().append(true).create(true).open(logfile)?);
OpenOptions::new()
.append(true)
.create(true)
.open(&logfile)?,
);
fuzzer.fuzz_loop(&mut stages, &mut executor, &mut state, &mut mgr)?; fuzzer.fuzz_loop(&mut stages, &mut executor, &mut state, &mut mgr)?;

View File

@ -29,11 +29,7 @@ fn main() {
let monitor = MultiMonitor::new(|s| println!("{}", s)); let monitor = MultiMonitor::new(|s| println!("{}", s));
let cores = Cores::all().expect("unable to get all core id"); let cores = Cores::all().expect("unable to get all core id");
let parent_cpu_id = cores let parent_cpu_id = cores.ids.first().expect("unable to get first core id");
.ids
.first()
.expect("unable to get first core id")
.clone();
// region: fuzzer start function // region: fuzzer start function
let mut run_client = |state: Option<_>, mut restarting_mgr, _core_id: usize| { let mut run_client = |state: Option<_>, mut restarting_mgr, _core_id: usize| {

View File

@ -29,7 +29,7 @@ pub mod shmem;
pub mod staterestore; pub mod staterestore;
pub mod tuples; pub mod tuples;
use alloc::string::String; use alloc::{string::String, vec::Vec};
use core::{iter::Iterator, time}; use core::{iter::Iterator, time};
#[cfg(feature = "std")] #[cfg(feature = "std")]
use std::time::{SystemTime, UNIX_EPOCH}; use std::time::{SystemTime, UNIX_EPOCH};
@ -50,6 +50,54 @@ pub trait AsMutSlice {
fn as_mut_slice(&mut self) -> &mut [Self::Entry]; fn as_mut_slice(&mut self) -> &mut [Self::Entry];
} }
impl<T> AsSlice for Vec<T> {
type Entry = T;
fn as_slice(&self) -> &[Self::Entry] {
self
}
}
impl<T> AsMutSlice for Vec<T> {
type Entry = T;
fn as_mut_slice(&mut self) -> &mut [Self::Entry] {
self
}
}
impl<T> AsSlice for &[T] {
type Entry = T;
fn as_slice(&self) -> &[Self::Entry] {
self
}
}
impl<T> AsSlice for [T] {
type Entry = T;
fn as_slice(&self) -> &[Self::Entry] {
self
}
}
impl<T> AsMutSlice for &mut [T] {
type Entry = T;
fn as_mut_slice(&mut self) -> &mut [Self::Entry] {
self
}
}
impl<T> AsMutSlice for [T] {
type Entry = T;
fn as_mut_slice(&mut self) -> &mut [Self::Entry] {
self
}
}
/// Create an `Iterator` from a reference /// Create an `Iterator` from a reference
pub trait AsIter<'it> { pub trait AsIter<'it> {
/// The item type /// The item type
@ -57,7 +105,7 @@ pub trait AsIter<'it> {
/// The iterator type /// The iterator type
type IntoIter: Iterator<Item = &'it Self::Item>; type IntoIter: Iterator<Item = &'it Self::Item>;
/// Create an interator from &self /// Create an iterator from &self
fn as_iter(&'it self) -> Self::IntoIter; fn as_iter(&'it self) -> Self::IntoIter;
} }
@ -68,7 +116,7 @@ pub trait AsIterMut<'it> {
/// The iterator type /// The iterator type
type IntoIter: Iterator<Item = &'it mut Self::Item>; type IntoIter: Iterator<Item = &'it mut Self::Item>;
/// Create an interator from &mut self /// Create an iterator from &mut self
fn as_iter_mut(&'it mut self) -> Self::IntoIter; fn as_iter_mut(&'it mut self) -> Self::IntoIter;
} }

View File

@ -331,6 +331,20 @@ where
} }
} }
/// Create a vector from an [`OwnedSliceMut`], or return the owned vec.
impl<'a, T> From<OwnedSlice<'a, T>> for Vec<T>
where
T: Clone,
{
fn from(slice: OwnedSlice<'a, T>) -> Self {
let slice = slice.into_owned();
match slice.inner {
OwnedSliceInner::Owned(vec) => vec,
_ => panic!("Could not own slice!"),
}
}
}
/// Wrap a mutable slice and convert to a Vec on serialize. /// Wrap a mutable slice and convert to a Vec on serialize.
/// We use a hidden inner enum so the public API can be safe, /// We use a hidden inner enum so the public API can be safe,
/// unless the user uses the unsafe [`OwnedSliceMut::from_raw_parts_mut`] /// unless the user uses the unsafe [`OwnedSliceMut::from_raw_parts_mut`]
@ -486,6 +500,20 @@ impl<'a, T> From<Vec<T>> for OwnedSliceMut<'a, T> {
} }
} }
/// Create a vector from an [`OwnedSliceMut`], or return the owned vec.
impl<'a, T> From<OwnedSliceMut<'a, T>> for Vec<T>
where
T: Clone,
{
fn from(slice: OwnedSliceMut<'a, T>) -> Self {
let slice = slice.into_owned();
match slice.inner {
OwnedSliceMutInner::Owned(vec) => vec,
_ => panic!("Could not own slice!"),
}
}
}
/// Create a new [`OwnedSliceMut`] from a vector reference /// Create a new [`OwnedSliceMut`] from a vector reference
impl<'a, T> From<&'a mut Vec<T>> for OwnedSliceMut<'a, T> { impl<'a, T> From<&'a mut Vec<T>> for OwnedSliceMut<'a, T> {
fn from(vec: &'a mut Vec<T>) -> Self { fn from(vec: &'a mut Vec<T>) -> Self {

128
libafl/src/stages/dump.rs Normal file
View File

@ -0,0 +1,128 @@
//! The [`DumpToDiskStage`] is a stage that dumps the corpus and the solutions to disk to e.g. allow AFL to sync
use alloc::vec::Vec;
use core::{clone::Clone, marker::PhantomData};
use std::{fs, fs::File, io::Write, path::PathBuf};
use serde::{Deserialize, Serialize};
use crate::{
corpus::Corpus,
inputs::UsesInput,
stages::Stage,
state::{HasCorpus, HasMetadata, HasRand, HasSolutions, UsesState},
Error,
};
/// Metadata used to store information about disk dump indexes for names
#[derive(Default, Serialize, Deserialize, Clone, Debug)]
pub struct DumpToDiskMetadata {
last_corpus: usize,
last_solution: usize,
}
crate::impl_serdeany!(DumpToDiskMetadata);
/// The [`DumpToDiskStage`] is a stage that dumps the corpus and the solutions to disk
#[derive(Debug)]
pub struct DumpToDiskStage<CB, EM, Z> {
solutions_dir: PathBuf,
corpus_dir: PathBuf,
to_bytes: CB,
phantom: PhantomData<(EM, Z)>,
}
impl<CB, EM, Z> UsesState for DumpToDiskStage<CB, EM, Z>
where
EM: UsesState,
{
type State = EM::State;
}
impl<CB, E, EM, Z> Stage<E, EM, Z> for DumpToDiskStage<CB, EM, Z>
where
CB: FnMut(&<Z::State as UsesInput>::Input) -> Vec<u8>,
EM: UsesState<State = Z::State>,
E: UsesState<State = Z::State>,
Z: UsesState,
Z::State: HasCorpus + HasSolutions + HasRand + HasMetadata,
{
#[inline]
fn perform(
&mut self,
_fuzzer: &mut Z,
_executor: &mut E,
state: &mut Z::State,
_manager: &mut EM,
_corpus_idx: usize,
) -> Result<(), Error> {
let meta = state
.metadata()
.get::<DumpToDiskMetadata>()
.map_or_else(DumpToDiskMetadata::default, Clone::clone);
let corpus_count = state.corpus().count();
let solutions_count = state.solutions().count();
for i in meta.last_corpus..corpus_count {
let mut testcase = state.corpus().get(i)?.borrow_mut();
let input = testcase.load_input()?;
let bytes = (self.to_bytes)(input);
let fname = self.corpus_dir.join(format!("id_{i}"));
let mut f = File::create(fname)?;
drop(f.write_all(&bytes));
}
for i in meta.last_solution..solutions_count {
let mut testcase = state.solutions().get(i)?.borrow_mut();
let input = testcase.load_input()?;
let bytes = (self.to_bytes)(input);
let fname = self.solutions_dir.join(format!("id_{i}"));
let mut f = File::create(fname)?;
drop(f.write_all(&bytes));
}
state.add_metadata(DumpToDiskMetadata {
last_corpus: corpus_count,
last_solution: solutions_count,
});
Ok(())
}
}
impl<CB, EM, Z> DumpToDiskStage<CB, EM, Z>
where
CB: FnMut(&<Z::State as UsesInput>::Input) -> Vec<u8>,
EM: UsesState<State = Z::State>,
Z: UsesState,
Z::State: HasCorpus + HasSolutions + HasRand + HasMetadata,
{
/// Create a new [`DumpToDiskStage`]
pub fn new<A, B>(to_bytes: CB, corpus_dir: A, solutions_dir: B) -> Result<Self, Error>
where
A: Into<PathBuf>,
B: Into<PathBuf>,
{
let corpus_dir = corpus_dir.into();
if let Err(e) = fs::create_dir(&corpus_dir) {
if !corpus_dir.is_dir() {
return Err(Error::file(e));
}
}
let solutions_dir = solutions_dir.into();
if let Err(e) = fs::create_dir(&solutions_dir) {
if !corpus_dir.is_dir() {
return Err(Error::file(e));
}
}
Ok(Self {
to_bytes,
solutions_dir,
corpus_dir,
phantom: PhantomData,
})
}
}

View File

@ -39,10 +39,15 @@ pub use concolic::SimpleConcolicMutationalStage;
#[cfg(feature = "std")] #[cfg(feature = "std")]
pub mod sync; pub mod sync;
#[cfg(feature = "std")]
pub use sync::*;
#[cfg(feature = "std")]
pub mod dump;
use core::{convert::From, marker::PhantomData}; use core::{convert::From, marker::PhantomData};
#[cfg(feature = "std")] #[cfg(feature = "std")]
pub use sync::*; pub use dump::*;
use self::push::PushStage; use self::push::PushStage;
use crate::{ use crate::{

View File

@ -1,5 +1,4 @@
//| The [`MutationalStage`] is the default stage used during fuzzing. //! The [`SyncFromDiskStage`] is a stage that imports inputs from disk for e.g. sync with AFL
//! For the current input, it will perform a range of random mutations, and then run them in the executor.
use core::marker::PhantomData; use core::marker::PhantomData;
use std::{ use std::{