DumpOnDiskStage in fuzzbench_text to dump the grimoire inputs as bytes for the fuzzbench measurers (#869)

* FuzzbenchDumpStage in fuzzbench_text

* fix

* DumpOnDiskStage

* clippy

* removed duplicated code from example fuzzer

* shorthand to move OwnedSlice into vec

* clippy

* fiz

* fix missing semicolon

Co-authored-by: Dominik Maier <domenukk@gmail.com>
Co-authored-by: Dominik Maier <dmnk@google.com>
This commit is contained in:
Andrea Fioraldi 2022-11-11 08:38:48 -08:00 committed by GitHub
parent e340d35674
commit fe459f6fa5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 261 additions and 52 deletions

View File

@ -27,6 +27,7 @@ clap = { version = "4.0", features = ["default"] }
nix = "0.25"
mimalloc = { version = "*", default-features = false }
content_inspector = "0.2.4"
serde = { version = "1.0", default-features = false, features = ["alloc"] } # serialization lib
[lib]
name = "fuzzbench"

View File

@ -1,5 +1,5 @@
pub mod libafl_cc;
fn main() {
libafl_cc::main()
libafl_cc::main();
}

View File

@ -47,8 +47,8 @@ use libafl::{
powersched::PowerSchedule, IndexesLenTimeMinimizerScheduler, StdWeightedScheduler,
},
stages::{
calibrate::CalibrationStage, power::StdPowerMutationalStage, GeneralizationStage,
StdMutationalStage, TracingStage,
calibrate::CalibrationStage, dump::DumpToDiskStage, power::StdPowerMutationalStage,
GeneralizationStage, StdMutationalStage, TracingStage,
},
state::{HasCorpus, HasMetadata, StdState},
Error,
@ -64,6 +64,7 @@ use nix::{self, unistd::dup};
/// The fuzzer main (as `no_mangle` C function)
#[no_mangle]
#[allow(clippy::too_many_lines)]
pub fn libafl_main() {
// Registry the metadata types used in this fuzzer
// Needed only on no_std
@ -127,7 +128,7 @@ pub fn libafl_main() {
);
if let Some(filenames) = res.get_many::<String>("remaining") {
let filenames: Vec<&str> = filenames.map(|v| v.as_str()).collect();
let filenames: Vec<&str> = filenames.map(std::string::String::as_str).collect();
if !filenames.is_empty() {
run_testcases(&filenames);
return;
@ -148,8 +149,11 @@ pub fn libafl_main() {
}
}
let mut crashes = out_dir.clone();
let mut report = out_dir.clone();
crashes.push("crashes");
report.push("report");
out_dir.push("queue");
drop(fs::create_dir(&report));
let in_dir = PathBuf::from(
res.get_one::<String>("in")
@ -174,10 +178,12 @@ pub fn libafl_main() {
);
if check_if_textual(&in_dir, &tokens) {
fuzz_text(out_dir, crashes, in_dir, tokens, logfile, timeout)
.expect("An error occurred while fuzzing");
fuzz_text(
out_dir, crashes, &report, &in_dir, tokens, &logfile, timeout,
)
.expect("An error occurred while fuzzing");
} else {
fuzz_binary(out_dir, crashes, in_dir, tokens, logfile, timeout)
fuzz_binary(out_dir, crashes, &in_dir, tokens, &logfile, timeout)
.expect("An error occurred while fuzzing");
}
}
@ -215,7 +221,7 @@ fn count_textual_inputs(dir: &Path) -> (usize, usize) {
}
fn check_if_textual(seeds_dir: &Path, tokenfile: &Option<PathBuf>) -> bool {
let (found, tot) = count_textual_inputs(&seeds_dir);
let (found, tot) = count_textual_inputs(seeds_dir);
let is_text = found * 100 / tot > 90; // 90% of text inputs
if let Some(tokenfile) = tokenfile {
let toks = Tokens::from_file(tokenfile).unwrap();
@ -237,7 +243,7 @@ fn run_testcases(filenames: &[&str]) {
// Call LLVMFUzzerInitialize() if present.
let args: Vec<String> = env::args().collect();
if libfuzzer_initialize(&args) == -1 {
println!("Warning: LLVMFuzzerInitialize failed with -1")
println!("Warning: LLVMFuzzerInitialize failed with -1");
}
println!(
@ -256,20 +262,16 @@ fn run_testcases(filenames: &[&str]) {
}
/// The actual fuzzer
#[allow(clippy::too_many_lines)]
fn fuzz_binary(
corpus_dir: PathBuf,
objective_dir: PathBuf,
seed_dir: PathBuf,
seed_dir: &PathBuf,
tokenfile: Option<PathBuf>,
logfile: PathBuf,
logfile: &PathBuf,
timeout: Duration,
) -> Result<(), Error> {
let log = RefCell::new(
OpenOptions::new()
.append(true)
.create(true)
.open(&logfile)?,
);
let log = RefCell::new(OpenOptions::new().append(true).create(true).open(logfile)?);
#[cfg(unix)]
let mut stdout_cpy = unsafe {
@ -357,7 +359,7 @@ fn fuzz_binary(
// Call LLVMFUzzerInitialize() if present.
let args: Vec<String> = env::args().collect();
if libfuzzer_initialize(&args) == -1 {
println!("Warning: LLVMFuzzerInitialize failed with -1")
println!("Warning: LLVMFuzzerInitialize failed with -1");
}
// Setup a randomic Input2State stage
@ -454,12 +456,7 @@ fn fuzz_binary(
dup2(null_fd, io::stderr().as_raw_fd())?;
}
// reopen file to make sure we're at the end
log.replace(
OpenOptions::new()
.append(true)
.create(true)
.open(&logfile)?,
);
log.replace(OpenOptions::new().append(true).create(true).open(logfile)?);
fuzzer.fuzz_loop(&mut stages, &mut executor, &mut state, &mut mgr)?;
@ -467,21 +464,18 @@ fn fuzz_binary(
Ok(())
}
/// The actual fuzzer based on Grimoire
/// The actual fuzzer based on `Grimoire`
#[allow(clippy::too_many_lines)]
fn fuzz_text(
corpus_dir: PathBuf,
objective_dir: PathBuf,
seed_dir: PathBuf,
report_dir: &Path,
seed_dir: &PathBuf,
tokenfile: Option<PathBuf>,
logfile: PathBuf,
logfile: &PathBuf,
timeout: Duration,
) -> Result<(), Error> {
let log = RefCell::new(
OpenOptions::new()
.append(true)
.create(true)
.open(&logfile)?,
);
let log = RefCell::new(OpenOptions::new().append(true).create(true).open(logfile)?);
#[cfg(unix)]
let mut stdout_cpy = unsafe {
@ -570,7 +564,7 @@ fn fuzz_text(
// Call LLVMFUzzerInitialize() if present.
let args: Vec<String> = env::args().collect();
if libfuzzer_initialize(&args) == -1 {
println!("Warning: LLVMFuzzerInitialize failed with -1")
println!("Warning: LLVMFuzzerInitialize failed with -1");
}
// Setup a randomic Input2State stage
@ -644,8 +638,23 @@ fn fuzz_text(
timeout * 10,
));
let fuzzbench = DumpToDiskStage::new(
|input: &GeneralizedInput| input.target_bytes().into(),
&report_dir.join("queue"),
&report_dir.join("crashes"),
)
.unwrap();
// The order of the stages matter!
let mut stages = tuple_list!(generalization, calibration, tracing, i2s, power, grimoire);
let mut stages = tuple_list!(
fuzzbench,
generalization,
calibration,
tracing,
i2s,
power,
grimoire
);
// Read tokens
if state.metadata().get::<Tokens>().is_none() {
@ -682,12 +691,7 @@ fn fuzz_text(
dup2(null_fd, io::stderr().as_raw_fd())?;
}
// reopen file to make sure we're at the end
log.replace(
OpenOptions::new()
.append(true)
.create(true)
.open(&logfile)?,
);
log.replace(OpenOptions::new().append(true).create(true).open(logfile)?);
fuzzer.fuzz_loop(&mut stages, &mut executor, &mut state, &mut mgr)?;

View File

@ -29,11 +29,7 @@ fn main() {
let monitor = MultiMonitor::new(|s| println!("{}", s));
let cores = Cores::all().expect("unable to get all core id");
let parent_cpu_id = cores
.ids
.first()
.expect("unable to get first core id")
.clone();
let parent_cpu_id = cores.ids.first().expect("unable to get first core id");
// region: fuzzer start function
let mut run_client = |state: Option<_>, mut restarting_mgr, _core_id: usize| {

View File

@ -29,7 +29,7 @@ pub mod shmem;
pub mod staterestore;
pub mod tuples;
use alloc::string::String;
use alloc::{string::String, vec::Vec};
use core::{iter::Iterator, time};
#[cfg(feature = "std")]
use std::time::{SystemTime, UNIX_EPOCH};
@ -50,6 +50,54 @@ pub trait AsMutSlice {
fn as_mut_slice(&mut self) -> &mut [Self::Entry];
}
impl<T> AsSlice for Vec<T> {
type Entry = T;
fn as_slice(&self) -> &[Self::Entry] {
self
}
}
impl<T> AsMutSlice for Vec<T> {
type Entry = T;
fn as_mut_slice(&mut self) -> &mut [Self::Entry] {
self
}
}
impl<T> AsSlice for &[T] {
type Entry = T;
fn as_slice(&self) -> &[Self::Entry] {
self
}
}
impl<T> AsSlice for [T] {
type Entry = T;
fn as_slice(&self) -> &[Self::Entry] {
self
}
}
impl<T> AsMutSlice for &mut [T] {
type Entry = T;
fn as_mut_slice(&mut self) -> &mut [Self::Entry] {
self
}
}
impl<T> AsMutSlice for [T] {
type Entry = T;
fn as_mut_slice(&mut self) -> &mut [Self::Entry] {
self
}
}
/// Create an `Iterator` from a reference
pub trait AsIter<'it> {
/// The item type
@ -57,7 +105,7 @@ pub trait AsIter<'it> {
/// The iterator type
type IntoIter: Iterator<Item = &'it Self::Item>;
/// Create an interator from &self
/// Create an iterator from &self
fn as_iter(&'it self) -> Self::IntoIter;
}
@ -68,7 +116,7 @@ pub trait AsIterMut<'it> {
/// The iterator type
type IntoIter: Iterator<Item = &'it mut Self::Item>;
/// Create an interator from &mut self
/// Create an iterator from &mut self
fn as_iter_mut(&'it mut self) -> Self::IntoIter;
}

View File

@ -331,6 +331,20 @@ where
}
}
/// Create a vector from an [`OwnedSliceMut`], or return the owned vec.
impl<'a, T> From<OwnedSlice<'a, T>> for Vec<T>
where
T: Clone,
{
fn from(slice: OwnedSlice<'a, T>) -> Self {
let slice = slice.into_owned();
match slice.inner {
OwnedSliceInner::Owned(vec) => vec,
_ => panic!("Could not own slice!"),
}
}
}
/// Wrap a mutable slice and convert to a Vec on serialize.
/// We use a hidden inner enum so the public API can be safe,
/// unless the user uses the unsafe [`OwnedSliceMut::from_raw_parts_mut`]
@ -486,6 +500,20 @@ impl<'a, T> From<Vec<T>> for OwnedSliceMut<'a, T> {
}
}
/// Create a vector from an [`OwnedSliceMut`], or return the owned vec.
impl<'a, T> From<OwnedSliceMut<'a, T>> for Vec<T>
where
T: Clone,
{
fn from(slice: OwnedSliceMut<'a, T>) -> Self {
let slice = slice.into_owned();
match slice.inner {
OwnedSliceMutInner::Owned(vec) => vec,
_ => panic!("Could not own slice!"),
}
}
}
/// Create a new [`OwnedSliceMut`] from a vector reference
impl<'a, T> From<&'a mut Vec<T>> for OwnedSliceMut<'a, T> {
fn from(vec: &'a mut Vec<T>) -> Self {

128
libafl/src/stages/dump.rs Normal file
View File

@ -0,0 +1,128 @@
//! The [`DumpToDiskStage`] is a stage that dumps the corpus and the solutions to disk to e.g. allow AFL to sync
use alloc::vec::Vec;
use core::{clone::Clone, marker::PhantomData};
use std::{fs, fs::File, io::Write, path::PathBuf};
use serde::{Deserialize, Serialize};
use crate::{
corpus::Corpus,
inputs::UsesInput,
stages::Stage,
state::{HasCorpus, HasMetadata, HasRand, HasSolutions, UsesState},
Error,
};
/// Metadata used to store information about disk dump indexes for names
#[derive(Default, Serialize, Deserialize, Clone, Debug)]
pub struct DumpToDiskMetadata {
last_corpus: usize,
last_solution: usize,
}
crate::impl_serdeany!(DumpToDiskMetadata);
/// The [`DumpToDiskStage`] is a stage that dumps the corpus and the solutions to disk
#[derive(Debug)]
pub struct DumpToDiskStage<CB, EM, Z> {
solutions_dir: PathBuf,
corpus_dir: PathBuf,
to_bytes: CB,
phantom: PhantomData<(EM, Z)>,
}
impl<CB, EM, Z> UsesState for DumpToDiskStage<CB, EM, Z>
where
EM: UsesState,
{
type State = EM::State;
}
impl<CB, E, EM, Z> Stage<E, EM, Z> for DumpToDiskStage<CB, EM, Z>
where
CB: FnMut(&<Z::State as UsesInput>::Input) -> Vec<u8>,
EM: UsesState<State = Z::State>,
E: UsesState<State = Z::State>,
Z: UsesState,
Z::State: HasCorpus + HasSolutions + HasRand + HasMetadata,
{
#[inline]
fn perform(
&mut self,
_fuzzer: &mut Z,
_executor: &mut E,
state: &mut Z::State,
_manager: &mut EM,
_corpus_idx: usize,
) -> Result<(), Error> {
let meta = state
.metadata()
.get::<DumpToDiskMetadata>()
.map_or_else(DumpToDiskMetadata::default, Clone::clone);
let corpus_count = state.corpus().count();
let solutions_count = state.solutions().count();
for i in meta.last_corpus..corpus_count {
let mut testcase = state.corpus().get(i)?.borrow_mut();
let input = testcase.load_input()?;
let bytes = (self.to_bytes)(input);
let fname = self.corpus_dir.join(format!("id_{i}"));
let mut f = File::create(fname)?;
drop(f.write_all(&bytes));
}
for i in meta.last_solution..solutions_count {
let mut testcase = state.solutions().get(i)?.borrow_mut();
let input = testcase.load_input()?;
let bytes = (self.to_bytes)(input);
let fname = self.solutions_dir.join(format!("id_{i}"));
let mut f = File::create(fname)?;
drop(f.write_all(&bytes));
}
state.add_metadata(DumpToDiskMetadata {
last_corpus: corpus_count,
last_solution: solutions_count,
});
Ok(())
}
}
impl<CB, EM, Z> DumpToDiskStage<CB, EM, Z>
where
CB: FnMut(&<Z::State as UsesInput>::Input) -> Vec<u8>,
EM: UsesState<State = Z::State>,
Z: UsesState,
Z::State: HasCorpus + HasSolutions + HasRand + HasMetadata,
{
/// Create a new [`DumpToDiskStage`]
pub fn new<A, B>(to_bytes: CB, corpus_dir: A, solutions_dir: B) -> Result<Self, Error>
where
A: Into<PathBuf>,
B: Into<PathBuf>,
{
let corpus_dir = corpus_dir.into();
if let Err(e) = fs::create_dir(&corpus_dir) {
if !corpus_dir.is_dir() {
return Err(Error::file(e));
}
}
let solutions_dir = solutions_dir.into();
if let Err(e) = fs::create_dir(&solutions_dir) {
if !corpus_dir.is_dir() {
return Err(Error::file(e));
}
}
Ok(Self {
to_bytes,
solutions_dir,
corpus_dir,
phantom: PhantomData,
})
}
}

View File

@ -39,10 +39,15 @@ pub use concolic::SimpleConcolicMutationalStage;
#[cfg(feature = "std")]
pub mod sync;
#[cfg(feature = "std")]
pub use sync::*;
#[cfg(feature = "std")]
pub mod dump;
use core::{convert::From, marker::PhantomData};
#[cfg(feature = "std")]
pub use sync::*;
pub use dump::*;
use self::push::PushStage;
use crate::{

View File

@ -1,5 +1,4 @@
//| The [`MutationalStage`] is the default stage used during fuzzing.
//! For the current input, it will perform a range of random mutations, and then run them in the executor.
//! The [`SyncFromDiskStage`] is a stage that imports inputs from disk for e.g. sync with AFL
use core::marker::PhantomData;
use std::{