DumpOnDiskStage in fuzzbench_text to dump the grimoire inputs as bytes for the fuzzbench measurers (#869)

* FuzzbenchDumpStage in fuzzbench_text * fix * DumpOnDiskStage * clippy * removed duplicated code from example fuzzer * shorthand to move OwnedSlice into vec * clippy * fiz * fix missing semicolon Co-authored-by: Dominik Maier <domenukk@gmail.com> Co-authored-by: Dominik Maier <dmnk@google.com>
2022-11-11 08:38:48 -08:00 · 2022-11-11 08:38:48 -08:00 · fe459f6fa5
commit fe459f6fa5
parent e340d35674
9 changed files with 261 additions and 52 deletions
--- a/fuzzers/fuzzbench_text/Cargo.toml
+++ b/fuzzers/fuzzbench_text/Cargo.toml
@ -27,6 +27,7 @@ clap = { version = "4.0", features = ["default"] }
 nix = "0.25"
 mimalloc = { version = "*", default-features = false }
 content_inspector = "0.2.4"
+serde = { version = "1.0", default-features = false, features = ["alloc"] } # serialization lib

 [lib]
 name = "fuzzbench"
--- a/fuzzers/fuzzbench_text/src/bin/libafl_cxx.rs
+++ b/fuzzers/fuzzbench_text/src/bin/libafl_cxx.rs
@ -1,5 +1,5 @@
 pub mod libafl_cc;

 fn main() {
-    libafl_cc::main()
+    libafl_cc::main();
 }
--- a/fuzzers/fuzzbench_text/src/lib.rs
+++ b/fuzzers/fuzzbench_text/src/lib.rs
@ -47,8 +47,8 @@ use libafl::{
        powersched::PowerSchedule, IndexesLenTimeMinimizerScheduler, StdWeightedScheduler,
    },
    stages::{
-        calibrate::CalibrationStage, power::StdPowerMutationalStage, GeneralizationStage,
-        StdMutationalStage, TracingStage,
+        calibrate::CalibrationStage, dump::DumpToDiskStage, power::StdPowerMutationalStage,
+        GeneralizationStage, StdMutationalStage, TracingStage,
    },
    state::{HasCorpus, HasMetadata, StdState},
    Error,
@ -64,6 +64,7 @@ use nix::{self, unistd::dup};

 /// The fuzzer main (as `no_mangle` C function)
 #[no_mangle]
+#[allow(clippy::too_many_lines)]
 pub fn libafl_main() {
    // Registry the metadata types used in this fuzzer
    // Needed only on no_std
@ -127,7 +128,7 @@ pub fn libafl_main() {
    );

    if let Some(filenames) = res.get_many::<String>("remaining") {
-        let filenames: Vec<&str> = filenames.map(|v| v.as_str()).collect();
+        let filenames: Vec<&str> = filenames.map(std::string::String::as_str).collect();
        if !filenames.is_empty() {
            run_testcases(&filenames);
            return;
@ -148,8 +149,11 @@ pub fn libafl_main() {
        }
    }
    let mut crashes = out_dir.clone();
+    let mut report = out_dir.clone();
    crashes.push("crashes");
+    report.push("report");
    out_dir.push("queue");
+    drop(fs::create_dir(&report));

    let in_dir = PathBuf::from(
        res.get_one::<String>("in")
@ -174,10 +178,12 @@ pub fn libafl_main() {
    );

    if check_if_textual(&in_dir, &tokens) {
-        fuzz_text(out_dir, crashes, in_dir, tokens, logfile, timeout)
-            .expect("An error occurred while fuzzing");
+        fuzz_text(
+            out_dir, crashes, &report, &in_dir, tokens, &logfile, timeout,
+        )
+        .expect("An error occurred while fuzzing");
    } else {
-        fuzz_binary(out_dir, crashes, in_dir, tokens, logfile, timeout)
+        fuzz_binary(out_dir, crashes, &in_dir, tokens, &logfile, timeout)
            .expect("An error occurred while fuzzing");
    }
 }
@ -215,7 +221,7 @@ fn count_textual_inputs(dir: &Path) -> (usize, usize) {
 }

 fn check_if_textual(seeds_dir: &Path, tokenfile: &Option<PathBuf>) -> bool {
-    let (found, tot) = count_textual_inputs(&seeds_dir);
+    let (found, tot) = count_textual_inputs(seeds_dir);
    let is_text = found * 100 / tot > 90; // 90% of text inputs
    if let Some(tokenfile) = tokenfile {
        let toks = Tokens::from_file(tokenfile).unwrap();
@ -237,7 +243,7 @@ fn run_testcases(filenames: &[&str]) {
    // Call LLVMFUzzerInitialize() if present.
    let args: Vec<String> = env::args().collect();
    if libfuzzer_initialize(&args) == -1 {
-        println!("Warning: LLVMFuzzerInitialize failed with -1")
+        println!("Warning: LLVMFuzzerInitialize failed with -1");
    }

    println!(
@ -256,20 +262,16 @@ fn run_testcases(filenames: &[&str]) {
 }

 /// The actual fuzzer
+#[allow(clippy::too_many_lines)]
 fn fuzz_binary(
    corpus_dir: PathBuf,
    objective_dir: PathBuf,
-    seed_dir: PathBuf,
+    seed_dir: &PathBuf,
    tokenfile: Option<PathBuf>,
-    logfile: PathBuf,
+    logfile: &PathBuf,
    timeout: Duration,
 ) -> Result<(), Error> {
-    let log = RefCell::new(
-        OpenOptions::new()
-            .append(true)
-            .create(true)
-            .open(&logfile)?,
-    );
+    let log = RefCell::new(OpenOptions::new().append(true).create(true).open(logfile)?);

    #[cfg(unix)]
    let mut stdout_cpy = unsafe {
@ -357,7 +359,7 @@ fn fuzz_binary(
    // Call LLVMFUzzerInitialize() if present.
    let args: Vec<String> = env::args().collect();
    if libfuzzer_initialize(&args) == -1 {
-        println!("Warning: LLVMFuzzerInitialize failed with -1")
+        println!("Warning: LLVMFuzzerInitialize failed with -1");
    }

    // Setup a randomic Input2State stage
@ -454,12 +456,7 @@ fn fuzz_binary(
        dup2(null_fd, io::stderr().as_raw_fd())?;
    }
    // reopen file to make sure we're at the end
-    log.replace(
-        OpenOptions::new()
-            .append(true)
-            .create(true)
-            .open(&logfile)?,
-    );
+    log.replace(OpenOptions::new().append(true).create(true).open(logfile)?);

    fuzzer.fuzz_loop(&mut stages, &mut executor, &mut state, &mut mgr)?;

@ -467,21 +464,18 @@ fn fuzz_binary(
    Ok(())
 }

-/// The actual fuzzer based on Grimoire
+/// The actual fuzzer based on `Grimoire`
+#[allow(clippy::too_many_lines)]
 fn fuzz_text(
    corpus_dir: PathBuf,
    objective_dir: PathBuf,
-    seed_dir: PathBuf,
+    report_dir: &Path,
+    seed_dir: &PathBuf,
    tokenfile: Option<PathBuf>,
-    logfile: PathBuf,
+    logfile: &PathBuf,
    timeout: Duration,
 ) -> Result<(), Error> {
-    let log = RefCell::new(
-        OpenOptions::new()
-            .append(true)
-            .create(true)
-            .open(&logfile)?,
-    );
+    let log = RefCell::new(OpenOptions::new().append(true).create(true).open(logfile)?);

    #[cfg(unix)]
    let mut stdout_cpy = unsafe {
@ -570,7 +564,7 @@ fn fuzz_text(
    // Call LLVMFUzzerInitialize() if present.
    let args: Vec<String> = env::args().collect();
    if libfuzzer_initialize(&args) == -1 {
-        println!("Warning: LLVMFuzzerInitialize failed with -1")
+        println!("Warning: LLVMFuzzerInitialize failed with -1");
    }

    // Setup a randomic Input2State stage
@ -644,8 +638,23 @@ fn fuzz_text(
        timeout * 10,
    ));

+    let fuzzbench = DumpToDiskStage::new(
+        |input: &GeneralizedInput| input.target_bytes().into(),
+        &report_dir.join("queue"),
+        &report_dir.join("crashes"),
+    )
+    .unwrap();
+
    // The order of the stages matter!
-    let mut stages = tuple_list!(generalization, calibration, tracing, i2s, power, grimoire);
+    let mut stages = tuple_list!(
+        fuzzbench,
+        generalization,
+        calibration,
+        tracing,
+        i2s,
+        power,
+        grimoire
+    );

    // Read tokens
    if state.metadata().get::<Tokens>().is_none() {
@ -682,12 +691,7 @@ fn fuzz_text(
        dup2(null_fd, io::stderr().as_raw_fd())?;
    }
    // reopen file to make sure we're at the end
-    log.replace(
-        OpenOptions::new()
-            .append(true)
-            .create(true)
-            .open(&logfile)?,
-    );
+    log.replace(OpenOptions::new().append(true).create(true).open(logfile)?);

    fuzzer.fuzz_loop(&mut stages, &mut executor, &mut state, &mut mgr)?;

--- a/fuzzers/nyx_libxml2_parallel/src/main.rs
+++ b/fuzzers/nyx_libxml2_parallel/src/main.rs
@ -29,11 +29,7 @@ fn main() {
    let monitor = MultiMonitor::new(|s| println!("{}", s));

    let cores = Cores::all().expect("unable to get all core id");
-    let parent_cpu_id = cores
-        .ids
-        .first()
-        .expect("unable to get first core id")
-        .clone();
+    let parent_cpu_id = cores.ids.first().expect("unable to get first core id");

    // region: fuzzer start function
    let mut run_client = |state: Option<_>, mut restarting_mgr, _core_id: usize| {
--- a/libafl/src/bolts/mod.rs
+++ b/libafl/src/bolts/mod.rs
@ -29,7 +29,7 @@ pub mod shmem;
 pub mod staterestore;
 pub mod tuples;

-use alloc::string::String;
+use alloc::{string::String, vec::Vec};
 use core::{iter::Iterator, time};
 #[cfg(feature = "std")]
 use std::time::{SystemTime, UNIX_EPOCH};
@ -50,6 +50,54 @@ pub trait AsMutSlice {
    fn as_mut_slice(&mut self) -> &mut [Self::Entry];
 }

+impl<T> AsSlice for Vec<T> {
+    type Entry = T;
+
+    fn as_slice(&self) -> &[Self::Entry] {
+        self
+    }
+}
+
+impl<T> AsMutSlice for Vec<T> {
+    type Entry = T;
+
+    fn as_mut_slice(&mut self) -> &mut [Self::Entry] {
+        self
+    }
+}
+
+impl<T> AsSlice for &[T] {
+    type Entry = T;
+
+    fn as_slice(&self) -> &[Self::Entry] {
+        self
+    }
+}
+
+impl<T> AsSlice for [T] {
+    type Entry = T;
+
+    fn as_slice(&self) -> &[Self::Entry] {
+        self
+    }
+}
+
+impl<T> AsMutSlice for &mut [T] {
+    type Entry = T;
+
+    fn as_mut_slice(&mut self) -> &mut [Self::Entry] {
+        self
+    }
+}
+
+impl<T> AsMutSlice for [T] {
+    type Entry = T;
+
+    fn as_mut_slice(&mut self) -> &mut [Self::Entry] {
+        self
+    }
+}
+
 /// Create an `Iterator` from a reference
 pub trait AsIter<'it> {
    /// The item type
@ -57,7 +105,7 @@ pub trait AsIter<'it> {
    /// The iterator type
    type IntoIter: Iterator<Item = &'it Self::Item>;

-    /// Create an interator from &self
+    /// Create an iterator from &self
    fn as_iter(&'it self) -> Self::IntoIter;
 }

@ -68,7 +116,7 @@ pub trait AsIterMut<'it> {
    /// The iterator type
    type IntoIter: Iterator<Item = &'it mut Self::Item>;

-    /// Create an interator from &mut self
+    /// Create an iterator from &mut self
    fn as_iter_mut(&'it mut self) -> Self::IntoIter;
 }

--- a/libafl/src/bolts/ownedref.rs
+++ b/libafl/src/bolts/ownedref.rs
@ -331,6 +331,20 @@ where
    }
 }

+/// Create a vector from an [`OwnedSliceMut`], or return the owned vec.
+impl<'a, T> From<OwnedSlice<'a, T>> for Vec<T>
+where
+    T: Clone,
+{
+    fn from(slice: OwnedSlice<'a, T>) -> Self {
+        let slice = slice.into_owned();
+        match slice.inner {
+            OwnedSliceInner::Owned(vec) => vec,
+            _ => panic!("Could not own slice!"),
+        }
+    }
+}
+
 /// Wrap a mutable slice and convert to a Vec on serialize.
 /// We use a hidden inner enum so the public API can be safe,
 /// unless the user uses the unsafe [`OwnedSliceMut::from_raw_parts_mut`]
@ -486,6 +500,20 @@ impl<'a, T> From<Vec<T>> for OwnedSliceMut<'a, T> {
    }
 }

+/// Create a vector from an [`OwnedSliceMut`], or return the owned vec.
+impl<'a, T> From<OwnedSliceMut<'a, T>> for Vec<T>
+where
+    T: Clone,
+{
+    fn from(slice: OwnedSliceMut<'a, T>) -> Self {
+        let slice = slice.into_owned();
+        match slice.inner {
+            OwnedSliceMutInner::Owned(vec) => vec,
+            _ => panic!("Could not own slice!"),
+        }
+    }
+}
+
 /// Create a new [`OwnedSliceMut`] from a vector reference
 impl<'a, T> From<&'a mut Vec<T>> for OwnedSliceMut<'a, T> {
    fn from(vec: &'a mut Vec<T>) -> Self {
--- a/libafl/src/stages/dump.rs
+++ b/libafl/src/stages/dump.rs
@ -0,0 +1,128 @@
+//! The [`DumpToDiskStage`] is a stage that dumps the corpus and the solutions to disk to e.g. allow AFL to sync
+
+use alloc::vec::Vec;
+use core::{clone::Clone, marker::PhantomData};
+use std::{fs, fs::File, io::Write, path::PathBuf};
+
+use serde::{Deserialize, Serialize};
+
+use crate::{
+    corpus::Corpus,
+    inputs::UsesInput,
+    stages::Stage,
+    state::{HasCorpus, HasMetadata, HasRand, HasSolutions, UsesState},
+    Error,
+};
+
+/// Metadata used to store information about disk dump indexes for names
+#[derive(Default, Serialize, Deserialize, Clone, Debug)]
+pub struct DumpToDiskMetadata {
+    last_corpus: usize,
+    last_solution: usize,
+}
+
+crate::impl_serdeany!(DumpToDiskMetadata);
+
+/// The [`DumpToDiskStage`] is a stage that dumps the corpus and the solutions to disk
+#[derive(Debug)]
+pub struct DumpToDiskStage<CB, EM, Z> {
+    solutions_dir: PathBuf,
+    corpus_dir: PathBuf,
+    to_bytes: CB,
+    phantom: PhantomData<(EM, Z)>,
+}
+
+impl<CB, EM, Z> UsesState for DumpToDiskStage<CB, EM, Z>
+where
+    EM: UsesState,
+{
+    type State = EM::State;
+}
+
+impl<CB, E, EM, Z> Stage<E, EM, Z> for DumpToDiskStage<CB, EM, Z>
+where
+    CB: FnMut(&<Z::State as UsesInput>::Input) -> Vec<u8>,
+    EM: UsesState<State = Z::State>,
+    E: UsesState<State = Z::State>,
+    Z: UsesState,
+    Z::State: HasCorpus + HasSolutions + HasRand + HasMetadata,
+{
+    #[inline]
+    fn perform(
+        &mut self,
+        _fuzzer: &mut Z,
+        _executor: &mut E,
+        state: &mut Z::State,
+        _manager: &mut EM,
+        _corpus_idx: usize,
+    ) -> Result<(), Error> {
+        let meta = state
+            .metadata()
+            .get::<DumpToDiskMetadata>()
+            .map_or_else(DumpToDiskMetadata::default, Clone::clone);
+
+        let corpus_count = state.corpus().count();
+        let solutions_count = state.solutions().count();
+
+        for i in meta.last_corpus..corpus_count {
+            let mut testcase = state.corpus().get(i)?.borrow_mut();
+            let input = testcase.load_input()?;
+            let bytes = (self.to_bytes)(input);
+
+            let fname = self.corpus_dir.join(format!("id_{i}"));
+            let mut f = File::create(fname)?;
+            drop(f.write_all(&bytes));
+        }
+
+        for i in meta.last_solution..solutions_count {
+            let mut testcase = state.solutions().get(i)?.borrow_mut();
+            let input = testcase.load_input()?;
+            let bytes = (self.to_bytes)(input);
+
+            let fname = self.solutions_dir.join(format!("id_{i}"));
+            let mut f = File::create(fname)?;
+            drop(f.write_all(&bytes));
+        }
+
+        state.add_metadata(DumpToDiskMetadata {
+            last_corpus: corpus_count,
+            last_solution: solutions_count,
+        });
+
+        Ok(())
+    }
+}
+
+impl<CB, EM, Z> DumpToDiskStage<CB, EM, Z>
+where
+    CB: FnMut(&<Z::State as UsesInput>::Input) -> Vec<u8>,
+    EM: UsesState<State = Z::State>,
+    Z: UsesState,
+    Z::State: HasCorpus + HasSolutions + HasRand + HasMetadata,
+{
+    /// Create a new [`DumpToDiskStage`]
+    pub fn new<A, B>(to_bytes: CB, corpus_dir: A, solutions_dir: B) -> Result<Self, Error>
+    where
+        A: Into<PathBuf>,
+        B: Into<PathBuf>,
+    {
+        let corpus_dir = corpus_dir.into();
+        if let Err(e) = fs::create_dir(&corpus_dir) {
+            if !corpus_dir.is_dir() {
+                return Err(Error::file(e));
+            }
+        }
+        let solutions_dir = solutions_dir.into();
+        if let Err(e) = fs::create_dir(&solutions_dir) {
+            if !corpus_dir.is_dir() {
+                return Err(Error::file(e));
+            }
+        }
+        Ok(Self {
+            to_bytes,
+            solutions_dir,
+            corpus_dir,
+            phantom: PhantomData,
+        })
+    }
+}
--- a/libafl/src/stages/mod.rs
+++ b/libafl/src/stages/mod.rs
@ -39,10 +39,15 @@ pub use concolic::SimpleConcolicMutationalStage;

 #[cfg(feature = "std")]
 pub mod sync;
+#[cfg(feature = "std")]
+pub use sync::*;
+
+#[cfg(feature = "std")]
+pub mod dump;
 use core::{convert::From, marker::PhantomData};

 #[cfg(feature = "std")]
-pub use sync::*;
+pub use dump::*;

 use self::push::PushStage;
 use crate::{
--- a/libafl/src/stages/sync.rs
+++ b/libafl/src/stages/sync.rs
@ -1,5 +1,4 @@
-//| The [`MutationalStage`] is the default stage used during fuzzing.
-//! For the current input, it will perform a range of random mutations, and then run them in the executor.
+//! The [`SyncFromDiskStage`] is a stage that imports inputs from disk for e.g. sync with AFL

 use core::marker::PhantomData;
 use std::{