diff --git a/fuzzers/fuzzbench_text/Cargo.toml b/fuzzers/fuzzbench_text/Cargo.toml index c46df9c300..0c526f3a2e 100644 --- a/fuzzers/fuzzbench_text/Cargo.toml +++ b/fuzzers/fuzzbench_text/Cargo.toml @@ -27,6 +27,7 @@ clap = { version = "4.0", features = ["default"] } nix = "0.25" mimalloc = { version = "*", default-features = false } content_inspector = "0.2.4" +serde = { version = "1.0", default-features = false, features = ["alloc"] } # serialization lib [lib] name = "fuzzbench" diff --git a/fuzzers/fuzzbench_text/src/bin/libafl_cxx.rs b/fuzzers/fuzzbench_text/src/bin/libafl_cxx.rs index ce786239b0..dabd22971a 100644 --- a/fuzzers/fuzzbench_text/src/bin/libafl_cxx.rs +++ b/fuzzers/fuzzbench_text/src/bin/libafl_cxx.rs @@ -1,5 +1,5 @@ pub mod libafl_cc; fn main() { - libafl_cc::main() + libafl_cc::main(); } diff --git a/fuzzers/fuzzbench_text/src/lib.rs b/fuzzers/fuzzbench_text/src/lib.rs index 35abacbf59..8df1e7e8f3 100644 --- a/fuzzers/fuzzbench_text/src/lib.rs +++ b/fuzzers/fuzzbench_text/src/lib.rs @@ -47,8 +47,8 @@ use libafl::{ powersched::PowerSchedule, IndexesLenTimeMinimizerScheduler, StdWeightedScheduler, }, stages::{ - calibrate::CalibrationStage, power::StdPowerMutationalStage, GeneralizationStage, - StdMutationalStage, TracingStage, + calibrate::CalibrationStage, dump::DumpToDiskStage, power::StdPowerMutationalStage, + GeneralizationStage, StdMutationalStage, TracingStage, }, state::{HasCorpus, HasMetadata, StdState}, Error, @@ -64,6 +64,7 @@ use nix::{self, unistd::dup}; /// The fuzzer main (as `no_mangle` C function) #[no_mangle] +#[allow(clippy::too_many_lines)] pub fn libafl_main() { // Registry the metadata types used in this fuzzer // Needed only on no_std @@ -127,7 +128,7 @@ pub fn libafl_main() { ); if let Some(filenames) = res.get_many::("remaining") { - let filenames: Vec<&str> = filenames.map(|v| v.as_str()).collect(); + let filenames: Vec<&str> = filenames.map(std::string::String::as_str).collect(); if !filenames.is_empty() { run_testcases(&filenames); return; @@ -148,8 +149,11 @@ pub fn libafl_main() { } } let mut crashes = out_dir.clone(); + let mut report = out_dir.clone(); crashes.push("crashes"); + report.push("report"); out_dir.push("queue"); + drop(fs::create_dir(&report)); let in_dir = PathBuf::from( res.get_one::("in") @@ -174,10 +178,12 @@ pub fn libafl_main() { ); if check_if_textual(&in_dir, &tokens) { - fuzz_text(out_dir, crashes, in_dir, tokens, logfile, timeout) - .expect("An error occurred while fuzzing"); + fuzz_text( + out_dir, crashes, &report, &in_dir, tokens, &logfile, timeout, + ) + .expect("An error occurred while fuzzing"); } else { - fuzz_binary(out_dir, crashes, in_dir, tokens, logfile, timeout) + fuzz_binary(out_dir, crashes, &in_dir, tokens, &logfile, timeout) .expect("An error occurred while fuzzing"); } } @@ -215,7 +221,7 @@ fn count_textual_inputs(dir: &Path) -> (usize, usize) { } fn check_if_textual(seeds_dir: &Path, tokenfile: &Option) -> bool { - let (found, tot) = count_textual_inputs(&seeds_dir); + let (found, tot) = count_textual_inputs(seeds_dir); let is_text = found * 100 / tot > 90; // 90% of text inputs if let Some(tokenfile) = tokenfile { let toks = Tokens::from_file(tokenfile).unwrap(); @@ -237,7 +243,7 @@ fn run_testcases(filenames: &[&str]) { // Call LLVMFUzzerInitialize() if present. let args: Vec = env::args().collect(); if libfuzzer_initialize(&args) == -1 { - println!("Warning: LLVMFuzzerInitialize failed with -1") + println!("Warning: LLVMFuzzerInitialize failed with -1"); } println!( @@ -256,20 +262,16 @@ fn run_testcases(filenames: &[&str]) { } /// The actual fuzzer +#[allow(clippy::too_many_lines)] fn fuzz_binary( corpus_dir: PathBuf, objective_dir: PathBuf, - seed_dir: PathBuf, + seed_dir: &PathBuf, tokenfile: Option, - logfile: PathBuf, + logfile: &PathBuf, timeout: Duration, ) -> Result<(), Error> { - let log = RefCell::new( - OpenOptions::new() - .append(true) - .create(true) - .open(&logfile)?, - ); + let log = RefCell::new(OpenOptions::new().append(true).create(true).open(logfile)?); #[cfg(unix)] let mut stdout_cpy = unsafe { @@ -357,7 +359,7 @@ fn fuzz_binary( // Call LLVMFUzzerInitialize() if present. let args: Vec = env::args().collect(); if libfuzzer_initialize(&args) == -1 { - println!("Warning: LLVMFuzzerInitialize failed with -1") + println!("Warning: LLVMFuzzerInitialize failed with -1"); } // Setup a randomic Input2State stage @@ -454,12 +456,7 @@ fn fuzz_binary( dup2(null_fd, io::stderr().as_raw_fd())?; } // reopen file to make sure we're at the end - log.replace( - OpenOptions::new() - .append(true) - .create(true) - .open(&logfile)?, - ); + log.replace(OpenOptions::new().append(true).create(true).open(logfile)?); fuzzer.fuzz_loop(&mut stages, &mut executor, &mut state, &mut mgr)?; @@ -467,21 +464,18 @@ fn fuzz_binary( Ok(()) } -/// The actual fuzzer based on Grimoire +/// The actual fuzzer based on `Grimoire` +#[allow(clippy::too_many_lines)] fn fuzz_text( corpus_dir: PathBuf, objective_dir: PathBuf, - seed_dir: PathBuf, + report_dir: &Path, + seed_dir: &PathBuf, tokenfile: Option, - logfile: PathBuf, + logfile: &PathBuf, timeout: Duration, ) -> Result<(), Error> { - let log = RefCell::new( - OpenOptions::new() - .append(true) - .create(true) - .open(&logfile)?, - ); + let log = RefCell::new(OpenOptions::new().append(true).create(true).open(logfile)?); #[cfg(unix)] let mut stdout_cpy = unsafe { @@ -570,7 +564,7 @@ fn fuzz_text( // Call LLVMFUzzerInitialize() if present. let args: Vec = env::args().collect(); if libfuzzer_initialize(&args) == -1 { - println!("Warning: LLVMFuzzerInitialize failed with -1") + println!("Warning: LLVMFuzzerInitialize failed with -1"); } // Setup a randomic Input2State stage @@ -644,8 +638,23 @@ fn fuzz_text( timeout * 10, )); + let fuzzbench = DumpToDiskStage::new( + |input: &GeneralizedInput| input.target_bytes().into(), + &report_dir.join("queue"), + &report_dir.join("crashes"), + ) + .unwrap(); + // The order of the stages matter! - let mut stages = tuple_list!(generalization, calibration, tracing, i2s, power, grimoire); + let mut stages = tuple_list!( + fuzzbench, + generalization, + calibration, + tracing, + i2s, + power, + grimoire + ); // Read tokens if state.metadata().get::().is_none() { @@ -682,12 +691,7 @@ fn fuzz_text( dup2(null_fd, io::stderr().as_raw_fd())?; } // reopen file to make sure we're at the end - log.replace( - OpenOptions::new() - .append(true) - .create(true) - .open(&logfile)?, - ); + log.replace(OpenOptions::new().append(true).create(true).open(logfile)?); fuzzer.fuzz_loop(&mut stages, &mut executor, &mut state, &mut mgr)?; diff --git a/fuzzers/nyx_libxml2_parallel/src/main.rs b/fuzzers/nyx_libxml2_parallel/src/main.rs index fddd15bedf..8629e3eea8 100644 --- a/fuzzers/nyx_libxml2_parallel/src/main.rs +++ b/fuzzers/nyx_libxml2_parallel/src/main.rs @@ -29,11 +29,7 @@ fn main() { let monitor = MultiMonitor::new(|s| println!("{}", s)); let cores = Cores::all().expect("unable to get all core id"); - let parent_cpu_id = cores - .ids - .first() - .expect("unable to get first core id") - .clone(); + let parent_cpu_id = cores.ids.first().expect("unable to get first core id"); // region: fuzzer start function let mut run_client = |state: Option<_>, mut restarting_mgr, _core_id: usize| { diff --git a/libafl/src/bolts/mod.rs b/libafl/src/bolts/mod.rs index c8b1eb86ab..b584d374bc 100644 --- a/libafl/src/bolts/mod.rs +++ b/libafl/src/bolts/mod.rs @@ -29,7 +29,7 @@ pub mod shmem; pub mod staterestore; pub mod tuples; -use alloc::string::String; +use alloc::{string::String, vec::Vec}; use core::{iter::Iterator, time}; #[cfg(feature = "std")] use std::time::{SystemTime, UNIX_EPOCH}; @@ -50,6 +50,54 @@ pub trait AsMutSlice { fn as_mut_slice(&mut self) -> &mut [Self::Entry]; } +impl AsSlice for Vec { + type Entry = T; + + fn as_slice(&self) -> &[Self::Entry] { + self + } +} + +impl AsMutSlice for Vec { + type Entry = T; + + fn as_mut_slice(&mut self) -> &mut [Self::Entry] { + self + } +} + +impl AsSlice for &[T] { + type Entry = T; + + fn as_slice(&self) -> &[Self::Entry] { + self + } +} + +impl AsSlice for [T] { + type Entry = T; + + fn as_slice(&self) -> &[Self::Entry] { + self + } +} + +impl AsMutSlice for &mut [T] { + type Entry = T; + + fn as_mut_slice(&mut self) -> &mut [Self::Entry] { + self + } +} + +impl AsMutSlice for [T] { + type Entry = T; + + fn as_mut_slice(&mut self) -> &mut [Self::Entry] { + self + } +} + /// Create an `Iterator` from a reference pub trait AsIter<'it> { /// The item type @@ -57,7 +105,7 @@ pub trait AsIter<'it> { /// The iterator type type IntoIter: Iterator; - /// Create an interator from &self + /// Create an iterator from &self fn as_iter(&'it self) -> Self::IntoIter; } @@ -68,7 +116,7 @@ pub trait AsIterMut<'it> { /// The iterator type type IntoIter: Iterator; - /// Create an interator from &mut self + /// Create an iterator from &mut self fn as_iter_mut(&'it mut self) -> Self::IntoIter; } diff --git a/libafl/src/bolts/ownedref.rs b/libafl/src/bolts/ownedref.rs index bdaf095184..c6a90acf3a 100644 --- a/libafl/src/bolts/ownedref.rs +++ b/libafl/src/bolts/ownedref.rs @@ -331,6 +331,20 @@ where } } +/// Create a vector from an [`OwnedSliceMut`], or return the owned vec. +impl<'a, T> From> for Vec +where + T: Clone, +{ + fn from(slice: OwnedSlice<'a, T>) -> Self { + let slice = slice.into_owned(); + match slice.inner { + OwnedSliceInner::Owned(vec) => vec, + _ => panic!("Could not own slice!"), + } + } +} + /// Wrap a mutable slice and convert to a Vec on serialize. /// We use a hidden inner enum so the public API can be safe, /// unless the user uses the unsafe [`OwnedSliceMut::from_raw_parts_mut`] @@ -486,6 +500,20 @@ impl<'a, T> From> for OwnedSliceMut<'a, T> { } } +/// Create a vector from an [`OwnedSliceMut`], or return the owned vec. +impl<'a, T> From> for Vec +where + T: Clone, +{ + fn from(slice: OwnedSliceMut<'a, T>) -> Self { + let slice = slice.into_owned(); + match slice.inner { + OwnedSliceMutInner::Owned(vec) => vec, + _ => panic!("Could not own slice!"), + } + } +} + /// Create a new [`OwnedSliceMut`] from a vector reference impl<'a, T> From<&'a mut Vec> for OwnedSliceMut<'a, T> { fn from(vec: &'a mut Vec) -> Self { diff --git a/libafl/src/stages/dump.rs b/libafl/src/stages/dump.rs new file mode 100644 index 0000000000..194c611998 --- /dev/null +++ b/libafl/src/stages/dump.rs @@ -0,0 +1,128 @@ +//! The [`DumpToDiskStage`] is a stage that dumps the corpus and the solutions to disk to e.g. allow AFL to sync + +use alloc::vec::Vec; +use core::{clone::Clone, marker::PhantomData}; +use std::{fs, fs::File, io::Write, path::PathBuf}; + +use serde::{Deserialize, Serialize}; + +use crate::{ + corpus::Corpus, + inputs::UsesInput, + stages::Stage, + state::{HasCorpus, HasMetadata, HasRand, HasSolutions, UsesState}, + Error, +}; + +/// Metadata used to store information about disk dump indexes for names +#[derive(Default, Serialize, Deserialize, Clone, Debug)] +pub struct DumpToDiskMetadata { + last_corpus: usize, + last_solution: usize, +} + +crate::impl_serdeany!(DumpToDiskMetadata); + +/// The [`DumpToDiskStage`] is a stage that dumps the corpus and the solutions to disk +#[derive(Debug)] +pub struct DumpToDiskStage { + solutions_dir: PathBuf, + corpus_dir: PathBuf, + to_bytes: CB, + phantom: PhantomData<(EM, Z)>, +} + +impl UsesState for DumpToDiskStage +where + EM: UsesState, +{ + type State = EM::State; +} + +impl Stage for DumpToDiskStage +where + CB: FnMut(&::Input) -> Vec, + EM: UsesState, + E: UsesState, + Z: UsesState, + Z::State: HasCorpus + HasSolutions + HasRand + HasMetadata, +{ + #[inline] + fn perform( + &mut self, + _fuzzer: &mut Z, + _executor: &mut E, + state: &mut Z::State, + _manager: &mut EM, + _corpus_idx: usize, + ) -> Result<(), Error> { + let meta = state + .metadata() + .get::() + .map_or_else(DumpToDiskMetadata::default, Clone::clone); + + let corpus_count = state.corpus().count(); + let solutions_count = state.solutions().count(); + + for i in meta.last_corpus..corpus_count { + let mut testcase = state.corpus().get(i)?.borrow_mut(); + let input = testcase.load_input()?; + let bytes = (self.to_bytes)(input); + + let fname = self.corpus_dir.join(format!("id_{i}")); + let mut f = File::create(fname)?; + drop(f.write_all(&bytes)); + } + + for i in meta.last_solution..solutions_count { + let mut testcase = state.solutions().get(i)?.borrow_mut(); + let input = testcase.load_input()?; + let bytes = (self.to_bytes)(input); + + let fname = self.solutions_dir.join(format!("id_{i}")); + let mut f = File::create(fname)?; + drop(f.write_all(&bytes)); + } + + state.add_metadata(DumpToDiskMetadata { + last_corpus: corpus_count, + last_solution: solutions_count, + }); + + Ok(()) + } +} + +impl DumpToDiskStage +where + CB: FnMut(&::Input) -> Vec, + EM: UsesState, + Z: UsesState, + Z::State: HasCorpus + HasSolutions + HasRand + HasMetadata, +{ + /// Create a new [`DumpToDiskStage`] + pub fn new(to_bytes: CB, corpus_dir: A, solutions_dir: B) -> Result + where + A: Into, + B: Into, + { + let corpus_dir = corpus_dir.into(); + if let Err(e) = fs::create_dir(&corpus_dir) { + if !corpus_dir.is_dir() { + return Err(Error::file(e)); + } + } + let solutions_dir = solutions_dir.into(); + if let Err(e) = fs::create_dir(&solutions_dir) { + if !corpus_dir.is_dir() { + return Err(Error::file(e)); + } + } + Ok(Self { + to_bytes, + solutions_dir, + corpus_dir, + phantom: PhantomData, + }) + } +} diff --git a/libafl/src/stages/mod.rs b/libafl/src/stages/mod.rs index 5189a8a04f..848c0d9cc7 100644 --- a/libafl/src/stages/mod.rs +++ b/libafl/src/stages/mod.rs @@ -39,10 +39,15 @@ pub use concolic::SimpleConcolicMutationalStage; #[cfg(feature = "std")] pub mod sync; +#[cfg(feature = "std")] +pub use sync::*; + +#[cfg(feature = "std")] +pub mod dump; use core::{convert::From, marker::PhantomData}; #[cfg(feature = "std")] -pub use sync::*; +pub use dump::*; use self::push::PushStage; use crate::{ diff --git a/libafl/src/stages/sync.rs b/libafl/src/stages/sync.rs index 567b1daf7b..10ac9853a9 100644 --- a/libafl/src/stages/sync.rs +++ b/libafl/src/stages/sync.rs @@ -1,5 +1,4 @@ -//| The [`MutationalStage`] is the default stage used during fuzzing. -//! For the current input, it will perform a range of random mutations, and then run them in the executor. +//! The [`SyncFromDiskStage`] is a stage that imports inputs from disk for e.g. sync with AFL use core::marker::PhantomData; use std::{