From d6fe67c3c828b94b02e9b3606af05372481e28db Mon Sep 17 00:00:00 2001 From: Aarnav Date: Fri, 8 Mar 2024 08:56:08 +0700 Subject: [PATCH] Introduce multicore loading for the initial seed corpus (#1905) * introduce multicore load initial corpus * update fuzzers/libfuzzer_libpng_norestart to use multicore corpus loading * run clippy * use CoreId and Cores in state if std * misc. typos * adapt multicore load initial inputs to allow resumable corpus loading in case of crashes or timeouts during corpus loading. * add std feature flag to multicore_inputs_processed * fix doc comment * run fmt for example fuzzer --------- Co-authored-by: aarnav Co-authored-by: Romain Malmain Co-authored-by: Dongjia "toka" Zhang --- fuzzers/libfuzzer_libpng_norestart/src/lib.rs | 11 +- libafl/src/state/mod.rs | 181 ++++++++++++++---- 2 files changed, 156 insertions(+), 36 deletions(-) diff --git a/fuzzers/libfuzzer_libpng_norestart/src/lib.rs b/fuzzers/libfuzzer_libpng_norestart/src/lib.rs index a9de4ae99b..bf868ec2fc 100644 --- a/fuzzers/libfuzzer_libpng_norestart/src/lib.rs +++ b/fuzzers/libfuzzer_libpng_norestart/src/lib.rs @@ -152,7 +152,7 @@ pub extern "C" fn libafl_main() { let mut run_client = |state: Option<_>, mut restarting_mgr: LlmpRestartingEventManager<_, _>, - _core_id| { + core_id| { // Create an observation channel using the coverage map let edges_observer = HitcountsMapObserver::new(unsafe { std_edges_map_observer("edges") }); @@ -240,7 +240,14 @@ pub extern "C" fn libafl_main() { // In case the corpus is empty (on first run), reset if state.must_load_initial_inputs() { state - .load_initial_inputs(&mut fuzzer, &mut executor, &mut restarting_mgr, &opt.input) + .load_initial_inputs_multicore( + &mut fuzzer, + &mut executor, + &mut restarting_mgr, + &opt.input, + &core_id, + &cores, + ) .unwrap_or_else(|_| panic!("Failed to load initial corpus at {:?}", &opt.input)); println!("We imported {} inputs from disk.", state.corpus().count()); } diff --git a/libafl/src/state/mod.rs b/libafl/src/state/mod.rs index 9cc387e68d..2f37cbda82 100644 --- a/libafl/src/state/mod.rs +++ b/libafl/src/state/mod.rs @@ -13,6 +13,8 @@ use std::{ path::{Path, PathBuf}, }; +#[cfg(feature = "std")] +use libafl_bolts::core_affinity::{CoreId, Cores}; use libafl_bolts::{ rands::{Rand, StdRand}, serdeany::{NamedSerdeAnyMap, SerdeAny, SerdeAnyMap}, @@ -325,6 +327,10 @@ pub struct StdState { #[cfg(feature = "std")] /// Remaining initial inputs to load, if any dont_reenter: Option>, + #[cfg(feature = "std")] + /// If inputs have been processed for multicore loading + /// relevant only for `load_initial_inputs_multicore` + multicore_inputs_processed: Option, /// The last time we reported progress (if available/used). /// This information is used by fuzzer `maybe_report_progress`. last_report_time: Option, @@ -642,22 +648,14 @@ where } } - /// Loads initial inputs from the passed-in `in_dirs`. - /// If `forced` is true, will add all testcases, no matter what. - fn load_initial_inputs_custom( - &mut self, - fuzzer: &mut Z, - executor: &mut E, - manager: &mut EM, - in_dirs: &[PathBuf], - forced: bool, - loader: &mut dyn FnMut(&mut Z, &mut Self, &Path) -> Result, - ) -> Result<(), Error> - where - E: UsesState, - EM: EventFirer, - Z: Evaluator, - { + /// Resets the state of initial files. + fn reset_initial_files_state(&mut self) { + self.remaining_initial_files = None; + self.dont_reenter = None; + } + + /// Sets canonical paths for provided inputs + fn canonicalize_input_dirs(&mut self, in_dirs: &[PathBuf]) -> Result<(), Error> { if let Some(remaining) = self.remaining_initial_files.as_ref() { // everything was loaded if remaining.is_empty() { @@ -673,8 +671,7 @@ where self.dont_reenter = Some(files.clone()); self.remaining_initial_files = Some(files); } - - self.continue_loading_initial_inputs_custom(fuzzer, executor, manager, forced, loader) + Ok(()) } /// Loads initial inputs from the passed-in `in_dirs`. @@ -705,7 +702,32 @@ where self.continue_loading_initial_inputs_custom(fuzzer, executor, manager, forced, loader) } - + fn load_file( + &mut self, + path: &PathBuf, + manager: &mut EM, + fuzzer: &mut Z, + executor: &mut E, + forced: bool, + loader: &mut dyn FnMut(&mut Z, &mut Self, &Path) -> Result, + ) -> Result<(), Error> + where + E: UsesState, + EM: EventFirer, + Z: Evaluator, + { + log::info!("Loading file {:?} ...", &path); + let input = loader(fuzzer, self, path)?; + if forced { + let _: CorpusId = fuzzer.add_input(self, executor, manager, input)?; + } else { + let (res, _) = fuzzer.evaluate_input(self, executor, manager, input)?; + if res == ExecuteInputResult::None { + log::warn!("File {:?} was not interesting, skipped.", &path); + } + } + Ok(()) + } /// Loads initial inputs from the passed-in `in_dirs`. /// If `forced` is true, will add all testcases, no matter what. /// This method takes a list of files. @@ -725,16 +747,7 @@ where loop { match self.next_file() { Ok(path) => { - log::info!("Loading file {:?} ...", &path); - let input = loader(fuzzer, self, &path)?; - if forced { - let _: CorpusId = fuzzer.add_input(self, executor, manager, input)?; - } else { - let (res, _) = fuzzer.evaluate_input(self, executor, manager, input)?; - if res == ExecuteInputResult::None { - log::warn!("File {:?} was not interesting, skipped.", &path); - } - } + self.load_file(&path, manager, fuzzer, executor, forced, loader)?; } Err(Error::IteratorEnd(_, _)) => break, Err(e) => return Err(e), @@ -793,16 +806,15 @@ where EM: EventFirer, Z: Evaluator, { - self.load_initial_inputs_custom( + self.canonicalize_input_dirs(in_dirs)?; + self.continue_loading_initial_inputs_custom( fuzzer, executor, manager, - in_dirs, true, &mut |_, _, path| I::from_file(path), ) } - /// Loads initial inputs from the passed-in `in_dirs`. /// If `forced` is true, will add all testcases, no matter what. /// This method takes a list of files, instead of folders. @@ -841,15 +853,114 @@ where EM: EventFirer, Z: Evaluator, { - self.load_initial_inputs_custom( + self.canonicalize_input_dirs(in_dirs)?; + self.continue_loading_initial_inputs_custom( fuzzer, executor, manager, - in_dirs, false, &mut |_, _, path| I::from_file(path), ) } + + fn calculate_corpus_size(&mut self) -> Result { + let mut count: usize = 0; + loop { + match self.next_file() { + Ok(_) => { + count = count.saturating_add(1); + } + Err(Error::IteratorEnd(_, _)) => break, + Err(e) => return Err(e), + } + } + Ok(count) + } + /// Loads initial inputs by dividing the from the passed-in `in_dirs` + /// in a multicore fashion. Divides the corpus in chunks spread across cores. + pub fn load_initial_inputs_multicore( + &mut self, + fuzzer: &mut Z, + executor: &mut E, + manager: &mut EM, + in_dirs: &[PathBuf], + core_id: &CoreId, + cores: &Cores, + ) -> Result<(), Error> + where + E: UsesState, + EM: EventFirer, + Z: Evaluator, + { + if self.multicore_inputs_processed.unwrap_or(false) { + self.continue_loading_initial_inputs_custom( + fuzzer, + executor, + manager, + false, + &mut |_, _, path| I::from_file(path), + )?; + } else { + self.canonicalize_input_dirs(in_dirs)?; + let corpus_size = self.calculate_corpus_size()?; + log::info!( + "{} total_corpus_size, {} cores", + corpus_size, + cores.ids.len() + ); + self.reset_initial_files_state(); + self.canonicalize_input_dirs(in_dirs)?; + if cores.ids.len() > corpus_size { + log::info!( + "low intial corpus count ({}), no parallelism required.", + corpus_size + ); + } else { + let core_index = cores + .ids + .iter() + .enumerate() + .find(|(_, c)| *c == core_id) + .unwrap_or_else(|| panic!("core id {} not in cores list", core_id.0)) + .0; + let chunk_size = corpus_size.saturating_div(cores.ids.len()); + let mut skip = core_index.saturating_mul(chunk_size); + let mut inputs_todo = chunk_size; + let mut collected_inputs = Vec::new(); + log::info!( + "core = {}, core_index = {}, chunk_size = {}, skip = {}", + core_id.0, + core_index, + chunk_size, + skip + ); + loop { + match self.next_file() { + Ok(path) => { + if skip != 0 { + skip = skip.saturating_sub(1); + continue; + } + if inputs_todo == 0 { + break; + } + collected_inputs.push(path); + inputs_todo = inputs_todo.saturating_sub(1); + } + Err(Error::IteratorEnd(_, _)) => break, + Err(e) => { + return Err(e); + } + } + } + self.remaining_initial_files = Some(collected_inputs); + } + self.multicore_inputs_processed = Some(true); + return self + .load_initial_inputs_multicore(fuzzer, executor, manager, in_dirs, core_id, cores); + } + Ok(()) + } } impl StdState @@ -969,6 +1080,8 @@ where stage_depth: 0, stage_idx_stack: Vec::new(), phantom: PhantomData, + #[cfg(feature = "std")] + multicore_inputs_processed: None, }; feedback.init_state(&mut state)?; objective.init_state(&mut state)?;