Introduce multicore loading for the initial seed corpus (#1905)

* introduce multicore load initial corpus

* update fuzzers/libfuzzer_libpng_norestart to use multicore corpus loading

* run clippy

* use CoreId and Cores in state if std

* misc. typos

* adapt multicore load initial inputs to allow resumable corpus loading
in case of crashes or timeouts during corpus loading.

* add std feature flag to multicore_inputs_processed

* fix doc comment

* run fmt for example fuzzer

---------

Co-authored-by: aarnav <aarnav@srlabs.de>
Co-authored-by: Romain Malmain <romain.malmain@pm.me>
Co-authored-by: Dongjia "toka" Zhang <tokazerkje@outlook.com>
This commit is contained in:
Aarnav 2024-03-08 08:56:08 +07:00 committed by GitHub
parent 1b9f4ea29c
commit d6fe67c3c8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 156 additions and 36 deletions

View File

@ -152,7 +152,7 @@ pub extern "C" fn libafl_main() {
let mut run_client = |state: Option<_>, let mut run_client = |state: Option<_>,
mut restarting_mgr: LlmpRestartingEventManager<_, _>, mut restarting_mgr: LlmpRestartingEventManager<_, _>,
_core_id| { core_id| {
// Create an observation channel using the coverage map // Create an observation channel using the coverage map
let edges_observer = HitcountsMapObserver::new(unsafe { std_edges_map_observer("edges") }); let edges_observer = HitcountsMapObserver::new(unsafe { std_edges_map_observer("edges") });
@ -240,7 +240,14 @@ pub extern "C" fn libafl_main() {
// In case the corpus is empty (on first run), reset // In case the corpus is empty (on first run), reset
if state.must_load_initial_inputs() { if state.must_load_initial_inputs() {
state state
.load_initial_inputs(&mut fuzzer, &mut executor, &mut restarting_mgr, &opt.input) .load_initial_inputs_multicore(
&mut fuzzer,
&mut executor,
&mut restarting_mgr,
&opt.input,
&core_id,
&cores,
)
.unwrap_or_else(|_| panic!("Failed to load initial corpus at {:?}", &opt.input)); .unwrap_or_else(|_| panic!("Failed to load initial corpus at {:?}", &opt.input));
println!("We imported {} inputs from disk.", state.corpus().count()); println!("We imported {} inputs from disk.", state.corpus().count());
} }

View File

@ -13,6 +13,8 @@ use std::{
path::{Path, PathBuf}, path::{Path, PathBuf},
}; };
#[cfg(feature = "std")]
use libafl_bolts::core_affinity::{CoreId, Cores};
use libafl_bolts::{ use libafl_bolts::{
rands::{Rand, StdRand}, rands::{Rand, StdRand},
serdeany::{NamedSerdeAnyMap, SerdeAny, SerdeAnyMap}, serdeany::{NamedSerdeAnyMap, SerdeAny, SerdeAnyMap},
@ -325,6 +327,10 @@ pub struct StdState<I, C, R, SC> {
#[cfg(feature = "std")] #[cfg(feature = "std")]
/// Remaining initial inputs to load, if any /// Remaining initial inputs to load, if any
dont_reenter: Option<Vec<PathBuf>>, dont_reenter: Option<Vec<PathBuf>>,
#[cfg(feature = "std")]
/// If inputs have been processed for multicore loading
/// relevant only for `load_initial_inputs_multicore`
multicore_inputs_processed: Option<bool>,
/// The last time we reported progress (if available/used). /// The last time we reported progress (if available/used).
/// This information is used by fuzzer `maybe_report_progress`. /// This information is used by fuzzer `maybe_report_progress`.
last_report_time: Option<Duration>, last_report_time: Option<Duration>,
@ -642,22 +648,14 @@ where
} }
} }
/// Loads initial inputs from the passed-in `in_dirs`. /// Resets the state of initial files.
/// If `forced` is true, will add all testcases, no matter what. fn reset_initial_files_state(&mut self) {
fn load_initial_inputs_custom<E, EM, Z>( self.remaining_initial_files = None;
&mut self, self.dont_reenter = None;
fuzzer: &mut Z, }
executor: &mut E,
manager: &mut EM, /// Sets canonical paths for provided inputs
in_dirs: &[PathBuf], fn canonicalize_input_dirs(&mut self, in_dirs: &[PathBuf]) -> Result<(), Error> {
forced: bool,
loader: &mut dyn FnMut(&mut Z, &mut Self, &Path) -> Result<I, Error>,
) -> Result<(), Error>
where
E: UsesState<State = Self>,
EM: EventFirer<State = Self>,
Z: Evaluator<E, EM, State = Self>,
{
if let Some(remaining) = self.remaining_initial_files.as_ref() { if let Some(remaining) = self.remaining_initial_files.as_ref() {
// everything was loaded // everything was loaded
if remaining.is_empty() { if remaining.is_empty() {
@ -673,8 +671,7 @@ where
self.dont_reenter = Some(files.clone()); self.dont_reenter = Some(files.clone());
self.remaining_initial_files = Some(files); self.remaining_initial_files = Some(files);
} }
Ok(())
self.continue_loading_initial_inputs_custom(fuzzer, executor, manager, forced, loader)
} }
/// Loads initial inputs from the passed-in `in_dirs`. /// Loads initial inputs from the passed-in `in_dirs`.
@ -705,7 +702,32 @@ where
self.continue_loading_initial_inputs_custom(fuzzer, executor, manager, forced, loader) self.continue_loading_initial_inputs_custom(fuzzer, executor, manager, forced, loader)
} }
fn load_file<E, EM, Z>(
&mut self,
path: &PathBuf,
manager: &mut EM,
fuzzer: &mut Z,
executor: &mut E,
forced: bool,
loader: &mut dyn FnMut(&mut Z, &mut Self, &Path) -> Result<I, Error>,
) -> Result<(), Error>
where
E: UsesState<State = Self>,
EM: EventFirer<State = Self>,
Z: Evaluator<E, EM, State = Self>,
{
log::info!("Loading file {:?} ...", &path);
let input = loader(fuzzer, self, path)?;
if forced {
let _: CorpusId = fuzzer.add_input(self, executor, manager, input)?;
} else {
let (res, _) = fuzzer.evaluate_input(self, executor, manager, input)?;
if res == ExecuteInputResult::None {
log::warn!("File {:?} was not interesting, skipped.", &path);
}
}
Ok(())
}
/// Loads initial inputs from the passed-in `in_dirs`. /// Loads initial inputs from the passed-in `in_dirs`.
/// If `forced` is true, will add all testcases, no matter what. /// If `forced` is true, will add all testcases, no matter what.
/// This method takes a list of files. /// This method takes a list of files.
@ -725,16 +747,7 @@ where
loop { loop {
match self.next_file() { match self.next_file() {
Ok(path) => { Ok(path) => {
log::info!("Loading file {:?} ...", &path); self.load_file(&path, manager, fuzzer, executor, forced, loader)?;
let input = loader(fuzzer, self, &path)?;
if forced {
let _: CorpusId = fuzzer.add_input(self, executor, manager, input)?;
} else {
let (res, _) = fuzzer.evaluate_input(self, executor, manager, input)?;
if res == ExecuteInputResult::None {
log::warn!("File {:?} was not interesting, skipped.", &path);
}
}
} }
Err(Error::IteratorEnd(_, _)) => break, Err(Error::IteratorEnd(_, _)) => break,
Err(e) => return Err(e), Err(e) => return Err(e),
@ -793,16 +806,15 @@ where
EM: EventFirer<State = Self>, EM: EventFirer<State = Self>,
Z: Evaluator<E, EM, State = Self>, Z: Evaluator<E, EM, State = Self>,
{ {
self.load_initial_inputs_custom( self.canonicalize_input_dirs(in_dirs)?;
self.continue_loading_initial_inputs_custom(
fuzzer, fuzzer,
executor, executor,
manager, manager,
in_dirs,
true, true,
&mut |_, _, path| I::from_file(path), &mut |_, _, path| I::from_file(path),
) )
} }
/// Loads initial inputs from the passed-in `in_dirs`. /// Loads initial inputs from the passed-in `in_dirs`.
/// If `forced` is true, will add all testcases, no matter what. /// If `forced` is true, will add all testcases, no matter what.
/// This method takes a list of files, instead of folders. /// This method takes a list of files, instead of folders.
@ -841,15 +853,114 @@ where
EM: EventFirer<State = Self>, EM: EventFirer<State = Self>,
Z: Evaluator<E, EM, State = Self>, Z: Evaluator<E, EM, State = Self>,
{ {
self.load_initial_inputs_custom( self.canonicalize_input_dirs(in_dirs)?;
self.continue_loading_initial_inputs_custom(
fuzzer, fuzzer,
executor, executor,
manager, manager,
in_dirs,
false, false,
&mut |_, _, path| I::from_file(path), &mut |_, _, path| I::from_file(path),
) )
} }
fn calculate_corpus_size(&mut self) -> Result<usize, Error> {
let mut count: usize = 0;
loop {
match self.next_file() {
Ok(_) => {
count = count.saturating_add(1);
}
Err(Error::IteratorEnd(_, _)) => break,
Err(e) => return Err(e),
}
}
Ok(count)
}
/// Loads initial inputs by dividing the from the passed-in `in_dirs`
/// in a multicore fashion. Divides the corpus in chunks spread across cores.
pub fn load_initial_inputs_multicore<E, EM, Z>(
&mut self,
fuzzer: &mut Z,
executor: &mut E,
manager: &mut EM,
in_dirs: &[PathBuf],
core_id: &CoreId,
cores: &Cores,
) -> Result<(), Error>
where
E: UsesState<State = Self>,
EM: EventFirer<State = Self>,
Z: Evaluator<E, EM, State = Self>,
{
if self.multicore_inputs_processed.unwrap_or(false) {
self.continue_loading_initial_inputs_custom(
fuzzer,
executor,
manager,
false,
&mut |_, _, path| I::from_file(path),
)?;
} else {
self.canonicalize_input_dirs(in_dirs)?;
let corpus_size = self.calculate_corpus_size()?;
log::info!(
"{} total_corpus_size, {} cores",
corpus_size,
cores.ids.len()
);
self.reset_initial_files_state();
self.canonicalize_input_dirs(in_dirs)?;
if cores.ids.len() > corpus_size {
log::info!(
"low intial corpus count ({}), no parallelism required.",
corpus_size
);
} else {
let core_index = cores
.ids
.iter()
.enumerate()
.find(|(_, c)| *c == core_id)
.unwrap_or_else(|| panic!("core id {} not in cores list", core_id.0))
.0;
let chunk_size = corpus_size.saturating_div(cores.ids.len());
let mut skip = core_index.saturating_mul(chunk_size);
let mut inputs_todo = chunk_size;
let mut collected_inputs = Vec::new();
log::info!(
"core = {}, core_index = {}, chunk_size = {}, skip = {}",
core_id.0,
core_index,
chunk_size,
skip
);
loop {
match self.next_file() {
Ok(path) => {
if skip != 0 {
skip = skip.saturating_sub(1);
continue;
}
if inputs_todo == 0 {
break;
}
collected_inputs.push(path);
inputs_todo = inputs_todo.saturating_sub(1);
}
Err(Error::IteratorEnd(_, _)) => break,
Err(e) => {
return Err(e);
}
}
}
self.remaining_initial_files = Some(collected_inputs);
}
self.multicore_inputs_processed = Some(true);
return self
.load_initial_inputs_multicore(fuzzer, executor, manager, in_dirs, core_id, cores);
}
Ok(())
}
} }
impl<C, I, R, SC> StdState<I, C, R, SC> impl<C, I, R, SC> StdState<I, C, R, SC>
@ -969,6 +1080,8 @@ where
stage_depth: 0, stage_depth: 0,
stage_idx_stack: Vec::new(), stage_idx_stack: Vec::new(),
phantom: PhantomData, phantom: PhantomData,
#[cfg(feature = "std")]
multicore_inputs_processed: None,
}; };
feedback.init_state(&mut state)?; feedback.init_state(&mut state)?;
objective.init_state(&mut state)?; objective.init_state(&mut state)?;