Introduce multicore loading for the initial seed corpus (#1905)
* introduce multicore load initial corpus * update fuzzers/libfuzzer_libpng_norestart to use multicore corpus loading * run clippy * use CoreId and Cores in state if std * misc. typos * adapt multicore load initial inputs to allow resumable corpus loading in case of crashes or timeouts during corpus loading. * add std feature flag to multicore_inputs_processed * fix doc comment * run fmt for example fuzzer --------- Co-authored-by: aarnav <aarnav@srlabs.de> Co-authored-by: Romain Malmain <romain.malmain@pm.me> Co-authored-by: Dongjia "toka" Zhang <tokazerkje@outlook.com>
This commit is contained in:
parent
1b9f4ea29c
commit
d6fe67c3c8
@ -152,7 +152,7 @@ pub extern "C" fn libafl_main() {
|
|||||||
|
|
||||||
let mut run_client = |state: Option<_>,
|
let mut run_client = |state: Option<_>,
|
||||||
mut restarting_mgr: LlmpRestartingEventManager<_, _>,
|
mut restarting_mgr: LlmpRestartingEventManager<_, _>,
|
||||||
_core_id| {
|
core_id| {
|
||||||
// Create an observation channel using the coverage map
|
// Create an observation channel using the coverage map
|
||||||
let edges_observer = HitcountsMapObserver::new(unsafe { std_edges_map_observer("edges") });
|
let edges_observer = HitcountsMapObserver::new(unsafe { std_edges_map_observer("edges") });
|
||||||
|
|
||||||
@ -240,7 +240,14 @@ pub extern "C" fn libafl_main() {
|
|||||||
// In case the corpus is empty (on first run), reset
|
// In case the corpus is empty (on first run), reset
|
||||||
if state.must_load_initial_inputs() {
|
if state.must_load_initial_inputs() {
|
||||||
state
|
state
|
||||||
.load_initial_inputs(&mut fuzzer, &mut executor, &mut restarting_mgr, &opt.input)
|
.load_initial_inputs_multicore(
|
||||||
|
&mut fuzzer,
|
||||||
|
&mut executor,
|
||||||
|
&mut restarting_mgr,
|
||||||
|
&opt.input,
|
||||||
|
&core_id,
|
||||||
|
&cores,
|
||||||
|
)
|
||||||
.unwrap_or_else(|_| panic!("Failed to load initial corpus at {:?}", &opt.input));
|
.unwrap_or_else(|_| panic!("Failed to load initial corpus at {:?}", &opt.input));
|
||||||
println!("We imported {} inputs from disk.", state.corpus().count());
|
println!("We imported {} inputs from disk.", state.corpus().count());
|
||||||
}
|
}
|
||||||
|
@ -13,6 +13,8 @@ use std::{
|
|||||||
path::{Path, PathBuf},
|
path::{Path, PathBuf},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#[cfg(feature = "std")]
|
||||||
|
use libafl_bolts::core_affinity::{CoreId, Cores};
|
||||||
use libafl_bolts::{
|
use libafl_bolts::{
|
||||||
rands::{Rand, StdRand},
|
rands::{Rand, StdRand},
|
||||||
serdeany::{NamedSerdeAnyMap, SerdeAny, SerdeAnyMap},
|
serdeany::{NamedSerdeAnyMap, SerdeAny, SerdeAnyMap},
|
||||||
@ -325,6 +327,10 @@ pub struct StdState<I, C, R, SC> {
|
|||||||
#[cfg(feature = "std")]
|
#[cfg(feature = "std")]
|
||||||
/// Remaining initial inputs to load, if any
|
/// Remaining initial inputs to load, if any
|
||||||
dont_reenter: Option<Vec<PathBuf>>,
|
dont_reenter: Option<Vec<PathBuf>>,
|
||||||
|
#[cfg(feature = "std")]
|
||||||
|
/// If inputs have been processed for multicore loading
|
||||||
|
/// relevant only for `load_initial_inputs_multicore`
|
||||||
|
multicore_inputs_processed: Option<bool>,
|
||||||
/// The last time we reported progress (if available/used).
|
/// The last time we reported progress (if available/used).
|
||||||
/// This information is used by fuzzer `maybe_report_progress`.
|
/// This information is used by fuzzer `maybe_report_progress`.
|
||||||
last_report_time: Option<Duration>,
|
last_report_time: Option<Duration>,
|
||||||
@ -642,22 +648,14 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Loads initial inputs from the passed-in `in_dirs`.
|
/// Resets the state of initial files.
|
||||||
/// If `forced` is true, will add all testcases, no matter what.
|
fn reset_initial_files_state(&mut self) {
|
||||||
fn load_initial_inputs_custom<E, EM, Z>(
|
self.remaining_initial_files = None;
|
||||||
&mut self,
|
self.dont_reenter = None;
|
||||||
fuzzer: &mut Z,
|
}
|
||||||
executor: &mut E,
|
|
||||||
manager: &mut EM,
|
/// Sets canonical paths for provided inputs
|
||||||
in_dirs: &[PathBuf],
|
fn canonicalize_input_dirs(&mut self, in_dirs: &[PathBuf]) -> Result<(), Error> {
|
||||||
forced: bool,
|
|
||||||
loader: &mut dyn FnMut(&mut Z, &mut Self, &Path) -> Result<I, Error>,
|
|
||||||
) -> Result<(), Error>
|
|
||||||
where
|
|
||||||
E: UsesState<State = Self>,
|
|
||||||
EM: EventFirer<State = Self>,
|
|
||||||
Z: Evaluator<E, EM, State = Self>,
|
|
||||||
{
|
|
||||||
if let Some(remaining) = self.remaining_initial_files.as_ref() {
|
if let Some(remaining) = self.remaining_initial_files.as_ref() {
|
||||||
// everything was loaded
|
// everything was loaded
|
||||||
if remaining.is_empty() {
|
if remaining.is_empty() {
|
||||||
@ -673,8 +671,7 @@ where
|
|||||||
self.dont_reenter = Some(files.clone());
|
self.dont_reenter = Some(files.clone());
|
||||||
self.remaining_initial_files = Some(files);
|
self.remaining_initial_files = Some(files);
|
||||||
}
|
}
|
||||||
|
Ok(())
|
||||||
self.continue_loading_initial_inputs_custom(fuzzer, executor, manager, forced, loader)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Loads initial inputs from the passed-in `in_dirs`.
|
/// Loads initial inputs from the passed-in `in_dirs`.
|
||||||
@ -705,7 +702,32 @@ where
|
|||||||
|
|
||||||
self.continue_loading_initial_inputs_custom(fuzzer, executor, manager, forced, loader)
|
self.continue_loading_initial_inputs_custom(fuzzer, executor, manager, forced, loader)
|
||||||
}
|
}
|
||||||
|
fn load_file<E, EM, Z>(
|
||||||
|
&mut self,
|
||||||
|
path: &PathBuf,
|
||||||
|
manager: &mut EM,
|
||||||
|
fuzzer: &mut Z,
|
||||||
|
executor: &mut E,
|
||||||
|
forced: bool,
|
||||||
|
loader: &mut dyn FnMut(&mut Z, &mut Self, &Path) -> Result<I, Error>,
|
||||||
|
) -> Result<(), Error>
|
||||||
|
where
|
||||||
|
E: UsesState<State = Self>,
|
||||||
|
EM: EventFirer<State = Self>,
|
||||||
|
Z: Evaluator<E, EM, State = Self>,
|
||||||
|
{
|
||||||
|
log::info!("Loading file {:?} ...", &path);
|
||||||
|
let input = loader(fuzzer, self, path)?;
|
||||||
|
if forced {
|
||||||
|
let _: CorpusId = fuzzer.add_input(self, executor, manager, input)?;
|
||||||
|
} else {
|
||||||
|
let (res, _) = fuzzer.evaluate_input(self, executor, manager, input)?;
|
||||||
|
if res == ExecuteInputResult::None {
|
||||||
|
log::warn!("File {:?} was not interesting, skipped.", &path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
/// Loads initial inputs from the passed-in `in_dirs`.
|
/// Loads initial inputs from the passed-in `in_dirs`.
|
||||||
/// If `forced` is true, will add all testcases, no matter what.
|
/// If `forced` is true, will add all testcases, no matter what.
|
||||||
/// This method takes a list of files.
|
/// This method takes a list of files.
|
||||||
@ -725,16 +747,7 @@ where
|
|||||||
loop {
|
loop {
|
||||||
match self.next_file() {
|
match self.next_file() {
|
||||||
Ok(path) => {
|
Ok(path) => {
|
||||||
log::info!("Loading file {:?} ...", &path);
|
self.load_file(&path, manager, fuzzer, executor, forced, loader)?;
|
||||||
let input = loader(fuzzer, self, &path)?;
|
|
||||||
if forced {
|
|
||||||
let _: CorpusId = fuzzer.add_input(self, executor, manager, input)?;
|
|
||||||
} else {
|
|
||||||
let (res, _) = fuzzer.evaluate_input(self, executor, manager, input)?;
|
|
||||||
if res == ExecuteInputResult::None {
|
|
||||||
log::warn!("File {:?} was not interesting, skipped.", &path);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
Err(Error::IteratorEnd(_, _)) => break,
|
Err(Error::IteratorEnd(_, _)) => break,
|
||||||
Err(e) => return Err(e),
|
Err(e) => return Err(e),
|
||||||
@ -793,16 +806,15 @@ where
|
|||||||
EM: EventFirer<State = Self>,
|
EM: EventFirer<State = Self>,
|
||||||
Z: Evaluator<E, EM, State = Self>,
|
Z: Evaluator<E, EM, State = Self>,
|
||||||
{
|
{
|
||||||
self.load_initial_inputs_custom(
|
self.canonicalize_input_dirs(in_dirs)?;
|
||||||
|
self.continue_loading_initial_inputs_custom(
|
||||||
fuzzer,
|
fuzzer,
|
||||||
executor,
|
executor,
|
||||||
manager,
|
manager,
|
||||||
in_dirs,
|
|
||||||
true,
|
true,
|
||||||
&mut |_, _, path| I::from_file(path),
|
&mut |_, _, path| I::from_file(path),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Loads initial inputs from the passed-in `in_dirs`.
|
/// Loads initial inputs from the passed-in `in_dirs`.
|
||||||
/// If `forced` is true, will add all testcases, no matter what.
|
/// If `forced` is true, will add all testcases, no matter what.
|
||||||
/// This method takes a list of files, instead of folders.
|
/// This method takes a list of files, instead of folders.
|
||||||
@ -841,15 +853,114 @@ where
|
|||||||
EM: EventFirer<State = Self>,
|
EM: EventFirer<State = Self>,
|
||||||
Z: Evaluator<E, EM, State = Self>,
|
Z: Evaluator<E, EM, State = Self>,
|
||||||
{
|
{
|
||||||
self.load_initial_inputs_custom(
|
self.canonicalize_input_dirs(in_dirs)?;
|
||||||
|
self.continue_loading_initial_inputs_custom(
|
||||||
fuzzer,
|
fuzzer,
|
||||||
executor,
|
executor,
|
||||||
manager,
|
manager,
|
||||||
in_dirs,
|
|
||||||
false,
|
false,
|
||||||
&mut |_, _, path| I::from_file(path),
|
&mut |_, _, path| I::from_file(path),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn calculate_corpus_size(&mut self) -> Result<usize, Error> {
|
||||||
|
let mut count: usize = 0;
|
||||||
|
loop {
|
||||||
|
match self.next_file() {
|
||||||
|
Ok(_) => {
|
||||||
|
count = count.saturating_add(1);
|
||||||
|
}
|
||||||
|
Err(Error::IteratorEnd(_, _)) => break,
|
||||||
|
Err(e) => return Err(e),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(count)
|
||||||
|
}
|
||||||
|
/// Loads initial inputs by dividing the from the passed-in `in_dirs`
|
||||||
|
/// in a multicore fashion. Divides the corpus in chunks spread across cores.
|
||||||
|
pub fn load_initial_inputs_multicore<E, EM, Z>(
|
||||||
|
&mut self,
|
||||||
|
fuzzer: &mut Z,
|
||||||
|
executor: &mut E,
|
||||||
|
manager: &mut EM,
|
||||||
|
in_dirs: &[PathBuf],
|
||||||
|
core_id: &CoreId,
|
||||||
|
cores: &Cores,
|
||||||
|
) -> Result<(), Error>
|
||||||
|
where
|
||||||
|
E: UsesState<State = Self>,
|
||||||
|
EM: EventFirer<State = Self>,
|
||||||
|
Z: Evaluator<E, EM, State = Self>,
|
||||||
|
{
|
||||||
|
if self.multicore_inputs_processed.unwrap_or(false) {
|
||||||
|
self.continue_loading_initial_inputs_custom(
|
||||||
|
fuzzer,
|
||||||
|
executor,
|
||||||
|
manager,
|
||||||
|
false,
|
||||||
|
&mut |_, _, path| I::from_file(path),
|
||||||
|
)?;
|
||||||
|
} else {
|
||||||
|
self.canonicalize_input_dirs(in_dirs)?;
|
||||||
|
let corpus_size = self.calculate_corpus_size()?;
|
||||||
|
log::info!(
|
||||||
|
"{} total_corpus_size, {} cores",
|
||||||
|
corpus_size,
|
||||||
|
cores.ids.len()
|
||||||
|
);
|
||||||
|
self.reset_initial_files_state();
|
||||||
|
self.canonicalize_input_dirs(in_dirs)?;
|
||||||
|
if cores.ids.len() > corpus_size {
|
||||||
|
log::info!(
|
||||||
|
"low intial corpus count ({}), no parallelism required.",
|
||||||
|
corpus_size
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
let core_index = cores
|
||||||
|
.ids
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.find(|(_, c)| *c == core_id)
|
||||||
|
.unwrap_or_else(|| panic!("core id {} not in cores list", core_id.0))
|
||||||
|
.0;
|
||||||
|
let chunk_size = corpus_size.saturating_div(cores.ids.len());
|
||||||
|
let mut skip = core_index.saturating_mul(chunk_size);
|
||||||
|
let mut inputs_todo = chunk_size;
|
||||||
|
let mut collected_inputs = Vec::new();
|
||||||
|
log::info!(
|
||||||
|
"core = {}, core_index = {}, chunk_size = {}, skip = {}",
|
||||||
|
core_id.0,
|
||||||
|
core_index,
|
||||||
|
chunk_size,
|
||||||
|
skip
|
||||||
|
);
|
||||||
|
loop {
|
||||||
|
match self.next_file() {
|
||||||
|
Ok(path) => {
|
||||||
|
if skip != 0 {
|
||||||
|
skip = skip.saturating_sub(1);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if inputs_todo == 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
collected_inputs.push(path);
|
||||||
|
inputs_todo = inputs_todo.saturating_sub(1);
|
||||||
|
}
|
||||||
|
Err(Error::IteratorEnd(_, _)) => break,
|
||||||
|
Err(e) => {
|
||||||
|
return Err(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self.remaining_initial_files = Some(collected_inputs);
|
||||||
|
}
|
||||||
|
self.multicore_inputs_processed = Some(true);
|
||||||
|
return self
|
||||||
|
.load_initial_inputs_multicore(fuzzer, executor, manager, in_dirs, core_id, cores);
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<C, I, R, SC> StdState<I, C, R, SC>
|
impl<C, I, R, SC> StdState<I, C, R, SC>
|
||||||
@ -969,6 +1080,8 @@ where
|
|||||||
stage_depth: 0,
|
stage_depth: 0,
|
||||||
stage_idx_stack: Vec::new(),
|
stage_idx_stack: Vec::new(),
|
||||||
phantom: PhantomData,
|
phantom: PhantomData,
|
||||||
|
#[cfg(feature = "std")]
|
||||||
|
multicore_inputs_processed: None,
|
||||||
};
|
};
|
||||||
feedback.init_state(&mut state)?;
|
feedback.init_state(&mut state)?;
|
||||||
objective.init_state(&mut state)?;
|
objective.init_state(&mut state)?;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user