diff --git a/fuzzers/baby_fuzzer_minimizing/src/main.rs b/fuzzers/baby_fuzzer_minimizing/src/main.rs index c909e3e34b..4cec8054af 100644 --- a/fuzzers/baby_fuzzer_minimizing/src/main.rs +++ b/fuzzers/baby_fuzzer_minimizing/src/main.rs @@ -136,7 +136,13 @@ pub fn main() -> Result<(), Error> { let mut executor = InProcessExecutor::new(&mut harness, (), &mut fuzzer, &mut state, &mut mgr)?; state.load_initial_inputs_forced(&mut fuzzer, &mut executor, &mut mgr, &[solution_dir])?; - stages.perform_all(&mut fuzzer, &mut executor, &mut state, &mut mgr, 0)?; + stages.perform_all( + &mut fuzzer, + &mut executor, + &mut state, + &mut mgr, + CorpusId::from(0usize), + )?; Ok(()) } diff --git a/fuzzers/libfuzzer_libpng_launcher/src/lib.rs b/fuzzers/libfuzzer_libpng_launcher/src/lib.rs index fc7d9daed9..b34e30789b 100644 --- a/fuzzers/libfuzzer_libpng_launcher/src/lib.rs +++ b/fuzzers/libfuzzer_libpng_launcher/src/lib.rs @@ -32,7 +32,7 @@ use libafl::{ scheduled::{havoc_mutations, tokens_mutations, StdScheduledMutator}, token_mutations::Tokens, }, - observers::{HitcountsMapObserver, StdMapObserver, TimeObserver}, + observers::{HitcountsMapObserver, TimeObserver}, schedulers::{IndexesLenTimeMinimizerScheduler, QueueScheduler}, stages::mutational::StdMutationalStage, state::{HasCorpus, HasMetadata, StdState}, diff --git a/libafl/Cargo.toml b/libafl/Cargo.toml index eb8b465024..c6eaa1af5b 100644 --- a/libafl/Cargo.toml +++ b/libafl/Cargo.toml @@ -29,6 +29,7 @@ frida_cli = ["cli"] afl_exec_sec = [] # calculate exec/sec like AFL errors_backtrace = ["backtrace"] cmin = ["z3"] # corpus minimisation +corpus_btreemap = [] # features hiding dependencies licensed under GPL gpl = [] diff --git a/libafl/src/corpus/cached.rs b/libafl/src/corpus/cached.rs index 51be87c462..135311ba5e 100644 --- a/libafl/src/corpus/cached.rs +++ b/libafl/src/corpus/cached.rs @@ -9,7 +9,7 @@ use serde::{Deserialize, Serialize}; use crate::{ corpus::{ ondisk::{OnDiskCorpus, OnDiskMetadataFormat}, - Corpus, Testcase, + Corpus, CorpusId, Testcase, }, inputs::{Input, UsesInput}, Error, @@ -24,7 +24,7 @@ where I: Input, { inner: OnDiskCorpus, - cached_indexes: RefCell>, + cached_indexes: RefCell>, cache_max_len: usize, } @@ -47,30 +47,28 @@ where /// Add an entry to the corpus and return its index #[inline] - fn add(&mut self, testcase: Testcase) -> Result { + fn add(&mut self, testcase: Testcase) -> Result { self.inner.add(testcase) } /// Replaces the testcase at the given idx #[inline] - fn replace(&mut self, idx: usize, testcase: Testcase) -> Result, Error> { + fn replace(&mut self, idx: CorpusId, testcase: Testcase) -> Result, Error> { // TODO finish self.inner.replace(idx, testcase) } /// Removes an entry from the corpus, returning it if it was present. #[inline] - fn remove(&mut self, idx: usize) -> Result>, Error> { + fn remove(&mut self, idx: CorpusId) -> Result, Error> { let testcase = self.inner.remove(idx)?; - if testcase.is_some() { - self.cached_indexes.borrow_mut().retain(|e| *e != idx); - } + self.cached_indexes.borrow_mut().retain(|e| *e != idx); Ok(testcase) } /// Get by id #[inline] - fn get(&self, idx: usize) -> Result<&RefCell>, Error> { + fn get(&self, idx: CorpusId) -> Result<&RefCell>, Error> { let testcase = { self.inner.get(idx)? }; if testcase.borrow().input().is_none() { let _ = testcase.borrow_mut().load_input()?; @@ -94,15 +92,40 @@ where /// Current testcase scheduled #[inline] - fn current(&self) -> &Option { + fn current(&self) -> &Option { self.inner.current() } /// Current testcase scheduled (mutable) #[inline] - fn current_mut(&mut self) -> &mut Option { + fn current_mut(&mut self) -> &mut Option { self.inner.current_mut() } + + #[inline] + fn next(&self, idx: CorpusId) -> Option { + self.inner.next(idx) + } + + #[inline] + fn prev(&self, idx: CorpusId) -> Option { + self.inner.prev(idx) + } + + #[inline] + fn first(&self) -> Option { + self.inner.first() + } + + #[inline] + fn last(&self) -> Option { + self.inner.last() + } + + #[inline] + fn nth(&self, nth: usize) -> CorpusId { + self.inner.nth(nth) + } } impl CachedOnDiskCorpus diff --git a/libafl/src/corpus/inmemory.rs b/libafl/src/corpus/inmemory.rs index 0640274d9c..69093766d7 100644 --- a/libafl/src/corpus/inmemory.rs +++ b/libafl/src/corpus/inmemory.rs @@ -6,11 +6,284 @@ use core::cell::RefCell; use serde::{Deserialize, Serialize}; use crate::{ - corpus::{Corpus, Testcase}, + corpus::{Corpus, CorpusId, Testcase}, inputs::{Input, UsesInput}, Error, }; +/// Keep track of the stored `Testcase` and the siblings ids (insertion order) +#[cfg(not(feature = "corpus_btreemap"))] +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(bound = "I: serde::de::DeserializeOwned")] +pub struct TestcaseStorageItem +where + I: Input, +{ + /// The stored testcase + pub testcase: RefCell>, + /// Previously inserted id + pub prev: Option, + /// Following inserted id + pub next: Option, +} + +#[cfg(not(feature = "corpus_btreemap"))] +/// The map type in which testcases are stored (enable the feature `corpus_btreemap` to use a `BTreeMap` instead of `HashMap`) +pub type TestcaseStorageMap = hashbrown::HashMap>; + +#[cfg(feature = "corpus_btreemap")] +/// The map type in which testcases are stored (disable the feature `corpus_btreemap` to use a `HashMap` instead of `BTreeMap`) +pub type TestcaseStorageMap = + alloc::collections::btree_map::BTreeMap>>; + +/// Storage map for the testcases (used in `Corpus` implementations) with an incremental index +#[derive(Default, Serialize, Deserialize, Clone, Debug)] +#[serde(bound = "I: serde::de::DeserializeOwned")] +pub struct TestcaseStorage +where + I: Input, +{ + /// The map in which testcases are stored + pub map: TestcaseStorageMap, + /// The keys in order (use `Vec::binary_search`) + pub keys: Vec, + /// The progressive idx + progressive_idx: usize, + /// First inserted idx + #[cfg(not(feature = "corpus_btreemap"))] + first_idx: Option, + /// Last inserted idx + #[cfg(not(feature = "corpus_btreemap"))] + last_idx: Option, +} + +impl UsesInput for TestcaseStorage +where + I: Input, +{ + type Input = I; +} + +impl TestcaseStorage +where + I: Input, +{ + /// Insert a key in the keys set + fn insert_key(&mut self, id: CorpusId) { + if let Err(idx) = self.keys.binary_search(&id) { + self.keys.insert(idx, id); + } + } + + /// Remove a key from the keys set + fn remove_key(&mut self, id: CorpusId) { + if let Ok(idx) = self.keys.binary_search(&id) { + self.keys.remove(idx); + } + } + + /// Insert a testcase assigning a `CorpusId` to it + #[cfg(not(feature = "corpus_btreemap"))] + pub fn insert(&mut self, testcase: RefCell>) -> CorpusId { + let idx = CorpusId::from(self.progressive_idx); + self.progressive_idx += 1; + let prev = if let Some(last_idx) = self.last_idx { + self.map.get_mut(&last_idx).unwrap().next = Some(idx); + Some(last_idx) + } else { + None + }; + if self.first_idx.is_none() { + self.first_idx = Some(idx); + } + self.last_idx = Some(idx); + self.insert_key(idx); + self.map.insert( + idx, + TestcaseStorageItem { + testcase, + prev, + next: None, + }, + ); + idx + } + + /// Insert a testcase assigning a `CorpusId` to it + #[cfg(feature = "corpus_btreemap")] + pub fn insert(&mut self, testcase: RefCell>) -> CorpusId { + let idx = CorpusId::from(self.progressive_idx); + self.progressive_idx += 1; + self.insert_key(idx); + self.map.insert(idx, testcase); + idx + } + + /// Replace a testcase given a `CorpusId` + #[cfg(not(feature = "corpus_btreemap"))] + pub fn replace(&mut self, idx: CorpusId, testcase: Testcase) -> Option> { + if let Some(entry) = self.map.get_mut(&idx) { + Some(entry.testcase.replace(testcase)) + } else { + None + } + } + + /// Replace a testcase given a `CorpusId` + #[cfg(feature = "corpus_btreemap")] + pub fn replace(&mut self, idx: CorpusId, testcase: Testcase) -> Option> { + self.map.get_mut(&idx).map(|entry| entry.replace(testcase)) + } + + /// Remove a testcase given a `CorpusId` + #[cfg(not(feature = "corpus_btreemap"))] + pub fn remove(&mut self, idx: CorpusId) -> Option>> { + if let Some(item) = self.map.remove(&idx) { + self.remove_key(idx); + if let Some(prev) = item.prev { + self.map.get_mut(&prev).unwrap().next = item.next; + } else { + // first elem + self.first_idx = item.next; + } + if let Some(next) = item.next { + self.map.get_mut(&next).unwrap().prev = item.prev; + } else { + // last elem + self.last_idx = item.prev; + } + Some(item.testcase) + } else { + None + } + } + + /// Remove a testcase given a `CorpusId` + #[cfg(feature = "corpus_btreemap")] + pub fn remove(&mut self, idx: CorpusId) -> Option>> { + self.remove_key(idx); + self.map.remove(&idx) + } + + /// Get a testcase given a `CorpusId` + #[cfg(not(feature = "corpus_btreemap"))] + #[must_use] + pub fn get(&self, idx: CorpusId) -> Option<&RefCell>> { + self.map.get(&idx).as_ref().map(|x| &x.testcase) + } + + /// Get a testcase given a `CorpusId` + #[cfg(feature = "corpus_btreemap")] + #[must_use] + pub fn get(&self, idx: CorpusId) -> Option<&RefCell>> { + self.map.get(&idx) + } + + /// Get the next id given a `CorpusId` (creation order) + #[cfg(not(feature = "corpus_btreemap"))] + #[must_use] + fn next(&self, idx: CorpusId) -> Option { + if let Some(item) = self.map.get(&idx) { + item.next + } else { + None + } + } + + /// Get the next id given a `CorpusId` (creation order) + #[cfg(feature = "corpus_btreemap")] + #[must_use] + fn next(&self, idx: CorpusId) -> Option { + // TODO see if using self.keys is faster + let mut range = self + .map + .range((core::ops::Bound::Included(idx), core::ops::Bound::Unbounded)); + if let Some((this_id, _)) = range.next() { + if idx != *this_id { + return None; + } + } + if let Some((next_id, _)) = range.next() { + Some(*next_id) + } else { + None + } + } + + /// Get the previous id given a `CorpusId` (creation order) + #[cfg(not(feature = "corpus_btreemap"))] + #[must_use] + fn prev(&self, idx: CorpusId) -> Option { + if let Some(item) = self.map.get(&idx) { + item.prev + } else { + None + } + } + + /// Get the previous id given a `CorpusId` (creation order) + #[cfg(feature = "corpus_btreemap")] + #[must_use] + fn prev(&self, idx: CorpusId) -> Option { + // TODO see if using self.keys is faster + let mut range = self + .map + .range((core::ops::Bound::Unbounded, core::ops::Bound::Included(idx))); + if let Some((this_id, _)) = range.next_back() { + if idx != *this_id { + return None; + } + } + if let Some((prev_id, _)) = range.next_back() { + Some(*prev_id) + } else { + None + } + } + + /// Get the first created id + #[cfg(not(feature = "corpus_btreemap"))] + #[must_use] + fn first(&self) -> Option { + self.first_idx + } + + /// Get the first created id + #[cfg(feature = "corpus_btreemap")] + #[must_use] + fn first(&self) -> Option { + self.map.iter().next().map(|x| *x.0) + } + + /// Get the last created id + #[cfg(not(feature = "corpus_btreemap"))] + #[must_use] + fn last(&self) -> Option { + self.last_idx + } + + /// Get the last created id + #[cfg(feature = "corpus_btreemap")] + #[must_use] + fn last(&self) -> Option { + self.map.iter().next_back().map(|x| *x.0) + } + + /// Create new `TestcaseStorage` + #[must_use] + pub fn new() -> Self { + Self { + map: TestcaseStorageMap::default(), + keys: vec![], + progressive_idx: 0, + #[cfg(not(feature = "corpus_btreemap"))] + first_idx: None, + #[cfg(not(feature = "corpus_btreemap"))] + last_idx: None, + } + } +} + /// A corpus handling all in memory. #[derive(Default, Serialize, Deserialize, Clone, Debug)] #[serde(bound = "I: serde::de::DeserializeOwned")] @@ -18,8 +291,8 @@ pub struct InMemoryCorpus where I: Input, { - entries: Vec>>, - current: Option, + storage: TestcaseStorage, + current: Option, } impl UsesInput for InMemoryCorpus @@ -36,52 +309,76 @@ where /// Returns the number of elements #[inline] fn count(&self) -> usize { - self.entries.len() + self.storage.map.len() } /// Add an entry to the corpus and return its index #[inline] - fn add(&mut self, testcase: Testcase) -> Result { - self.entries.push(RefCell::new(testcase)); - Ok(self.entries.len() - 1) + fn add(&mut self, testcase: Testcase) -> Result { + Ok(self.storage.insert(RefCell::new(testcase))) } /// Replaces the testcase at the given idx #[inline] - fn replace(&mut self, idx: usize, testcase: Testcase) -> Result, Error> { - if idx >= self.entries.len() { - return Err(Error::key_not_found(format!("Index {idx} out of bounds"))); - } - Ok(self.entries[idx].replace(testcase)) + fn replace(&mut self, idx: CorpusId, testcase: Testcase) -> Result, Error> { + self.storage + .replace(idx, testcase) + .ok_or_else(|| Error::key_not_found(format!("Index {idx} not found"))) } /// Removes an entry from the corpus, returning it if it was present. #[inline] - fn remove(&mut self, idx: usize) -> Result>, Error> { - if idx >= self.entries.len() { - Ok(None) - } else { - Ok(Some(self.entries.remove(idx).into_inner())) - } + fn remove(&mut self, idx: CorpusId) -> Result, Error> { + self.storage + .remove(idx) + .map(|x| x.take()) + .ok_or_else(|| Error::key_not_found(format!("Index {idx} not found"))) } /// Get by id #[inline] - fn get(&self, idx: usize) -> Result<&RefCell>, Error> { - Ok(&self.entries[idx]) + fn get(&self, idx: CorpusId) -> Result<&RefCell>, Error> { + self.storage + .get(idx) + .ok_or_else(|| Error::key_not_found(format!("Index {idx} not found"))) } /// Current testcase scheduled #[inline] - fn current(&self) -> &Option { + fn current(&self) -> &Option { &self.current } /// Current testcase scheduled (mutable) #[inline] - fn current_mut(&mut self) -> &mut Option { + fn current_mut(&mut self) -> &mut Option { &mut self.current } + + #[inline] + fn next(&self, idx: CorpusId) -> Option { + self.storage.next(idx) + } + + #[inline] + fn prev(&self, idx: CorpusId) -> Option { + self.storage.prev(idx) + } + + #[inline] + fn first(&self) -> Option { + self.storage.first() + } + + #[inline] + fn last(&self) -> Option { + self.storage.last() + } + + #[inline] + fn nth(&self, nth: usize) -> CorpusId { + self.storage.keys[nth] + } } impl InMemoryCorpus @@ -93,7 +390,7 @@ where #[must_use] pub fn new() -> Self { Self { - entries: vec![], + storage: TestcaseStorage::new(), current: None, } } diff --git a/libafl/src/corpus/minimizer.rs b/libafl/src/corpus/minimizer.rs index 577dea31bd..a0063e4946 100644 --- a/libafl/src/corpus/minimizer.rs +++ b/libafl/src/corpus/minimizer.rs @@ -111,7 +111,8 @@ where let mut seed_exprs = HashMap::new(); let mut cov_map = HashMap::new(); - for idx in 0..state.corpus().count() { + let mut cur_id = state.corpus().first(); + while let Some(idx) = cur_id { let (weight, input) = { let mut testcase = state.corpus().get(idx)?.borrow_mut(); let weight = TS::compute(&mut *testcase, state)? @@ -151,6 +152,8 @@ where // Keep track of that seed's index and weight seed_exprs.insert(seed_expr, (idx, weight)); + + cur_id = state.corpus().next(idx); } for (_, cov) in cov_map { @@ -191,7 +194,9 @@ where let removed = state.corpus_mut().remove(idx)?; // scheduler needs to know we've removed the input, or it will continue to try // to use now-missing inputs - fuzzer.scheduler_mut().on_remove(state, idx, &removed)?; + fuzzer + .scheduler_mut() + .on_remove(state, idx, &Some(removed))?; } Ok(()) } else { diff --git a/libafl/src/corpus/mod.rs b/libafl/src/corpus/mod.rs index dadbec6953..4116128e5e 100644 --- a/libafl/src/corpus/mod.rs +++ b/libafl/src/corpus/mod.rs @@ -18,15 +18,49 @@ pub use cached::CachedOnDiskCorpus; #[cfg(feature = "cmin")] pub mod minimizer; -use core::cell::RefCell; +use core::{cell::RefCell, fmt}; #[cfg(feature = "cmin")] pub use minimizer::*; +use serde::{Deserialize, Serialize}; use crate::{inputs::UsesInput, Error}; +/// An abstraction for the index that identify a testcase in the corpus +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)] +#[repr(transparent)] +pub struct CorpusId(pub(crate) usize); + +impl fmt::Display for CorpusId { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl From for CorpusId { + fn from(id: usize) -> Self { + Self(id) + } +} + +impl From for CorpusId { + fn from(id: u64) -> Self { + Self(id as usize) + } +} + +/// Utility macro to call `Corpus::random_id` +#[macro_export] +macro_rules! random_corpus_id { + ($corpus:expr, $rand:expr) => {{ + let cnt = $corpus.count() as u64; + let nth = $rand.below(cnt) as usize; + $corpus.nth(nth) + }}; +} + /// Corpus with all current testcases -pub trait Corpus: UsesInput + serde::Serialize + for<'de> serde::Deserialize<'de> { +pub trait Corpus: UsesInput + Serialize + for<'de> Deserialize<'de> { /// Returns the number of elements fn count(&self) -> usize; @@ -36,26 +70,95 @@ pub trait Corpus: UsesInput + serde::Serialize + for<'de> serde::Deserialize<'de } /// Add an entry to the corpus and return its index - fn add(&mut self, testcase: Testcase) -> Result; + fn add(&mut self, testcase: Testcase) -> Result; /// Replaces the testcase at the given idx, returning the existing. fn replace( &mut self, - idx: usize, + idx: CorpusId, testcase: Testcase, ) -> Result, Error>; /// Removes an entry from the corpus, returning it if it was present. - fn remove(&mut self, idx: usize) -> Result>, Error>; + fn remove(&mut self, id: CorpusId) -> Result, Error>; /// Get by id - fn get(&self, idx: usize) -> Result<&RefCell>, Error>; + fn get(&self, id: CorpusId) -> Result<&RefCell>, Error>; /// Current testcase scheduled - fn current(&self) -> &Option; + fn current(&self) -> &Option; /// Current testcase scheduled (mutable) - fn current_mut(&mut self) -> &mut Option; + fn current_mut(&mut self) -> &mut Option; + + /// Get the next corpus id + fn next(&self, id: CorpusId) -> Option; + + /// Get the prev corpus id + fn prev(&self, id: CorpusId) -> Option; + + /// Get the first inserted corpus id + fn first(&self) -> Option; + + /// Get the last inserted corpus id + fn last(&self) -> Option; + + /// An iterator over very active corpus id + fn ids(&self) -> CorpusIdIterator<'_, Self> { + CorpusIdIterator { + corpus: self, + cur: self.first(), + cur_back: self.last(), + } + } + + /// Get the nth corpus id + fn nth(&self, nth: usize) -> CorpusId { + self.ids() + .nth(nth) + .expect("Failed to get the {nth} CorpusId") + } +} + +/// `Iterator` over the ids of a `Corpus` +#[derive(Debug)] +pub struct CorpusIdIterator<'a, C> +where + C: Corpus, +{ + corpus: &'a C, + cur: Option, + cur_back: Option, +} + +impl<'a, C> Iterator for CorpusIdIterator<'a, C> +where + C: Corpus, +{ + type Item = CorpusId; + + fn next(&mut self) -> Option { + if let Some(cur) = self.cur { + self.cur = self.corpus.next(cur); + Some(cur) + } else { + None + } + } +} + +impl<'a, C> DoubleEndedIterator for CorpusIdIterator<'a, C> +where + C: Corpus, +{ + fn next_back(&mut self) -> Option { + if let Some(cur_back) = self.cur_back { + self.cur_back = self.corpus.prev(cur_back); + Some(cur_back) + } else { + None + } + } } /// `Corpus` Python bindings @@ -71,7 +174,7 @@ pub mod pybind { corpus::{ cached::pybind::PythonCachedOnDiskCorpus, inmemory::pybind::PythonInMemoryCorpus, ondisk::pybind::PythonOnDiskCorpus, testcase::pybind::PythonTestcaseWrapper, Corpus, - Testcase, + CorpusId, Testcase, }, inputs::{BytesInput, UsesInput}, Error, @@ -157,13 +260,13 @@ pub mod pybind { #[pyo3(name = "current")] fn pycurrent(&self) -> Option { - *self.current() + self.current().map(|x| x.0) } #[pyo3(name = "get")] fn pyget(&self, idx: usize) -> PythonTestcaseWrapper { let t: &mut Testcase = unwrap_me!(self.wrapper, c, { - c.get(idx) + c.get(CorpusId::from(idx)) .map(|v| unsafe { v.as_ptr().as_mut().unwrap() }) .expect("PythonCorpus::get failed") }); @@ -182,26 +285,26 @@ pub mod pybind { } #[inline] - fn add(&mut self, testcase: Testcase) -> Result { + fn add(&mut self, testcase: Testcase) -> Result { unwrap_me_mut!(self.wrapper, c, { c.add(testcase) }) } #[inline] fn replace( &mut self, - idx: usize, + idx: CorpusId, testcase: Testcase, ) -> Result, Error> { unwrap_me_mut!(self.wrapper, c, { c.replace(idx, testcase) }) } #[inline] - fn remove(&mut self, idx: usize) -> Result>, Error> { + fn remove(&mut self, idx: CorpusId) -> Result, Error> { unwrap_me_mut!(self.wrapper, c, { c.remove(idx) }) } #[inline] - fn get(&self, idx: usize) -> Result<&RefCell>, Error> { + fn get(&self, idx: CorpusId) -> Result<&RefCell>, Error> { let ptr = unwrap_me!(self.wrapper, c, { c.get(idx) .map(|v| v as *const RefCell>) @@ -210,16 +313,49 @@ pub mod pybind { } #[inline] - fn current(&self) -> &Option { - let ptr = unwrap_me!(self.wrapper, c, { c.current() as *const Option }); + fn current(&self) -> &Option { + let ptr = unwrap_me!(self.wrapper, c, { c.current() as *const Option }); unsafe { ptr.as_ref().unwrap() } } #[inline] - fn current_mut(&mut self) -> &mut Option { - let ptr = unwrap_me_mut!(self.wrapper, c, { c.current_mut() as *mut Option }); + fn current_mut(&mut self) -> &mut Option { + let ptr = unwrap_me_mut!(self.wrapper, c, { + c.current_mut() as *mut Option + }); unsafe { ptr.as_mut().unwrap() } } + + fn next(&self, idx: CorpusId) -> Option { + unwrap_me!(self.wrapper, c, { c.next(idx) }) + } + + fn prev(&self, idx: CorpusId) -> Option { + unwrap_me!(self.wrapper, c, { c.prev(idx) }) + } + + fn first(&self) -> Option { + unwrap_me!(self.wrapper, c, { c.first() }) + } + + fn last(&self) -> Option { + unwrap_me!(self.wrapper, c, { c.last() }) + } + + /*fn ids<'a>(&'a self) -> CorpusIdIterator<'a, Self> { + CorpusIdIterator { + corpus: self, + cur: self.first(), + cur_back: self.last(), + } + } + + fn random_id(&self, next_random: u64) -> CorpusId { + let nth = (next_random as usize) % self.count(); + self.ids() + .nth(nth) + .expect("Failed to get a random CorpusId") + }*/ } /// Register the classes to the python module diff --git a/libafl/src/corpus/ondisk.rs b/libafl/src/corpus/ondisk.rs index bc031e1aaa..6712ce858d 100644 --- a/libafl/src/corpus/ondisk.rs +++ b/libafl/src/corpus/ondisk.rs @@ -1,6 +1,5 @@ //! The ondisk corpus stores unused testcases to disk. -use alloc::vec::Vec; use core::{cell::RefCell, time::Duration}; #[cfg(feature = "std")] use std::{fs, fs::File, io::Write}; @@ -13,7 +12,7 @@ use serde::{Deserialize, Serialize}; use crate::{ bolts::serdeany::SerdeAnyMap, - corpus::{Corpus, Testcase}, + corpus::{Corpus, CorpusId, InMemoryCorpus, Testcase}, inputs::{Input, UsesInput}, state::HasMetadata, Error, @@ -48,8 +47,7 @@ pub struct OnDiskCorpus where I: Input, { - entries: Vec>>, - current: Option, + inner: InMemoryCorpus, dir_path: PathBuf, meta_format: Option, } @@ -68,57 +66,75 @@ where /// Returns the number of elements #[inline] fn count(&self) -> usize { - self.entries.len() + self.inner.count() } /// Add an entry to the corpus and return its index #[inline] - fn add(&mut self, mut testcase: Testcase) -> Result { - self.save_testcase(&mut testcase)?; - self.entries.push(RefCell::new(testcase)); - Ok(self.entries.len() - 1) + fn add(&mut self, testcase: Testcase) -> Result { + let idx = self.inner.add(testcase)?; + self.save_testcase(&mut self.get(idx).unwrap().borrow_mut(), idx)?; + Ok(idx) } /// Replaces the testcase at the given idx #[inline] - fn replace(&mut self, idx: usize, mut testcase: Testcase) -> Result, Error> { - if idx >= self.entries.len() { - return Err(Error::key_not_found(format!("Index {idx} out of bounds"))); - } - self.save_testcase(&mut testcase)?; - let previous = self.entries[idx].replace(testcase); - self.remove_testcase(&previous)?; - Ok(previous) + fn replace(&mut self, idx: CorpusId, testcase: Testcase) -> Result, Error> { + let entry = self.inner.replace(idx, testcase)?; + self.remove_testcase(&entry)?; + self.save_testcase(&mut self.get(idx).unwrap().borrow_mut(), idx)?; + Ok(entry) } /// Removes an entry from the corpus, returning it if it was present. #[inline] - fn remove(&mut self, idx: usize) -> Result>, Error> { - if idx >= self.entries.len() { - Ok(None) - } else { - let prev = self.entries.remove(idx).into_inner(); - self.remove_testcase(&prev)?; - Ok(Some(prev)) - } + fn remove(&mut self, idx: CorpusId) -> Result, Error> { + let entry = self.inner.remove(idx)?; + self.remove_testcase(&entry)?; + Ok(entry) } /// Get by id #[inline] - fn get(&self, idx: usize) -> Result<&RefCell>, Error> { - Ok(&self.entries[idx]) + fn get(&self, idx: CorpusId) -> Result<&RefCell>, Error> { + self.inner.get(idx) } /// Current testcase scheduled #[inline] - fn current(&self) -> &Option { - &self.current + fn current(&self) -> &Option { + self.inner.current() } /// Current testcase scheduled (mutable) #[inline] - fn current_mut(&mut self) -> &mut Option { - &mut self.current + fn current_mut(&mut self) -> &mut Option { + self.inner.current_mut() + } + + #[inline] + fn next(&self, idx: CorpusId) -> Option { + self.inner.next(idx) + } + + #[inline] + fn prev(&self, idx: CorpusId) -> Option { + self.inner.prev(idx) + } + + #[inline] + fn first(&self) -> Option { + self.inner.first() + } + + #[inline] + fn last(&self) -> Option { + self.inner.last() + } + + #[inline] + fn nth(&self, nth: usize) -> CorpusId { + self.inner.nth(nth) } } @@ -135,8 +151,7 @@ where fn new(dir_path: PathBuf) -> Result, Error> { fs::create_dir_all(&dir_path)?; Ok(OnDiskCorpus { - entries: vec![], - current: None, + inner: InMemoryCorpus::new(), dir_path, meta_format: None, }) @@ -152,21 +167,16 @@ where ) -> Result { fs::create_dir_all(&dir_path)?; Ok(Self { - entries: vec![], - current: None, + inner: InMemoryCorpus::new(), dir_path, meta_format, }) } - fn save_testcase(&mut self, testcase: &mut Testcase) -> Result<(), Error> { + fn save_testcase(&self, testcase: &mut Testcase, idx: CorpusId) -> Result<(), Error> { if testcase.filename().is_none() { // TODO walk entry metadata to ask for pieces of filename (e.g. :havoc in AFL) - let file_orig = testcase - .input() - .as_ref() - .unwrap() - .generate_name(self.entries.len()); + let file_orig = testcase.input().as_ref().unwrap().generate_name(idx.0); let mut file = file_orig.clone(); let mut ctr = 2; @@ -224,7 +234,7 @@ where Ok(()) } - fn remove_testcase(&mut self, testcase: &Testcase) -> Result<(), Error> { + fn remove_testcase(&self, testcase: &Testcase) -> Result<(), Error> { if let Some(filename) = testcase.filename() { fs::remove_file(filename)?; } diff --git a/libafl/src/fuzzer/mod.rs b/libafl/src/fuzzer/mod.rs index 7784bf8604..c5a59d106c 100644 --- a/libafl/src/fuzzer/mod.rs +++ b/libafl/src/fuzzer/mod.rs @@ -13,7 +13,7 @@ use crate::monitors::PerfFeature; use crate::state::NopState; use crate::{ bolts::current_time, - corpus::{Corpus, Testcase}, + corpus::{Corpus, CorpusId, Testcase}, events::{Event, EventConfig, EventFirer, EventProcessor, ProgressReporter}, executors::{Executor, ExitKind, HasObservers}, feedbacks::Feedback, @@ -83,7 +83,7 @@ pub trait ExecutionProcessor: UsesState { observers: &OT, exit_kind: &ExitKind, send_events: bool, - ) -> Result<(ExecuteInputResult, Option), Error> + ) -> Result<(ExecuteInputResult, Option), Error> where EM: EventFirer; } @@ -100,7 +100,7 @@ pub trait EvaluatorObservers: UsesState + Sized { manager: &mut EM, input: ::Input, send_events: bool, - ) -> Result<(ExecuteInputResult, Option), Error> + ) -> Result<(ExecuteInputResult, Option), Error> where E: Executor + HasObservers, EM: EventFirer; @@ -120,7 +120,7 @@ where executor: &mut E, manager: &mut EM, input: ::Input, - ) -> Result<(ExecuteInputResult, Option), Error> { + ) -> Result<(ExecuteInputResult, Option), Error> { self.evaluate_input_events(state, executor, manager, input, true) } @@ -134,7 +134,7 @@ where manager: &mut EM, input: ::Input, send_events: bool, - ) -> Result<(ExecuteInputResult, Option), Error>; + ) -> Result<(ExecuteInputResult, Option), Error>; /// Runs the input and triggers observers and feedback. /// Adds an input, to the corpus even if it's not considered `interesting` by the `feedback`. @@ -146,7 +146,7 @@ where executor: &mut E, manager: &mut EM, input: ::Input, - ) -> Result; + ) -> Result; } /// The main fuzzer trait. @@ -172,7 +172,7 @@ where executor: &mut E, state: &mut EM::State, manager: &mut EM, - ) -> Result; + ) -> Result; /// Fuzz forever (or until stopped) fn fuzz_loop( @@ -181,7 +181,7 @@ where executor: &mut E, state: &mut EM::State, manager: &mut EM, - ) -> Result { + ) -> Result { let mut last = current_time(); let monitor_timeout = STATS_TIMEOUT_DEFAULT; loop { @@ -206,19 +206,19 @@ where state: &mut EM::State, manager: &mut EM, iters: u64, - ) -> Result { + ) -> Result { if iters == 0 { return Err(Error::illegal_argument( "Cannot fuzz for 0 iterations!".to_string(), )); } - let mut ret = 0; + let mut ret = None; let mut last = current_time(); let monitor_timeout = STATS_TIMEOUT_DEFAULT; for _ in 0..iters { - ret = self.fuzz_one(stages, executor, state, manager)?; + ret = Some(self.fuzz_one(stages, executor, state, manager)?); last = manager.maybe_report_progress(state, last, monitor_timeout)?; } @@ -227,7 +227,7 @@ where // But as the state may grow to a few megabytes, // for now we won' and the user has to do it (unless we find a way to do this on `Drop`). - Ok(ret) + Ok(ret.unwrap()) } } @@ -338,7 +338,7 @@ where observers: &OT, exit_kind: &ExitKind, send_events: bool, - ) -> Result<(ExecuteInputResult, Option), Error> + ) -> Result<(ExecuteInputResult, Option), Error> where EM: EventFirer, { @@ -451,7 +451,7 @@ where manager: &mut EM, input: ::Input, send_events: bool, - ) -> Result<(ExecuteInputResult, Option), Error> + ) -> Result<(ExecuteInputResult, Option), Error> where E: Executor + HasObservers, EM: EventFirer, @@ -481,7 +481,7 @@ where manager: &mut EM, input: ::Input, send_events: bool, - ) -> Result<(ExecuteInputResult, Option), Error> { + ) -> Result<(ExecuteInputResult, Option), Error> { self.evaluate_input_with_observers(state, executor, manager, input, send_events) } @@ -492,7 +492,7 @@ where executor: &mut E, manager: &mut EM, input: ::Input, - ) -> Result { + ) -> Result { let exit_kind = self.execute_input(state, executor, manager, &input)?; let observers = executor.observers(); // Always consider this to be "interesting" @@ -543,7 +543,7 @@ where executor: &mut E, state: &mut CS::State, manager: &mut EM, - ) -> Result { + ) -> Result { // Init timer for scheduler #[cfg(feature = "introspection")] state.introspection_monitor_mut().start_timer(); @@ -717,7 +717,7 @@ where _executor: &mut E, _state: &mut EM::State, _manager: &mut EM, - ) -> Result { + ) -> Result { unimplemented!() } } @@ -805,6 +805,7 @@ pub mod pybind { BytesInput::new(input), ) .expect("Failed to add input") + .0 } fn fuzz_loop( diff --git a/libafl/src/mutators/encoded_mutations.rs b/libafl/src/mutators/encoded_mutations.rs index 74429ae5f7..9273ae38d8 100644 --- a/libafl/src/mutators/encoded_mutations.rs +++ b/libafl/src/mutators/encoded_mutations.rs @@ -14,6 +14,7 @@ use crate::{ mutations::{buffer_copy, buffer_self_copy, ARITH_MAX}, MutationResult, Mutator, Named, }, + random_corpus_id, state::{HasCorpus, HasMaxSize, HasRand}, Error, }; @@ -320,8 +321,7 @@ where let size = input.codes().len(); // We don't want to use the testcase we're already using for splicing - let count = state.corpus().count(); - let idx = state.rand_mut().below(count as u64) as usize; + let idx = random_corpus_id!(state.corpus(), state.rand_mut()); if let Some(cur) = state.corpus().current() { if idx == *cur { return Ok(MutationResult::Skipped); @@ -397,8 +397,7 @@ where } // We don't want to use the testcase we're already using for splicing - let count = state.corpus().count(); - let idx = state.rand_mut().below(count as u64) as usize; + let idx = random_corpus_id!(state.corpus(), state.rand_mut()); if let Some(cur) = state.corpus().current() { if idx == *cur { return Ok(MutationResult::Skipped); diff --git a/libafl/src/mutators/gramatron.rs b/libafl/src/mutators/gramatron.rs index 01ec4fa289..a1898dcbec 100644 --- a/libafl/src/mutators/gramatron.rs +++ b/libafl/src/mutators/gramatron.rs @@ -12,6 +12,7 @@ use crate::{ generators::GramatronGenerator, inputs::{GramatronInput, Terminal, UsesInput}, mutators::{MutationResult, Mutator}, + random_corpus_id, state::{HasCorpus, HasMetadata, HasRand}, Error, }; @@ -110,8 +111,7 @@ where return Ok(MutationResult::Skipped); } - let count = state.corpus().count(); - let idx = state.rand_mut().below(count as u64) as usize; + let idx = random_corpus_id!(state.corpus(), state.rand_mut()); let insert_at = state.rand_mut().below(input.terminals().len() as u64) as usize; diff --git a/libafl/src/mutators/mod.rs b/libafl/src/mutators/mod.rs index 6c71bad390..964c36641a 100644 --- a/libafl/src/mutators/mod.rs +++ b/libafl/src/mutators/mod.rs @@ -24,6 +24,7 @@ pub use nautilus::*; use crate::{ bolts::tuples::{HasConstLen, Named}, + corpus::CorpusId, inputs::UsesInput, Error, }; @@ -61,7 +62,7 @@ where &mut self, _state: &mut S, _stage_idx: i32, - _corpus_idx: Option, + _corpus_idx: Option, ) -> Result<(), Error> { Ok(()) } @@ -85,7 +86,7 @@ where &mut self, state: &mut S, stage_idx: i32, - corpus_idx: Option, + corpus_idx: Option, ) -> Result<(), Error>; /// Gets the [`Mutator`] at the given index and runs the `mutate` function on it. @@ -103,7 +104,7 @@ where index: usize, state: &mut S, stage_idx: i32, - corpus_idx: Option, + corpus_idx: Option, ) -> Result<(), Error>; } @@ -124,7 +125,7 @@ where &mut self, _state: &mut S, _stage_idx: i32, - _corpus_idx: Option, + _corpus_idx: Option, ) -> Result<(), Error> { Ok(()) } @@ -144,7 +145,7 @@ where _index: usize, _state: &mut S, _stage_idx: i32, - _corpus_idx: Option, + _corpus_idx: Option, ) -> Result<(), Error> { Ok(()) } @@ -174,7 +175,7 @@ where &mut self, state: &mut S, stage_idx: i32, - corpus_idx: Option, + corpus_idx: Option, ) -> Result<(), Error> { self.0.post_exec(state, stage_idx, corpus_idx)?; self.1.post_exec_all(state, stage_idx, corpus_idx) @@ -199,7 +200,7 @@ where index: usize, state: &mut S, stage_idx: i32, - corpus_idx: Option, + corpus_idx: Option, ) -> Result<(), Error> { if index == 0 { self.0.post_exec(state, stage_idx, corpus_idx) @@ -218,6 +219,7 @@ pub mod pybind { use super::{MutationResult, Mutator}; use crate::{ + corpus::CorpusId, inputs::{BytesInput, HasBytesVec}, mutators::scheduled::pybind::PythonStdHavocMutator, state::pybind::{PythonStdState, PythonStdStateWrapper}, @@ -263,13 +265,17 @@ pub mod pybind { &mut self, state: &mut PythonStdState, stage_idx: i32, - corpus_idx: Option, + corpus_idx: Option, ) -> Result<(), Error> { Python::with_gil(|py| -> PyResult<()> { self.inner.call_method1( py, "post_exec", - (PythonStdStateWrapper::wrap(state), stage_idx, corpus_idx), + ( + PythonStdStateWrapper::wrap(state), + stage_idx, + corpus_idx.map(|x| x.0), + ), )?; Ok(()) })?; @@ -345,7 +351,7 @@ pub mod pybind { &mut self, state: &mut PythonStdState, stage_idx: i32, - corpus_idx: Option, + corpus_idx: Option, ) -> Result<(), Error> { unwrap_me_mut!(self.wrapper, m, { m.post_exec(state, stage_idx, corpus_idx) diff --git a/libafl/src/mutators/mopt_mutator.rs b/libafl/src/mutators/mopt_mutator.rs index 0dfc803a25..62d39bb5c4 100644 --- a/libafl/src/mutators/mopt_mutator.rs +++ b/libafl/src/mutators/mopt_mutator.rs @@ -9,7 +9,7 @@ use serde::{Deserialize, Serialize}; use crate::{ bolts::rands::{Rand, StdRand}, - corpus::Corpus, + corpus::{Corpus, CorpusId}, mutators::{ComposedByMutations, MutationResult, Mutator, MutatorsTuple, ScheduledMutator}, state::{HasCorpus, HasMetadata, HasRand, HasSolutions}, Error, @@ -407,7 +407,7 @@ where &mut self, state: &mut S, _stage_idx: i32, - _corpus_idx: Option, + _corpus_idx: Option, ) -> Result<(), Error> { let before = self.finds_before; let after = state.corpus().count() + state.solutions().count(); diff --git a/libafl/src/mutators/mutations.rs b/libafl/src/mutators/mutations.rs index 3a2a612df4..48ae5ce8f4 100644 --- a/libafl/src/mutators/mutations.rs +++ b/libafl/src/mutators/mutations.rs @@ -11,6 +11,7 @@ use crate::{ corpus::Corpus, inputs::{HasBytesVec, UsesInput}, mutators::{MutationResult, Mutator}, + random_corpus_id, state::{HasCorpus, HasMaxSize, HasRand}, Error, }; @@ -912,8 +913,8 @@ where let size = input.bytes().len(); // We don't want to use the testcase we're already using for splicing - let count = state.corpus().count(); - let idx = state.rand_mut().below(count as u64) as usize; + let idx = random_corpus_id!(state.corpus(), state.rand_mut()); + if let Some(cur) = state.corpus().current() { if idx == *cur { return Ok(MutationResult::Skipped); @@ -990,8 +991,7 @@ where } // We don't want to use the testcase we're already using for splicing - let count = state.corpus().count(); - let idx = state.rand_mut().below(count as u64) as usize; + let idx = random_corpus_id!(state.corpus(), state.rand_mut()); if let Some(cur) = state.corpus().current() { if idx == *cur { return Ok(MutationResult::Skipped); @@ -1069,8 +1069,7 @@ where _stage_idx: i32, ) -> Result { // We don't want to use the testcase we're already using for splicing - let count = state.corpus().count(); - let idx = state.rand_mut().below(count as u64) as usize; + let idx = random_corpus_id!(state.corpus(), state.rand_mut()); if let Some(cur) = state.corpus().current() { if idx == *cur { return Ok(MutationResult::Skipped); diff --git a/libafl/src/mutators/scheduled.rs b/libafl/src/mutators/scheduled.rs index e60318132a..edc483f505 100644 --- a/libafl/src/mutators/scheduled.rs +++ b/libafl/src/mutators/scheduled.rs @@ -15,7 +15,7 @@ use crate::{ tuples::{tuple_list, tuple_list_type, NamedTuple}, AsMutSlice, AsSlice, }, - corpus::Corpus, + corpus::{Corpus, CorpusId}, inputs::UsesInput, mutators::{MutationResult, Mutator, MutatorsTuple}, state::{HasCorpus, HasMetadata, HasRand, State}, @@ -321,7 +321,7 @@ where &mut self, state: &mut S, _stage_idx: i32, - corpus_idx: Option, + corpus_idx: Option, ) -> Result<(), Error> { if let Some(idx) = corpus_idx { let mut testcase = (*state.corpus_mut().get(idx)?).borrow_mut(); @@ -441,7 +441,9 @@ mod tests { .add(Testcase::new(vec![b'd', b'e', b'f'].into())) .unwrap(); - let testcase = corpus.get(0).expect("Corpus did not contain entries"); + let testcase = corpus + .get(corpus.first().unwrap()) + .expect("Corpus did not contain entries"); let mut input = testcase.borrow_mut().load_input().unwrap().clone(); let mut feedback = ConstFeedback::new(false); @@ -481,7 +483,9 @@ mod tests { .add(Testcase::new(vec![b'd', b'e', b'f'].into())) .unwrap(); - let testcase = corpus.get(0).expect("Corpus did not contain entries"); + let testcase = corpus + .get(corpus.first().unwrap()) + .expect("Corpus did not contain entries"); let mut input = testcase.borrow_mut().load_input().unwrap().clone(); let input_prior = input.clone(); diff --git a/libafl/src/schedulers/accounting.rs b/libafl/src/schedulers/accounting.rs index 04b4fbde9a..643175348b 100644 --- a/libafl/src/schedulers/accounting.rs +++ b/libafl/src/schedulers/accounting.rs @@ -8,7 +8,7 @@ use serde::{Deserialize, Serialize}; use crate::{ bolts::{rands::Rand, AsMutSlice, AsSlice, HasLen, HasRefCnt}, - corpus::{Corpus, Testcase}, + corpus::{Corpus, CorpusId, Testcase}, feedbacks::MapIndexesMetadata, inputs::UsesInput, schedulers::{ @@ -74,7 +74,7 @@ impl AccountingIndexesMetadata { #[derive(Debug, Serialize, Deserialize)] pub struct TopAccountingMetadata { /// map index -> corpus index - pub map: HashMap, + pub map: HashMap, /// If changed sicne the previous add to the corpus pub changed: bool, /// The max accounting seen so far @@ -125,7 +125,7 @@ where CS::State: HasCorpus + HasMetadata + HasRand + Debug, ::Input: HasLen, { - fn on_add(&self, state: &mut Self::State, idx: usize) -> Result<(), Error> { + fn on_add(&self, state: &mut Self::State, idx: CorpusId) -> Result<(), Error> { self.update_accounting_score(state, idx)?; self.inner.on_add(state, idx) } @@ -133,7 +133,7 @@ where fn on_replace( &self, state: &mut Self::State, - idx: usize, + idx: CorpusId, testcase: &Testcase<::Input>, ) -> Result<(), Error> { self.inner.on_replace(state, idx, testcase) @@ -142,13 +142,13 @@ where fn on_remove( &self, state: &mut Self::State, - idx: usize, + idx: CorpusId, testcase: &Option::Input>>, ) -> Result<(), Error> { self.inner.on_remove(state, idx, testcase) } - fn next(&self, state: &mut Self::State) -> Result { + fn next(&self, state: &mut Self::State) -> Result { if state .metadata() .get::() @@ -183,7 +183,11 @@ where /// Update the `Corpus` score #[allow(clippy::unused_self)] #[allow(clippy::cast_possible_wrap)] - pub fn update_accounting_score(&self, state: &mut CS::State, idx: usize) -> Result<(), Error> { + pub fn update_accounting_score( + &self, + state: &mut CS::State, + idx: CorpusId, + ) -> Result<(), Error> { let mut indexes = vec![]; let mut new_favoreds = vec![]; { diff --git a/libafl/src/schedulers/minimizer.rs b/libafl/src/schedulers/minimizer.rs index 76d381dc56..87c21c3738 100644 --- a/libafl/src/schedulers/minimizer.rs +++ b/libafl/src/schedulers/minimizer.rs @@ -9,7 +9,7 @@ use serde::{Deserialize, Serialize}; use crate::{ bolts::{rands::Rand, serdeany::SerdeAny, AsSlice, HasRefCnt}, - corpus::{Corpus, Testcase}, + corpus::{Corpus, CorpusId, Testcase}, feedbacks::MapIndexesMetadata, inputs::UsesInput, schedulers::{LenTimeMulTestcaseScore, Scheduler, TestcaseScore}, @@ -30,7 +30,7 @@ crate::impl_serdeany!(IsFavoredMetadata); #[derive(Debug, Serialize, Deserialize)] pub struct TopRatedsMetadata { /// map index -> corpus index - pub map: HashMap, + pub map: HashMap, } crate::impl_serdeany!(TopRatedsMetadata); @@ -46,7 +46,7 @@ impl TopRatedsMetadata { /// Getter for map #[must_use] - pub fn map(&self) -> &HashMap { + pub fn map(&self) -> &HashMap { &self.map } } @@ -82,7 +82,7 @@ where CS::State: HasCorpus + HasMetadata + HasRand, { /// Add an entry to the corpus and return its index - fn on_add(&self, state: &mut CS::State, idx: usize) -> Result<(), Error> { + fn on_add(&self, state: &mut CS::State, idx: CorpusId) -> Result<(), Error> { self.update_score(state, idx)?; self.base.on_add(state, idx) } @@ -91,7 +91,7 @@ where fn on_replace( &self, state: &mut CS::State, - idx: usize, + idx: CorpusId, testcase: &Testcase<::Input>, ) -> Result<(), Error> { self.update_score(state, idx)?; @@ -102,7 +102,7 @@ where fn on_remove( &self, state: &mut CS::State, - idx: usize, + idx: CorpusId, testcase: &Option::Input>>, ) -> Result<(), Error> { self.base.on_remove(state, idx, testcase)?; @@ -112,19 +112,13 @@ where .drain_filter(|_, other_idx| *other_idx == idx) .map(|(entry, _)| entry) .collect::>(); - meta.map - .values_mut() - .filter(|other_idx| **other_idx > idx) - .for_each(|other_idx| { - *other_idx -= 1; - }); entries } else { return Ok(()); }; entries.sort_unstable(); // this should already be sorted, but just in case let mut map = HashMap::new(); - for i in 0..state.corpus().count() { + for i in state.corpus().ids() { let mut old = state.corpus().get(i)?.borrow_mut(); let factor = F::compute(&mut *old, state)?; if let Some(old_map) = old.metadata_mut().get_mut::() { @@ -169,7 +163,7 @@ where } /// Gets the next entry - fn next(&self, state: &mut CS::State) -> Result { + fn next(&self, state: &mut CS::State) -> Result { self.cull(state)?; let mut idx = self.base.next(state)?; while { @@ -197,7 +191,7 @@ where /// Update the `Corpus` score using the `MinimizerScheduler` #[allow(clippy::unused_self)] #[allow(clippy::cast_possible_wrap)] - pub fn update_score(&self, state: &mut CS::State, idx: usize) -> Result<(), Error> { + pub fn update_score(&self, state: &mut CS::State, idx: CorpusId) -> Result<(), Error> { // Create a new top rated meta if not existing if state.metadata().get::().is_none() { state.add_metadata(TopRatedsMetadata::new()); diff --git a/libafl/src/schedulers/mod.rs b/libafl/src/schedulers/mod.rs index 9d3cdfd8f3..2a0a96ea99 100644 --- a/libafl/src/schedulers/mod.rs +++ b/libafl/src/schedulers/mod.rs @@ -1,39 +1,39 @@ //! Schedule the access to the Corpus. -pub mod queue; +use alloc::borrow::ToOwned; use core::marker::PhantomData; +pub mod testcase_score; +pub use testcase_score::{LenTimeMulTestcaseScore, TestcaseScore}; + +pub mod queue; pub use queue::QueueScheduler; +pub mod minimizer; +pub use minimizer::{ + IndexesLenTimeMinimizerScheduler, LenTimeMinimizerScheduler, MinimizerScheduler, +}; + +pub mod powersched; +pub use powersched::PowerQueueScheduler; + pub mod probabilistic_sampling; pub use probabilistic_sampling::ProbabilitySamplingScheduler; pub mod accounting; pub use accounting::CoverageAccountingScheduler; -pub mod testcase_score; -pub use testcase_score::{LenTimeMulTestcaseScore, TestcaseScore}; - -pub mod minimizer; -pub use minimizer::{ - IndexesLenTimeMinimizerScheduler, LenTimeMinimizerScheduler, MinimizerScheduler, -}; - pub mod weighted; pub use weighted::{StdWeightedScheduler, WeightedScheduler}; -pub mod powersched; -use alloc::borrow::ToOwned; - -pub use powersched::PowerQueueScheduler; - pub mod tuneable; pub use tuneable::*; use crate::{ bolts::rands::Rand, - corpus::{Corpus, Testcase}, + corpus::{Corpus, CorpusId, Testcase}, inputs::UsesInput, + random_corpus_id, state::{HasCorpus, HasRand, UsesState}, Error, }; @@ -42,7 +42,7 @@ use crate::{ /// It has hooks to corpus add/replace/remove to allow complex scheduling algorithms to collect data. pub trait Scheduler: UsesState { /// Added an entry to the corpus at the given index - fn on_add(&self, _state: &mut Self::State, _idx: usize) -> Result<(), Error> { + fn on_add(&self, _state: &mut Self::State, _idx: CorpusId) -> Result<(), Error> { Ok(()) } @@ -50,7 +50,7 @@ pub trait Scheduler: UsesState { fn on_replace( &self, _state: &mut Self::State, - _idx: usize, + _idx: CorpusId, _prev: &Testcase<::Input>, ) -> Result<(), Error> { Ok(()) @@ -60,14 +60,14 @@ pub trait Scheduler: UsesState { fn on_remove( &self, _state: &mut Self::State, - _idx: usize, + _idx: CorpusId, _testcase: &Option::Input>>, ) -> Result<(), Error> { Ok(()) } /// Gets the next entry - fn next(&self, state: &mut Self::State) -> Result; + fn next(&self, state: &mut Self::State) -> Result; } /// Feed the fuzzer simply with a random testcase on request @@ -88,12 +88,11 @@ where S: HasCorpus + HasRand, { /// Gets the next entry at random - fn next(&self, state: &mut Self::State) -> Result { + fn next(&self, state: &mut Self::State) -> Result { if state.corpus().count() == 0 { Err(Error::empty("No entries in corpus".to_owned())) } else { - let len = state.corpus().count(); - let id = state.rand_mut().below(len as u64) as usize; + let id = random_corpus_id!(state.corpus(), state.rand_mut()); *state.corpus_mut().current_mut() = Some(id); Ok(id) } diff --git a/libafl/src/schedulers/powersched.rs b/libafl/src/schedulers/powersched.rs index 6278d92811..fca5719aa9 100644 --- a/libafl/src/schedulers/powersched.rs +++ b/libafl/src/schedulers/powersched.rs @@ -9,7 +9,7 @@ use core::{marker::PhantomData, time::Duration}; use serde::{Deserialize, Serialize}; use crate::{ - corpus::{Corpus, SchedulerTestcaseMetaData}, + corpus::{Corpus, CorpusId, SchedulerTestcaseMetaData}, inputs::UsesInput, schedulers::Scheduler, state::{HasCorpus, HasMetadata, UsesState}, @@ -180,7 +180,7 @@ where S: HasCorpus + HasMetadata, { /// Add an entry to the corpus and return its index - fn on_add(&self, state: &mut Self::State, idx: usize) -> Result<(), Error> { + fn on_add(&self, state: &mut Self::State, idx: CorpusId) -> Result<(), Error> { if !state.has_metadata::() { state.add_metadata::(SchedulerMetadata::new(Some(self.strat))); } @@ -211,13 +211,15 @@ where Ok(()) } - fn next(&self, state: &mut Self::State) -> Result { + fn next(&self, state: &mut Self::State) -> Result { if state.corpus().count() == 0 { Err(Error::empty(String::from("No entries in corpus"))) } else { let id = match state.corpus().current() { Some(cur) => { - if *cur + 1 >= state.corpus().count() { + if let Some(next) = state.corpus().next(*cur) { + next + } else { let psmeta = state .metadata_mut() .get_mut::() @@ -225,12 +227,10 @@ where Error::key_not_found("SchedulerMetadata not found".to_string()) })?; psmeta.set_queue_cycles(psmeta.queue_cycles() + 1); - 0 - } else { - *cur + 1 + state.corpus().first().unwrap() } } - None => 0, + None => state.corpus().first().unwrap(), }; *state.corpus_mut().current_mut() = Some(id); diff --git a/libafl/src/schedulers/probabilistic_sampling.rs b/libafl/src/schedulers/probabilistic_sampling.rs index fd17758a9c..aaacc9f847 100644 --- a/libafl/src/schedulers/probabilistic_sampling.rs +++ b/libafl/src/schedulers/probabilistic_sampling.rs @@ -9,7 +9,7 @@ use serde::{Deserialize, Serialize}; use crate::{ bolts::rands::Rand, - corpus::Corpus, + corpus::{Corpus, CorpusId}, inputs::UsesInput, schedulers::{Scheduler, TestcaseScore}, state::{HasCorpus, HasMetadata, HasRand, UsesState}, @@ -29,7 +29,7 @@ where #[derive(Debug, Serialize, Deserialize)] pub struct ProbabilityMetadata { /// corpus index -> probability - pub map: HashMap, + pub map: HashMap, /// total probability of all items in the map pub total_probability: f64, } @@ -69,7 +69,7 @@ where /// Calculate the score and store in `ProbabilityMetadata` #[allow(clippy::cast_precision_loss)] #[allow(clippy::unused_self)] - pub fn store_probability(&self, state: &mut S, idx: usize) -> Result<(), Error> { + pub fn store_probability(&self, state: &mut S, idx: CorpusId) -> Result<(), Error> { let factor = F::compute(&mut *state.corpus().get(idx)?.borrow_mut(), state)?; if factor == 0.0 { return Err(Error::illegal_state( @@ -99,7 +99,7 @@ where F: TestcaseScore, S: HasCorpus + HasMetadata + HasRand, { - fn on_add(&self, state: &mut Self::State, idx: usize) -> Result<(), Error> { + fn on_add(&self, state: &mut Self::State, idx: CorpusId) -> Result<(), Error> { if state.metadata().get::().is_none() { state.add_metadata(ProbabilityMetadata::new()); } @@ -108,7 +108,7 @@ where /// Gets the next entry #[allow(clippy::cast_precision_loss)] - fn next(&self, state: &mut Self::State) -> Result { + fn next(&self, state: &mut Self::State) -> Result { if state.corpus().count() == 0 { Err(Error::empty(String::from("No entries in corpus"))) } else { diff --git a/libafl/src/schedulers/queue.rs b/libafl/src/schedulers/queue.rs index 4f0cd0fb81..f0190c0b93 100644 --- a/libafl/src/schedulers/queue.rs +++ b/libafl/src/schedulers/queue.rs @@ -4,7 +4,7 @@ use alloc::borrow::ToOwned; use core::marker::PhantomData; use crate::{ - corpus::Corpus, + corpus::{Corpus, CorpusId}, inputs::UsesInput, schedulers::Scheduler, state::{HasCorpus, UsesState}, @@ -29,20 +29,16 @@ where S: HasCorpus, { /// Gets the next entry in the queue - fn next(&self, state: &mut Self::State) -> Result { + fn next(&self, state: &mut Self::State) -> Result { if state.corpus().count() == 0 { Err(Error::empty("No entries in corpus".to_owned())) } else { - let id = match state.corpus().current() { - Some(cur) => { - if *cur + 1 >= state.corpus().count() { - 0 - } else { - *cur + 1 - } - } - None => 0, - }; + let id = state + .corpus() + .current() + .map(|id| state.corpus().next(id)) + .flatten() + .unwrap_or_else(|| state.corpus().first().unwrap()); *state.corpus_mut().current_mut() = Some(id); Ok(id) } diff --git a/libafl/src/schedulers/testcase_score.rs b/libafl/src/schedulers/testcase_score.rs index f9c21b262f..2593cd9be6 100644 --- a/libafl/src/schedulers/testcase_score.rs +++ b/libafl/src/schedulers/testcase_score.rs @@ -77,7 +77,7 @@ where let mut n_paths = 0; let mut v = 0.0; let cur_index = state.corpus().current().unwrap(); - for idx in 0..corpus.count() { + for idx in corpus.ids() { let n_fuzz_entry = if cur_index == idx { entry .metadata() diff --git a/libafl/src/schedulers/tuneable.rs b/libafl/src/schedulers/tuneable.rs index e2cd11b35e..cc3a0a1fa5 100644 --- a/libafl/src/schedulers/tuneable.rs +++ b/libafl/src/schedulers/tuneable.rs @@ -8,7 +8,7 @@ use core::marker::PhantomData; use serde::{Deserialize, Serialize}; use crate::{ - corpus::Corpus, + corpus::{Corpus, CorpusId}, impl_serdeany, inputs::UsesInput, schedulers::Scheduler, @@ -18,7 +18,7 @@ use crate::{ #[derive(Default, Clone, Copy, Eq, PartialEq, Debug, Serialize, Deserialize)] struct TuneableSchedulerMetadata { - next: Option, + next: Option, } impl_serdeany!(TuneableSchedulerMetadata); @@ -57,12 +57,12 @@ where } /// Sets the next corpus id to be used - pub fn set_next(state: &mut S, next: usize) { + pub fn set_next(state: &mut S, next: CorpusId) { Self::metadata_mut(state).next = Some(next); } /// Gets the next set corpus id - pub fn get_next(state: &S) -> Option { + pub fn get_next(state: &S) -> Option { Self::metadata(state).next } @@ -73,8 +73,11 @@ where } /// Gets the current corpus entry id - pub fn get_current(state: &S) -> usize { - state.corpus().current().unwrap_or_default() + pub fn get_current(state: &S) -> CorpusId { + state + .corpus() + .current() + .unwrap_or_else(|| state.corpus().first().expect("Empty corpus")) } } @@ -90,17 +93,17 @@ where S: HasCorpus + HasMetadata, { /// Gets the next entry in the queue - fn next(&self, state: &mut Self::State) -> Result { + fn next(&self, state: &mut Self::State) -> Result { if state.corpus().count() == 0 { return Err(Error::empty("No entries in corpus".to_owned())); } let id = if let Some(next) = Self::get_next(state) { // next was set next - } else if Self::get_current(state) + 1 >= state.corpus().count() { - 0 + } else if let Some(next) = state.corpus().next(Self::get_current(state)) { + next } else { - Self::get_current(state) + 1 + state.corpus().first().unwrap() }; *state.corpus_mut().current_mut() = Some(id); Ok(id) diff --git a/libafl/src/schedulers/weighted.rs b/libafl/src/schedulers/weighted.rs index 4efed1dd96..9a5586704f 100644 --- a/libafl/src/schedulers/weighted.rs +++ b/libafl/src/schedulers/weighted.rs @@ -1,18 +1,17 @@ //! The queue corpus scheduler with weighted queue item selection from aflpp (`https://github.com/AFLplusplus/AFLplusplus/blob/1d4f1e48797c064ee71441ba555b29fc3f467983/src/afl-fuzz-queue.c#L32`) //! This queue corpus scheduler needs calibration stage. -use alloc::{ - string::{String, ToString}, - vec::Vec, -}; +use alloc::string::{String, ToString}; use core::marker::PhantomData; +use hashbrown::HashMap; use serde::{Deserialize, Serialize}; use crate::{ bolts::rands::Rand, - corpus::{Corpus, SchedulerTestcaseMetaData, Testcase}, + corpus::{Corpus, CorpusId, SchedulerTestcaseMetaData, Testcase}, inputs::UsesInput, + random_corpus_id, schedulers::{ powersched::{PowerSchedule, SchedulerMetadata}, testcase_score::{CorpusWeightTestcaseScore, TestcaseScore}, @@ -29,9 +28,9 @@ pub struct WeightedScheduleMetadata { /// The fuzzer execution spent in the current cycles runs_in_current_cycle: usize, /// Alias table for weighted queue entry selection - alias_table: Vec, + alias_table: HashMap, /// Probability for which queue entry is selected - alias_probability: Vec, + alias_probability: HashMap, } impl Default for WeightedScheduleMetadata { @@ -46,8 +45,8 @@ impl WeightedScheduleMetadata { pub fn new() -> Self { Self { runs_in_current_cycle: 0, - alias_table: vec![0], - alias_probability: vec![0.0], + alias_table: HashMap::default(), + alias_probability: HashMap::default(), } } @@ -64,23 +63,23 @@ impl WeightedScheduleMetadata { /// The getter for `alias_table` #[must_use] - pub fn alias_table(&self) -> &[usize] { + pub fn alias_table(&self) -> &HashMap { &self.alias_table } /// The setter for `alias_table` - pub fn set_alias_table(&mut self, table: Vec) { + pub fn set_alias_table(&mut self, table: HashMap) { self.alias_table = table; } /// The getter for `alias_probability` #[must_use] - pub fn alias_probability(&self) -> &[f64] { + pub fn alias_probability(&self) -> &HashMap { &self.alias_probability } /// The setter for `alias_probability` - pub fn set_alias_probability(&mut self, probability: Vec) { + pub fn set_alias_probability(&mut self, probability: HashMap) { self.alias_probability = probability; } } @@ -137,25 +136,25 @@ where pub fn create_alias_table(&self, state: &mut S) -> Result<(), Error> { let n = state.corpus().count(); - let mut alias_table: Vec = vec![0; n]; - let mut alias_probability: Vec = vec![0.0; n]; - let mut weights: Vec = vec![0.0; n]; + let mut alias_table: HashMap = HashMap::default(); + let mut alias_probability: HashMap = HashMap::default(); + let mut weights: HashMap = HashMap::default(); - let mut p_arr: Vec = vec![0.0; n]; - let mut s_arr: Vec = vec![0; n]; - let mut l_arr: Vec = vec![0; n]; + let mut p_arr: HashMap = HashMap::default(); + let mut s_arr: HashMap = HashMap::default(); + let mut l_arr: HashMap = HashMap::default(); let mut sum: f64 = 0.0; - for (i, item) in weights.iter_mut().enumerate().take(n) { + for i in state.corpus().ids() { let mut testcase = state.corpus().get(i)?.borrow_mut(); let weight = F::compute(&mut *testcase, state)?; - *item = weight; + weights.insert(i, weight); sum += weight; } - for i in 0..n { - p_arr[i] = weights[i] * (n as f64) / sum; + for (i, w) in weights.iter() { + p_arr.insert(*i, w * (n as f64) / sum); } // # of items in queue S @@ -164,12 +163,12 @@ where // # of items in queue L let mut n_l = 0; // Divide P into two queues, S and L - for s in (0..n).rev() { - if p_arr[s] < 1.0 { - s_arr[n_s] = s; + for s in state.corpus().ids().rev() { + if *p_arr.get(&s).unwrap() < 1.0 { + s_arr.insert(n_s, s); n_s += 1; } else { - l_arr[n_l] = s; + l_arr.insert(n_l, s); n_l += 1; } } @@ -177,30 +176,30 @@ where while n_s > 0 && n_l > 0 { n_s -= 1; n_l -= 1; - let a = s_arr[n_s]; - let g = l_arr[n_l]; + let a = *s_arr.get(&n_s).unwrap(); + let g = *l_arr.get(&n_l).unwrap(); - alias_probability[a] = p_arr[a]; - alias_table[a] = g; - p_arr[g] = p_arr[g] + p_arr[a] - 1.0; + alias_probability.insert(a, *p_arr.get(&a).unwrap()); + alias_table.insert(a, g); + *p_arr.get_mut(&g).unwrap() += p_arr.get(&a).unwrap() - 1.0; - if p_arr[g] < 1.0 { - s_arr[n_s] = g; + if *p_arr.get(&g).unwrap() < 1.0 { + *s_arr.get_mut(&n_s).unwrap() = g; n_s += 1; } else { - l_arr[n_l] = g; + *l_arr.get_mut(&n_l).unwrap() = g; n_l += 1; } } while n_l > 0 { n_l -= 1; - alias_probability[l_arr[n_l]] = 1.0; + alias_probability.insert(*l_arr.get(&n_l).unwrap(), 1.0); } while n_s > 0 { n_s -= 1; - alias_probability[s_arr[n_s]] = 1.0; + alias_probability.insert(*s_arr.get(&n_s).unwrap(), 1.0); } let wsmeta = state @@ -230,7 +229,7 @@ where S: HasCorpus + HasMetadata + HasRand, { /// Add an entry to the corpus and return its index - fn on_add(&self, state: &mut S, idx: usize) -> Result<(), Error> { + fn on_add(&self, state: &mut S, idx: CorpusId) -> Result<(), Error> { if !state.has_metadata::() { state.add_metadata(SchedulerMetadata::new(self.strat)); } @@ -271,7 +270,7 @@ where fn on_replace( &self, state: &mut S, - idx: usize, + idx: CorpusId, _testcase: &Testcase, ) -> Result<(), Error> { // Recreate the alias table @@ -281,7 +280,7 @@ where fn on_remove( &self, state: &mut S, - _idx: usize, + _idx: CorpusId, _testcase: &Option>, ) -> Result<(), Error> { // Recreate the alias table @@ -290,12 +289,13 @@ where } #[allow(clippy::similar_names, clippy::cast_precision_loss)] - fn next(&self, state: &mut S) -> Result { - if state.corpus().count() == 0 { + fn next(&self, state: &mut S) -> Result { + let corpus_counts = state.corpus().count(); + if corpus_counts == 0 { Err(Error::empty(String::from("No entries in corpus"))) } else { - let corpus_counts = state.corpus().count(); - let s = state.rand_mut().below(corpus_counts as u64) as usize; + let s = random_corpus_id!(state.corpus(), state.rand_mut()); + // Choose a random value between 0.000000000 and 1.000000000 let probability = state.rand_mut().between(0, 1000000000) as f64 / 1000000000_f64; @@ -308,16 +308,17 @@ where let current_cycles = wsmeta.runs_in_current_cycle(); + // TODO deal with corpus_counts decreasing due to removals if current_cycles >= corpus_counts { wsmeta.set_runs_current_cycle(0); } else { wsmeta.set_runs_current_cycle(current_cycles + 1); } - let idx = if probability < wsmeta.alias_probability()[s] { + let idx = if probability < *wsmeta.alias_probability().get(&s).unwrap() { s } else { - wsmeta.alias_table()[s] + *wsmeta.alias_table().get(&s).unwrap() }; // Update depth diff --git a/libafl/src/stages/calibrate.rs b/libafl/src/stages/calibrate.rs index ba093c6c3b..3a9fbc9c86 100644 --- a/libafl/src/stages/calibrate.rs +++ b/libafl/src/stages/calibrate.rs @@ -12,7 +12,7 @@ use serde::{Deserialize, Serialize}; use crate::{ bolts::{current_time, tuples::Named, AsIter}, - corpus::{Corpus, SchedulerTestcaseMetaData}, + corpus::{Corpus, CorpusId, SchedulerTestcaseMetaData}, events::{EventFirer, LogSeverity}, executors::{Executor, ExitKind, HasObservers}, feedbacks::{ @@ -103,7 +103,7 @@ where executor: &mut E, state: &mut E::State, mgr: &mut EM, - corpus_idx: usize, + corpus_idx: CorpusId, ) -> Result<(), Error> { // Run this stage only once for each corpus entry if state.corpus().get(corpus_idx)?.borrow_mut().fuzz_level() > 0 { diff --git a/libafl/src/stages/concolic.rs b/libafl/src/stages/concolic.rs index 3332c52752..c6564153e7 100644 --- a/libafl/src/stages/concolic.rs +++ b/libafl/src/stages/concolic.rs @@ -9,7 +9,7 @@ use core::marker::PhantomData; use super::{Stage, TracingStage}; use crate::{ - corpus::Corpus, + corpus::{Corpus, CorpusId}, executors::{Executor, HasObservers}, observers::concolic::ConcolicObserver, state::{HasClientPerfMonitor, HasCorpus, HasExecutions, HasMetadata}, @@ -45,7 +45,7 @@ where executor: &mut E, state: &mut TE::State, manager: &mut EM, - corpus_idx: usize, + corpus_idx: CorpusId, ) -> Result<(), Error> { self.inner .perform(fuzzer, executor, state, manager, corpus_idx)?; @@ -361,7 +361,7 @@ where executor: &mut E, state: &mut Z::State, manager: &mut EM, - corpus_idx: usize, + corpus_idx: CorpusId, ) -> Result<(), Error> { start_timer!(state); let testcase = state.corpus().get(corpus_idx)?.clone(); diff --git a/libafl/src/stages/dump.rs b/libafl/src/stages/dump.rs index 194c611998..585533fddb 100644 --- a/libafl/src/stages/dump.rs +++ b/libafl/src/stages/dump.rs @@ -7,7 +7,7 @@ use std::{fs, fs::File, io::Write, path::PathBuf}; use serde::{Deserialize, Serialize}; use crate::{ - corpus::Corpus, + corpus::{Corpus, CorpusId}, inputs::UsesInput, stages::Stage, state::{HasCorpus, HasMetadata, HasRand, HasSolutions, UsesState}, @@ -17,8 +17,8 @@ use crate::{ /// Metadata used to store information about disk dump indexes for names #[derive(Default, Serialize, Deserialize, Clone, Debug)] pub struct DumpToDiskMetadata { - last_corpus: usize, - last_solution: usize, + last_corpus: Option, + last_solution: Option, } crate::impl_serdeany!(DumpToDiskMetadata); @@ -54,17 +54,19 @@ where _executor: &mut E, state: &mut Z::State, _manager: &mut EM, - _corpus_idx: usize, + _corpus_idx: CorpusId, ) -> Result<(), Error> { - let meta = state - .metadata() - .get::() - .map_or_else(DumpToDiskMetadata::default, Clone::clone); + let (mut corpus_idx, mut solutions_idx) = + if let Some(meta) = state.metadata().get::() { + ( + meta.last_corpus.and_then(|x| state.corpus().next(x)), + meta.last_solution.and_then(|x| state.solutions().next(x)), + ) + } else { + (state.corpus().first(), state.solutions().first()) + }; - let corpus_count = state.corpus().count(); - let solutions_count = state.solutions().count(); - - for i in meta.last_corpus..corpus_count { + while let Some(i) = corpus_idx { let mut testcase = state.corpus().get(i)?.borrow_mut(); let input = testcase.load_input()?; let bytes = (self.to_bytes)(input); @@ -72,9 +74,11 @@ where let fname = self.corpus_dir.join(format!("id_{i}")); let mut f = File::create(fname)?; drop(f.write_all(&bytes)); + + corpus_idx = state.corpus().next(i); } - for i in meta.last_solution..solutions_count { + while let Some(i) = solutions_idx { let mut testcase = state.solutions().get(i)?.borrow_mut(); let input = testcase.load_input()?; let bytes = (self.to_bytes)(input); @@ -82,11 +86,13 @@ where let fname = self.solutions_dir.join(format!("id_{i}")); let mut f = File::create(fname)?; drop(f.write_all(&bytes)); + + solutions_idx = state.solutions().next(i); } state.add_metadata(DumpToDiskMetadata { - last_corpus: corpus_count, - last_solution: solutions_count, + last_corpus: state.corpus().last(), + last_solution: state.solutions().last(), }); Ok(()) diff --git a/libafl/src/stages/generalization.rs b/libafl/src/stages/generalization.rs index d8e389227c..0e94d8338e 100644 --- a/libafl/src/stages/generalization.rs +++ b/libafl/src/stages/generalization.rs @@ -13,7 +13,7 @@ use serde::{Deserialize, Serialize}; use crate::monitors::PerfFeature; use crate::{ bolts::AsSlice, - corpus::Corpus, + corpus::{Corpus, CorpusId}, executors::{Executor, HasObservers}, feedbacks::map::MapNoveltiesMetadata, inputs::{GeneralizedInput, GeneralizedItem, HasBytesVec, UsesInput}, @@ -31,7 +31,7 @@ const MAX_GENERALIZED_LEN: usize = 8192; #[derive(Debug, Default, Serialize, Deserialize)] pub struct GeneralizedIndexesMetadata { /// The set of indexes - pub indexes: HashSet, + pub indexes: HashSet, } crate::impl_serdeany!(GeneralizedIndexesMetadata); @@ -95,7 +95,7 @@ where executor: &mut E, state: &mut E::State, manager: &mut EM, - corpus_idx: usize, + corpus_idx: CorpusId, ) -> Result<(), Error> { if state .metadata() diff --git a/libafl/src/stages/mod.rs b/libafl/src/stages/mod.rs index ab50cef54f..f05b8443f7 100644 --- a/libafl/src/stages/mod.rs +++ b/libafl/src/stages/mod.rs @@ -54,6 +54,7 @@ pub use dump::*; use self::push::PushStage; use crate::{ + corpus::CorpusId, events::{EventFirer, EventRestarter, HasEventManagerId, ProgressReporter}, executors::{Executor, HasObservers}, inputs::UsesInput, @@ -78,7 +79,7 @@ where executor: &mut E, state: &mut Self::State, manager: &mut EM, - corpus_idx: usize, + corpus_idx: CorpusId, ) -> Result<(), Error>; } @@ -97,7 +98,7 @@ where executor: &mut E, state: &mut S, manager: &mut EM, - corpus_idx: usize, + corpus_idx: CorpusId, ) -> Result<(), Error>; } @@ -114,7 +115,7 @@ where _: &mut E, _: &mut S, _: &mut EM, - _: usize, + _: CorpusId, ) -> Result<(), Error> { Ok(()) } @@ -134,7 +135,7 @@ where executor: &mut E, state: &mut Head::State, manager: &mut EM, - corpus_idx: usize, + corpus_idx: CorpusId, ) -> Result<(), Error> { // Perform the current stage self.0 @@ -150,7 +151,7 @@ where #[derive(Debug)] pub struct ClosureStage where - CB: FnMut(&mut Z, &mut E, &mut E::State, &mut EM, usize) -> Result<(), Error>, + CB: FnMut(&mut Z, &mut E, &mut E::State, &mut EM, CorpusId) -> Result<(), Error>, E: UsesState, { closure: CB, @@ -159,7 +160,7 @@ where impl UsesState for ClosureStage where - CB: FnMut(&mut Z, &mut E, &mut E::State, &mut EM, usize) -> Result<(), Error>, + CB: FnMut(&mut Z, &mut E, &mut E::State, &mut EM, CorpusId) -> Result<(), Error>, E: UsesState, { type State = E::State; @@ -167,7 +168,7 @@ where impl Stage for ClosureStage where - CB: FnMut(&mut Z, &mut E, &mut E::State, &mut EM, usize) -> Result<(), Error>, + CB: FnMut(&mut Z, &mut E, &mut E::State, &mut EM, CorpusId) -> Result<(), Error>, E: UsesState, EM: UsesState, Z: UsesState, @@ -178,7 +179,7 @@ where executor: &mut E, state: &mut E::State, manager: &mut EM, - corpus_idx: usize, + corpus_idx: CorpusId, ) -> Result<(), Error> { (self.closure)(fuzzer, executor, state, manager, corpus_idx) } @@ -187,7 +188,7 @@ where /// A stage that takes a closure impl ClosureStage where - CB: FnMut(&mut Z, &mut E, &mut E::State, &mut EM, usize) -> Result<(), Error>, + CB: FnMut(&mut Z, &mut E, &mut E::State, &mut EM, CorpusId) -> Result<(), Error>, E: UsesState, { /// Create a new [`ClosureStage`] @@ -202,7 +203,7 @@ where impl From for ClosureStage where - CB: FnMut(&mut Z, &mut E, &mut E::State, &mut EM, usize) -> Result<(), Error>, + CB: FnMut(&mut Z, &mut E, &mut E::State, &mut EM, CorpusId) -> Result<(), Error>, E: UsesState, { #[must_use] @@ -260,7 +261,7 @@ where executor: &mut E, state: &mut CS::State, event_mgr: &mut EM, - corpus_idx: usize, + corpus_idx: CorpusId, ) -> Result<(), Error> { let push_stage = &mut self.push_stage; @@ -365,7 +366,7 @@ where executor: &mut E, state: &mut ST::State, manager: &mut EM, - corpus_idx: usize, + corpus_idx: CorpusId, ) -> Result<(), Error> { let condition = &mut self.condition; if condition(state) == SkippableStageDecision::Perform { @@ -386,6 +387,7 @@ pub mod pybind { use pyo3::prelude::*; use crate::{ + corpus::CorpusId, events::pybind::PythonEventManager, executors::pybind::PythonExecutor, fuzzer::pybind::{PythonStdFuzzer, PythonStdFuzzerWrapper}, @@ -421,7 +423,7 @@ pub mod pybind { executor: &mut PythonExecutor, state: &mut PythonStdState, manager: &mut PythonEventManager, - corpus_idx: usize, + corpus_idx: CorpusId, ) -> Result<(), Error> { Python::with_gil(|py| -> PyResult<()> { self.inner.call_method1( @@ -432,7 +434,7 @@ pub mod pybind { executor.clone(), PythonStdStateWrapper::wrap(state), manager.clone(), - corpus_idx, + corpus_idx.0, ), )?; Ok(()) @@ -510,7 +512,7 @@ pub mod pybind { executor: &mut PythonExecutor, state: &mut PythonStdState, manager: &mut PythonEventManager, - corpus_idx: usize, + corpus_idx: CorpusId, ) -> Result<(), Error> { unwrap_me_mut!(self.wrapper, s, { s.perform(fuzzer, executor, state, manager, corpus_idx) @@ -549,7 +551,7 @@ pub mod pybind { executor: &mut PythonExecutor, state: &mut PythonStdState, manager: &mut PythonEventManager, - corpus_idx: usize, + corpus_idx: CorpusId, ) -> Result<(), Error> { for s in &mut self.list { s.perform(fuzzer, executor, state, manager, corpus_idx)?; diff --git a/libafl/src/stages/mutational.rs b/libafl/src/stages/mutational.rs index 57a2ae6e7f..016d3626cb 100644 --- a/libafl/src/stages/mutational.rs +++ b/libafl/src/stages/mutational.rs @@ -7,7 +7,7 @@ use core::marker::PhantomData; use crate::monitors::PerfFeature; use crate::{ bolts::rands::Rand, - corpus::Corpus, + corpus::{Corpus, CorpusId}, fuzzer::Evaluator, mark_feature_time, mutators::Mutator, @@ -37,7 +37,7 @@ where fn mutator_mut(&mut self) -> &mut M; /// Gets the number of iterations this mutator should run for. - fn iterations(&self, state: &mut Z::State, corpus_idx: usize) -> Result; + fn iterations(&self, state: &mut Z::State, corpus_idx: CorpusId) -> Result; /// Runs this (mutational) stage for the given testcase #[allow(clippy::cast_possible_wrap)] // more than i32 stages on 32 bit system - highly unlikely... @@ -47,7 +47,7 @@ where executor: &mut E, state: &mut Z::State, manager: &mut EM, - corpus_idx: usize, + corpus_idx: CorpusId, ) -> Result<(), Error> { let num = self.iterations(state, corpus_idx)?; @@ -109,7 +109,7 @@ where } /// Gets the number of iterations as a random number - fn iterations(&self, state: &mut Z::State, _corpus_idx: usize) -> Result { + fn iterations(&self, state: &mut Z::State, _corpus_idx: CorpusId) -> Result { Ok(1 + state.rand_mut().below(DEFAULT_MUTATIONAL_MAX_ITERATIONS)) } } @@ -141,7 +141,7 @@ where executor: &mut E, state: &mut Z::State, manager: &mut EM, - corpus_idx: usize, + corpus_idx: CorpusId, ) -> Result<(), Error> { let ret = self.perform_mutational(fuzzer, executor, state, manager, corpus_idx); diff --git a/libafl/src/stages/owned.rs b/libafl/src/stages/owned.rs index 11e335bf1c..5983234b55 100644 --- a/libafl/src/stages/owned.rs +++ b/libafl/src/stages/owned.rs @@ -4,6 +4,7 @@ use alloc::{boxed::Box, vec::Vec}; use crate::{ bolts::anymap::AsAny, + corpus::CorpusId, stages::{Stage, StagesTuple}, state::UsesState, Error, @@ -42,7 +43,7 @@ where executor: &mut E, state: &mut E::State, manager: &mut EM, - corpus_idx: usize, + corpus_idx: CorpusId, ) -> Result<(), Error> { for s in &mut self.list { s.perform(fuzzer, executor, state, manager, corpus_idx)?; diff --git a/libafl/src/stages/power.rs b/libafl/src/stages/power.rs index e94bb208e5..70719cfa31 100644 --- a/libafl/src/stages/power.rs +++ b/libafl/src/stages/power.rs @@ -5,7 +5,7 @@ use core::{fmt::Debug, marker::PhantomData}; use crate::{ bolts::tuples::MatchName, - corpus::{Corpus, SchedulerTestcaseMetaData}, + corpus::{Corpus, CorpusId, SchedulerTestcaseMetaData}, executors::{Executor, HasObservers}, fuzzer::Evaluator, mutators::Mutator, @@ -58,7 +58,7 @@ where /// Gets the number of iterations as a random number #[allow(clippy::cast_sign_loss)] - fn iterations(&self, state: &mut E::State, corpus_idx: usize) -> Result { + fn iterations(&self, state: &mut E::State, corpus_idx: CorpusId) -> Result { // Update handicap let mut testcase = state.corpus().get(corpus_idx)?.borrow_mut(); let score = F::compute(&mut *testcase, state)? as u64; @@ -73,7 +73,7 @@ where executor: &mut E, state: &mut E::State, manager: &mut EM, - corpus_idx: usize, + corpus_idx: CorpusId, ) -> Result<(), Error> { let num = self.iterations(state, corpus_idx)?; @@ -143,7 +143,7 @@ where executor: &mut E, state: &mut E::State, manager: &mut EM, - corpus_idx: usize, + corpus_idx: CorpusId, ) -> Result<(), Error> { let ret = self.perform_mutational(fuzzer, executor, state, manager, corpus_idx); ret diff --git a/libafl/src/stages/push/mod.rs b/libafl/src/stages/push/mod.rs index 71ed01a0b2..bb0ce56030 100644 --- a/libafl/src/stages/push/mod.rs +++ b/libafl/src/stages/push/mod.rs @@ -17,6 +17,7 @@ pub use mutational::StdMutationalPushStage; use crate::{ bolts::current_time, + corpus::CorpusId, events::{EventFirer, EventRestarter, HasEventManagerId, ProgressReporter}, executors::ExitKind, inputs::UsesInput, @@ -100,7 +101,7 @@ where pub errored: bool, /// The corpus index we're currently working on - pub current_corpus_idx: Option, + pub current_corpus_idx: Option, /// The input we just ran pub current_input: Option<::Input>, // Todo: Get rid of copy @@ -196,7 +197,7 @@ where fn push_stage_helper_mut(&mut self) -> &mut PushStageHelper; /// Set the current corpus index this stage works on - fn set_current_corpus_idx(&mut self, corpus_idx: usize) { + fn set_current_corpus_idx(&mut self, corpus_idx: CorpusId) { self.push_stage_helper_mut().current_corpus_idx = Some(corpus_idx); } diff --git a/libafl/src/stages/push/mutational.rs b/libafl/src/stages/push/mutational.rs index 92c0dded2e..a0b6d8f7d9 100644 --- a/libafl/src/stages/push/mutational.rs +++ b/libafl/src/stages/push/mutational.rs @@ -12,7 +12,7 @@ use super::{PushStage, PushStageHelper, PushStageSharedState}; use crate::monitors::PerfFeature; use crate::{ bolts::rands::Rand, - corpus::Corpus, + corpus::{Corpus, CorpusId}, events::{EventFirer, EventRestarter, HasEventManagerId, ProgressReporter}, executors::ExitKind, inputs::UsesInput, @@ -48,7 +48,7 @@ where + EvaluatorObservers + HasScheduler, { - current_corpus_idx: Option, + current_corpus_idx: Option, testcases_to_do: usize, testcases_done: usize, @@ -72,12 +72,12 @@ where { /// Gets the number of iterations as a random number #[allow(clippy::unused_self, clippy::unnecessary_wraps)] // TODO: we should put this function into a trait later - fn iterations(&self, state: &mut CS::State, _corpus_idx: usize) -> Result { + fn iterations(&self, state: &mut CS::State, _corpus_idx: CorpusId) -> Result { Ok(1 + state.rand_mut().below(DEFAULT_MUTATIONAL_MAX_ITERATIONS) as usize) } /// Sets the current corpus index - pub fn set_current_corpus_idx(&mut self, current_corpus_idx: usize) { + pub fn set_current_corpus_idx(&mut self, current_corpus_idx: CorpusId) { self.current_corpus_idx = Some(current_corpus_idx); } } @@ -175,7 +175,7 @@ where start_timer!(state); self.mutator - .post_exec(state, self.stage_idx, Some(self.testcases_done))?; + .post_exec(state, self.stage_idx, self.current_corpus_idx)?; mark_feature_time!(state, PerfFeature::MutatePostExec); self.testcases_done += 1; diff --git a/libafl/src/stages/sync.rs b/libafl/src/stages/sync.rs index 10ac9853a9..1d6ff49d95 100644 --- a/libafl/src/stages/sync.rs +++ b/libafl/src/stages/sync.rs @@ -10,6 +10,7 @@ use std::{ use serde::{Deserialize, Serialize}; use crate::{ + corpus::CorpusId, fuzzer::Evaluator, inputs::{Input, UsesInput}, stages::Stage, @@ -64,7 +65,7 @@ where executor: &mut E, state: &mut Z::State, manager: &mut EM, - _corpus_idx: usize, + _corpus_idx: CorpusId, ) -> Result<(), Error> { let last = state .metadata() diff --git a/libafl/src/stages/tmin.rs b/libafl/src/stages/tmin.rs index 0e956b2ecb..e3faa7826e 100644 --- a/libafl/src/stages/tmin.rs +++ b/libafl/src/stages/tmin.rs @@ -13,7 +13,7 @@ use ahash::AHasher; use crate::monitors::PerfFeature; use crate::{ bolts::{tuples::Named, HasLen}, - corpus::{Corpus, Testcase}, + corpus::{Corpus, CorpusId, Testcase}, events::EventFirer, executors::{Executor, ExitKind, HasObservers}, feedbacks::{Feedback, FeedbackFactory, HasObserverName}, @@ -55,7 +55,7 @@ where fn mutator_mut(&mut self) -> &mut M; /// Gets the number of iterations this mutator should run for. - fn iterations(&self, state: &mut CS::State, corpus_idx: usize) -> Result; + fn iterations(&self, state: &mut CS::State, corpus_idx: CorpusId) -> Result; /// Runs this (mutational) stage for new objectives #[allow(clippy::cast_possible_wrap)] // more than i32 stages on 32 bit system - highly unlikely... @@ -65,7 +65,7 @@ where executor: &mut E, state: &mut CS::State, manager: &mut EM, - base_corpus_idx: usize, + base_corpus_idx: CorpusId, ) -> Result<(), Error> { let orig_max_size = state.max_size(); // basically copy-pasted from mutational.rs @@ -207,7 +207,7 @@ where executor: &mut E, state: &mut CS::State, manager: &mut EM, - corpus_idx: usize, + corpus_idx: CorpusId, ) -> Result<(), Error> { self.perform_minification(fuzzer, executor, state, manager, corpus_idx)?; @@ -262,7 +262,7 @@ where } /// Gets the number of iterations from a fixed number of runs - fn iterations(&self, _state: &mut CS::State, _corpus_idx: usize) -> Result { + fn iterations(&self, _state: &mut CS::State, _corpus_idx: CorpusId) -> Result { Ok(self.runs) } } diff --git a/libafl/src/stages/tracing.rs b/libafl/src/stages/tracing.rs index 725354048d..af5fcf4480 100644 --- a/libafl/src/stages/tracing.rs +++ b/libafl/src/stages/tracing.rs @@ -5,7 +5,7 @@ use core::{fmt::Debug, marker::PhantomData}; #[cfg(feature = "introspection")] use crate::monitors::PerfFeature; use crate::{ - corpus::Corpus, + corpus::{Corpus, CorpusId}, executors::{Executor, HasObservers, ShadowExecutor}, mark_feature_time, observers::ObserversTuple, @@ -45,7 +45,7 @@ where _executor: &mut E, state: &mut TE::State, manager: &mut EM, - corpus_idx: usize, + corpus_idx: CorpusId, ) -> Result<(), Error> { start_timer!(state); let input = state @@ -124,7 +124,7 @@ where executor: &mut ShadowExecutor, state: &mut E::State, manager: &mut EM, - corpus_idx: usize, + corpus_idx: CorpusId, ) -> Result<(), Error> { start_timer!(state); let input = state diff --git a/libafl/src/stages/tuneable.rs b/libafl/src/stages/tuneable.rs index ff68fea670..efdce30084 100644 --- a/libafl/src/stages/tuneable.rs +++ b/libafl/src/stages/tuneable.rs @@ -6,6 +6,7 @@ use serde::{Deserialize, Serialize}; use crate::{ bolts::rands::Rand, + corpus::CorpusId, impl_serdeany, mutators::Mutator, stages::{mutational::DEFAULT_MUTATIONAL_MAX_ITERATIONS, MutationalStage, Stage}, @@ -78,7 +79,7 @@ where /// Gets the number of iterations as a random number #[allow(clippy::cast_possible_truncation)] - fn iterations(&self, state: &mut Z::State, _corpus_idx: usize) -> Result { + fn iterations(&self, state: &mut Z::State, _corpus_idx: CorpusId) -> Result { Ok(if let Some(iters) = get_iters(state)? { iters } else { @@ -115,7 +116,7 @@ where executor: &mut E, state: &mut Z::State, manager: &mut EM, - corpus_idx: usize, + corpus_idx: CorpusId, ) -> Result<(), Error> { let ret = self.perform_mutational(fuzzer, executor, state, manager, corpus_idx);