diff --git a/libafl/src/corpus/accounting.rs b/libafl/src/corpus/accounting.rs index 4ab473054d..21ccb0a1fa 100644 --- a/libafl/src/corpus/accounting.rs +++ b/libafl/src/corpus/accounting.rs @@ -3,11 +3,8 @@ use crate::{ bolts::{rands::Rand, AsMutSlice, AsSlice, HasLen, HasRefCnt}, corpus::{ - minimizer::{ - IsFavoredMetadata, LenTimeMulFavFactor, MinimizerCorpusScheduler, - DEFAULT_SKIP_NON_FAVORED_PROB, - }, - Corpus, CorpusScheduler, Testcase, + minimizer::{IsFavoredMetadata, MinimizerCorpusScheduler, DEFAULT_SKIP_NON_FAVORED_PROB}, + Corpus, CorpusScheduler, LenTimeMulFavFactor, Testcase, }, feedbacks::MapIndexesMetadata, inputs::Input, diff --git a/libafl/src/corpus/fav_factor.rs b/libafl/src/corpus/fav_factor.rs new file mode 100644 index 0000000000..d7adcb097f --- /dev/null +++ b/libafl/src/corpus/fav_factor.rs @@ -0,0 +1,34 @@ +//! The `FavFactor` is an evaluator providing scores of corpus items. + +use crate::{bolts::HasLen, corpus::Testcase, inputs::Input, Error}; + +use core::marker::PhantomData; + +/// Compute the favor factor of a [`Testcase`]. Lower is better. +pub trait FavFactor +where + I: Input, +{ + /// Computes the favor factor of a [`Testcase`]. Lower is better. + fn compute(entry: &mut Testcase) -> Result; +} + +/// Multiply the testcase size with the execution time. +/// This favors small and quick testcases. +#[derive(Debug, Clone)] +pub struct LenTimeMulFavFactor +where + I: Input + HasLen, +{ + phantom: PhantomData, +} + +impl FavFactor for LenTimeMulFavFactor +where + I: Input + HasLen, +{ + fn compute(entry: &mut Testcase) -> Result { + // TODO maybe enforce entry.exec_time().is_some() + Ok(entry.exec_time().map_or(1, |d| d.as_millis()) as u64 * entry.cached_len()? as u64) + } +} diff --git a/libafl/src/corpus/minimizer.rs b/libafl/src/corpus/minimizer.rs index d78ea9597e..01718bca64 100644 --- a/libafl/src/corpus/minimizer.rs +++ b/libafl/src/corpus/minimizer.rs @@ -2,8 +2,8 @@ // with testcases only from a subset of the total corpus. use crate::{ - bolts::{rands::Rand, serdeany::SerdeAny, AsSlice, HasLen, HasRefCnt}, - corpus::{Corpus, CorpusScheduler, Testcase}, + bolts::{rands::Rand, serdeany::SerdeAny, AsSlice, HasRefCnt}, + corpus::{Corpus, CorpusScheduler, FavFactor, LenTimeMulFavFactor, Testcase}, feedbacks::MapIndexesMetadata, inputs::Input, state::{HasCorpus, HasMetadata, HasRand}, @@ -48,35 +48,6 @@ impl Default for TopRatedsMetadata { } } -/// Compute the favor factor of a [`Testcase`]. Lower is better. -pub trait FavFactor -where - I: Input, -{ - /// Computes the favor factor of a [`Testcase`]. Lower is better. - fn compute(testcase: &mut Testcase) -> Result; -} - -/// Multiply the testcase size with the execution time. -/// This favors small and quick testcases. -#[derive(Debug, Clone)] -pub struct LenTimeMulFavFactor -where - I: Input + HasLen, -{ - phantom: PhantomData, -} - -impl FavFactor for LenTimeMulFavFactor -where - I: Input + HasLen, -{ - fn compute(entry: &mut Testcase) -> Result { - // TODO maybe enforce entry.exec_time().is_some() - Ok(entry.exec_time().map_or(1, |d| d.as_millis()) as u64 * entry.cached_len()? as u64) - } -} - /// The [`MinimizerCorpusScheduler`] employs a genetic algorithm to compute a subset of the /// corpus that exercise all the requested features (e.g. all the coverage seen so far) /// prioritizing [`Testcase`]`s` using [`FavFactor`] diff --git a/libafl/src/corpus/mod.rs b/libafl/src/corpus/mod.rs index a495f393ad..de14872949 100644 --- a/libafl/src/corpus/mod.rs +++ b/libafl/src/corpus/mod.rs @@ -19,14 +19,19 @@ pub use cached::CachedOnDiskCorpus; pub mod queue; pub use queue::QueueCorpusScheduler; +pub mod probabilistic_sampling; +pub use probabilistic_sampling::ProbabilitySamplingCorpusScheduler; + pub mod accounting; pub use accounting::*; +pub mod fav_factor; +pub use fav_factor::{FavFactor, LenTimeMulFavFactor}; + pub mod minimizer; pub use minimizer::{ - FavFactor, IndexesLenTimeMinimizerCorpusScheduler, IsFavoredMetadata, - LenTimeMinimizerCorpusScheduler, LenTimeMulFavFactor, MinimizerCorpusScheduler, - TopRatedsMetadata, + IndexesLenTimeMinimizerCorpusScheduler, IsFavoredMetadata, LenTimeMinimizerCorpusScheduler, + MinimizerCorpusScheduler, TopRatedsMetadata, }; pub mod powersched; diff --git a/libafl/src/corpus/probabilistic_sampling.rs b/libafl/src/corpus/probabilistic_sampling.rs new file mode 100644 index 0000000000..b172c77cbd --- /dev/null +++ b/libafl/src/corpus/probabilistic_sampling.rs @@ -0,0 +1,197 @@ +//! Probabilistic sampling scheduler is a corpus scheduler that feeds the fuzzer +//! with sampled item from the corpus. + +use crate::{ + bolts::rands::Rand, + corpus::{Corpus, CorpusScheduler, FavFactor}, + inputs::Input, + state::{HasCorpus, HasMetadata, HasRand}, + Error, +}; +use alloc::string::String; +use core::marker::PhantomData; +use hashbrown::HashMap; +use serde::{Deserialize, Serialize}; + +/// Conduct reservoir sampling (probabilistic sampling) over all corpus elements. +#[derive(Debug, Clone)] +pub struct ProbabilitySamplingCorpusScheduler +where + I: Input, + S: HasCorpus + HasMetadata + HasRand, + F: FavFactor, +{ + phantom: PhantomData<(I, S, F)>, +} + +/// A state metadata holding a map of probability of corpus elements. +#[derive(Debug, Serialize, Deserialize)] +pub struct ProbabilityMetadata { + /// corpus index -> probability + pub map: HashMap, + /// total probability of all items in the map + pub total_probability: f64, +} + +crate::impl_serdeany!(ProbabilityMetadata); + +impl ProbabilityMetadata { + /// Creates a new [`struct@ProbabilityMetadata`] + #[must_use] + pub fn new() -> Self { + Self { + map: HashMap::default(), + total_probability: 0.0, + } + } +} + +impl Default for ProbabilityMetadata { + fn default() -> Self { + Self::new() + } +} + +impl ProbabilitySamplingCorpusScheduler +where + I: Input, + S: HasCorpus + HasMetadata + HasRand, + F: FavFactor, +{ + /// Creates a new [`struct@ProbabilitySamplingCorpusScheduler`] + #[must_use] + pub fn new() -> Self { + Self { + phantom: PhantomData, + } + } + + /// Calculate the score and store in `ProbabilityMetadata` + #[allow(clippy::cast_precision_loss)] + #[allow(clippy::unused_self)] + pub fn store_probability(&self, state: &mut S, idx: usize) -> Result<(), Error> { + let factor = F::compute(&mut *state.corpus().get(idx)?.borrow_mut())?; + if factor == 0 { + return Err(Error::IllegalState( + "Infinity probability calculated for probabilistic sampling scheduler".into(), + )); + } + let meta = state + .metadata_mut() + .get_mut::() + .unwrap(); + let prob = 1.0 / (factor as f64); + meta.map.insert(idx, prob); + meta.total_probability += prob; + Ok(()) + } +} + +impl CorpusScheduler for ProbabilitySamplingCorpusScheduler +where + I: Input, + S: HasCorpus + HasMetadata + HasRand, + F: FavFactor, +{ + fn on_add(&self, state: &mut S, idx: usize) -> Result<(), Error> { + if state.metadata().get::().is_none() { + state.add_metadata(ProbabilityMetadata::new()); + } + self.store_probability(state, idx) + } + + /// Gets the next entry + #[allow(clippy::cast_precision_loss)] + fn next(&self, state: &mut S) -> Result { + if state.corpus().count() == 0 { + Err(Error::Empty(String::from("No entries in corpus"))) + } else { + let rand_prob: f64 = (state.rand_mut().below(100) as f64) / 100.0; + let meta = state.metadata().get::().unwrap(); + let threshold = meta.total_probability * rand_prob; + let mut k: f64 = 0.0; + for (idx, prob) in meta.map.iter() { + k += prob; + if k >= threshold { + return Ok(*idx); + } + } + Ok(*meta.map.keys().last().unwrap()) + } + } +} + +impl Default for ProbabilitySamplingCorpusScheduler +where + I: Input, + S: HasCorpus + HasMetadata + HasRand, + F: FavFactor, +{ + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +#[cfg(feature = "std")] +mod tests { + use core::borrow::BorrowMut; + + use crate::{ + bolts::rands::StdRand, + corpus::{ + Corpus, CorpusScheduler, FavFactor, InMemoryCorpus, ProbabilitySamplingCorpusScheduler, + Testcase, + }, + inputs::{bytes::BytesInput, Input}, + state::StdState, + Error, + }; + use core::marker::PhantomData; + + const FACTOR: u64 = 1337; + + #[derive(Debug, Clone)] + pub struct UniformDistribution + where + I: Input, + { + phantom: PhantomData, + } + + impl FavFactor for UniformDistribution + where + I: Input, + { + fn compute(_: &mut Testcase) -> Result { + Ok(FACTOR) + } + } + + pub type UniformProbabilitySamplingCorpusScheduler = + ProbabilitySamplingCorpusScheduler>; + + #[test] + fn test_prob_sampling() { + // the first 3 probabilities will be .69, .86, .44 + let rand = StdRand::with_seed(12); + + let scheduler = UniformProbabilitySamplingCorpusScheduler::new(); + + let mut corpus = InMemoryCorpus::new(); + let t1 = Testcase::with_filename(BytesInput::new(vec![0_u8; 4]), "1".into()); + let t2 = Testcase::with_filename(BytesInput::new(vec![1_u8; 4]), "2".into()); + + let idx1 = corpus.add(t1).unwrap(); + let idx2 = corpus.add(t2).unwrap(); + + let mut state = StdState::new(rand, corpus, InMemoryCorpus::new(), ()); + scheduler.on_add(state.borrow_mut(), idx1).unwrap(); + scheduler.on_add(state.borrow_mut(), idx2).unwrap(); + let next_idx1 = scheduler.next(&mut state).unwrap(); + let next_idx2 = scheduler.next(&mut state).unwrap(); + let next_idx3 = scheduler.next(&mut state).unwrap(); + assert_eq!(next_idx1, next_idx2); + assert_ne!(next_idx1, next_idx3); + } +}