Add probabilistic sampling corpus scheduler (#544)

* Add probabilistic sampling corpus scheduler

* Linting

* Fix ToOwned error

* Move if-stmt of checking `ProbabilityMetadata` existence and revert powersched removal

* Use `Error::IllegalState` instead of `Error::DivByZero`
This commit is contained in:
Chaofan Shou 2022-02-24 01:19:38 -08:00 committed by GitHub
parent 679eadcc50
commit c4fb92a1a4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 243 additions and 39 deletions

View File

@ -3,11 +3,8 @@
use crate::{
bolts::{rands::Rand, AsMutSlice, AsSlice, HasLen, HasRefCnt},
corpus::{
minimizer::{
IsFavoredMetadata, LenTimeMulFavFactor, MinimizerCorpusScheduler,
DEFAULT_SKIP_NON_FAVORED_PROB,
},
Corpus, CorpusScheduler, Testcase,
minimizer::{IsFavoredMetadata, MinimizerCorpusScheduler, DEFAULT_SKIP_NON_FAVORED_PROB},
Corpus, CorpusScheduler, LenTimeMulFavFactor, Testcase,
},
feedbacks::MapIndexesMetadata,
inputs::Input,

View File

@ -0,0 +1,34 @@
//! The `FavFactor` is an evaluator providing scores of corpus items.
use crate::{bolts::HasLen, corpus::Testcase, inputs::Input, Error};
use core::marker::PhantomData;
/// Compute the favor factor of a [`Testcase`]. Lower is better.
pub trait FavFactor<I>
where
I: Input,
{
/// Computes the favor factor of a [`Testcase`]. Lower is better.
fn compute(entry: &mut Testcase<I>) -> Result<u64, Error>;
}
/// Multiply the testcase size with the execution time.
/// This favors small and quick testcases.
#[derive(Debug, Clone)]
pub struct LenTimeMulFavFactor<I>
where
I: Input + HasLen,
{
phantom: PhantomData<I>,
}
impl<I> FavFactor<I> for LenTimeMulFavFactor<I>
where
I: Input + HasLen,
{
fn compute(entry: &mut Testcase<I>) -> Result<u64, Error> {
// TODO maybe enforce entry.exec_time().is_some()
Ok(entry.exec_time().map_or(1, |d| d.as_millis()) as u64 * entry.cached_len()? as u64)
}
}

View File

@ -2,8 +2,8 @@
// with testcases only from a subset of the total corpus.
use crate::{
bolts::{rands::Rand, serdeany::SerdeAny, AsSlice, HasLen, HasRefCnt},
corpus::{Corpus, CorpusScheduler, Testcase},
bolts::{rands::Rand, serdeany::SerdeAny, AsSlice, HasRefCnt},
corpus::{Corpus, CorpusScheduler, FavFactor, LenTimeMulFavFactor, Testcase},
feedbacks::MapIndexesMetadata,
inputs::Input,
state::{HasCorpus, HasMetadata, HasRand},
@ -48,35 +48,6 @@ impl Default for TopRatedsMetadata {
}
}
/// Compute the favor factor of a [`Testcase`]. Lower is better.
pub trait FavFactor<I>
where
I: Input,
{
/// Computes the favor factor of a [`Testcase`]. Lower is better.
fn compute(testcase: &mut Testcase<I>) -> Result<u64, Error>;
}
/// Multiply the testcase size with the execution time.
/// This favors small and quick testcases.
#[derive(Debug, Clone)]
pub struct LenTimeMulFavFactor<I>
where
I: Input + HasLen,
{
phantom: PhantomData<I>,
}
impl<I> FavFactor<I> for LenTimeMulFavFactor<I>
where
I: Input + HasLen,
{
fn compute(entry: &mut Testcase<I>) -> Result<u64, Error> {
// TODO maybe enforce entry.exec_time().is_some()
Ok(entry.exec_time().map_or(1, |d| d.as_millis()) as u64 * entry.cached_len()? as u64)
}
}
/// The [`MinimizerCorpusScheduler`] employs a genetic algorithm to compute a subset of the
/// corpus that exercise all the requested features (e.g. all the coverage seen so far)
/// prioritizing [`Testcase`]`s` using [`FavFactor`]

View File

@ -19,14 +19,19 @@ pub use cached::CachedOnDiskCorpus;
pub mod queue;
pub use queue::QueueCorpusScheduler;
pub mod probabilistic_sampling;
pub use probabilistic_sampling::ProbabilitySamplingCorpusScheduler;
pub mod accounting;
pub use accounting::*;
pub mod fav_factor;
pub use fav_factor::{FavFactor, LenTimeMulFavFactor};
pub mod minimizer;
pub use minimizer::{
FavFactor, IndexesLenTimeMinimizerCorpusScheduler, IsFavoredMetadata,
LenTimeMinimizerCorpusScheduler, LenTimeMulFavFactor, MinimizerCorpusScheduler,
TopRatedsMetadata,
IndexesLenTimeMinimizerCorpusScheduler, IsFavoredMetadata, LenTimeMinimizerCorpusScheduler,
MinimizerCorpusScheduler, TopRatedsMetadata,
};
pub mod powersched;

View File

@ -0,0 +1,197 @@
//! Probabilistic sampling scheduler is a corpus scheduler that feeds the fuzzer
//! with sampled item from the corpus.
use crate::{
bolts::rands::Rand,
corpus::{Corpus, CorpusScheduler, FavFactor},
inputs::Input,
state::{HasCorpus, HasMetadata, HasRand},
Error,
};
use alloc::string::String;
use core::marker::PhantomData;
use hashbrown::HashMap;
use serde::{Deserialize, Serialize};
/// Conduct reservoir sampling (probabilistic sampling) over all corpus elements.
#[derive(Debug, Clone)]
pub struct ProbabilitySamplingCorpusScheduler<I, S, F>
where
I: Input,
S: HasCorpus<I> + HasMetadata + HasRand,
F: FavFactor<I>,
{
phantom: PhantomData<(I, S, F)>,
}
/// A state metadata holding a map of probability of corpus elements.
#[derive(Debug, Serialize, Deserialize)]
pub struct ProbabilityMetadata {
/// corpus index -> probability
pub map: HashMap<usize, f64>,
/// total probability of all items in the map
pub total_probability: f64,
}
crate::impl_serdeany!(ProbabilityMetadata);
impl ProbabilityMetadata {
/// Creates a new [`struct@ProbabilityMetadata`]
#[must_use]
pub fn new() -> Self {
Self {
map: HashMap::default(),
total_probability: 0.0,
}
}
}
impl Default for ProbabilityMetadata {
fn default() -> Self {
Self::new()
}
}
impl<I, S, F> ProbabilitySamplingCorpusScheduler<I, S, F>
where
I: Input,
S: HasCorpus<I> + HasMetadata + HasRand,
F: FavFactor<I>,
{
/// Creates a new [`struct@ProbabilitySamplingCorpusScheduler`]
#[must_use]
pub fn new() -> Self {
Self {
phantom: PhantomData,
}
}
/// Calculate the score and store in `ProbabilityMetadata`
#[allow(clippy::cast_precision_loss)]
#[allow(clippy::unused_self)]
pub fn store_probability(&self, state: &mut S, idx: usize) -> Result<(), Error> {
let factor = F::compute(&mut *state.corpus().get(idx)?.borrow_mut())?;
if factor == 0 {
return Err(Error::IllegalState(
"Infinity probability calculated for probabilistic sampling scheduler".into(),
));
}
let meta = state
.metadata_mut()
.get_mut::<ProbabilityMetadata>()
.unwrap();
let prob = 1.0 / (factor as f64);
meta.map.insert(idx, prob);
meta.total_probability += prob;
Ok(())
}
}
impl<I, S, F> CorpusScheduler<I, S> for ProbabilitySamplingCorpusScheduler<I, S, F>
where
I: Input,
S: HasCorpus<I> + HasMetadata + HasRand,
F: FavFactor<I>,
{
fn on_add(&self, state: &mut S, idx: usize) -> Result<(), Error> {
if state.metadata().get::<ProbabilityMetadata>().is_none() {
state.add_metadata(ProbabilityMetadata::new());
}
self.store_probability(state, idx)
}
/// Gets the next entry
#[allow(clippy::cast_precision_loss)]
fn next(&self, state: &mut S) -> Result<usize, Error> {
if state.corpus().count() == 0 {
Err(Error::Empty(String::from("No entries in corpus")))
} else {
let rand_prob: f64 = (state.rand_mut().below(100) as f64) / 100.0;
let meta = state.metadata().get::<ProbabilityMetadata>().unwrap();
let threshold = meta.total_probability * rand_prob;
let mut k: f64 = 0.0;
for (idx, prob) in meta.map.iter() {
k += prob;
if k >= threshold {
return Ok(*idx);
}
}
Ok(*meta.map.keys().last().unwrap())
}
}
}
impl<I, S, F> Default for ProbabilitySamplingCorpusScheduler<I, S, F>
where
I: Input,
S: HasCorpus<I> + HasMetadata + HasRand,
F: FavFactor<I>,
{
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
#[cfg(feature = "std")]
mod tests {
use core::borrow::BorrowMut;
use crate::{
bolts::rands::StdRand,
corpus::{
Corpus, CorpusScheduler, FavFactor, InMemoryCorpus, ProbabilitySamplingCorpusScheduler,
Testcase,
},
inputs::{bytes::BytesInput, Input},
state::StdState,
Error,
};
use core::marker::PhantomData;
const FACTOR: u64 = 1337;
#[derive(Debug, Clone)]
pub struct UniformDistribution<I>
where
I: Input,
{
phantom: PhantomData<I>,
}
impl<I> FavFactor<I> for UniformDistribution<I>
where
I: Input,
{
fn compute(_: &mut Testcase<I>) -> Result<u64, Error> {
Ok(FACTOR)
}
}
pub type UniformProbabilitySamplingCorpusScheduler<I, S> =
ProbabilitySamplingCorpusScheduler<I, S, UniformDistribution<I>>;
#[test]
fn test_prob_sampling() {
// the first 3 probabilities will be .69, .86, .44
let rand = StdRand::with_seed(12);
let scheduler = UniformProbabilitySamplingCorpusScheduler::new();
let mut corpus = InMemoryCorpus::new();
let t1 = Testcase::with_filename(BytesInput::new(vec![0_u8; 4]), "1".into());
let t2 = Testcase::with_filename(BytesInput::new(vec![1_u8; 4]), "2".into());
let idx1 = corpus.add(t1).unwrap();
let idx2 = corpus.add(t2).unwrap();
let mut state = StdState::new(rand, corpus, InMemoryCorpus::new(), ());
scheduler.on_add(state.borrow_mut(), idx1).unwrap();
scheduler.on_add(state.borrow_mut(), idx2).unwrap();
let next_idx1 = scheduler.next(&mut state).unwrap();
let next_idx2 = scheduler.next(&mut state).unwrap();
let next_idx3 = scheduler.next(&mut state).unwrap();
assert_eq!(next_idx1, next_idx2);
assert_ne!(next_idx1, next_idx3);
}
}