Add probabilistic sampling corpus scheduler (#544)
* Add probabilistic sampling corpus scheduler * Linting * Fix ToOwned error * Move if-stmt of checking `ProbabilityMetadata` existence and revert powersched removal * Use `Error::IllegalState` instead of `Error::DivByZero`
This commit is contained in:
parent
679eadcc50
commit
c4fb92a1a4
@ -3,11 +3,8 @@
|
|||||||
use crate::{
|
use crate::{
|
||||||
bolts::{rands::Rand, AsMutSlice, AsSlice, HasLen, HasRefCnt},
|
bolts::{rands::Rand, AsMutSlice, AsSlice, HasLen, HasRefCnt},
|
||||||
corpus::{
|
corpus::{
|
||||||
minimizer::{
|
minimizer::{IsFavoredMetadata, MinimizerCorpusScheduler, DEFAULT_SKIP_NON_FAVORED_PROB},
|
||||||
IsFavoredMetadata, LenTimeMulFavFactor, MinimizerCorpusScheduler,
|
Corpus, CorpusScheduler, LenTimeMulFavFactor, Testcase,
|
||||||
DEFAULT_SKIP_NON_FAVORED_PROB,
|
|
||||||
},
|
|
||||||
Corpus, CorpusScheduler, Testcase,
|
|
||||||
},
|
},
|
||||||
feedbacks::MapIndexesMetadata,
|
feedbacks::MapIndexesMetadata,
|
||||||
inputs::Input,
|
inputs::Input,
|
||||||
|
34
libafl/src/corpus/fav_factor.rs
Normal file
34
libafl/src/corpus/fav_factor.rs
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
//! The `FavFactor` is an evaluator providing scores of corpus items.
|
||||||
|
|
||||||
|
use crate::{bolts::HasLen, corpus::Testcase, inputs::Input, Error};
|
||||||
|
|
||||||
|
use core::marker::PhantomData;
|
||||||
|
|
||||||
|
/// Compute the favor factor of a [`Testcase`]. Lower is better.
|
||||||
|
pub trait FavFactor<I>
|
||||||
|
where
|
||||||
|
I: Input,
|
||||||
|
{
|
||||||
|
/// Computes the favor factor of a [`Testcase`]. Lower is better.
|
||||||
|
fn compute(entry: &mut Testcase<I>) -> Result<u64, Error>;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Multiply the testcase size with the execution time.
|
||||||
|
/// This favors small and quick testcases.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct LenTimeMulFavFactor<I>
|
||||||
|
where
|
||||||
|
I: Input + HasLen,
|
||||||
|
{
|
||||||
|
phantom: PhantomData<I>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<I> FavFactor<I> for LenTimeMulFavFactor<I>
|
||||||
|
where
|
||||||
|
I: Input + HasLen,
|
||||||
|
{
|
||||||
|
fn compute(entry: &mut Testcase<I>) -> Result<u64, Error> {
|
||||||
|
// TODO maybe enforce entry.exec_time().is_some()
|
||||||
|
Ok(entry.exec_time().map_or(1, |d| d.as_millis()) as u64 * entry.cached_len()? as u64)
|
||||||
|
}
|
||||||
|
}
|
@ -2,8 +2,8 @@
|
|||||||
// with testcases only from a subset of the total corpus.
|
// with testcases only from a subset of the total corpus.
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
bolts::{rands::Rand, serdeany::SerdeAny, AsSlice, HasLen, HasRefCnt},
|
bolts::{rands::Rand, serdeany::SerdeAny, AsSlice, HasRefCnt},
|
||||||
corpus::{Corpus, CorpusScheduler, Testcase},
|
corpus::{Corpus, CorpusScheduler, FavFactor, LenTimeMulFavFactor, Testcase},
|
||||||
feedbacks::MapIndexesMetadata,
|
feedbacks::MapIndexesMetadata,
|
||||||
inputs::Input,
|
inputs::Input,
|
||||||
state::{HasCorpus, HasMetadata, HasRand},
|
state::{HasCorpus, HasMetadata, HasRand},
|
||||||
@ -48,35 +48,6 @@ impl Default for TopRatedsMetadata {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Compute the favor factor of a [`Testcase`]. Lower is better.
|
|
||||||
pub trait FavFactor<I>
|
|
||||||
where
|
|
||||||
I: Input,
|
|
||||||
{
|
|
||||||
/// Computes the favor factor of a [`Testcase`]. Lower is better.
|
|
||||||
fn compute(testcase: &mut Testcase<I>) -> Result<u64, Error>;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Multiply the testcase size with the execution time.
|
|
||||||
/// This favors small and quick testcases.
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub struct LenTimeMulFavFactor<I>
|
|
||||||
where
|
|
||||||
I: Input + HasLen,
|
|
||||||
{
|
|
||||||
phantom: PhantomData<I>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<I> FavFactor<I> for LenTimeMulFavFactor<I>
|
|
||||||
where
|
|
||||||
I: Input + HasLen,
|
|
||||||
{
|
|
||||||
fn compute(entry: &mut Testcase<I>) -> Result<u64, Error> {
|
|
||||||
// TODO maybe enforce entry.exec_time().is_some()
|
|
||||||
Ok(entry.exec_time().map_or(1, |d| d.as_millis()) as u64 * entry.cached_len()? as u64)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The [`MinimizerCorpusScheduler`] employs a genetic algorithm to compute a subset of the
|
/// The [`MinimizerCorpusScheduler`] employs a genetic algorithm to compute a subset of the
|
||||||
/// corpus that exercise all the requested features (e.g. all the coverage seen so far)
|
/// corpus that exercise all the requested features (e.g. all the coverage seen so far)
|
||||||
/// prioritizing [`Testcase`]`s` using [`FavFactor`]
|
/// prioritizing [`Testcase`]`s` using [`FavFactor`]
|
||||||
|
@ -19,14 +19,19 @@ pub use cached::CachedOnDiskCorpus;
|
|||||||
pub mod queue;
|
pub mod queue;
|
||||||
pub use queue::QueueCorpusScheduler;
|
pub use queue::QueueCorpusScheduler;
|
||||||
|
|
||||||
|
pub mod probabilistic_sampling;
|
||||||
|
pub use probabilistic_sampling::ProbabilitySamplingCorpusScheduler;
|
||||||
|
|
||||||
pub mod accounting;
|
pub mod accounting;
|
||||||
pub use accounting::*;
|
pub use accounting::*;
|
||||||
|
|
||||||
|
pub mod fav_factor;
|
||||||
|
pub use fav_factor::{FavFactor, LenTimeMulFavFactor};
|
||||||
|
|
||||||
pub mod minimizer;
|
pub mod minimizer;
|
||||||
pub use minimizer::{
|
pub use minimizer::{
|
||||||
FavFactor, IndexesLenTimeMinimizerCorpusScheduler, IsFavoredMetadata,
|
IndexesLenTimeMinimizerCorpusScheduler, IsFavoredMetadata, LenTimeMinimizerCorpusScheduler,
|
||||||
LenTimeMinimizerCorpusScheduler, LenTimeMulFavFactor, MinimizerCorpusScheduler,
|
MinimizerCorpusScheduler, TopRatedsMetadata,
|
||||||
TopRatedsMetadata,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
pub mod powersched;
|
pub mod powersched;
|
||||||
|
197
libafl/src/corpus/probabilistic_sampling.rs
Normal file
197
libafl/src/corpus/probabilistic_sampling.rs
Normal file
@ -0,0 +1,197 @@
|
|||||||
|
//! Probabilistic sampling scheduler is a corpus scheduler that feeds the fuzzer
|
||||||
|
//! with sampled item from the corpus.
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
bolts::rands::Rand,
|
||||||
|
corpus::{Corpus, CorpusScheduler, FavFactor},
|
||||||
|
inputs::Input,
|
||||||
|
state::{HasCorpus, HasMetadata, HasRand},
|
||||||
|
Error,
|
||||||
|
};
|
||||||
|
use alloc::string::String;
|
||||||
|
use core::marker::PhantomData;
|
||||||
|
use hashbrown::HashMap;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
/// Conduct reservoir sampling (probabilistic sampling) over all corpus elements.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct ProbabilitySamplingCorpusScheduler<I, S, F>
|
||||||
|
where
|
||||||
|
I: Input,
|
||||||
|
S: HasCorpus<I> + HasMetadata + HasRand,
|
||||||
|
F: FavFactor<I>,
|
||||||
|
{
|
||||||
|
phantom: PhantomData<(I, S, F)>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A state metadata holding a map of probability of corpus elements.
|
||||||
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
|
pub struct ProbabilityMetadata {
|
||||||
|
/// corpus index -> probability
|
||||||
|
pub map: HashMap<usize, f64>,
|
||||||
|
/// total probability of all items in the map
|
||||||
|
pub total_probability: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
crate::impl_serdeany!(ProbabilityMetadata);
|
||||||
|
|
||||||
|
impl ProbabilityMetadata {
|
||||||
|
/// Creates a new [`struct@ProbabilityMetadata`]
|
||||||
|
#[must_use]
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
map: HashMap::default(),
|
||||||
|
total_probability: 0.0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for ProbabilityMetadata {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<I, S, F> ProbabilitySamplingCorpusScheduler<I, S, F>
|
||||||
|
where
|
||||||
|
I: Input,
|
||||||
|
S: HasCorpus<I> + HasMetadata + HasRand,
|
||||||
|
F: FavFactor<I>,
|
||||||
|
{
|
||||||
|
/// Creates a new [`struct@ProbabilitySamplingCorpusScheduler`]
|
||||||
|
#[must_use]
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
phantom: PhantomData,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Calculate the score and store in `ProbabilityMetadata`
|
||||||
|
#[allow(clippy::cast_precision_loss)]
|
||||||
|
#[allow(clippy::unused_self)]
|
||||||
|
pub fn store_probability(&self, state: &mut S, idx: usize) -> Result<(), Error> {
|
||||||
|
let factor = F::compute(&mut *state.corpus().get(idx)?.borrow_mut())?;
|
||||||
|
if factor == 0 {
|
||||||
|
return Err(Error::IllegalState(
|
||||||
|
"Infinity probability calculated for probabilistic sampling scheduler".into(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
let meta = state
|
||||||
|
.metadata_mut()
|
||||||
|
.get_mut::<ProbabilityMetadata>()
|
||||||
|
.unwrap();
|
||||||
|
let prob = 1.0 / (factor as f64);
|
||||||
|
meta.map.insert(idx, prob);
|
||||||
|
meta.total_probability += prob;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<I, S, F> CorpusScheduler<I, S> for ProbabilitySamplingCorpusScheduler<I, S, F>
|
||||||
|
where
|
||||||
|
I: Input,
|
||||||
|
S: HasCorpus<I> + HasMetadata + HasRand,
|
||||||
|
F: FavFactor<I>,
|
||||||
|
{
|
||||||
|
fn on_add(&self, state: &mut S, idx: usize) -> Result<(), Error> {
|
||||||
|
if state.metadata().get::<ProbabilityMetadata>().is_none() {
|
||||||
|
state.add_metadata(ProbabilityMetadata::new());
|
||||||
|
}
|
||||||
|
self.store_probability(state, idx)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Gets the next entry
|
||||||
|
#[allow(clippy::cast_precision_loss)]
|
||||||
|
fn next(&self, state: &mut S) -> Result<usize, Error> {
|
||||||
|
if state.corpus().count() == 0 {
|
||||||
|
Err(Error::Empty(String::from("No entries in corpus")))
|
||||||
|
} else {
|
||||||
|
let rand_prob: f64 = (state.rand_mut().below(100) as f64) / 100.0;
|
||||||
|
let meta = state.metadata().get::<ProbabilityMetadata>().unwrap();
|
||||||
|
let threshold = meta.total_probability * rand_prob;
|
||||||
|
let mut k: f64 = 0.0;
|
||||||
|
for (idx, prob) in meta.map.iter() {
|
||||||
|
k += prob;
|
||||||
|
if k >= threshold {
|
||||||
|
return Ok(*idx);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(*meta.map.keys().last().unwrap())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<I, S, F> Default for ProbabilitySamplingCorpusScheduler<I, S, F>
|
||||||
|
where
|
||||||
|
I: Input,
|
||||||
|
S: HasCorpus<I> + HasMetadata + HasRand,
|
||||||
|
F: FavFactor<I>,
|
||||||
|
{
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
#[cfg(feature = "std")]
|
||||||
|
mod tests {
|
||||||
|
use core::borrow::BorrowMut;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
bolts::rands::StdRand,
|
||||||
|
corpus::{
|
||||||
|
Corpus, CorpusScheduler, FavFactor, InMemoryCorpus, ProbabilitySamplingCorpusScheduler,
|
||||||
|
Testcase,
|
||||||
|
},
|
||||||
|
inputs::{bytes::BytesInput, Input},
|
||||||
|
state::StdState,
|
||||||
|
Error,
|
||||||
|
};
|
||||||
|
use core::marker::PhantomData;
|
||||||
|
|
||||||
|
const FACTOR: u64 = 1337;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct UniformDistribution<I>
|
||||||
|
where
|
||||||
|
I: Input,
|
||||||
|
{
|
||||||
|
phantom: PhantomData<I>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<I> FavFactor<I> for UniformDistribution<I>
|
||||||
|
where
|
||||||
|
I: Input,
|
||||||
|
{
|
||||||
|
fn compute(_: &mut Testcase<I>) -> Result<u64, Error> {
|
||||||
|
Ok(FACTOR)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub type UniformProbabilitySamplingCorpusScheduler<I, S> =
|
||||||
|
ProbabilitySamplingCorpusScheduler<I, S, UniformDistribution<I>>;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_prob_sampling() {
|
||||||
|
// the first 3 probabilities will be .69, .86, .44
|
||||||
|
let rand = StdRand::with_seed(12);
|
||||||
|
|
||||||
|
let scheduler = UniformProbabilitySamplingCorpusScheduler::new();
|
||||||
|
|
||||||
|
let mut corpus = InMemoryCorpus::new();
|
||||||
|
let t1 = Testcase::with_filename(BytesInput::new(vec![0_u8; 4]), "1".into());
|
||||||
|
let t2 = Testcase::with_filename(BytesInput::new(vec![1_u8; 4]), "2".into());
|
||||||
|
|
||||||
|
let idx1 = corpus.add(t1).unwrap();
|
||||||
|
let idx2 = corpus.add(t2).unwrap();
|
||||||
|
|
||||||
|
let mut state = StdState::new(rand, corpus, InMemoryCorpus::new(), ());
|
||||||
|
scheduler.on_add(state.borrow_mut(), idx1).unwrap();
|
||||||
|
scheduler.on_add(state.borrow_mut(), idx2).unwrap();
|
||||||
|
let next_idx1 = scheduler.next(&mut state).unwrap();
|
||||||
|
let next_idx2 = scheduler.next(&mut state).unwrap();
|
||||||
|
let next_idx3 = scheduler.next(&mut state).unwrap();
|
||||||
|
assert_eq!(next_idx1, next_idx2);
|
||||||
|
assert_ne!(next_idx1, next_idx3);
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user