Add probabilistic sampling corpus scheduler (#544)
* Add probabilistic sampling corpus scheduler * Linting * Fix ToOwned error * Move if-stmt of checking `ProbabilityMetadata` existence and revert powersched removal * Use `Error::IllegalState` instead of `Error::DivByZero`
This commit is contained in:
parent
679eadcc50
commit
c4fb92a1a4
@ -3,11 +3,8 @@
|
||||
use crate::{
|
||||
bolts::{rands::Rand, AsMutSlice, AsSlice, HasLen, HasRefCnt},
|
||||
corpus::{
|
||||
minimizer::{
|
||||
IsFavoredMetadata, LenTimeMulFavFactor, MinimizerCorpusScheduler,
|
||||
DEFAULT_SKIP_NON_FAVORED_PROB,
|
||||
},
|
||||
Corpus, CorpusScheduler, Testcase,
|
||||
minimizer::{IsFavoredMetadata, MinimizerCorpusScheduler, DEFAULT_SKIP_NON_FAVORED_PROB},
|
||||
Corpus, CorpusScheduler, LenTimeMulFavFactor, Testcase,
|
||||
},
|
||||
feedbacks::MapIndexesMetadata,
|
||||
inputs::Input,
|
||||
|
34
libafl/src/corpus/fav_factor.rs
Normal file
34
libafl/src/corpus/fav_factor.rs
Normal file
@ -0,0 +1,34 @@
|
||||
//! The `FavFactor` is an evaluator providing scores of corpus items.
|
||||
|
||||
use crate::{bolts::HasLen, corpus::Testcase, inputs::Input, Error};
|
||||
|
||||
use core::marker::PhantomData;
|
||||
|
||||
/// Compute the favor factor of a [`Testcase`]. Lower is better.
|
||||
pub trait FavFactor<I>
|
||||
where
|
||||
I: Input,
|
||||
{
|
||||
/// Computes the favor factor of a [`Testcase`]. Lower is better.
|
||||
fn compute(entry: &mut Testcase<I>) -> Result<u64, Error>;
|
||||
}
|
||||
|
||||
/// Multiply the testcase size with the execution time.
|
||||
/// This favors small and quick testcases.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct LenTimeMulFavFactor<I>
|
||||
where
|
||||
I: Input + HasLen,
|
||||
{
|
||||
phantom: PhantomData<I>,
|
||||
}
|
||||
|
||||
impl<I> FavFactor<I> for LenTimeMulFavFactor<I>
|
||||
where
|
||||
I: Input + HasLen,
|
||||
{
|
||||
fn compute(entry: &mut Testcase<I>) -> Result<u64, Error> {
|
||||
// TODO maybe enforce entry.exec_time().is_some()
|
||||
Ok(entry.exec_time().map_or(1, |d| d.as_millis()) as u64 * entry.cached_len()? as u64)
|
||||
}
|
||||
}
|
@ -2,8 +2,8 @@
|
||||
// with testcases only from a subset of the total corpus.
|
||||
|
||||
use crate::{
|
||||
bolts::{rands::Rand, serdeany::SerdeAny, AsSlice, HasLen, HasRefCnt},
|
||||
corpus::{Corpus, CorpusScheduler, Testcase},
|
||||
bolts::{rands::Rand, serdeany::SerdeAny, AsSlice, HasRefCnt},
|
||||
corpus::{Corpus, CorpusScheduler, FavFactor, LenTimeMulFavFactor, Testcase},
|
||||
feedbacks::MapIndexesMetadata,
|
||||
inputs::Input,
|
||||
state::{HasCorpus, HasMetadata, HasRand},
|
||||
@ -48,35 +48,6 @@ impl Default for TopRatedsMetadata {
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute the favor factor of a [`Testcase`]. Lower is better.
|
||||
pub trait FavFactor<I>
|
||||
where
|
||||
I: Input,
|
||||
{
|
||||
/// Computes the favor factor of a [`Testcase`]. Lower is better.
|
||||
fn compute(testcase: &mut Testcase<I>) -> Result<u64, Error>;
|
||||
}
|
||||
|
||||
/// Multiply the testcase size with the execution time.
|
||||
/// This favors small and quick testcases.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct LenTimeMulFavFactor<I>
|
||||
where
|
||||
I: Input + HasLen,
|
||||
{
|
||||
phantom: PhantomData<I>,
|
||||
}
|
||||
|
||||
impl<I> FavFactor<I> for LenTimeMulFavFactor<I>
|
||||
where
|
||||
I: Input + HasLen,
|
||||
{
|
||||
fn compute(entry: &mut Testcase<I>) -> Result<u64, Error> {
|
||||
// TODO maybe enforce entry.exec_time().is_some()
|
||||
Ok(entry.exec_time().map_or(1, |d| d.as_millis()) as u64 * entry.cached_len()? as u64)
|
||||
}
|
||||
}
|
||||
|
||||
/// The [`MinimizerCorpusScheduler`] employs a genetic algorithm to compute a subset of the
|
||||
/// corpus that exercise all the requested features (e.g. all the coverage seen so far)
|
||||
/// prioritizing [`Testcase`]`s` using [`FavFactor`]
|
||||
|
@ -19,14 +19,19 @@ pub use cached::CachedOnDiskCorpus;
|
||||
pub mod queue;
|
||||
pub use queue::QueueCorpusScheduler;
|
||||
|
||||
pub mod probabilistic_sampling;
|
||||
pub use probabilistic_sampling::ProbabilitySamplingCorpusScheduler;
|
||||
|
||||
pub mod accounting;
|
||||
pub use accounting::*;
|
||||
|
||||
pub mod fav_factor;
|
||||
pub use fav_factor::{FavFactor, LenTimeMulFavFactor};
|
||||
|
||||
pub mod minimizer;
|
||||
pub use minimizer::{
|
||||
FavFactor, IndexesLenTimeMinimizerCorpusScheduler, IsFavoredMetadata,
|
||||
LenTimeMinimizerCorpusScheduler, LenTimeMulFavFactor, MinimizerCorpusScheduler,
|
||||
TopRatedsMetadata,
|
||||
IndexesLenTimeMinimizerCorpusScheduler, IsFavoredMetadata, LenTimeMinimizerCorpusScheduler,
|
||||
MinimizerCorpusScheduler, TopRatedsMetadata,
|
||||
};
|
||||
|
||||
pub mod powersched;
|
||||
|
197
libafl/src/corpus/probabilistic_sampling.rs
Normal file
197
libafl/src/corpus/probabilistic_sampling.rs
Normal file
@ -0,0 +1,197 @@
|
||||
//! Probabilistic sampling scheduler is a corpus scheduler that feeds the fuzzer
|
||||
//! with sampled item from the corpus.
|
||||
|
||||
use crate::{
|
||||
bolts::rands::Rand,
|
||||
corpus::{Corpus, CorpusScheduler, FavFactor},
|
||||
inputs::Input,
|
||||
state::{HasCorpus, HasMetadata, HasRand},
|
||||
Error,
|
||||
};
|
||||
use alloc::string::String;
|
||||
use core::marker::PhantomData;
|
||||
use hashbrown::HashMap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Conduct reservoir sampling (probabilistic sampling) over all corpus elements.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ProbabilitySamplingCorpusScheduler<I, S, F>
|
||||
where
|
||||
I: Input,
|
||||
S: HasCorpus<I> + HasMetadata + HasRand,
|
||||
F: FavFactor<I>,
|
||||
{
|
||||
phantom: PhantomData<(I, S, F)>,
|
||||
}
|
||||
|
||||
/// A state metadata holding a map of probability of corpus elements.
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct ProbabilityMetadata {
|
||||
/// corpus index -> probability
|
||||
pub map: HashMap<usize, f64>,
|
||||
/// total probability of all items in the map
|
||||
pub total_probability: f64,
|
||||
}
|
||||
|
||||
crate::impl_serdeany!(ProbabilityMetadata);
|
||||
|
||||
impl ProbabilityMetadata {
|
||||
/// Creates a new [`struct@ProbabilityMetadata`]
|
||||
#[must_use]
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
map: HashMap::default(),
|
||||
total_probability: 0.0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for ProbabilityMetadata {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl<I, S, F> ProbabilitySamplingCorpusScheduler<I, S, F>
|
||||
where
|
||||
I: Input,
|
||||
S: HasCorpus<I> + HasMetadata + HasRand,
|
||||
F: FavFactor<I>,
|
||||
{
|
||||
/// Creates a new [`struct@ProbabilitySamplingCorpusScheduler`]
|
||||
#[must_use]
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
/// Calculate the score and store in `ProbabilityMetadata`
|
||||
#[allow(clippy::cast_precision_loss)]
|
||||
#[allow(clippy::unused_self)]
|
||||
pub fn store_probability(&self, state: &mut S, idx: usize) -> Result<(), Error> {
|
||||
let factor = F::compute(&mut *state.corpus().get(idx)?.borrow_mut())?;
|
||||
if factor == 0 {
|
||||
return Err(Error::IllegalState(
|
||||
"Infinity probability calculated for probabilistic sampling scheduler".into(),
|
||||
));
|
||||
}
|
||||
let meta = state
|
||||
.metadata_mut()
|
||||
.get_mut::<ProbabilityMetadata>()
|
||||
.unwrap();
|
||||
let prob = 1.0 / (factor as f64);
|
||||
meta.map.insert(idx, prob);
|
||||
meta.total_probability += prob;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<I, S, F> CorpusScheduler<I, S> for ProbabilitySamplingCorpusScheduler<I, S, F>
|
||||
where
|
||||
I: Input,
|
||||
S: HasCorpus<I> + HasMetadata + HasRand,
|
||||
F: FavFactor<I>,
|
||||
{
|
||||
fn on_add(&self, state: &mut S, idx: usize) -> Result<(), Error> {
|
||||
if state.metadata().get::<ProbabilityMetadata>().is_none() {
|
||||
state.add_metadata(ProbabilityMetadata::new());
|
||||
}
|
||||
self.store_probability(state, idx)
|
||||
}
|
||||
|
||||
/// Gets the next entry
|
||||
#[allow(clippy::cast_precision_loss)]
|
||||
fn next(&self, state: &mut S) -> Result<usize, Error> {
|
||||
if state.corpus().count() == 0 {
|
||||
Err(Error::Empty(String::from("No entries in corpus")))
|
||||
} else {
|
||||
let rand_prob: f64 = (state.rand_mut().below(100) as f64) / 100.0;
|
||||
let meta = state.metadata().get::<ProbabilityMetadata>().unwrap();
|
||||
let threshold = meta.total_probability * rand_prob;
|
||||
let mut k: f64 = 0.0;
|
||||
for (idx, prob) in meta.map.iter() {
|
||||
k += prob;
|
||||
if k >= threshold {
|
||||
return Ok(*idx);
|
||||
}
|
||||
}
|
||||
Ok(*meta.map.keys().last().unwrap())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<I, S, F> Default for ProbabilitySamplingCorpusScheduler<I, S, F>
|
||||
where
|
||||
I: Input,
|
||||
S: HasCorpus<I> + HasMetadata + HasRand,
|
||||
F: FavFactor<I>,
|
||||
{
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[cfg(feature = "std")]
|
||||
mod tests {
|
||||
use core::borrow::BorrowMut;
|
||||
|
||||
use crate::{
|
||||
bolts::rands::StdRand,
|
||||
corpus::{
|
||||
Corpus, CorpusScheduler, FavFactor, InMemoryCorpus, ProbabilitySamplingCorpusScheduler,
|
||||
Testcase,
|
||||
},
|
||||
inputs::{bytes::BytesInput, Input},
|
||||
state::StdState,
|
||||
Error,
|
||||
};
|
||||
use core::marker::PhantomData;
|
||||
|
||||
const FACTOR: u64 = 1337;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct UniformDistribution<I>
|
||||
where
|
||||
I: Input,
|
||||
{
|
||||
phantom: PhantomData<I>,
|
||||
}
|
||||
|
||||
impl<I> FavFactor<I> for UniformDistribution<I>
|
||||
where
|
||||
I: Input,
|
||||
{
|
||||
fn compute(_: &mut Testcase<I>) -> Result<u64, Error> {
|
||||
Ok(FACTOR)
|
||||
}
|
||||
}
|
||||
|
||||
pub type UniformProbabilitySamplingCorpusScheduler<I, S> =
|
||||
ProbabilitySamplingCorpusScheduler<I, S, UniformDistribution<I>>;
|
||||
|
||||
#[test]
|
||||
fn test_prob_sampling() {
|
||||
// the first 3 probabilities will be .69, .86, .44
|
||||
let rand = StdRand::with_seed(12);
|
||||
|
||||
let scheduler = UniformProbabilitySamplingCorpusScheduler::new();
|
||||
|
||||
let mut corpus = InMemoryCorpus::new();
|
||||
let t1 = Testcase::with_filename(BytesInput::new(vec![0_u8; 4]), "1".into());
|
||||
let t2 = Testcase::with_filename(BytesInput::new(vec![1_u8; 4]), "2".into());
|
||||
|
||||
let idx1 = corpus.add(t1).unwrap();
|
||||
let idx2 = corpus.add(t2).unwrap();
|
||||
|
||||
let mut state = StdState::new(rand, corpus, InMemoryCorpus::new(), ());
|
||||
scheduler.on_add(state.borrow_mut(), idx1).unwrap();
|
||||
scheduler.on_add(state.borrow_mut(), idx2).unwrap();
|
||||
let next_idx1 = scheduler.next(&mut state).unwrap();
|
||||
let next_idx2 = scheduler.next(&mut state).unwrap();
|
||||
let next_idx3 = scheduler.next(&mut state).unwrap();
|
||||
assert_eq!(next_idx1, next_idx2);
|
||||
assert_ne!(next_idx1, next_idx3);
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user