Introduce MutatorId, Tuneable fixes (#1022)

* Add simpler APIs for TunableStage

* Make API usable

* Add TunableScheduledMutator APIs

* Introduce MutatorId

* More API

* Cleanup

* add sampling-based mutation scheduling

* reduce precision for sampling

* clippy
This commit is contained in:
Dominik Maier 2023-01-30 18:04:42 +01:00 committed by GitHub
parent b927fc9b06
commit 33ddce2cea
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 179 additions and 50 deletions

View File

@ -141,7 +141,7 @@ pub fn main() -> Result<(), Error> {
&mut executor, &mut executor,
&mut state, &mut state,
&mut mgr, &mut mgr,
CorpusId::from(0usize), CorpusId::from(0_usize),
)?; )?;
Ok(()) Ok(())

View File

@ -1190,8 +1190,7 @@ mod windows_exception_handler {
.ExceptionAddress as usize; .ExceptionAddress as usize;
eprintln!( eprintln!(
"We crashed at addr 0x{:x}, but are not in the target... Bug in the fuzzer? Exiting.", "We crashed at addr 0x{crash_addr:x}, but are not in the target... Bug in the fuzzer? Exiting."
crash_addr
); );
} }
#[cfg(feature = "std")] #[cfg(feature = "std")]

View File

@ -1,10 +1,13 @@
//! Mutators mutate input during fuzzing. //! Mutators mutate input during fuzzing.
pub mod scheduled; pub mod scheduled;
use core::fmt;
pub use scheduled::*; pub use scheduled::*;
pub mod mutations; pub mod mutations;
pub use mutations::*; pub use mutations::*;
pub mod token_mutations; pub mod token_mutations;
use serde::{Deserialize, Serialize};
pub use token_mutations::*; pub use token_mutations::*;
pub mod encoded_mutations; pub mod encoded_mutations;
pub use encoded_mutations::*; pub use encoded_mutations::*;
@ -31,6 +34,37 @@ use crate::{
// TODO mutator stats method that produces something that can be sent with the NewTestcase event // TODO mutator stats method that produces something that can be sent with the NewTestcase event
// We can use it to report which mutations generated the testcase in the broker logs // We can use it to report which mutations generated the testcase in the broker logs
/// The index of a mutation in the mutations tuple
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
#[repr(transparent)]
pub struct MutationId(pub(crate) usize);
impl fmt::Display for MutationId {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "MutationId({})", self.0)
}
}
impl From<usize> for MutationId {
fn from(value: usize) -> Self {
MutationId(value)
}
}
impl From<u64> for MutationId {
fn from(value: u64) -> Self {
MutationId(value as usize)
}
}
impl From<i32> for MutationId {
#[allow(clippy::cast_sign_loss)]
fn from(value: i32) -> Self {
debug_assert!(value >= 0);
MutationId(value as usize)
}
}
/// The result of a mutation. /// The result of a mutation.
/// If the mutation got skipped, the target /// If the mutation got skipped, the target
/// will not be executed with the returned input. /// will not be executed with the returned input.
@ -85,7 +119,7 @@ pub trait MutatorsTuple<I, S>: HasConstLen {
/// Gets the [`Mutator`] at the given index and runs the `mutate` function on it. /// Gets the [`Mutator`] at the given index and runs the `mutate` function on it.
fn get_and_mutate( fn get_and_mutate(
&mut self, &mut self,
index: usize, index: MutationId,
state: &mut S, state: &mut S,
input: &mut I, input: &mut I,
stage_idx: i32, stage_idx: i32,
@ -122,7 +156,7 @@ impl<I, S> MutatorsTuple<I, S> for () {
fn get_and_mutate( fn get_and_mutate(
&mut self, &mut self,
_index: usize, _index: MutationId,
_state: &mut S, _state: &mut S,
_input: &mut I, _input: &mut I,
_stage_idx: i32, _stage_idx: i32,
@ -172,15 +206,16 @@ where
fn get_and_mutate( fn get_and_mutate(
&mut self, &mut self,
index: usize, index: MutationId,
state: &mut S, state: &mut S,
input: &mut I, input: &mut I,
stage_idx: i32, stage_idx: i32,
) -> Result<MutationResult, Error> { ) -> Result<MutationResult, Error> {
if index == 0 { if index.0 == 0 {
self.0.mutate(state, input, stage_idx) self.0.mutate(state, input, stage_idx)
} else { } else {
self.1.get_and_mutate(index - 1, state, input, stage_idx) self.1
.get_and_mutate((index.0 - 1).into(), state, input, stage_idx)
} }
} }

View File

@ -7,6 +7,7 @@ use core::{
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use super::MutationId;
use crate::{ use crate::{
bolts::rands::{Rand, StdRand}, bolts::rands::{Rand, StdRand},
corpus::{Corpus, CorpusId}, corpus::{Corpus, CorpusId},
@ -310,7 +311,7 @@ impl MOpt {
/// This function is used to decide the operator that we want to apply next /// This function is used to decide the operator that we want to apply next
/// see <https://github.com/puppet-meteor/MOpt-AFL/blob/master/MOpt/afl-fuzz.c#L397> /// see <https://github.com/puppet-meteor/MOpt-AFL/blob/master/MOpt/afl-fuzz.c#L397>
#[allow(clippy::cast_precision_loss)] #[allow(clippy::cast_precision_loss)]
pub fn select_algorithm(&mut self) -> Result<usize, Error> { pub fn select_algorithm(&mut self) -> Result<MutationId, Error> {
let mut res = 0; let mut res = 0;
let mut sentry = 0; let mut sentry = 0;
@ -341,7 +342,7 @@ impl MOpt {
"MOpt: Error in select_algorithm".to_string(), "MOpt: Error in select_algorithm".to_string(),
)); ));
} }
Ok(res) Ok(res.into())
} }
} }
@ -570,7 +571,7 @@ where
.metadata_mut() .metadata_mut()
.get_mut::<MOpt>() .get_mut::<MOpt>()
.unwrap() .unwrap()
.core_operator_cycles_v2[idx] += 1; .core_operator_cycles_v2[idx.0] += 1;
} }
Ok(r) Ok(r)
} }
@ -606,7 +607,7 @@ where
.metadata_mut() .metadata_mut()
.get_mut::<MOpt>() .get_mut::<MOpt>()
.unwrap() .unwrap()
.pilot_operator_cycles_v2[swarm_now][idx] += 1; .pilot_operator_cycles_v2[swarm_now][idx.0] += 1;
} }
Ok(r) Ok(r)
@ -643,7 +644,7 @@ where
/// Get the next mutation to apply /// Get the next mutation to apply
#[inline] #[inline]
fn schedule(&self, state: &mut S, _: &I) -> usize { fn schedule(&self, state: &mut S, _: &I) -> MutationId {
state state
.metadata_mut() .metadata_mut()
.get_mut::<MOpt>() .get_mut::<MOpt>()

View File

@ -1249,7 +1249,7 @@ mod tests {
for input in &inputs { for input in &inputs {
let mut mutant = input.clone(); let mut mutant = input.clone();
match mutations match mutations
.get_and_mutate(idx, &mut state, &mut mutant, 0) .get_and_mutate(idx.into(), &mut state, &mut mutant, 0)
.unwrap() .unwrap()
{ {
MutationResult::Mutated => new_testcases.push(mutant), MutationResult::Mutated => new_testcases.push(mutant),

View File

@ -8,6 +8,7 @@ use core::{
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use super::MutationId;
pub use crate::mutators::{mutations::*, token_mutations::*}; pub use crate::mutators::{mutations::*, token_mutations::*};
use crate::{ use crate::{
bolts::{ bolts::{
@ -74,7 +75,7 @@ where
fn iterations(&self, state: &mut S, input: &I) -> u64; fn iterations(&self, state: &mut S, input: &I) -> u64;
/// Get the next mutation to apply /// Get the next mutation to apply
fn schedule(&self, state: &mut S, input: &I) -> usize; fn schedule(&self, state: &mut S, input: &I) -> MutationId;
/// New default implementation for mutate. /// New default implementation for mutate.
/// Implementations must forward mutate() to this method /// Implementations must forward mutate() to this method
@ -170,9 +171,9 @@ where
} }
/// Get the next mutation to apply /// Get the next mutation to apply
fn schedule(&self, state: &mut S, _: &I) -> usize { fn schedule(&self, state: &mut S, _: &I) -> MutationId {
debug_assert!(!self.mutations().is_empty()); debug_assert!(!self.mutations().is_empty());
state.rand_mut().below(self.mutations().len() as u64) as usize state.rand_mut().below(self.mutations().len() as u64).into()
} }
} }
@ -279,7 +280,7 @@ where
SM: ScheduledMutator<I, MT, S>, SM: ScheduledMutator<I, MT, S>,
{ {
scheduled: SM, scheduled: SM,
mutation_log: Vec<usize>, mutation_log: Vec<MutationId>,
phantom: PhantomData<(I, MT, S)>, phantom: PhantomData<(I, MT, S)>,
} }
@ -324,7 +325,7 @@ where
let mut testcase = (*state.corpus_mut().get(idx)?).borrow_mut(); let mut testcase = (*state.corpus_mut().get(idx)?).borrow_mut();
let mut log = Vec::<String>::new(); let mut log = Vec::<String>::new();
while let Some(idx) = self.mutation_log.pop() { while let Some(idx) = self.mutation_log.pop() {
let name = String::from(self.scheduled.mutations().name(idx).unwrap()); // TODO maybe return an Error on None let name = String::from(self.scheduled.mutations().name(idx.0).unwrap()); // TODO maybe return an Error on None
log.push(name); log.push(name);
} }
let meta = LogMutationMetadata::new(log); let meta = LogMutationMetadata::new(log);
@ -365,11 +366,12 @@ where
} }
/// Get the next mutation to apply /// Get the next mutation to apply
fn schedule(&self, state: &mut S, _: &I) -> usize { fn schedule(&self, state: &mut S, _: &I) -> MutationId {
debug_assert!(!self.scheduled.mutations().is_empty()); debug_assert!(!self.scheduled.mutations().is_empty());
state state
.rand_mut() .rand_mut()
.below(self.scheduled.mutations().len() as u64) as usize .below(self.scheduled.mutations().len() as u64)
.into()
} }
fn scheduled_mutate( fn scheduled_mutate(

View File

@ -14,24 +14,42 @@ pub use crate::mutators::{mutations::*, token_mutations::*};
use crate::{ use crate::{
bolts::rands::Rand, bolts::rands::Rand,
impl_serdeany, impl_serdeany,
mutators::{ComposedByMutations, MutationResult, Mutator, MutatorsTuple, ScheduledMutator}, mutators::{
ComposedByMutations, MutationId, MutationResult, Mutator, MutatorsTuple, ScheduledMutator,
},
state::{HasMetadata, HasRand}, state::{HasMetadata, HasRand},
Error, Error,
}; };
/// Metadata in the state, that controls the behavior of the [`TuneableScheduledMutator`] at runtime /// Metadata in the state, that controls the behavior of the [`TuneableScheduledMutator`] at runtime
#[derive(Default, Clone, Eq, PartialEq, Debug, Serialize, Deserialize)] #[derive(Clone, PartialEq, Debug, Serialize, Deserialize)]
pub struct TuneableScheduledMutatorMetadata { pub struct TuneableScheduledMutatorMetadata {
/// The offsets of mutators to run, in order. Clear to fall back to random. /// The offsets of mutators to run, in order. Clear to fall back to random,
pub next: Vec<usize>, /// or use `mutation_probabilities`
pub mutation_ids: Vec<MutationId>,
/// The next index to read from in the `next` vec /// The next index to read from in the `next` vec
pub next_idx: usize, pub next_id: MutationId,
/// The cumulative probability distribution for each mutation.
/// Will not be used when `mutation_ids` are set.
/// Clear to fall back to random.
pub mutation_probabilities_cumulative: Vec<f32>,
/// The count of total mutations to perform. /// The count of total mutations to perform.
/// If `next` is of length `10`, and this number is `20`, /// If `mutation_ids` is of length `10`, and this number is `20`,
/// the mutations will be iterated through twice. /// the mutations will be iterated through twice.
pub iters: Option<u64>, pub iters: Option<u64>,
} }
impl Default for TuneableScheduledMutatorMetadata {
fn default() -> Self {
Self {
mutation_ids: Vec::default(),
next_id: 0.into(),
mutation_probabilities_cumulative: Vec::default(),
iters: None,
}
}
}
impl TuneableScheduledMutatorMetadata { impl TuneableScheduledMutatorMetadata {
/// Gets the stored metadata, used to alter the [`TuneableScheduledMutator`] behavior /// Gets the stored metadata, used to alter the [`TuneableScheduledMutator`] behavior
pub fn get<S: HasMetadata>(state: &S) -> Result<&Self, Error> { pub fn get<S: HasMetadata>(state: &S) -> Result<&Self, Error> {
@ -89,9 +107,9 @@ where
&mut self, &mut self,
state: &mut S, state: &mut S,
input: &mut I, input: &mut I,
stage_idx: i32, stage_id: i32,
) -> Result<MutationResult, Error> { ) -> Result<MutationResult, Error> {
self.scheduled_mutate(state, input, stage_idx) self.scheduled_mutate(state, input, stage_id)
} }
} }
@ -120,7 +138,7 @@ where
{ {
/// Compute the number of iterations used to apply stacked mutations /// Compute the number of iterations used to apply stacked mutations
fn iterations(&self, state: &mut S, _: &I) -> u64 { fn iterations(&self, state: &mut S, _: &I) -> u64 {
if let Some(iters) = Self::get_iters(state) { if let Some(iters) = TuneableScheduledMutator::get_iters(state) {
iters iters
} else { } else {
// fall back to random // fall back to random
@ -129,26 +147,41 @@ where
} }
/// Get the next mutation to apply /// Get the next mutation to apply
fn schedule(&self, state: &mut S, _: &I) -> usize { fn schedule(&self, state: &mut S, _: &I) -> MutationId {
debug_assert!(!self.mutations().is_empty()); debug_assert!(!self.mutations().is_empty());
// Assumption: we can not reach this code path without previously adding this metadatum. // Assumption: we can not reach this code path without previously adding this metadatum.
let metadata = TuneableScheduledMutatorMetadata::get_mut(state).unwrap(); let metadata = TuneableScheduledMutatorMetadata::get_mut(state).unwrap();
#[allow(clippy::cast_possible_truncation)] #[allow(clippy::cast_possible_truncation)]
if metadata.next.is_empty() { if metadata.mutation_ids.is_empty() {
// fall back to random if no entries in the vec if metadata.mutation_probabilities_cumulative.is_empty() {
state.rand_mut().below(self.mutations().len() as u64) as usize // fall back to random if no entries in either vec, the scheduling is not tuned.
return state.rand_mut().below(self.mutations().len() as u64).into();
}
} else { } else {
let ret = metadata.next[metadata.next_idx]; // using pre-set ids.
metadata.next_idx += 1_usize; let ret = metadata.mutation_ids[metadata.next_id.0];
if metadata.next_idx >= metadata.next.len() { metadata.next_id.0 += 1_usize;
metadata.next_idx = 0; if metadata.next_id.0 >= metadata.mutation_ids.len() {
metadata.next_id = 0.into();
} }
debug_assert!( debug_assert!(
self.mutations().len() > ret, self.mutations().len() > ret.0,
"TuneableScheduler: next vec may not contain idx larger than number of mutations!" "TuneableScheduler: next vec may not contain id larger than number of mutations!"
); );
ret return ret;
} }
// We will sample using the mutation probabilities.
// Doing this outside of the original if branch to make the borrow checker happy.
#[allow(clippy::cast_precision_loss)]
let coin = state.rand_mut().next() as f32 / u64::MAX as f32;
let metadata = TuneableScheduledMutatorMetadata::get_mut(state).unwrap();
metadata
.mutation_probabilities_cumulative
.iter()
.position(|i| *i >= coin)
.unwrap()
.into()
} }
} }
@ -168,7 +201,12 @@ where
phantom: PhantomData, phantom: PhantomData,
} }
} }
}
impl<S> TuneableScheduledMutator<(), (), S>
where
S: HasRand + HasMetadata,
{
fn metadata_mut(state: &mut S) -> &mut TuneableScheduledMutatorMetadata { fn metadata_mut(state: &mut S) -> &mut TuneableScheduledMutatorMetadata {
state state
.metadata_mut() .metadata_mut()
@ -185,7 +223,7 @@ where
/// Sets the next iterations count, i.e., how many times to mutate the input /// Sets the next iterations count, i.e., how many times to mutate the input
/// ///
/// Using `set_next_and_iter` to set multiple values at the same time /// Using `set_mutation_ids_and_iter` to set multiple values at the same time
/// will be faster than setting them individually /// will be faster than setting them individually
/// as it internally only needs a single metadata lookup /// as it internally only needs a single metadata lookup
pub fn set_iters(state: &mut S, iters: u64) { pub fn set_iters(state: &mut S, iters: u64) {
@ -193,17 +231,59 @@ where
metadata.iters = Some(iters); metadata.iters = Some(iters);
} }
/// Gets the set iterations /// Gets the set amount of iterations
pub fn get_iters(state: &S) -> Option<u64> { pub fn get_iters(state: &S) -> Option<u64> {
let metadata = Self::metadata(state); let metadata = Self::metadata(state);
metadata.iters metadata.iters
} }
/// Sets the mutation ids
pub fn set_mutation_ids(state: &mut S, mutations: Vec<MutationId>) {
let metadata = TuneableScheduledMutatorMetadata::get_mut(state).unwrap();
metadata.mutation_ids = mutations;
metadata.next_id = 0.into();
}
/// Sets the mutation probabilities.
/// The `Vec` should ideally contain one value per [`MutationId`].
/// Setting the probabilities will remove the value set through `set_mutation_ids`.
pub fn set_mutation_probabilities(state: &mut S, mutation_probabilities: Vec<f32>) {
let metadata = TuneableScheduledMutatorMetadata::get_mut(state).unwrap();
metadata.mutation_ids.clear();
metadata.next_id = 0.into();
// we precalculate the cumulative probability to be faster when sampling later.
let mut mutation_probabilities_cumulative = mutation_probabilities;
let mut acc = 0.0;
for probability in &mut mutation_probabilities_cumulative {
let l = *probability;
*probability += acc;
acc += l;
}
metadata.mutation_probabilities_cumulative = mutation_probabilities_cumulative;
}
/// mutation ids and iterations
pub fn set_mutation_ids_and_iters(state: &mut S, mutations: Vec<MutationId>, iters: u64) {
let metadata = TuneableScheduledMutatorMetadata::get_mut(state).unwrap();
metadata.mutation_ids = mutations;
metadata.next_id = 0.into();
metadata.iters = Some(iters);
}
/// Appends a mutation id to the end of the mutations
pub fn push_mutation_id(state: &mut S, mutation_id: MutationId) {
let metadata = TuneableScheduledMutatorMetadata::get_mut(state).unwrap();
metadata.mutation_ids.push(mutation_id);
}
/// Resets this to a randomic mutational stage /// Resets this to a randomic mutational stage
pub fn reset(state: &mut S) { pub fn reset(state: &mut S) {
let metadata = Self::metadata_mut(state); let metadata = Self::metadata_mut(state);
metadata.next.clear(); metadata.mutation_ids.clear();
metadata.next_idx = 0; metadata.next_id = 0.into();
metadata.iters = None; metadata.iters = None;
} }
} }
@ -231,10 +311,10 @@ mod test {
let tuneable = TuneableScheduledMutator::new(&mut state, mutators); let tuneable = TuneableScheduledMutator::new(&mut state, mutators);
let input = BytesInput::new(vec![42]); let input = BytesInput::new(vec![42]);
let metadata = TuneableScheduledMutatorMetadata::get_mut(&mut state).unwrap(); let metadata = TuneableScheduledMutatorMetadata::get_mut(&mut state).unwrap();
metadata.next.push(1); metadata.mutation_ids.push(1.into());
metadata.next.push(2); metadata.mutation_ids.push(2.into());
assert_eq!(tuneable.schedule(&mut state, &input), 1); assert_eq!(tuneable.schedule(&mut state, &input), 1.into());
assert_eq!(tuneable.schedule(&mut state, &input), 2); assert_eq!(tuneable.schedule(&mut state, &input), 2.into());
assert_eq!(tuneable.schedule(&mut state, &input), 1); assert_eq!(tuneable.schedule(&mut state, &input), 1.into());
} }
} }

View File

@ -148,6 +148,18 @@ where
} }
} }
impl TuneableMutationalStage<(), (), (), (), ()> {
/// Set the number of iterations to be used by this mutational stage
pub fn set_iters<S: HasMetadata>(state: &mut S, iters: u64) -> Result<(), Error> {
set_iters(state, iters)
}
/// Get the set iterations
pub fn iters<S: HasMetadata>(state: &S) -> Result<Option<u64>, Error> {
get_iters(state)
}
}
impl<E, EM, I, M, Z> TuneableMutationalStage<E, EM, I, M, Z> impl<E, EM, I, M, Z> TuneableMutationalStage<E, EM, I, M, Z>
where where
E: UsesState<State = Z::State>, E: UsesState<State = Z::State>,