From 33ddce2ceaecb23fbf7f4626003a2322292ddeb9 Mon Sep 17 00:00:00 2001 From: Dominik Maier Date: Mon, 30 Jan 2023 18:04:42 +0100 Subject: [PATCH] Introduce MutatorId, Tuneable fixes (#1022) * Add simpler APIs for TunableStage * Make API usable * Add TunableScheduledMutator APIs * Introduce MutatorId * More API * Cleanup * add sampling-based mutation scheduling * reduce precision for sampling * clippy --- fuzzers/baby_fuzzer_minimizing/src/main.rs | 2 +- libafl/src/executors/inprocess.rs | 3 +- libafl/src/mutators/mod.rs | 45 ++++++- libafl/src/mutators/mopt_mutator.rs | 11 +- libafl/src/mutators/mutations.rs | 2 +- libafl/src/mutators/scheduled.rs | 16 +-- libafl/src/mutators/tuneable.rs | 138 ++++++++++++++++----- libafl/src/stages/tuneable.rs | 12 ++ 8 files changed, 179 insertions(+), 50 deletions(-) diff --git a/fuzzers/baby_fuzzer_minimizing/src/main.rs b/fuzzers/baby_fuzzer_minimizing/src/main.rs index 4cec8054af..c3142ea9e6 100644 --- a/fuzzers/baby_fuzzer_minimizing/src/main.rs +++ b/fuzzers/baby_fuzzer_minimizing/src/main.rs @@ -141,7 +141,7 @@ pub fn main() -> Result<(), Error> { &mut executor, &mut state, &mut mgr, - CorpusId::from(0usize), + CorpusId::from(0_usize), )?; Ok(()) diff --git a/libafl/src/executors/inprocess.rs b/libafl/src/executors/inprocess.rs index 3fb359c0f6..41d48f0aef 100644 --- a/libafl/src/executors/inprocess.rs +++ b/libafl/src/executors/inprocess.rs @@ -1190,8 +1190,7 @@ mod windows_exception_handler { .ExceptionAddress as usize; eprintln!( - "We crashed at addr 0x{:x}, but are not in the target... Bug in the fuzzer? Exiting.", - crash_addr + "We crashed at addr 0x{crash_addr:x}, but are not in the target... Bug in the fuzzer? Exiting." ); } #[cfg(feature = "std")] diff --git a/libafl/src/mutators/mod.rs b/libafl/src/mutators/mod.rs index 4143fb2ec1..e9052ad938 100644 --- a/libafl/src/mutators/mod.rs +++ b/libafl/src/mutators/mod.rs @@ -1,10 +1,13 @@ //! Mutators mutate input during fuzzing. pub mod scheduled; +use core::fmt; + pub use scheduled::*; pub mod mutations; pub use mutations::*; pub mod token_mutations; +use serde::{Deserialize, Serialize}; pub use token_mutations::*; pub mod encoded_mutations; pub use encoded_mutations::*; @@ -31,6 +34,37 @@ use crate::{ // TODO mutator stats method that produces something that can be sent with the NewTestcase event // We can use it to report which mutations generated the testcase in the broker logs +/// The index of a mutation in the mutations tuple +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)] +#[repr(transparent)] +pub struct MutationId(pub(crate) usize); + +impl fmt::Display for MutationId { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "MutationId({})", self.0) + } +} + +impl From for MutationId { + fn from(value: usize) -> Self { + MutationId(value) + } +} + +impl From for MutationId { + fn from(value: u64) -> Self { + MutationId(value as usize) + } +} + +impl From for MutationId { + #[allow(clippy::cast_sign_loss)] + fn from(value: i32) -> Self { + debug_assert!(value >= 0); + MutationId(value as usize) + } +} + /// The result of a mutation. /// If the mutation got skipped, the target /// will not be executed with the returned input. @@ -85,7 +119,7 @@ pub trait MutatorsTuple: HasConstLen { /// Gets the [`Mutator`] at the given index and runs the `mutate` function on it. fn get_and_mutate( &mut self, - index: usize, + index: MutationId, state: &mut S, input: &mut I, stage_idx: i32, @@ -122,7 +156,7 @@ impl MutatorsTuple for () { fn get_and_mutate( &mut self, - _index: usize, + _index: MutationId, _state: &mut S, _input: &mut I, _stage_idx: i32, @@ -172,15 +206,16 @@ where fn get_and_mutate( &mut self, - index: usize, + index: MutationId, state: &mut S, input: &mut I, stage_idx: i32, ) -> Result { - if index == 0 { + if index.0 == 0 { self.0.mutate(state, input, stage_idx) } else { - self.1.get_and_mutate(index - 1, state, input, stage_idx) + self.1 + .get_and_mutate((index.0 - 1).into(), state, input, stage_idx) } } diff --git a/libafl/src/mutators/mopt_mutator.rs b/libafl/src/mutators/mopt_mutator.rs index 06cea8fae8..2b418fff9a 100644 --- a/libafl/src/mutators/mopt_mutator.rs +++ b/libafl/src/mutators/mopt_mutator.rs @@ -7,6 +7,7 @@ use core::{ use serde::{Deserialize, Serialize}; +use super::MutationId; use crate::{ bolts::rands::{Rand, StdRand}, corpus::{Corpus, CorpusId}, @@ -310,7 +311,7 @@ impl MOpt { /// This function is used to decide the operator that we want to apply next /// see #[allow(clippy::cast_precision_loss)] - pub fn select_algorithm(&mut self) -> Result { + pub fn select_algorithm(&mut self) -> Result { let mut res = 0; let mut sentry = 0; @@ -341,7 +342,7 @@ impl MOpt { "MOpt: Error in select_algorithm".to_string(), )); } - Ok(res) + Ok(res.into()) } } @@ -570,7 +571,7 @@ where .metadata_mut() .get_mut::() .unwrap() - .core_operator_cycles_v2[idx] += 1; + .core_operator_cycles_v2[idx.0] += 1; } Ok(r) } @@ -606,7 +607,7 @@ where .metadata_mut() .get_mut::() .unwrap() - .pilot_operator_cycles_v2[swarm_now][idx] += 1; + .pilot_operator_cycles_v2[swarm_now][idx.0] += 1; } Ok(r) @@ -643,7 +644,7 @@ where /// Get the next mutation to apply #[inline] - fn schedule(&self, state: &mut S, _: &I) -> usize { + fn schedule(&self, state: &mut S, _: &I) -> MutationId { state .metadata_mut() .get_mut::() diff --git a/libafl/src/mutators/mutations.rs b/libafl/src/mutators/mutations.rs index 33085b1d90..56fd8399ed 100644 --- a/libafl/src/mutators/mutations.rs +++ b/libafl/src/mutators/mutations.rs @@ -1249,7 +1249,7 @@ mod tests { for input in &inputs { let mut mutant = input.clone(); match mutations - .get_and_mutate(idx, &mut state, &mut mutant, 0) + .get_and_mutate(idx.into(), &mut state, &mut mutant, 0) .unwrap() { MutationResult::Mutated => new_testcases.push(mutant), diff --git a/libafl/src/mutators/scheduled.rs b/libafl/src/mutators/scheduled.rs index 7e9ca0b67b..00a50f8be1 100644 --- a/libafl/src/mutators/scheduled.rs +++ b/libafl/src/mutators/scheduled.rs @@ -8,6 +8,7 @@ use core::{ use serde::{Deserialize, Serialize}; +use super::MutationId; pub use crate::mutators::{mutations::*, token_mutations::*}; use crate::{ bolts::{ @@ -74,7 +75,7 @@ where fn iterations(&self, state: &mut S, input: &I) -> u64; /// Get the next mutation to apply - fn schedule(&self, state: &mut S, input: &I) -> usize; + fn schedule(&self, state: &mut S, input: &I) -> MutationId; /// New default implementation for mutate. /// Implementations must forward mutate() to this method @@ -170,9 +171,9 @@ where } /// Get the next mutation to apply - fn schedule(&self, state: &mut S, _: &I) -> usize { + fn schedule(&self, state: &mut S, _: &I) -> MutationId { debug_assert!(!self.mutations().is_empty()); - state.rand_mut().below(self.mutations().len() as u64) as usize + state.rand_mut().below(self.mutations().len() as u64).into() } } @@ -279,7 +280,7 @@ where SM: ScheduledMutator, { scheduled: SM, - mutation_log: Vec, + mutation_log: Vec, phantom: PhantomData<(I, MT, S)>, } @@ -324,7 +325,7 @@ where let mut testcase = (*state.corpus_mut().get(idx)?).borrow_mut(); let mut log = Vec::::new(); while let Some(idx) = self.mutation_log.pop() { - let name = String::from(self.scheduled.mutations().name(idx).unwrap()); // TODO maybe return an Error on None + let name = String::from(self.scheduled.mutations().name(idx.0).unwrap()); // TODO maybe return an Error on None log.push(name); } let meta = LogMutationMetadata::new(log); @@ -365,11 +366,12 @@ where } /// Get the next mutation to apply - fn schedule(&self, state: &mut S, _: &I) -> usize { + fn schedule(&self, state: &mut S, _: &I) -> MutationId { debug_assert!(!self.scheduled.mutations().is_empty()); state .rand_mut() - .below(self.scheduled.mutations().len() as u64) as usize + .below(self.scheduled.mutations().len() as u64) + .into() } fn scheduled_mutate( diff --git a/libafl/src/mutators/tuneable.rs b/libafl/src/mutators/tuneable.rs index efcfabe687..72043c4b43 100644 --- a/libafl/src/mutators/tuneable.rs +++ b/libafl/src/mutators/tuneable.rs @@ -14,24 +14,42 @@ pub use crate::mutators::{mutations::*, token_mutations::*}; use crate::{ bolts::rands::Rand, impl_serdeany, - mutators::{ComposedByMutations, MutationResult, Mutator, MutatorsTuple, ScheduledMutator}, + mutators::{ + ComposedByMutations, MutationId, MutationResult, Mutator, MutatorsTuple, ScheduledMutator, + }, state::{HasMetadata, HasRand}, Error, }; /// Metadata in the state, that controls the behavior of the [`TuneableScheduledMutator`] at runtime -#[derive(Default, Clone, Eq, PartialEq, Debug, Serialize, Deserialize)] +#[derive(Clone, PartialEq, Debug, Serialize, Deserialize)] pub struct TuneableScheduledMutatorMetadata { - /// The offsets of mutators to run, in order. Clear to fall back to random. - pub next: Vec, + /// The offsets of mutators to run, in order. Clear to fall back to random, + /// or use `mutation_probabilities` + pub mutation_ids: Vec, /// The next index to read from in the `next` vec - pub next_idx: usize, + pub next_id: MutationId, + /// The cumulative probability distribution for each mutation. + /// Will not be used when `mutation_ids` are set. + /// Clear to fall back to random. + pub mutation_probabilities_cumulative: Vec, /// The count of total mutations to perform. - /// If `next` is of length `10`, and this number is `20`, + /// If `mutation_ids` is of length `10`, and this number is `20`, /// the mutations will be iterated through twice. pub iters: Option, } +impl Default for TuneableScheduledMutatorMetadata { + fn default() -> Self { + Self { + mutation_ids: Vec::default(), + next_id: 0.into(), + mutation_probabilities_cumulative: Vec::default(), + iters: None, + } + } +} + impl TuneableScheduledMutatorMetadata { /// Gets the stored metadata, used to alter the [`TuneableScheduledMutator`] behavior pub fn get(state: &S) -> Result<&Self, Error> { @@ -89,9 +107,9 @@ where &mut self, state: &mut S, input: &mut I, - stage_idx: i32, + stage_id: i32, ) -> Result { - self.scheduled_mutate(state, input, stage_idx) + self.scheduled_mutate(state, input, stage_id) } } @@ -120,7 +138,7 @@ where { /// Compute the number of iterations used to apply stacked mutations fn iterations(&self, state: &mut S, _: &I) -> u64 { - if let Some(iters) = Self::get_iters(state) { + if let Some(iters) = TuneableScheduledMutator::get_iters(state) { iters } else { // fall back to random @@ -129,26 +147,41 @@ where } /// Get the next mutation to apply - fn schedule(&self, state: &mut S, _: &I) -> usize { + fn schedule(&self, state: &mut S, _: &I) -> MutationId { debug_assert!(!self.mutations().is_empty()); // Assumption: we can not reach this code path without previously adding this metadatum. let metadata = TuneableScheduledMutatorMetadata::get_mut(state).unwrap(); #[allow(clippy::cast_possible_truncation)] - if metadata.next.is_empty() { - // fall back to random if no entries in the vec - state.rand_mut().below(self.mutations().len() as u64) as usize + if metadata.mutation_ids.is_empty() { + if metadata.mutation_probabilities_cumulative.is_empty() { + // fall back to random if no entries in either vec, the scheduling is not tuned. + return state.rand_mut().below(self.mutations().len() as u64).into(); + } } else { - let ret = metadata.next[metadata.next_idx]; - metadata.next_idx += 1_usize; - if metadata.next_idx >= metadata.next.len() { - metadata.next_idx = 0; + // using pre-set ids. + let ret = metadata.mutation_ids[metadata.next_id.0]; + metadata.next_id.0 += 1_usize; + if metadata.next_id.0 >= metadata.mutation_ids.len() { + metadata.next_id = 0.into(); } debug_assert!( - self.mutations().len() > ret, - "TuneableScheduler: next vec may not contain idx larger than number of mutations!" + self.mutations().len() > ret.0, + "TuneableScheduler: next vec may not contain id larger than number of mutations!" ); - ret + return ret; } + + // We will sample using the mutation probabilities. + // Doing this outside of the original if branch to make the borrow checker happy. + #[allow(clippy::cast_precision_loss)] + let coin = state.rand_mut().next() as f32 / u64::MAX as f32; + let metadata = TuneableScheduledMutatorMetadata::get_mut(state).unwrap(); + metadata + .mutation_probabilities_cumulative + .iter() + .position(|i| *i >= coin) + .unwrap() + .into() } } @@ -168,7 +201,12 @@ where phantom: PhantomData, } } +} +impl TuneableScheduledMutator<(), (), S> +where + S: HasRand + HasMetadata, +{ fn metadata_mut(state: &mut S) -> &mut TuneableScheduledMutatorMetadata { state .metadata_mut() @@ -185,7 +223,7 @@ where /// Sets the next iterations count, i.e., how many times to mutate the input /// - /// Using `set_next_and_iter` to set multiple values at the same time + /// Using `set_mutation_ids_and_iter` to set multiple values at the same time /// will be faster than setting them individually /// as it internally only needs a single metadata lookup pub fn set_iters(state: &mut S, iters: u64) { @@ -193,17 +231,59 @@ where metadata.iters = Some(iters); } - /// Gets the set iterations + /// Gets the set amount of iterations pub fn get_iters(state: &S) -> Option { let metadata = Self::metadata(state); metadata.iters } + /// Sets the mutation ids + pub fn set_mutation_ids(state: &mut S, mutations: Vec) { + let metadata = TuneableScheduledMutatorMetadata::get_mut(state).unwrap(); + metadata.mutation_ids = mutations; + metadata.next_id = 0.into(); + } + + /// Sets the mutation probabilities. + /// The `Vec` should ideally contain one value per [`MutationId`]. + /// Setting the probabilities will remove the value set through `set_mutation_ids`. + pub fn set_mutation_probabilities(state: &mut S, mutation_probabilities: Vec) { + let metadata = TuneableScheduledMutatorMetadata::get_mut(state).unwrap(); + metadata.mutation_ids.clear(); + metadata.next_id = 0.into(); + + // we precalculate the cumulative probability to be faster when sampling later. + let mut mutation_probabilities_cumulative = mutation_probabilities; + let mut acc = 0.0; + + for probability in &mut mutation_probabilities_cumulative { + let l = *probability; + *probability += acc; + acc += l; + } + + metadata.mutation_probabilities_cumulative = mutation_probabilities_cumulative; + } + + /// mutation ids and iterations + pub fn set_mutation_ids_and_iters(state: &mut S, mutations: Vec, iters: u64) { + let metadata = TuneableScheduledMutatorMetadata::get_mut(state).unwrap(); + metadata.mutation_ids = mutations; + metadata.next_id = 0.into(); + metadata.iters = Some(iters); + } + + /// Appends a mutation id to the end of the mutations + pub fn push_mutation_id(state: &mut S, mutation_id: MutationId) { + let metadata = TuneableScheduledMutatorMetadata::get_mut(state).unwrap(); + metadata.mutation_ids.push(mutation_id); + } + /// Resets this to a randomic mutational stage pub fn reset(state: &mut S) { let metadata = Self::metadata_mut(state); - metadata.next.clear(); - metadata.next_idx = 0; + metadata.mutation_ids.clear(); + metadata.next_id = 0.into(); metadata.iters = None; } } @@ -231,10 +311,10 @@ mod test { let tuneable = TuneableScheduledMutator::new(&mut state, mutators); let input = BytesInput::new(vec![42]); let metadata = TuneableScheduledMutatorMetadata::get_mut(&mut state).unwrap(); - metadata.next.push(1); - metadata.next.push(2); - assert_eq!(tuneable.schedule(&mut state, &input), 1); - assert_eq!(tuneable.schedule(&mut state, &input), 2); - assert_eq!(tuneable.schedule(&mut state, &input), 1); + metadata.mutation_ids.push(1.into()); + metadata.mutation_ids.push(2.into()); + assert_eq!(tuneable.schedule(&mut state, &input), 1.into()); + assert_eq!(tuneable.schedule(&mut state, &input), 2.into()); + assert_eq!(tuneable.schedule(&mut state, &input), 1.into()); } } diff --git a/libafl/src/stages/tuneable.rs b/libafl/src/stages/tuneable.rs index 1e955750c5..9b46933a14 100644 --- a/libafl/src/stages/tuneable.rs +++ b/libafl/src/stages/tuneable.rs @@ -148,6 +148,18 @@ where } } +impl TuneableMutationalStage<(), (), (), (), ()> { + /// Set the number of iterations to be used by this mutational stage + pub fn set_iters(state: &mut S, iters: u64) -> Result<(), Error> { + set_iters(state, iters) + } + + /// Get the set iterations + pub fn iters(state: &S) -> Result, Error> { + get_iters(state) + } +} + impl TuneableMutationalStage where E: UsesState,