Fix probabilities in TuneableScheduledMutator (#1440)

This commit fixes some issues regarding the `TuneableScheduledMutator`, which had an edge case for drawing probabilities. The user is supposed to provide a vector with probabilities that have to add up to 1.0, but due to floating-point errors, the number can be sligthly off. If the sum ends up being slow, there's a chance that we draw a number that is bigger than it (for example, 1.0), and that would not be present in the cumulative distribution vectors (either for iterations or mutations). The issue is fixed by setting the last value of the cumulative distribution to 1.0. This commits adds some validations in the function that calculates the cumulative distribution function, making sure that the numbers add up to 1, and that they are all between 0 and 1. The API is now changed so the functions can now return an error instead of crashing. The commit also adds some tests, and fixes the `reset()` function.
2023-08-22 19:37:22 -04:00 · 2023-08-22 19:37:22 -04:00 · 174b852e0a
commit 174b852e0a
parent 389c7c6554
1 changed files with 163 additions and 31 deletions
--- a/libafl/src/mutators/tuneable.rs
+++ b/libafl/src/mutators/tuneable.rs
@ -165,13 +165,16 @@ where
            // Doing this outside of the original if branch to make the borrow checker happy.
            #[allow(clippy::cast_precision_loss)]
            let coin = state.rand_mut().next() as f32 / u64::MAX as f32;
+            debug_assert!(coin <= 1.0_f32);

            let metadata = TuneableScheduledMutatorMetadata::get_mut(state).unwrap();
-            1 << (1 + metadata
+            let power = metadata
                .iter_probabilities_pow_cumulative
                .iter()
                .position(|i| *i >= coin)
-                .unwrap())
+                .unwrap();
+
+            1 << (1 + power)
        }
    }

@ -180,13 +183,9 @@ where
        debug_assert!(!self.mutations().is_empty());
        // Assumption: we can not reach this code path without previously adding this metadatum.
        let metadata = TuneableScheduledMutatorMetadata::get_mut(state).unwrap();
+
        #[allow(clippy::cast_possible_truncation)]
-        if metadata.mutation_ids.is_empty() {
-            if metadata.mutation_probabilities_cumulative.is_empty() {
-                // fall back to random if no entries in either vec, the scheduling is not tuned.
-                return state.rand_mut().below(self.mutations().len() as u64).into();
-            }
-        } else {
+        if !metadata.mutation_ids.is_empty() {
            // using pre-set ids.
            let ret = metadata.mutation_ids[metadata.next_id.0];
            metadata.next_id.0 += 1_usize;
@ -200,17 +199,32 @@ where
            return ret;
        }

+        if !metadata.mutation_probabilities_cumulative.is_empty() {
            // We will sample using the mutation probabilities.
            // Doing this outside of the original if branch to make the borrow checker happy.
            #[allow(clippy::cast_precision_loss)]
            let coin = state.rand_mut().next() as f32 / u64::MAX as f32;
+            debug_assert!(coin <= 1.0_f32);
+
            let metadata = TuneableScheduledMutatorMetadata::get_mut(state).unwrap();
-        metadata
+            debug_assert_eq!(
+                self.mutations.len(),
+                metadata.mutation_probabilities_cumulative.len(),
+                "TuneableScheduler: mutation probabilities do not match with number of mutations"
+            );
+
+            let mutation_id = metadata
                .mutation_probabilities_cumulative
                .iter()
                .position(|i| *i >= coin)
                .unwrap()
-            .into()
+                .into();
+
+            return mutation_id;
+        }
+
+        // fall back to random if no entries in either vec, the scheduling is not tuned.
+        state.rand_mut().below(self.mutations().len() as u64).into()
    }
 }

@ -233,6 +247,36 @@ where
    }
 }

+// Returns the cumulative distribution function for a discrete distribution.
+fn calculate_cumulative_distribution(probabilities: Vec<f32>) -> Result<Vec<f32>, Error> {
+    if probabilities.is_empty() {
+        return Err(Error::illegal_argument("empty list of probabilities"));
+    }
+
+    if !probabilities.iter().all(|&p| (0.0..=1.0).contains(&p)) {
+        return Err(Error::illegal_argument(format!(
+            "invalid probability distribution: {probabilities:?}"
+        )));
+    }
+
+    let mut cumulative = probabilities;
+    calculate_cumulative_sum_in_place(&mut cumulative);
+
+    // The cumulative sum should be roughly equal to 1.
+    let last = cumulative.last_mut().unwrap();
+    if num_traits::abs(*last - 1.0_f32) > 1.0e-4 {
+        return Err(Error::illegal_argument(format!(
+            "sum of probabilities ({}) is not 1",
+            *last
+        )));
+    }
+
+    // Clamp the end of the vector to account for floating point errors.
+    *last = 1.0_f32;
+
+    Ok(cumulative)
+}
+
 impl<S> TuneableScheduledMutator<(), (), S>
 where
    S: HasRand + HasMetadata,
@ -266,20 +310,28 @@ where
    /// i.e., how many times the mutation is likely to get mutated.
    ///
    /// So, setting the `iter_probabilities` to `vec![0.1, 0.7, 0.2]`
-    /// would apply 2^one mutation with the likelihood of 10%, 2^2 mutations with the
+    /// would apply 2^1 mutation with the likelihood of 10%, 2^2 mutations with the
    /// a probability of 70% (0.7), and 2^3 mutations with the likelihood of 20%.
    /// These will be applied for each call of this `mutate` function.
    ///
    /// Setting this function will unset everything previously set in `set_iters`.
-    pub fn set_iter_probabilities_pow(state: &mut S, iter_probabilities_pow: Vec<f32>) {
+    pub fn set_iter_probabilities_pow(
+        state: &mut S,
+        iter_probabilities_pow: Vec<f32>,
+    ) -> Result<(), Error> {
+        if iter_probabilities_pow.len() >= 32 {
+            return Err(Error::illegal_argument(
+                "Cannot stack more than 2^32 mutations",
+            ));
+        }
        let metadata = Self::metadata_mut(state);
        metadata.iters = None;

        // we precalculate the cumulative probability to be faster when sampling later.
-        let mut iter_probabilities_pow_cumulative = iter_probabilities_pow;
-        calculate_cumulative_sum_in_place(&mut iter_probabilities_pow_cumulative);
+        metadata.iter_probabilities_pow_cumulative =
+            calculate_cumulative_distribution(iter_probabilities_pow)?;

-        metadata.iter_probabilities_pow_cumulative = iter_probabilities_pow_cumulative;
+        Ok(())
    }

    /// Gets the set amount of iterations
@ -296,18 +348,21 @@ where
    }

    /// Sets the mutation probabilities.
-    /// The `Vec` should ideally contain one value per [`MutationId`].
+    /// The `Vec` contains a probability per [`MutationId`]: between 0 and 1, and they have to add
+    /// up to 1.
    /// Setting the probabilities will remove the value set through `set_mutation_ids`.
-    pub fn set_mutation_probabilities(state: &mut S, mutation_probabilities: Vec<f32>) {
+    pub fn set_mutation_probabilities(
+        state: &mut S,
+        mutation_probabilities: Vec<f32>,
+    ) -> Result<(), Error> {
        let metadata = TuneableScheduledMutatorMetadata::get_mut(state).unwrap();
        metadata.mutation_ids.clear();
        metadata.next_id = 0.into();

        // we precalculate the cumulative probability to be faster when sampling later.
-        let mut mutation_probabilities_cumulative = mutation_probabilities;
-        calculate_cumulative_sum_in_place(&mut mutation_probabilities_cumulative);
-
-        metadata.mutation_probabilities_cumulative = mutation_probabilities_cumulative;
+        metadata.mutation_probabilities_cumulative =
+            calculate_cumulative_distribution(mutation_probabilities)?;
+        Ok(())
    }

    /// mutation ids and iterations
@ -330,6 +385,8 @@ where
        metadata.mutation_ids.clear();
        metadata.next_id = 0.into();
        metadata.iters = None;
+        metadata.mutation_probabilities_cumulative.clear();
+        metadata.iter_probabilities_pow_cumulative.clear();
    }
 }

@ -338,7 +395,8 @@ mod test {
    use libafl_bolts::tuples::tuple_list;

    use super::{
-        BitFlipMutator, ByteDecMutator, TuneableScheduledMutator, TuneableScheduledMutatorMetadata,
+        calculate_cumulative_distribution, BitFlipMutator, ByteDecMutator,
+        TuneableScheduledMutator, TuneableScheduledMutatorMetadata,
    };
    use crate::{
        inputs::BytesInput,
@ -363,4 +421,78 @@ mod test {
        assert_eq!(tuneable.schedule(&mut state, &input), 2.into());
        assert_eq!(tuneable.schedule(&mut state, &input), 1.into());
    }
+
+    #[test]
+    fn get_cdf_fails_with_invalid_probs() {
+        // Distribution has to sum up to 1.
+        assert!(calculate_cumulative_distribution(vec![]).is_err());
+        assert!(calculate_cumulative_distribution(vec![0.0]).is_err());
+        assert!(calculate_cumulative_distribution(vec![0.9]).is_err());
+        assert!(calculate_cumulative_distribution(vec![0.9, 0.9]).is_err());
+        assert!(calculate_cumulative_distribution(vec![f32::NAN]).is_err());
+        assert!(calculate_cumulative_distribution(vec![f32::INFINITY]).is_err());
+        assert!(calculate_cumulative_distribution(vec![f32::NEG_INFINITY]).is_err());
+
+        // Elements have to be between 0 and 1
+        assert!(calculate_cumulative_distribution(vec![-0.5, 0.5, 0.5]).is_err());
+
+        assert!(calculate_cumulative_distribution(vec![1.0]).is_ok());
+        assert!(calculate_cumulative_distribution(vec![0.0, 1.0]).is_ok());
+        assert!(calculate_cumulative_distribution(vec![0.0, 1.0, 0.0]).is_ok());
+        assert!(calculate_cumulative_distribution(vec![0.5, 0.5]).is_ok());
+        assert!(calculate_cumulative_distribution(vec![0.2; 5]).is_ok());
+    }
+
+    #[test]
+    fn test_mutation_distribution() {
+        let mut state: NopState<BytesInput> = NopState::new();
+        let mutators = tuple_list!(
+            BitFlipMutator::new(),
+            ByteDecMutator::new(),
+            ByteRandMutator::new()
+        );
+        let tuneable = TuneableScheduledMutator::new(&mut state, mutators);
+        let input = BytesInput::new(vec![42]);
+
+        // Basic tests over the probability distribution.
+        assert!(
+            TuneableScheduledMutator::set_mutation_probabilities(&mut state, vec![0.0]).is_err()
+        );
+        assert!(
+            TuneableScheduledMutator::set_mutation_probabilities(&mut state, vec![1.0; 3]).is_err()
+        );
+        assert!(TuneableScheduledMutator::set_mutation_probabilities(
+            &mut state,
+            vec![-1.0, 1.0, 1.0]
+        )
+        .is_err());
+        assert!(TuneableScheduledMutator::set_mutation_probabilities(&mut state, vec![]).is_err());
+
+        assert!(TuneableScheduledMutator::set_mutation_probabilities(
+            &mut state,
+            vec![0.0, 0.0, 1.0]
+        )
+        .is_ok());
+        assert_eq!(tuneable.schedule(&mut state, &input), 2.into());
+        assert!(TuneableScheduledMutator::set_mutation_probabilities(
+            &mut state,
+            vec![0.0, 1.0, 0.0]
+        )
+        .is_ok());
+        assert_eq!(tuneable.schedule(&mut state, &input), 1.into());
+        assert!(TuneableScheduledMutator::set_mutation_probabilities(
+            &mut state,
+            vec![1.0, 0.0, 0.0]
+        )
+        .is_ok());
+        assert_eq!(tuneable.schedule(&mut state, &input), 0.into());
+
+        // We should not choose a mutation with p=0.
+        assert!(TuneableScheduledMutator::set_mutation_probabilities(
+            &mut state,
+            vec![0.5, 0.0, 0.5]
+        )
+        .is_ok());
+        assert!(tuneable.schedule(&mut state, &input) != 1.into());
+    }
 }