From 174b852e0ae8644df0679066f24e8a7957ad9413 Mon Sep 17 00:00:00 2001
From: Marco Vanotti <mvanotti@users.noreply.github.com>
Date: Tue, 22 Aug 2023 19:37:22 -0400
Subject: [PATCH] Fix probabilities in TuneableScheduledMutator (#1440)

This commit fixes some issues regarding the `TuneableScheduledMutator`,
which had an edge case for drawing probabilities.

The user is supposed to provide a vector with probabilities that have to
add up to 1.0, but due to floating-point errors, the number can be
sligthly off.

If the sum ends up being slow, there's a chance that we draw a number
that is bigger than it (for example, 1.0), and that would not be present
in the cumulative distribution vectors (either for iterations or
mutations).

The issue is fixed by setting the last value of the cumulative
distribution to 1.0.

This commits adds some validations in the function that calculates the
cumulative distribution function, making sure that the numbers add up to
1, and that they are all between 0 and 1.

The API is now changed so the functions can now return an error instead
of crashing.

The commit also adds some tests, and fixes the `reset()` function.
---
 libafl/src/mutators/tuneable.rs | 194 +++++++++++++++++++++++++++-----
 1 file changed, 163 insertions(+), 31 deletions(-)

diff --git a/libafl/src/mutators/tuneable.rs b/libafl/src/mutators/tuneable.rs
index b861441b47..36bec3fac2 100644
--- a/libafl/src/mutators/tuneable.rs
+++ b/libafl/src/mutators/tuneable.rs
@@ -165,13 +165,16 @@ where
             // Doing this outside of the original if branch to make the borrow checker happy.
             #[allow(clippy::cast_precision_loss)]
             let coin = state.rand_mut().next() as f32 / u64::MAX as f32;
+            debug_assert!(coin <= 1.0_f32);
 
             let metadata = TuneableScheduledMutatorMetadata::get_mut(state).unwrap();
-            1 << (1 + metadata
+            let power = metadata
                 .iter_probabilities_pow_cumulative
                 .iter()
                 .position(|i| *i >= coin)
-                .unwrap())
+                .unwrap();
+
+            1 << (1 + power)
         }
     }
 
@@ -180,13 +183,9 @@ where
         debug_assert!(!self.mutations().is_empty());
         // Assumption: we can not reach this code path without previously adding this metadatum.
         let metadata = TuneableScheduledMutatorMetadata::get_mut(state).unwrap();
+
         #[allow(clippy::cast_possible_truncation)]
-        if metadata.mutation_ids.is_empty() {
-            if metadata.mutation_probabilities_cumulative.is_empty() {
-                // fall back to random if no entries in either vec, the scheduling is not tuned.
-                return state.rand_mut().below(self.mutations().len() as u64).into();
-            }
-        } else {
+        if !metadata.mutation_ids.is_empty() {
             // using pre-set ids.
             let ret = metadata.mutation_ids[metadata.next_id.0];
             metadata.next_id.0 += 1_usize;
@@ -200,17 +199,32 @@ where
             return ret;
         }
 
-        // We will sample using the mutation probabilities.
-        // Doing this outside of the original if branch to make the borrow checker happy.
-        #[allow(clippy::cast_precision_loss)]
-        let coin = state.rand_mut().next() as f32 / u64::MAX as f32;
-        let metadata = TuneableScheduledMutatorMetadata::get_mut(state).unwrap();
-        metadata
-            .mutation_probabilities_cumulative
-            .iter()
-            .position(|i| *i >= coin)
-            .unwrap()
-            .into()
+        if !metadata.mutation_probabilities_cumulative.is_empty() {
+            // We will sample using the mutation probabilities.
+            // Doing this outside of the original if branch to make the borrow checker happy.
+            #[allow(clippy::cast_precision_loss)]
+            let coin = state.rand_mut().next() as f32 / u64::MAX as f32;
+            debug_assert!(coin <= 1.0_f32);
+
+            let metadata = TuneableScheduledMutatorMetadata::get_mut(state).unwrap();
+            debug_assert_eq!(
+                self.mutations.len(),
+                metadata.mutation_probabilities_cumulative.len(),
+                "TuneableScheduler: mutation probabilities do not match with number of mutations"
+            );
+
+            let mutation_id = metadata
+                .mutation_probabilities_cumulative
+                .iter()
+                .position(|i| *i >= coin)
+                .unwrap()
+                .into();
+
+            return mutation_id;
+        }
+
+        // fall back to random if no entries in either vec, the scheduling is not tuned.
+        state.rand_mut().below(self.mutations().len() as u64).into()
     }
 }
 
@@ -233,6 +247,36 @@ where
     }
 }
 
+// Returns the cumulative distribution function for a discrete distribution.
+fn calculate_cumulative_distribution(probabilities: Vec<f32>) -> Result<Vec<f32>, Error> {
+    if probabilities.is_empty() {
+        return Err(Error::illegal_argument("empty list of probabilities"));
+    }
+
+    if !probabilities.iter().all(|&p| (0.0..=1.0).contains(&p)) {
+        return Err(Error::illegal_argument(format!(
+            "invalid probability distribution: {probabilities:?}"
+        )));
+    }
+
+    let mut cumulative = probabilities;
+    calculate_cumulative_sum_in_place(&mut cumulative);
+
+    // The cumulative sum should be roughly equal to 1.
+    let last = cumulative.last_mut().unwrap();
+    if num_traits::abs(*last - 1.0_f32) > 1.0e-4 {
+        return Err(Error::illegal_argument(format!(
+            "sum of probabilities ({}) is not 1",
+            *last
+        )));
+    }
+
+    // Clamp the end of the vector to account for floating point errors.
+    *last = 1.0_f32;
+
+    Ok(cumulative)
+}
+
 impl<S> TuneableScheduledMutator<(), (), S>
 where
     S: HasRand + HasMetadata,
@@ -266,20 +310,28 @@ where
     /// i.e., how many times the mutation is likely to get mutated.
     ///
     /// So, setting the `iter_probabilities` to `vec![0.1, 0.7, 0.2]`
-    /// would apply 2^one mutation with the likelihood of 10%, 2^2 mutations with the
+    /// would apply 2^1 mutation with the likelihood of 10%, 2^2 mutations with the
     /// a probability of 70% (0.7), and 2^3 mutations with the likelihood of 20%.
     /// These will be applied for each call of this `mutate` function.
     ///
     /// Setting this function will unset everything previously set in `set_iters`.
-    pub fn set_iter_probabilities_pow(state: &mut S, iter_probabilities_pow: Vec<f32>) {
+    pub fn set_iter_probabilities_pow(
+        state: &mut S,
+        iter_probabilities_pow: Vec<f32>,
+    ) -> Result<(), Error> {
+        if iter_probabilities_pow.len() >= 32 {
+            return Err(Error::illegal_argument(
+                "Cannot stack more than 2^32 mutations",
+            ));
+        }
         let metadata = Self::metadata_mut(state);
         metadata.iters = None;
 
         // we precalculate the cumulative probability to be faster when sampling later.
-        let mut iter_probabilities_pow_cumulative = iter_probabilities_pow;
-        calculate_cumulative_sum_in_place(&mut iter_probabilities_pow_cumulative);
+        metadata.iter_probabilities_pow_cumulative =
+            calculate_cumulative_distribution(iter_probabilities_pow)?;
 
-        metadata.iter_probabilities_pow_cumulative = iter_probabilities_pow_cumulative;
+        Ok(())
     }
 
     /// Gets the set amount of iterations
@@ -296,18 +348,21 @@ where
     }
 
     /// Sets the mutation probabilities.
-    /// The `Vec` should ideally contain one value per [`MutationId`].
+    /// The `Vec` contains a probability per [`MutationId`]: between 0 and 1, and they have to add
+    /// up to 1.
     /// Setting the probabilities will remove the value set through `set_mutation_ids`.
-    pub fn set_mutation_probabilities(state: &mut S, mutation_probabilities: Vec<f32>) {
+    pub fn set_mutation_probabilities(
+        state: &mut S,
+        mutation_probabilities: Vec<f32>,
+    ) -> Result<(), Error> {
         let metadata = TuneableScheduledMutatorMetadata::get_mut(state).unwrap();
         metadata.mutation_ids.clear();
         metadata.next_id = 0.into();
 
         // we precalculate the cumulative probability to be faster when sampling later.
-        let mut mutation_probabilities_cumulative = mutation_probabilities;
-        calculate_cumulative_sum_in_place(&mut mutation_probabilities_cumulative);
-
-        metadata.mutation_probabilities_cumulative = mutation_probabilities_cumulative;
+        metadata.mutation_probabilities_cumulative =
+            calculate_cumulative_distribution(mutation_probabilities)?;
+        Ok(())
     }
 
     /// mutation ids and iterations
@@ -330,6 +385,8 @@ where
         metadata.mutation_ids.clear();
         metadata.next_id = 0.into();
         metadata.iters = None;
+        metadata.mutation_probabilities_cumulative.clear();
+        metadata.iter_probabilities_pow_cumulative.clear();
     }
 }
 
@@ -338,7 +395,8 @@ mod test {
     use libafl_bolts::tuples::tuple_list;
 
     use super::{
-        BitFlipMutator, ByteDecMutator, TuneableScheduledMutator, TuneableScheduledMutatorMetadata,
+        calculate_cumulative_distribution, BitFlipMutator, ByteDecMutator,
+        TuneableScheduledMutator, TuneableScheduledMutatorMetadata,
     };
     use crate::{
         inputs::BytesInput,
@@ -363,4 +421,78 @@ mod test {
         assert_eq!(tuneable.schedule(&mut state, &input), 2.into());
         assert_eq!(tuneable.schedule(&mut state, &input), 1.into());
     }
+
+    #[test]
+    fn get_cdf_fails_with_invalid_probs() {
+        // Distribution has to sum up to 1.
+        assert!(calculate_cumulative_distribution(vec![]).is_err());
+        assert!(calculate_cumulative_distribution(vec![0.0]).is_err());
+        assert!(calculate_cumulative_distribution(vec![0.9]).is_err());
+        assert!(calculate_cumulative_distribution(vec![0.9, 0.9]).is_err());
+        assert!(calculate_cumulative_distribution(vec![f32::NAN]).is_err());
+        assert!(calculate_cumulative_distribution(vec![f32::INFINITY]).is_err());
+        assert!(calculate_cumulative_distribution(vec![f32::NEG_INFINITY]).is_err());
+
+        // Elements have to be between 0 and 1
+        assert!(calculate_cumulative_distribution(vec![-0.5, 0.5, 0.5]).is_err());
+
+        assert!(calculate_cumulative_distribution(vec![1.0]).is_ok());
+        assert!(calculate_cumulative_distribution(vec![0.0, 1.0]).is_ok());
+        assert!(calculate_cumulative_distribution(vec![0.0, 1.0, 0.0]).is_ok());
+        assert!(calculate_cumulative_distribution(vec![0.5, 0.5]).is_ok());
+        assert!(calculate_cumulative_distribution(vec![0.2; 5]).is_ok());
+    }
+
+    #[test]
+    fn test_mutation_distribution() {
+        let mut state: NopState<BytesInput> = NopState::new();
+        let mutators = tuple_list!(
+            BitFlipMutator::new(),
+            ByteDecMutator::new(),
+            ByteRandMutator::new()
+        );
+        let tuneable = TuneableScheduledMutator::new(&mut state, mutators);
+        let input = BytesInput::new(vec![42]);
+
+        // Basic tests over the probability distribution.
+        assert!(
+            TuneableScheduledMutator::set_mutation_probabilities(&mut state, vec![0.0]).is_err()
+        );
+        assert!(
+            TuneableScheduledMutator::set_mutation_probabilities(&mut state, vec![1.0; 3]).is_err()
+        );
+        assert!(TuneableScheduledMutator::set_mutation_probabilities(
+            &mut state,
+            vec![-1.0, 1.0, 1.0]
+        )
+        .is_err());
+        assert!(TuneableScheduledMutator::set_mutation_probabilities(&mut state, vec![]).is_err());
+
+        assert!(TuneableScheduledMutator::set_mutation_probabilities(
+            &mut state,
+            vec![0.0, 0.0, 1.0]
+        )
+        .is_ok());
+        assert_eq!(tuneable.schedule(&mut state, &input), 2.into());
+        assert!(TuneableScheduledMutator::set_mutation_probabilities(
+            &mut state,
+            vec![0.0, 1.0, 0.0]
+        )
+        .is_ok());
+        assert_eq!(tuneable.schedule(&mut state, &input), 1.into());
+        assert!(TuneableScheduledMutator::set_mutation_probabilities(
+            &mut state,
+            vec![1.0, 0.0, 0.0]
+        )
+        .is_ok());
+        assert_eq!(tuneable.schedule(&mut state, &input), 0.into());
+
+        // We should not choose a mutation with p=0.
+        assert!(TuneableScheduledMutator::set_mutation_probabilities(
+            &mut state,
+            vec![0.5, 0.0, 0.5]
+        )
+        .is_ok());
+        assert!(tuneable.schedule(&mut state, &input) != 1.into());
+    }
 }