Token level fuzzing (#274)

* EncodedInput * some encoded mutations * Encoded parse using comments and strings regexes * working js example * better mutator * clippy
2021-08-27 09:32:22 +02:00 · 2021-08-27 09:32:22 +02:00 · 14d1f63e56
commit 14d1f63e56
parent 45dd940532
14 changed files with 1210 additions and 20 deletions
--- a/fuzzers/baby_fuzzer_js/.gitignore
+++ b/fuzzers/baby_fuzzer_js/.gitignore
@ -0,0 +1 @@
+libpng-*
--- a/fuzzers/baby_fuzzer_js/Cargo.toml
+++ b/fuzzers/baby_fuzzer_js/Cargo.toml
@ -0,0 +1,22 @@
+[package]
+name = "baby_fuzzer"
+version = "0.6.0"
+authors = ["Andrea Fioraldi <andreafioraldi@gmail.com>", "Dominik Maier <domenukk@gmail.com>"]
+edition = "2018"
+
+[features]
+default = ["std"]
+std = []
+
+[profile.dev]
+panic = "abort"
+
+[profile.release]
+panic = "abort"
+lto = true
+codegen-units = 1
+opt-level = 3
+debug = true
+
+[dependencies]
+libafl = { path = "../../libafl/" }
--- a/fuzzers/baby_fuzzer_js/README.md
+++ b/fuzzers/baby_fuzzer_js/README.md
@ -0,0 +1,8 @@
+# Baby fuzzer
+
+This is a minimalistic example about how to create a libafl based fuzzer.
+
+It runs on a single core until a crash occurs and then exits.
+
+The tested program is a simple Rust function without any instrumentation.
+For real fuzzing, you will want to add some sort to add coverage or other feedback.
--- a/fuzzers/baby_fuzzer_js/corpus/new
+++ b/fuzzers/baby_fuzzer_js/corpus/new
@ -0,0 +1,4 @@
+fn pippo(v) { return "hello world " + v; }
+var a = 666;
+name = "scozzo" + a;
+pippo(name);
--- a/fuzzers/baby_fuzzer_js/src/main.rs
+++ b/fuzzers/baby_fuzzer_js/src/main.rs
@ -0,0 +1,132 @@
+use std::io::Read;
+use std::{fs, path::PathBuf};
+
+#[cfg(windows)]
+use std::ptr::write_volatile;
+
+use libafl::{
+    bolts::{current_nanos, rands::StdRand, tuples::tuple_list},
+    corpus::{InMemoryCorpus, OnDiskCorpus, QueueCorpusScheduler},
+    events::SimpleEventManager,
+    executors::{inprocess::InProcessExecutor, ExitKind},
+    feedbacks::{CrashFeedback, MapFeedbackState, MaxMapFeedback},
+    fuzzer::{Evaluator, Fuzzer, StdFuzzer},
+    inputs::{EncodedInput, InputDecoder, InputEncoder, NaiveTokenizer, TokenInputEncoderDecoder},
+    mutators::{encoded_mutations::encoded_mutations, scheduled::StdScheduledMutator},
+    observers::StdMapObserver,
+    stages::mutational::StdMutationalStage,
+    state::StdState,
+    stats::SimpleStats,
+};
+
+/// Coverage map with explicit assignments due to the lack of instrumentation
+static mut SIGNALS: [u8; 16] = [0; 16];
+/*
+/// Assign a signal to the signals map
+fn signals_set(idx: usize) {
+    unsafe { SIGNALS[idx] = 1 };
+}
+*/
+
+#[allow(clippy::similar_names)]
+pub fn main() {
+    let mut tokenizer = NaiveTokenizer::default();
+    let mut encoder_decoder = TokenInputEncoderDecoder::new();
+    let mut initial_inputs = vec![];
+    let mut decoded_bytes = vec![];
+
+    for entry in fs::read_dir("./corpus").unwrap() {
+        let path = entry.unwrap().path();
+        let attr = fs::metadata(&path);
+        if attr.is_err() {
+            continue;
+        }
+        let attr = attr.unwrap();
+
+        if attr.is_file() && attr.len() > 0 {
+            println!("Loading file {:?} ...", &path);
+            let mut file = fs::File::open(path).expect("no file found");
+            let mut buffer = vec![];
+            file.read_to_end(&mut buffer).expect("buffer overflow");
+            let input = encoder_decoder
+                .encode(&buffer, &mut tokenizer)
+                .expect("encoding failed");
+            initial_inputs.push(input);
+        }
+    }
+
+    // The closure that we want to fuzz
+    let mut harness = |input: &EncodedInput| {
+        decoded_bytes.clear();
+        encoder_decoder.decode(input, &mut decoded_bytes).unwrap();
+        unsafe {
+            println!("{}", std::str::from_utf8_unchecked(&decoded_bytes));
+        }
+        ExitKind::Ok
+    };
+
+    // Create an observation channel using the signals map
+    let observer = StdMapObserver::new("signals", unsafe { &mut SIGNALS });
+
+    // The state of the edges feedback.
+    let feedback_state = MapFeedbackState::with_observer(&observer);
+
+    // Feedback to rate the interestingness of an input
+    let feedback = MaxMapFeedback::new(&feedback_state, &observer);
+
+    // A feedback to choose if an input is a solution or not
+    let objective = CrashFeedback::new();
+
+    // create a State from scratch
+    let mut state = StdState::new(
+        // RNG
+        StdRand::with_seed(current_nanos()),
+        // Corpus that will be evolved, we keep it in memory for performance
+        InMemoryCorpus::new(),
+        // Corpus in which we store solutions (crashes in this example),
+        // on disk so the user can get them after stopping the fuzzer
+        OnDiskCorpus::new(PathBuf::from("./crashes")).unwrap(),
+        // States of the feedbacks.
+        // They are the data related to the feedbacks that you want to persist in the State.
+        tuple_list!(feedback_state),
+    );
+
+    // The Stats trait define how the fuzzer stats are reported to the user
+    let stats = SimpleStats::new(|s| println!("{}", s));
+
+    // The event manager handle the various events generated during the fuzzing loop
+    // such as the notification of the addition of a new item to the corpus
+    let mut mgr = SimpleEventManager::new(stats);
+
+    // A queue policy to get testcasess from the corpus
+    let scheduler = QueueCorpusScheduler::new();
+
+    // A fuzzer with feedbacks and a corpus scheduler
+    let mut fuzzer = StdFuzzer::new(scheduler, feedback, objective);
+
+    // Create the executor for an in-process function with just one observer
+    let mut executor = InProcessExecutor::new(
+        &mut harness,
+        tuple_list!(observer),
+        &mut fuzzer,
+        &mut state,
+        &mut mgr,
+    )
+    .expect("Failed to create the Executor");
+
+    // Setup a mutational stage with a basic bytes mutator
+    let mutator = StdScheduledMutator::with_max_iterations(encoded_mutations(), 2);
+    let mut stages = tuple_list!(StdMutationalStage::new(mutator));
+
+    println!("Decoder {:?} ...", &encoder_decoder);
+
+    for input in initial_inputs {
+        fuzzer
+            .add_input(&mut state, &mut executor, &mut mgr, input)
+            .unwrap();
+    }
+
+    fuzzer
+        .fuzz_loop(&mut stages, &mut executor, &mut state, &mut mgr)
+        .expect("Error in the fuzzing loop");
+}
--- a/libafl/Cargo.toml
+++ b/libafl/Cargo.toml
@ -38,7 +38,7 @@ harness = false

 [features]
 default = ["std", "anymap_debug", "derive", "llmp_compression"]
-std = ["serde_json", "hostname", "core_affinity", "nix", "serde/std", "bincode", "wait-timeout"] # print, env, launcher ... support
+std = ["serde_json", "hostname", "core_affinity", "nix", "serde/std", "bincode", "wait-timeout", "regex"] # print, env, launcher ... support
 anymap_debug = ["serde_json"] # uses serde_json to Debug the anymap trait. Disable for smaller footprint.
 derive = ["libafl_derive"] # provide derive(SerdeAny) macro.
 rand_trait = ["rand_core"] # If set, libafl's rand implementations will implement `rand::Rng`
@ -77,6 +77,7 @@ hostname = { version = "^0.3", optional = true } # Is there really no gethostnam
 rand = { version = "0.8.1", optional = true } #
 rand_core = { version = "0.6.2", optional = true } # This dependency allows us to export our RomuRand as rand::Rng.
 nix = { version = "0.20.0", optional = true }
+regex = { version = "1", optional = true }
 libm = "0.2.1"

 wait-timeout = { version = "0.2", optional = true } # used by CommandExecutor to wait for child process
--- a/libafl/src/bolts/llmp.rs
+++ b/libafl/src/bolts/llmp.rs
@ -1899,11 +1899,8 @@ where
            }

            #[cfg(not(feature = "std"))]
-            match sleep_time {
-                Some(_) => {
-                    panic!("Cannot sleep on no_std platform");
-                }
-                None => (),
+            if let Some(time) = sleep_time {
+                panic!("Cannot sleep on no_std platform (requested {:?})", time);
            }
        }
        self.llmp_out
--- a/libafl/src/inputs/encoded.rs
+++ b/libafl/src/inputs/encoded.rs
@ -0,0 +1,260 @@
+//! The `EncodedInput` is the "normal" input, a map of codes, that can be sent directly to the client
+//! (As opposed to other, more abstract, imputs, like an Grammar-Based AST Input)
+
+use ahash::AHasher;
+use core::hash::Hasher;
+
+use alloc::{borrow::ToOwned, rc::Rc, string::String, vec::Vec};
+#[cfg(feature = "std")]
+use core::str::from_utf8;
+use core::{cell::RefCell, convert::From};
+use hashbrown::HashMap;
+#[cfg(feature = "std")]
+use regex::Regex;
+use serde::{Deserialize, Serialize};
+
+use crate::{
+    inputs::{HasLen, Input},
+    Error,
+};
+
+pub trait InputEncoder<T>
+where
+    T: Tokenizer,
+{
+    fn encode(&mut self, bytes: &[u8], tokenizer: &mut T) -> Result<EncodedInput, Error>;
+}
+
+pub trait InputDecoder {
+    fn decode(&self, input: &EncodedInput, bytes: &mut Vec<u8>) -> Result<(), Error>;
+}
+
+pub trait Tokenizer {
+    fn tokenize(&self, bytes: &[u8]) -> Result<Vec<String>, Error>;
+}
+
+#[derive(Clone, Debug)]
+pub struct TokenInputEncoderDecoder {
+    token_table: HashMap<String, u32>,
+    id_table: HashMap<u32, String>,
+    next_id: u32,
+}
+
+impl<T> InputEncoder<T> for TokenInputEncoderDecoder
+where
+    T: Tokenizer,
+{
+    fn encode(&mut self, bytes: &[u8], tokenizer: &mut T) -> Result<EncodedInput, Error> {
+        let mut codes = vec![];
+        let tokens = tokenizer.tokenize(bytes)?;
+        for tok in tokens {
+            if let Some(id) = self.token_table.get(&tok) {
+                codes.push(*id);
+            } else {
+                self.token_table.insert(tok.clone(), self.next_id);
+                self.id_table.insert(self.next_id, tok.clone());
+                codes.push(self.next_id);
+                self.next_id += 1;
+            }
+        }
+        Ok(EncodedInput::new(codes))
+    }
+}
+
+impl InputDecoder for TokenInputEncoderDecoder {
+    fn decode(&self, input: &EncodedInput, bytes: &mut Vec<u8>) -> Result<(), Error> {
+        for id in input.codes() {
+            let tok = self.id_table.get(&(id % self.next_id)).ok_or_else(|| {
+                Error::IllegalState(format!("Id {} not in the decoder table", id))
+            })?;
+            bytes.extend_from_slice(tok.as_bytes());
+            bytes.push(b' ');
+        }
+        Ok(())
+    }
+}
+
+impl TokenInputEncoderDecoder {
+    #[must_use]
+    pub fn new() -> Self {
+        Self {
+            token_table: HashMap::default(),
+            id_table: HashMap::default(),
+            next_id: 0,
+        }
+    }
+}
+
+impl Default for TokenInputEncoderDecoder {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[cfg(feature = "std")]
+pub struct NaiveTokenizer {
+    ident_re: Regex,
+    comment_re: Regex,
+    string_re: Regex,
+}
+
+#[cfg(feature = "std")]
+impl NaiveTokenizer {
+    #[must_use]
+    pub fn new(ident_re: Regex, comment_re: Regex, string_re: Regex) -> Self {
+        Self {
+            ident_re,
+            comment_re,
+            string_re,
+        }
+    }
+}
+
+#[cfg(feature = "std")]
+impl Default for NaiveTokenizer {
+    fn default() -> Self {
+        Self {
+            // Generic identifier regex
+            ident_re: Regex::new("[A-Za-z0-9_$]+").unwrap(),
+            // C++ style comments
+            comment_re: Regex::new(r"(/\*[^*]*\*/)|(//[^*]*)").unwrap(),
+            // " and ' string regex
+            string_re: Regex::new("\"(\\\\|\\\\\"|[^\"])*\"|'(\\\\|\\\\'|[^'])*'").unwrap(),
+        }
+    }
+}
+
+#[cfg(feature = "std")]
+impl Tokenizer for NaiveTokenizer {
+    fn tokenize(&self, bytes: &[u8]) -> Result<Vec<String>, Error> {
+        let mut tokens = vec![];
+        let string =
+            from_utf8(bytes).map_err(|_| Error::IllegalArgument("Invalid UTF-8".to_owned()))?;
+        let string = self.comment_re.replace_all(string, "").to_string();
+        let mut str_prev = 0;
+        for str_match in self.string_re.find_iter(&string) {
+            if str_match.start() > str_prev {
+                for ws_tok in string[str_prev..str_match.start()].split_whitespace() {
+                    let mut ident_prev = 0;
+                    for ident_match in self.ident_re.find_iter(ws_tok) {
+                        if ident_match.start() > ident_prev {
+                            tokens.push(ws_tok[ident_prev..ident_match.start()].to_owned());
+                        }
+                        tokens.push(ws_tok[ident_match.start()..ident_match.end()].to_owned());
+                        ident_prev = ident_match.end();
+                    }
+                    if ident_prev < ws_tok.len() {
+                        tokens.push(ws_tok[ident_prev..].to_owned());
+                    }
+                }
+            }
+            tokens.push(string[str_match.start()..str_match.end()].to_owned());
+            str_prev = str_match.end();
+        }
+        if str_prev < string.len() {
+            for ws_tok in string[str_prev..].split_whitespace() {
+                let mut ident_prev = 0;
+                for ident_match in self.ident_re.find_iter(ws_tok) {
+                    if ident_match.start() > ident_prev {
+                        tokens.push(ws_tok[ident_prev..ident_match.start()].to_owned());
+                    }
+                    tokens.push(ws_tok[ident_match.start()..ident_match.end()].to_owned());
+                    ident_prev = ident_match.end();
+                }
+                if ident_prev < ws_tok.len() {
+                    tokens.push(ws_tok[ident_prev..].to_owned());
+                }
+            }
+        }
+        Ok(tokens)
+    }
+}
+
+/// A codes input is the basic input
+#[derive(Serialize, Deserialize, Clone, Debug, Default, PartialEq, Eq)]
+pub struct EncodedInput {
+    /// The input representation as list of codes
+    codes: Vec<u32>,
+}
+
+impl Input for EncodedInput {
+    /// Generate a name for this input
+    #[must_use]
+    fn generate_name(&self, _idx: usize) -> String {
+        let mut hasher = AHasher::new_with_keys(0, 0);
+        for code in &self.codes {
+            hasher.write(&code.to_le_bytes());
+        }
+        format!("{:016x}", hasher.finish())
+    }
+}
+
+/// Rc Ref-cell from Input
+impl From<EncodedInput> for Rc<RefCell<EncodedInput>> {
+    fn from(input: EncodedInput) -> Self {
+        Rc::new(RefCell::new(input))
+    }
+}
+
+impl HasLen for EncodedInput {
+    #[inline]
+    fn len(&self) -> usize {
+        self.codes.len()
+    }
+}
+
+impl From<Vec<u32>> for EncodedInput {
+    #[must_use]
+    fn from(codes: Vec<u32>) -> Self {
+        Self::new(codes)
+    }
+}
+
+impl From<&[u32]> for EncodedInput {
+    #[must_use]
+    fn from(codes: &[u32]) -> Self {
+        Self::new(codes.to_owned())
+    }
+}
+
+impl EncodedInput {
+    /// Creates a new codes input using the given codes
+    #[must_use]
+    pub fn new(codes: Vec<u32>) -> Self {
+        Self { codes }
+    }
+
+    #[must_use]
+    pub fn codes(&self) -> &[u32] {
+        &self.codes
+    }
+
+    #[must_use]
+    pub fn codes_mut(&mut self) -> &mut Vec<u32> {
+        &mut self.codes
+    }
+}
+
+#[cfg(feature = "std")]
+#[cfg(test)]
+mod tests {
+    use crate::inputs::encoded::{
+        InputDecoder, InputEncoder, NaiveTokenizer, TokenInputEncoderDecoder,
+    };
+    use core::str::from_utf8;
+
+    #[test]
+    fn test_input() {
+        let mut t = NaiveTokenizer::default();
+        let mut ed = TokenInputEncoderDecoder::new();
+        let input = ed
+            .encode("/* test */a = 'pippo baudo'; b=c+a\n".as_bytes(), &mut t)
+            .unwrap();
+        let mut bytes = vec![];
+        ed.decode(&input, &mut bytes).unwrap();
+        assert_eq!(
+            from_utf8(&bytes).unwrap(),
+            "a = 'pippo baudo' ; b = c + a ".to_owned()
+        );
+    }
+}
--- a/libafl/src/inputs/mod.rs
+++ b/libafl/src/inputs/mod.rs
@ -3,6 +3,9 @@
 pub mod bytes;
 pub use bytes::BytesInput;

+pub mod encoded;
+pub use encoded::*;
+
 use alloc::{
    string::{String, ToString},
    vec::Vec,
--- a/libafl/src/mutators/encoded_mutations.rs
+++ b/libafl/src/mutators/encoded_mutations.rs
@ -0,0 +1,655 @@
+use alloc::vec::Vec;
+use core::{
+    cmp::{max, min},
+    marker::PhantomData,
+};
+
+use crate::{
+    bolts::{
+        rands::Rand,
+        tuples::{tuple_list, tuple_list_type},
+    },
+    corpus::Corpus,
+    inputs::EncodedInput,
+    mutators::{
+        mutations::{buffer_copy, buffer_self_copy, ARITH_MAX},
+        MutationResult, Mutator, Named,
+    },
+    state::{HasCorpus, HasMaxSize, HasRand},
+    Error,
+};
+
+/// Set a code in the input as a random value
+#[derive(Default)]
+pub struct EncodedRandMutator<R, S>
+where
+    S: HasRand<R>,
+    R: Rand,
+{
+    phantom: PhantomData<(R, S)>,
+}
+
+impl<R, S> Mutator<EncodedInput, S> for EncodedRandMutator<R, S>
+where
+    S: HasRand<R>,
+    R: Rand,
+{
+    fn mutate(
+        &mut self,
+        state: &mut S,
+        input: &mut EncodedInput,
+        _stage_idx: i32,
+    ) -> Result<MutationResult, Error> {
+        if input.codes().is_empty() {
+            Ok(MutationResult::Skipped)
+        } else {
+            let val = state.rand_mut().choose(input.codes_mut());
+            *val = state.rand_mut().next() as u32;
+            Ok(MutationResult::Mutated)
+        }
+    }
+}
+
+impl<R, S> Named for EncodedRandMutator<R, S>
+where
+    S: HasRand<R>,
+    R: Rand,
+{
+    fn name(&self) -> &str {
+        "EncodedRandMutator"
+    }
+}
+
+impl<R, S> EncodedRandMutator<R, S>
+where
+    S: HasRand<R>,
+    R: Rand,
+{
+    /// Creates a new [`EncodedRandMutator`].
+    #[must_use]
+    pub fn new() -> Self {
+        Self {
+            phantom: PhantomData,
+        }
+    }
+}
+
+/// Increment a random code in the input
+#[derive(Default)]
+pub struct EncodedIncMutator<R, S>
+where
+    S: HasRand<R>,
+    R: Rand,
+{
+    phantom: PhantomData<(R, S)>,
+}
+
+impl<R, S> Mutator<EncodedInput, S> for EncodedIncMutator<R, S>
+where
+    S: HasRand<R>,
+    R: Rand,
+{
+    fn mutate(
+        &mut self,
+        state: &mut S,
+        input: &mut EncodedInput,
+        _stage_idx: i32,
+    ) -> Result<MutationResult, Error> {
+        if input.codes().is_empty() {
+            Ok(MutationResult::Skipped)
+        } else {
+            let val = state.rand_mut().choose(input.codes_mut());
+            *val = val.wrapping_add(1);
+            Ok(MutationResult::Mutated)
+        }
+    }
+}
+
+impl<R, S> Named for EncodedIncMutator<R, S>
+where
+    S: HasRand<R>,
+    R: Rand,
+{
+    fn name(&self) -> &str {
+        "EncodedIncMutator"
+    }
+}
+
+impl<R, S> EncodedIncMutator<R, S>
+where
+    S: HasRand<R>,
+    R: Rand,
+{
+    /// Creates a new [`EncodedRandMutator`].
+    #[must_use]
+    pub fn new() -> Self {
+        Self {
+            phantom: PhantomData,
+        }
+    }
+}
+
+/// Decrement a random code in the input
+#[derive(Default)]
+pub struct EncodedDecMutator<R, S>
+where
+    S: HasRand<R>,
+    R: Rand,
+{
+    phantom: PhantomData<(R, S)>,
+}
+
+impl<R, S> Mutator<EncodedInput, S> for EncodedDecMutator<R, S>
+where
+    S: HasRand<R>,
+    R: Rand,
+{
+    fn mutate(
+        &mut self,
+        state: &mut S,
+        input: &mut EncodedInput,
+        _stage_idx: i32,
+    ) -> Result<MutationResult, Error> {
+        if input.codes().is_empty() {
+            Ok(MutationResult::Skipped)
+        } else {
+            let val = state.rand_mut().choose(input.codes_mut());
+            *val = val.wrapping_sub(1);
+            Ok(MutationResult::Mutated)
+        }
+    }
+}
+
+impl<R, S> Named for EncodedDecMutator<R, S>
+where
+    S: HasRand<R>,
+    R: Rand,
+{
+    fn name(&self) -> &str {
+        "EncodedDecMutator"
+    }
+}
+
+impl<R, S> EncodedDecMutator<R, S>
+where
+    S: HasRand<R>,
+    R: Rand,
+{
+    /// Creates a new [`EncodedRandMutator`].
+    #[must_use]
+    pub fn new() -> Self {
+        Self {
+            phantom: PhantomData,
+        }
+    }
+}
+
+/// Adds or subtracts a random value up to `ARITH_MAX` to a random place in the codes [`Vec`].
+#[derive(Default)]
+pub struct EncodedAddMutator<R, S>
+where
+    S: HasRand<R>,
+    R: Rand,
+{
+    phantom: PhantomData<(R, S)>,
+}
+
+impl<R, S> Mutator<EncodedInput, S> for EncodedAddMutator<R, S>
+where
+    S: HasRand<R>,
+    R: Rand,
+{
+    fn mutate(
+        &mut self,
+        state: &mut S,
+        input: &mut EncodedInput,
+        _stage_idx: i32,
+    ) -> Result<MutationResult, Error> {
+        if input.codes().is_empty() {
+            Ok(MutationResult::Skipped)
+        } else {
+            let val = state.rand_mut().choose(input.codes_mut());
+            let num = 1 + state.rand_mut().below(ARITH_MAX) as u32;
+            *val = match state.rand_mut().below(2) {
+                0 => val.wrapping_add(num),
+                _ => val.wrapping_sub(num),
+            };
+            Ok(MutationResult::Mutated)
+        }
+    }
+}
+
+impl<R, S> Named for EncodedAddMutator<R, S>
+where
+    S: HasRand<R>,
+    R: Rand,
+{
+    fn name(&self) -> &str {
+        "EncodedAddMutator"
+    }
+}
+
+impl<R, S> EncodedAddMutator<R, S>
+where
+    S: HasRand<R>,
+    R: Rand,
+{
+    /// Creates a new [`EncodedAddMutator`].
+    #[must_use]
+    pub fn new() -> Self {
+        Self {
+            phantom: PhantomData,
+        }
+    }
+}
+
+/// Codes delete mutation for encoded inputs
+#[derive(Default)]
+pub struct EncodedDeleteMutator<R, S>
+where
+    S: HasRand<R>,
+    R: Rand,
+{
+    phantom: PhantomData<(R, S)>,
+}
+
+impl<R, S> Mutator<EncodedInput, S> for EncodedDeleteMutator<R, S>
+where
+    S: HasRand<R>,
+    R: Rand,
+{
+    fn mutate(
+        &mut self,
+        state: &mut S,
+        input: &mut EncodedInput,
+        _stage_idx: i32,
+    ) -> Result<MutationResult, Error> {
+        let size = input.codes().len();
+        if size <= 2 {
+            return Ok(MutationResult::Skipped);
+        }
+
+        let off = state.rand_mut().below(size as u64) as usize;
+        let len = state.rand_mut().below((size - off) as u64) as usize;
+        input.codes_mut().drain(off..off + len);
+
+        Ok(MutationResult::Mutated)
+    }
+}
+
+impl<R, S> Named for EncodedDeleteMutator<R, S>
+where
+    S: HasRand<R>,
+    R: Rand,
+{
+    fn name(&self) -> &str {
+        "EncodedDeleteMutator"
+    }
+}
+
+impl<R, S> EncodedDeleteMutator<R, S>
+where
+    S: HasRand<R>,
+    R: Rand,
+{
+    /// Creates a new [`EncodedDeleteMutator`].
+    #[must_use]
+    pub fn new() -> Self {
+        Self {
+            phantom: PhantomData,
+        }
+    }
+}
+
+/// Insert mutation for encoded inputs
+#[derive(Default)]
+pub struct EncodedInsertCopyMutator<R, S>
+where
+    S: HasRand<R> + HasMaxSize,
+    R: Rand,
+{
+    tmp_buf: Vec<u32>,
+    phantom: PhantomData<(R, S)>,
+}
+
+impl<R, S> Mutator<EncodedInput, S> for EncodedInsertCopyMutator<R, S>
+where
+    S: HasRand<R> + HasMaxSize,
+    R: Rand,
+{
+    fn mutate(
+        &mut self,
+        state: &mut S,
+        input: &mut EncodedInput,
+        _stage_idx: i32,
+    ) -> Result<MutationResult, Error> {
+        let max_size = state.max_size();
+        let size = input.codes().len();
+        if size == 0 {
+            return Ok(MutationResult::Skipped);
+        }
+        let off = state.rand_mut().below((size + 1) as u64) as usize;
+        let mut len = 1 + state.rand_mut().below(min(16, size as u64)) as usize;
+
+        if size + len > max_size {
+            if max_size > size {
+                len = max_size - size;
+            } else {
+                return Ok(MutationResult::Skipped);
+            }
+        }
+
+        let from = if size == len {
+            0
+        } else {
+            state.rand_mut().below((size - len) as u64) as usize
+        };
+
+        input.codes_mut().resize(size + len, 0);
+        self.tmp_buf.resize(len, 0);
+        buffer_copy(&mut self.tmp_buf, input.codes(), from, 0, len);
+
+        buffer_self_copy(input.codes_mut(), off, off + len, size - off);
+        buffer_copy(input.codes_mut(), &self.tmp_buf, 0, off, len);
+
+        Ok(MutationResult::Mutated)
+    }
+}
+
+impl<R, S> Named for EncodedInsertCopyMutator<R, S>
+where
+    S: HasRand<R> + HasMaxSize,
+    R: Rand,
+{
+    fn name(&self) -> &str {
+        "EncodedInsertCopyMutator"
+    }
+}
+
+impl<R, S> EncodedInsertCopyMutator<R, S>
+where
+    S: HasRand<R> + HasMaxSize,
+    R: Rand,
+{
+    /// Creates a new [`EncodedInsertCopyMutator`].
+    #[must_use]
+    pub fn new() -> Self {
+        Self {
+            tmp_buf: vec![],
+            phantom: PhantomData,
+        }
+    }
+}
+
+/// Codes copy mutation for encoded inputs
+#[derive(Default)]
+pub struct EncodedCopyMutator<R, S>
+where
+    S: HasRand<R>,
+    R: Rand,
+{
+    phantom: PhantomData<(R, S)>,
+}
+
+impl<R, S> Mutator<EncodedInput, S> for EncodedCopyMutator<R, S>
+where
+    S: HasRand<R>,
+    R: Rand,
+{
+    fn mutate(
+        &mut self,
+        state: &mut S,
+        input: &mut EncodedInput,
+        _stage_idx: i32,
+    ) -> Result<MutationResult, Error> {
+        let size = input.codes().len();
+        if size <= 1 {
+            return Ok(MutationResult::Skipped);
+        }
+
+        let from = state.rand_mut().below(size as u64) as usize;
+        let to = state.rand_mut().below(size as u64) as usize;
+        let len = 1 + state.rand_mut().below((size - max(from, to)) as u64) as usize;
+
+        buffer_self_copy(input.codes_mut(), from, to, len);
+
+        Ok(MutationResult::Mutated)
+    }
+}
+
+impl<R, S> Named for EncodedCopyMutator<R, S>
+where
+    S: HasRand<R>,
+    R: Rand,
+{
+    fn name(&self) -> &str {
+        "EncodedCopyMutator"
+    }
+}
+
+impl<R, S> EncodedCopyMutator<R, S>
+where
+    S: HasRand<R>,
+    R: Rand,
+{
+    /// Creates a new [`EncodedCopyMutator`].
+    #[must_use]
+    pub fn new() -> Self {
+        Self {
+            phantom: PhantomData,
+        }
+    }
+}
+
+/// Crossover insert mutation for encoded inputs
+#[derive(Default)]
+pub struct EncodedCrossoverInsertMutator<C, R, S>
+where
+    C: Corpus<EncodedInput>,
+    R: Rand,
+    S: HasRand<R> + HasCorpus<C, EncodedInput> + HasMaxSize,
+{
+    phantom: PhantomData<(C, R, S)>,
+}
+
+impl<C, R, S> Mutator<EncodedInput, S> for EncodedCrossoverInsertMutator<C, R, S>
+where
+    C: Corpus<EncodedInput>,
+    R: Rand,
+    S: HasRand<R> + HasCorpus<C, EncodedInput> + HasMaxSize,
+{
+    fn mutate(
+        &mut self,
+        state: &mut S,
+        input: &mut EncodedInput,
+        _stage_idx: i32,
+    ) -> Result<MutationResult, Error> {
+        let size = input.codes().len();
+
+        // We don't want to use the testcase we're already using for splicing
+        let count = state.corpus().count();
+        let idx = state.rand_mut().below(count as u64) as usize;
+        if let Some(cur) = state.corpus().current() {
+            if idx == *cur {
+                return Ok(MutationResult::Skipped);
+            }
+        }
+
+        let other_size = state
+            .corpus()
+            .get(idx)?
+            .borrow_mut()
+            .load_input()?
+            .codes()
+            .len();
+        if other_size < 2 {
+            return Ok(MutationResult::Skipped);
+        }
+
+        let max_size = state.max_size();
+        let from = state.rand_mut().below(other_size as u64) as usize;
+        let to = state.rand_mut().below(size as u64) as usize;
+        let mut len = 1 + state.rand_mut().below((other_size - from) as u64) as usize;
+
+        let mut other_testcase = state.corpus().get(idx)?.borrow_mut();
+        let other = other_testcase.load_input()?;
+
+        if size + len > max_size {
+            if max_size > size {
+                len = max_size - size;
+            } else {
+                return Ok(MutationResult::Skipped);
+            }
+        }
+
+        input.codes_mut().resize(size + len, 0);
+        buffer_self_copy(input.codes_mut(), to, to + len, size - to);
+        buffer_copy(input.codes_mut(), other.codes(), from, to, len);
+
+        Ok(MutationResult::Mutated)
+    }
+}
+
+impl<C, R, S> Named for EncodedCrossoverInsertMutator<C, R, S>
+where
+    C: Corpus<EncodedInput>,
+    R: Rand,
+    S: HasRand<R> + HasCorpus<C, EncodedInput> + HasMaxSize,
+{
+    fn name(&self) -> &str {
+        "EncodedCrossoverInsertMutator"
+    }
+}
+
+impl<C, R, S> EncodedCrossoverInsertMutator<C, R, S>
+where
+    C: Corpus<EncodedInput>,
+    R: Rand,
+    S: HasRand<R> + HasCorpus<C, EncodedInput> + HasMaxSize,
+{
+    /// Creates a new [`EncodedCrossoverInsertMutator`].
+    #[must_use]
+    pub fn new() -> Self {
+        Self {
+            phantom: PhantomData,
+        }
+    }
+}
+
+/// Crossover replace mutation for encoded inputs
+#[derive(Default)]
+pub struct EncodedCrossoverReplaceMutator<C, R, S>
+where
+    C: Corpus<EncodedInput>,
+    R: Rand,
+    S: HasRand<R> + HasCorpus<C, EncodedInput>,
+{
+    phantom: PhantomData<(C, R, S)>,
+}
+
+impl<C, R, S> Mutator<EncodedInput, S> for EncodedCrossoverReplaceMutator<C, R, S>
+where
+    C: Corpus<EncodedInput>,
+    R: Rand,
+    S: HasRand<R> + HasCorpus<C, EncodedInput>,
+{
+    fn mutate(
+        &mut self,
+        state: &mut S,
+        input: &mut EncodedInput,
+        _stage_idx: i32,
+    ) -> Result<MutationResult, Error> {
+        let size = input.codes().len();
+        if size == 0 {
+            return Ok(MutationResult::Skipped);
+        }
+
+        // We don't want to use the testcase we're already using for splicing
+        let count = state.corpus().count();
+        let idx = state.rand_mut().below(count as u64) as usize;
+        if let Some(cur) = state.corpus().current() {
+            if idx == *cur {
+                return Ok(MutationResult::Skipped);
+            }
+        }
+
+        let other_size = state
+            .corpus()
+            .get(idx)?
+            .borrow_mut()
+            .load_input()?
+            .codes()
+            .len();
+        if other_size < 2 {
+            return Ok(MutationResult::Skipped);
+        }
+
+        let from = state.rand_mut().below(other_size as u64) as usize;
+        let len = state.rand_mut().below(min(other_size - from, size) as u64) as usize;
+        let to = state.rand_mut().below((size - len) as u64) as usize;
+
+        let mut other_testcase = state.corpus().get(idx)?.borrow_mut();
+        let other = other_testcase.load_input()?;
+
+        buffer_copy(input.codes_mut(), other.codes(), from, to, len);
+
+        Ok(MutationResult::Mutated)
+    }
+}
+
+impl<C, R, S> Named for EncodedCrossoverReplaceMutator<C, R, S>
+where
+    C: Corpus<EncodedInput>,
+    R: Rand,
+    S: HasRand<R> + HasCorpus<C, EncodedInput>,
+{
+    fn name(&self) -> &str {
+        "EncodedCrossoverReplaceMutator"
+    }
+}
+
+impl<C, R, S> EncodedCrossoverReplaceMutator<C, R, S>
+where
+    C: Corpus<EncodedInput>,
+    R: Rand,
+    S: HasRand<R> + HasCorpus<C, EncodedInput>,
+{
+    /// Creates a new [`EncodedCrossoverReplaceMutator`].
+    #[must_use]
+    pub fn new() -> Self {
+        Self {
+            phantom: PhantomData,
+        }
+    }
+}
+
+/// Get the mutations that compose the encoded mutator
+#[must_use]
+pub fn encoded_mutations<C, R, S>() -> tuple_list_type!(
+       EncodedRandMutator<R, S>,
+       EncodedIncMutator<R, S>,
+       EncodedDecMutator<R, S>,
+       EncodedAddMutator<R, S>,
+       EncodedDeleteMutator<R, S>,
+       EncodedInsertCopyMutator<R, S>,
+       EncodedCopyMutator<R, S>,
+       EncodedCrossoverInsertMutator<C, R, S>,
+       EncodedCrossoverReplaceMutator<C, R, S>,
+   )
+where
+    S: HasRand<R> + HasCorpus<C, EncodedInput> + HasMaxSize,
+    C: Corpus<EncodedInput>,
+    R: Rand,
+{
+    tuple_list!(
+        EncodedRandMutator::new(),
+        EncodedIncMutator::new(),
+        EncodedDecMutator::new(),
+        EncodedAddMutator::new(),
+        EncodedDeleteMutator::new(),
+        EncodedInsertCopyMutator::new(),
+        EncodedCopyMutator::new(),
+        EncodedCrossoverInsertMutator::new(),
+        EncodedCrossoverReplaceMutator::new(),
+    )
+}
--- a/libafl/src/mutators/mod.rs
+++ b/libafl/src/mutators/mod.rs
@ -6,6 +6,8 @@ pub mod mutations;
 pub use mutations::*;
 pub mod token_mutations;
 pub use token_mutations::*;
+pub mod encoded_mutations;
+pub use encoded_mutations::*;
 pub mod mopt_mutator;
 pub use mopt_mutator::*;

--- a/libafl/src/mutators/mutations.rs
+++ b/libafl/src/mutators/mutations.rs
@ -19,7 +19,7 @@ use core::{

 /// Mem move in the own vec
 #[inline]
-pub fn buffer_self_copy(data: &mut [u8], from: usize, to: usize, len: usize) {
+pub fn buffer_self_copy<T>(data: &mut [T], from: usize, to: usize, len: usize) {
    debug_assert!(!data.is_empty());
    debug_assert!(from + len <= data.len());
    debug_assert!(to + len <= data.len());
@ -33,7 +33,7 @@ pub fn buffer_self_copy(data: &mut [u8], from: usize, to: usize, len: usize) {

 /// Mem move between vecs
 #[inline]
-pub fn buffer_copy(dst: &mut [u8], src: &[u8], from: usize, to: usize, len: usize) {
+pub fn buffer_copy<T>(dst: &mut [T], src: &[T], from: usize, to: usize, len: usize) {
    debug_assert!(!dst.is_empty());
    debug_assert!(!src.is_empty());
    debug_assert!(from + len <= src.len());
@ -51,21 +51,21 @@ pub fn buffer_copy(dst: &mut [u8], src: &[u8], from: usize, to: usize, len: usiz
 /// The compiler does the heavy lifting.
 /// see <https://stackoverflow.com/a/51732799/1345238/>
 #[inline]
-fn buffer_set(data: &mut [u8], from: usize, len: usize, val: u8) {
+pub fn buffer_set<T: Clone>(data: &mut [T], from: usize, len: usize, val: T) {
    debug_assert!(from + len <= data.len());
    for p in &mut data[from..(from + len)] {
-        *p = val;
+        *p = val.clone();
    }
 }

 /// The max value that will be added or subtracted during add mutations
-const ARITH_MAX: u64 = 35;
+pub const ARITH_MAX: u64 = 35;

-const INTERESTING_8: [i8; 9] = [-128, -1, 0, 1, 16, 32, 64, 100, 127];
-const INTERESTING_16: [i16; 19] = [
+pub const INTERESTING_8: [i8; 9] = [-128, -1, 0, 1, 16, 32, 64, 100, 127];
+pub const INTERESTING_16: [i16; 19] = [
    -128, -1, 0, 1, 16, 32, 64, 100, 127, -32768, -129, 128, 255, 256, 512, 1000, 1024, 4096, 32767,
 ];
-const INTERESTING_32: [i32; 27] = [
+pub const INTERESTING_32: [i32; 27] = [
    -128,
    -1,
    0,
@ -1082,6 +1082,90 @@ where
    }
 }

+/// Bytes insert and self copy mutation for inputs with a bytes vector
+#[derive(Default)]
+pub struct BytesInsertCopyMutator<I, R, S>
+where
+    I: Input + HasBytesVec,
+    S: HasRand<R> + HasMaxSize,
+    R: Rand,
+{
+    tmp_buf: Vec<u8>,
+    phantom: PhantomData<(I, R, S)>,
+}
+
+impl<I, R, S> Mutator<I, S> for BytesInsertCopyMutator<I, R, S>
+where
+    I: Input + HasBytesVec,
+    S: HasRand<R> + HasMaxSize,
+    R: Rand,
+{
+    fn mutate(
+        &mut self,
+        state: &mut S,
+        input: &mut I,
+        _stage_idx: i32,
+    ) -> Result<MutationResult, Error> {
+        let max_size = state.max_size();
+        let size = input.bytes().len();
+        if size == 0 {
+            return Ok(MutationResult::Skipped);
+        }
+        let off = state.rand_mut().below((size + 1) as u64) as usize;
+        let mut len = 1 + state.rand_mut().below(min(16, size as u64)) as usize;
+
+        if size + len > max_size {
+            if max_size > size {
+                len = max_size - size;
+            } else {
+                return Ok(MutationResult::Skipped);
+            }
+        }
+
+        let from = if size == len {
+            0
+        } else {
+            state.rand_mut().below((size - len) as u64) as usize
+        };
+
+        input.bytes_mut().resize(size + len, 0);
+        self.tmp_buf.resize(len, 0);
+        buffer_copy(&mut self.tmp_buf, input.bytes(), from, 0, len);
+
+        buffer_self_copy(input.bytes_mut(), off, off + len, size - off);
+        buffer_copy(input.bytes_mut(), &self.tmp_buf, 0, off, len);
+
+        Ok(MutationResult::Mutated)
+    }
+}
+
+impl<I, R, S> Named for BytesInsertCopyMutator<I, R, S>
+where
+    I: Input + HasBytesVec,
+    S: HasRand<R> + HasMaxSize,
+    R: Rand,
+{
+    fn name(&self) -> &str {
+        "BytesInsertCopyMutator"
+    }
+}
+
+impl<I, R, S> BytesInsertCopyMutator<I, R, S>
+where
+    I: Input + HasBytesVec,
+    S: HasRand<R> + HasMaxSize,
+    R: Rand,
+{
+    /// Creates a new [`BytesInsertCopyMutator`].
+    #[must_use]
+    pub fn new() -> Self {
+        Self {
+            tmp_buf: vec![],
+            phantom: PhantomData,
+        }
+    }
+}
+
 /// Bytes swap mutation for inputs with a bytes vector
 #[derive(Default)]
 pub struct BytesSwapMutator<I, R, S>
@ -1198,7 +1282,7 @@ where
        let max_size = state.max_size();
        let from = state.rand_mut().below(other_size as u64) as usize;
        let to = state.rand_mut().below(size as u64) as usize;
-        let mut len = state.rand_mut().below((other_size - from) as u64) as usize;
+        let mut len = 1 + state.rand_mut().below((other_size - from) as u64) as usize;

        let mut other_testcase = state.corpus().get(idx)?.borrow_mut();
        let other = other_testcase.load_input()?;
@ -1273,6 +1357,9 @@ where
        _stage_idx: i32,
    ) -> Result<MutationResult, Error> {
        let size = input.bytes().len();
+        if size == 0 {
+            return Ok(MutationResult::Skipped);
+        }

        // We don't want to use the testcase we're already using for splicing
        let count = state.corpus().count();
--- a/libafl/src/mutators/scheduled.rs
+++ b/libafl/src/mutators/scheduled.rs
@ -103,6 +103,7 @@ where
    S: HasRand<R>,
 {
    mutations: MT,
+    max_iterations: u64,
    phantom: PhantomData<(I, R, S)>,
 }

@ -170,7 +171,7 @@ where
 {
    /// Compute the number of iterations used to apply stacked mutations
    fn iterations(&self, state: &mut S, _: &I) -> u64 {
-        1 << (1 + state.rand_mut().below(6))
+        1 << (1 + state.rand_mut().below(self.max_iterations))
    }

    /// Get the next mutation to apply
@ -191,6 +192,16 @@ where
    pub fn new(mutations: MT) -> Self {
        StdScheduledMutator {
            mutations,
+            max_iterations: 6,
+            phantom: PhantomData,
+        }
+    }
+
+    /// Create a new [`StdScheduledMutator`] instance specifying mutations and the maximun number of iterations
+    pub fn with_max_iterations(mutations: MT, max_iterations: u64) -> Self {
+        StdScheduledMutator {
+            mutations,
+            max_iterations,
            phantom: PhantomData,
        }
    }
@ -222,6 +233,7 @@ pub fn havoc_mutations<C, I, R, S>() -> tuple_list_type!(
       BytesSetMutator<I, R, S>,
       BytesRandSetMutator<I, R, S>,
       BytesCopyMutator<I, R, S>,
+       BytesInsertCopyMutator<I, R, S>,
       BytesSwapMutator<I, R, S>,
       CrossoverInsertMutator<C, I, R, S>,
       CrossoverReplaceMutator<C, I, R, S>,
@ -256,6 +268,7 @@ where
        BytesSetMutator::new(),
        BytesRandSetMutator::new(),
        BytesCopyMutator::new(),
+        BytesInsertCopyMutator::new(),
        BytesSwapMutator::new(),
        CrossoverInsertMutator::new(),
        CrossoverReplaceMutator::new(),
--- a/libafl/src/state/mod.rs
+++ b/libafl/src/state/mod.rs
@ -364,7 +364,7 @@ where
        manager: &mut EM,
        in_dir: &Path,
        forced: bool,
-        loader: &dyn Fn(&mut Z, &mut Self, &Path) -> Result<I, Error>,
+        loader: &mut dyn FnMut(&mut Z, &mut Self, &Path) -> Result<I, Error>,
    ) -> Result<(), Error>
    where
        Z: Evaluator<E, EM, I, Self>,
@ -414,9 +414,14 @@ where
        EM: EventFirer<I, Self>,
    {
        for in_dir in in_dirs {
-            self.load_from_directory(fuzzer, executor, manager, in_dir, forced, &|_, _, path| {
-                I::from_file(&path)
-            })?;
+            self.load_from_directory(
+                fuzzer,
+                executor,
+                manager,
+                in_dir,
+                forced,
+                &mut |_, _, path| I::from_file(&path),
+            )?;
        }
        manager.fire(
            self,