Colorization stage (#1039)

* type_replace * separate * more * heap * comment * f * fix * clp * need rev * comment * ColorizationTracingStage * get_raw_map_hash_run * process_execution * metadat * unused TE * resolve type errors * remove colorizationtracingstage * Finally compiles * clp * fmt * a few debug println * revert * fix
2023-02-13 15:02:19 +09:00 · 2023-02-13 15:02:19 +09:00 · bd2de16b4e
commit bd2de16b4e
parent a74e5da268
3 changed files with 423 additions and 0 deletions
--- a/libafl/src/stages/colorization.rs
+++ b/libafl/src/stages/colorization.rs
@ -0,0 +1,415 @@
+//! The colorization stage from colorization() in afl++
+use alloc::{
+    collections::binary_heap::BinaryHeap,
+    string::{String, ToString},
+    vec::Vec,
+};
+use core::{cmp::Ordering, fmt::Debug, marker::PhantomData, ops::Range};
+
+use serde::{Deserialize, Serialize};
+
+use crate::{
+    bolts::{rands::Rand, tuples::MatchName},
+    corpus::{Corpus, CorpusId},
+    events::EventFirer,
+    executors::{Executor, HasObservers},
+    inputs::HasBytesVec,
+    mutators::mutations::buffer_copy,
+    observers::{MapObserver, ObserversTuple},
+    stages::Stage,
+    state::{HasCorpus, HasMetadata, HasRand, UsesState},
+    Error,
+};
+
+// Bigger range is better
+#[derive(Debug, PartialEq, Eq)]
+struct Bigger(Range<usize>);
+
+impl PartialOrd for Bigger {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        self.0.len().partial_cmp(&other.0.len())
+    }
+}
+
+impl Ord for Bigger {
+    fn cmp(&self, other: &Bigger) -> Ordering {
+        self.0.len().cmp(&other.0.len())
+    }
+}
+
+// Earlier range is better
+#[derive(Debug, PartialEq, Eq)]
+struct Earlier(Range<usize>);
+
+impl PartialOrd for Earlier {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        other.0.start.partial_cmp(&self.0.start)
+    }
+}
+
+impl Ord for Earlier {
+    fn cmp(&self, other: &Self) -> Ordering {
+        other.0.start.cmp(&self.0.start)
+    }
+}
+
+/// The mutational stage using power schedules
+#[derive(Clone, Debug)]
+pub struct ColorizationStage<EM, O, E, Z> {
+    map_observer_name: String,
+    #[allow(clippy::type_complexity)]
+    phantom: PhantomData<(E, EM, O, Z)>,
+}
+
+impl<EM, O, E, Z> UsesState for ColorizationStage<EM, O, E, Z>
+where
+    E: UsesState,
+{
+    type State = E::State;
+}
+
+impl<E, EM, O, Z> Stage<E, EM, Z> for ColorizationStage<EM, O, E, Z>
+where
+    EM: UsesState<State = E::State> + EventFirer,
+    E: HasObservers + Executor<EM, Z>,
+    E::State: HasCorpus + HasMetadata + HasRand,
+    E::Input: HasBytesVec,
+    O: MapObserver,
+    Z: UsesState<State = E::State>,
+{
+    #[inline]
+    #[allow(clippy::let_and_return)]
+    fn perform(
+        &mut self,
+        fuzzer: &mut Z,
+        executor: &mut E, // don't need the *main* executor for tracing
+        state: &mut E::State,
+        manager: &mut EM,
+        corpus_idx: CorpusId,
+    ) -> Result<(), Error> {
+        // Run with the mutated input
+        Self::colorize(
+            fuzzer,
+            executor,
+            state,
+            manager,
+            corpus_idx,
+            &self.map_observer_name,
+        )?;
+
+        Ok(())
+    }
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+/// Store the taint and the input
+pub struct TaintMetadata {
+    input_vec: Vec<u8>,
+    ranges: Vec<Range<usize>>,
+}
+
+impl TaintMetadata {
+    #[must_use]
+    /// Constructor for taint metadata
+    pub fn new(input_vec: Vec<u8>, ranges: Vec<Range<usize>>) -> Self {
+        Self { input_vec, ranges }
+    }
+
+    /// Set input and ranges
+    pub fn update(&mut self, input: Vec<u8>, ranges: Vec<Range<usize>>) {
+        self.input_vec = input;
+        self.ranges = ranges;
+    }
+
+    #[must_use]
+    /// Getter for `input_vec`
+    pub fn input_vec(&self) -> &Vec<u8> {
+        &self.input_vec
+    }
+
+    #[must_use]
+    /// Getter for `ranges`
+    pub fn ranges(&self) -> &Vec<Range<usize>> {
+        &self.ranges
+    }
+}
+
+crate::impl_serdeany!(TaintMetadata);
+
+impl<EM, O, E, Z> ColorizationStage<EM, O, E, Z>
+where
+    EM: UsesState<State = E::State> + EventFirer,
+    O: MapObserver,
+    E: HasObservers + Executor<EM, Z>,
+    E::State: HasCorpus + HasMetadata + HasRand,
+    E::Input: HasBytesVec,
+    Z: UsesState<State = E::State>,
+{
+    #[inline]
+    #[allow(clippy::let_and_return)]
+    fn colorize(
+        fuzzer: &mut Z,
+        executor: &mut E,
+        state: &mut E::State,
+        manager: &mut EM,
+        corpus_idx: CorpusId,
+        name: &str,
+    ) -> Result<E::Input, Error> {
+        let mut input = state
+            .corpus()
+            .get(corpus_idx)?
+            .borrow_mut()
+            .load_input()
+            .unwrap()
+            .clone();
+        // The backup of the input
+        let backup = input.clone();
+        // This is the buffer we'll randomly mutate during type_replace
+        let mut changed = input.clone();
+
+        // input will be consumed so clone it
+        let consumed_input = input.clone();
+
+        // First, run orig_input once and get the original hash
+        let orig_hash =
+            Self::get_raw_map_hash_run(fuzzer, executor, state, manager, consumed_input, name)?;
+        let changed_bytes = changed.bytes_mut();
+        let input_len = changed_bytes.len();
+
+        // Binary heap, pop is logN, insert is logN
+        // We will separate this range into smaller ranges.
+        // Keep it sorted, we want biggest ones to come first
+        let mut ranges = BinaryHeap::new();
+        ranges.push(Bigger(0..input_len));
+
+        // This heap contains the smaller ranges. Changes inside them does not affect the coverage.
+        // Keep it sorted, we want the earliest ones to come first so that it's easier to sort them
+        let mut ok_ranges = BinaryHeap::new();
+
+        // Now replace with random values (This is type_replace)
+        Self::type_replace(changed_bytes, state);
+
+        // println!("Replaced bytes: {:#?}", changed_bytes);
+        // What we do is now to separate the input into smaller regions
+        // And in each small regions make sure changing those bytes in the regions does not affect the coverage
+        for _ in 0..input_len * 2 {
+            if let Some(b) = ranges.pop() {
+                // Let's try the largest one (ranges is sorted)
+                let r = b.0;
+                let range_start = r.start;
+                let range_end = r.end;
+                let copy_len = r.len();
+                buffer_copy(
+                    input.bytes_mut(),
+                    changed.bytes(),
+                    range_start,
+                    range_start,
+                    copy_len,
+                );
+
+                let consumed_input = input.clone();
+                let changed_hash = Self::get_raw_map_hash_run(
+                    fuzzer,
+                    executor,
+                    state,
+                    manager,
+                    consumed_input,
+                    name,
+                )?;
+
+                if orig_hash == changed_hash {
+                    // The change in this range is safe!
+                    // println!("this range safe to change: {:#?}", range_start..range_end);
+
+                    ok_ranges.push(Earlier(range_start..range_end));
+                } else {
+                    // Seems like this range is too big that we can't keep the original hash anymore
+
+                    // Revert the changes
+                    buffer_copy(
+                        input.bytes_mut(),
+                        backup.bytes(),
+                        range_start,
+                        range_start,
+                        copy_len,
+                    );
+
+                    // Add smaller range
+                    if copy_len > 1 {
+                        // Separate the ranges
+                        ranges.push(Bigger(range_start..(range_start + copy_len / 2)));
+                        ranges.push(Bigger((range_start + copy_len / 2)..range_end));
+                    }
+                }
+            } else {
+                break;
+            }
+        }
+
+        // Now ok_ranges is a list of smaller range
+        // Each of them should be stored into a metadata and we'll use them later in afl++ redqueen
+
+        // let's merge ranges in ok_ranges
+        let mut res: Vec<Range<usize>> = Vec::new();
+        for item in ok_ranges.into_sorted_vec().into_iter().rev() {
+            match res.last_mut() {
+                Some(last) => {
+                    // Try merge
+                    if last.end == item.0.start {
+                        // The last one in `res` is the start of the new one
+                        // so merge
+                        last.end = item.0.end;
+                    } else {
+                        res.push(item.0);
+                    }
+                }
+                None => {
+                    res.push(item.0);
+                }
+            }
+        }
+
+        if let Some(meta) = state.metadata_mut().get_mut::<TaintMetadata>() {
+            meta.update(input.bytes().to_vec(), res);
+
+            // println!("meta: {:#?}", meta);
+        } else {
+            let meta = TaintMetadata::new(input.bytes().to_vec(), res);
+            state.add_metadata::<TaintMetadata>(meta);
+        }
+
+        Ok(input)
+    }
+
+    #[must_use]
+    /// Creates a new [`ColorizationStage`]
+    pub fn new(map_observer_name: &O) -> Self {
+        Self {
+            map_observer_name: map_observer_name.name().to_string(),
+            phantom: PhantomData,
+        }
+    }
+
+    // Run the target and get map hash but before hitcounts's post_exec is used
+    fn get_raw_map_hash_run(
+        fuzzer: &mut Z,
+        executor: &mut E,
+        state: &mut E::State,
+        manager: &mut EM,
+        input: E::Input,
+        name: &str,
+    ) -> Result<usize, Error> {
+        executor.observers_mut().pre_exec_all(state, &input)?;
+
+        let exit_kind = executor.run_target(fuzzer, state, manager, &input)?;
+
+        let observer = executor
+            .observers()
+            .match_name::<O>(name)
+            .ok_or_else(|| Error::key_not_found("MapObserver not found".to_string()))?;
+
+        let hash = observer.hash() as usize;
+
+        executor
+            .observers_mut()
+            .post_exec_all(state, &input, &exit_kind)?;
+
+        // let observers = executor.observers();
+        // fuzzer.process_execution(state, manager, input, observers, &exit_kind, true)?;
+
+        Ok(hash)
+    }
+
+    /// Replace bytes with random values but following certain rules
+    #[allow(clippy::needless_range_loop)]
+    fn type_replace(bytes: &mut [u8], state: &mut E::State) {
+        let len = bytes.len();
+        for idx in 0..len {
+            let c = match bytes[idx] {
+                0x41..=0x46 => {
+                    // 'A' + 1 + rand('F' - 'A')
+                    0x41 + 1 + state.rand_mut().below(5) as u8
+                }
+                0x61..=0x66 => {
+                    // 'a' + 1 + rand('f' - 'a')
+                    0x61 + 1 + state.rand_mut().below(5) as u8
+                }
+                0x30 => {
+                    // '0' -> '1'
+                    0x31
+                }
+                0x31 => {
+                    // '1' -> '0'
+                    0x30
+                }
+                0x32..=0x39 => {
+                    // '2' + 1 + rand('9' - '2')
+                    0x32 + 1 + state.rand_mut().below(7) as u8
+                }
+                0x47..=0x5a => {
+                    // 'G' + 1 + rand('Z' - 'G')
+                    0x47 + 1 + state.rand_mut().below(19) as u8
+                }
+                0x67..=0x7a => {
+                    // 'g' + 1 + rand('z' - 'g')
+                    0x67 + 1 + state.rand_mut().below(19) as u8
+                }
+                0x21..=0x2a => {
+                    // '!' + 1 + rand('*' - '!');
+                    0x21 + 1 + state.rand_mut().below(9) as u8
+                }
+                0x2c..=0x2e => {
+                    // ',' + 1 + rand('.' - ',')
+                    0x2c + 1 + state.rand_mut().below(2) as u8
+                }
+                0x3a..=0x40 => {
+                    // ':' + 1 + rand('@' - ':')
+                    0x3a + 1 + state.rand_mut().below(6) as u8
+                }
+                0x5b..=0x60 => {
+                    // '[' + 1 + rand('`' - '[')
+                    0x5b + 1 + state.rand_mut().below(5) as u8
+                }
+                0x7b..=0x7e => {
+                    // '{' + 1 + rand('~' - '{')
+                    0x7b + 1 + state.rand_mut().below(3) as u8
+                }
+                0x2b => {
+                    // '+' -> '/'
+                    0x2f
+                }
+                0x2f => {
+                    // '/' -> '+'
+                    0x2b
+                }
+                0x20 => {
+                    // ' ' -> '\t'
+                    0x9
+                }
+                0x9 => {
+                    // '\t' -> ' '
+                    0x20
+                }
+                0xd => {
+                    // '\r' -> '\n'
+                    0xa
+                }
+                0xa => {
+                    // '\n' -> '\r'
+                    0xd
+                }
+                0x0 => 0x1,
+                0x1 | 0xff => 0x0,
+                _ => {
+                    if bytes[idx] < 32 {
+                        bytes[idx] ^ 0x1f
+                    } else {
+                        bytes[idx] ^ 0x7f
+                    }
+                }
+            };
+
+            bytes[idx] = c;
+        }
+    }
+}
--- a/libafl/src/stages/mod.rs
+++ b/libafl/src/stages/mod.rs
@ -33,6 +33,9 @@ pub use owned::StagesOwnedList;
 pub mod tuneable;
 pub use tuneable::*;

+pub mod colorization;
+pub use colorization::*;
+
 #[cfg(feature = "std")]
 pub mod concolic;
 #[cfg(feature = "std")]
--- a/libafl/src/stages/tracing.rs
+++ b/libafl/src/stages/tracing.rs
@ -93,6 +93,11 @@ impl<EM, TE, Z> TracingStage<EM, TE, Z> {
    pub fn executor(&self) -> &TE {
        &self.tracer_executor
    }
+
+    /// Gets the underlying tracer executor (mut)
+    pub fn executor_mut(&mut self) -> &mut TE {
+        &mut self.tracer_executor
+    }
 }

 /// A stage that runs the shadow executor using also the shadow observers