From bd2de16b4e1213875bfc0da076263675269527c5 Mon Sep 17 00:00:00 2001 From: "Dongjia \"toka\" Zhang" Date: Mon, 13 Feb 2023 15:02:19 +0900 Subject: [PATCH] Colorization stage (#1039) * type_replace * separate * more * heap * comment * f * fix * clp * need rev * comment * ColorizationTracingStage * get_raw_map_hash_run * process_execution * metadat * unused TE * resolve type errors * remove colorizationtracingstage * Finally compiles * clp * fmt * a few debug println * revert * fix --- libafl/src/stages/colorization.rs | 415 ++++++++++++++++++++++++++++++ libafl/src/stages/mod.rs | 3 + libafl/src/stages/tracing.rs | 5 + 3 files changed, 423 insertions(+) create mode 100644 libafl/src/stages/colorization.rs diff --git a/libafl/src/stages/colorization.rs b/libafl/src/stages/colorization.rs new file mode 100644 index 0000000000..1b6016c57f --- /dev/null +++ b/libafl/src/stages/colorization.rs @@ -0,0 +1,415 @@ +//! The colorization stage from colorization() in afl++ +use alloc::{ + collections::binary_heap::BinaryHeap, + string::{String, ToString}, + vec::Vec, +}; +use core::{cmp::Ordering, fmt::Debug, marker::PhantomData, ops::Range}; + +use serde::{Deserialize, Serialize}; + +use crate::{ + bolts::{rands::Rand, tuples::MatchName}, + corpus::{Corpus, CorpusId}, + events::EventFirer, + executors::{Executor, HasObservers}, + inputs::HasBytesVec, + mutators::mutations::buffer_copy, + observers::{MapObserver, ObserversTuple}, + stages::Stage, + state::{HasCorpus, HasMetadata, HasRand, UsesState}, + Error, +}; + +// Bigger range is better +#[derive(Debug, PartialEq, Eq)] +struct Bigger(Range); + +impl PartialOrd for Bigger { + fn partial_cmp(&self, other: &Self) -> Option { + self.0.len().partial_cmp(&other.0.len()) + } +} + +impl Ord for Bigger { + fn cmp(&self, other: &Bigger) -> Ordering { + self.0.len().cmp(&other.0.len()) + } +} + +// Earlier range is better +#[derive(Debug, PartialEq, Eq)] +struct Earlier(Range); + +impl PartialOrd for Earlier { + fn partial_cmp(&self, other: &Self) -> Option { + other.0.start.partial_cmp(&self.0.start) + } +} + +impl Ord for Earlier { + fn cmp(&self, other: &Self) -> Ordering { + other.0.start.cmp(&self.0.start) + } +} + +/// The mutational stage using power schedules +#[derive(Clone, Debug)] +pub struct ColorizationStage { + map_observer_name: String, + #[allow(clippy::type_complexity)] + phantom: PhantomData<(E, EM, O, Z)>, +} + +impl UsesState for ColorizationStage +where + E: UsesState, +{ + type State = E::State; +} + +impl Stage for ColorizationStage +where + EM: UsesState + EventFirer, + E: HasObservers + Executor, + E::State: HasCorpus + HasMetadata + HasRand, + E::Input: HasBytesVec, + O: MapObserver, + Z: UsesState, +{ + #[inline] + #[allow(clippy::let_and_return)] + fn perform( + &mut self, + fuzzer: &mut Z, + executor: &mut E, // don't need the *main* executor for tracing + state: &mut E::State, + manager: &mut EM, + corpus_idx: CorpusId, + ) -> Result<(), Error> { + // Run with the mutated input + Self::colorize( + fuzzer, + executor, + state, + manager, + corpus_idx, + &self.map_observer_name, + )?; + + Ok(()) + } +} + +#[derive(Debug, Serialize, Deserialize)] +/// Store the taint and the input +pub struct TaintMetadata { + input_vec: Vec, + ranges: Vec>, +} + +impl TaintMetadata { + #[must_use] + /// Constructor for taint metadata + pub fn new(input_vec: Vec, ranges: Vec>) -> Self { + Self { input_vec, ranges } + } + + /// Set input and ranges + pub fn update(&mut self, input: Vec, ranges: Vec>) { + self.input_vec = input; + self.ranges = ranges; + } + + #[must_use] + /// Getter for `input_vec` + pub fn input_vec(&self) -> &Vec { + &self.input_vec + } + + #[must_use] + /// Getter for `ranges` + pub fn ranges(&self) -> &Vec> { + &self.ranges + } +} + +crate::impl_serdeany!(TaintMetadata); + +impl ColorizationStage +where + EM: UsesState + EventFirer, + O: MapObserver, + E: HasObservers + Executor, + E::State: HasCorpus + HasMetadata + HasRand, + E::Input: HasBytesVec, + Z: UsesState, +{ + #[inline] + #[allow(clippy::let_and_return)] + fn colorize( + fuzzer: &mut Z, + executor: &mut E, + state: &mut E::State, + manager: &mut EM, + corpus_idx: CorpusId, + name: &str, + ) -> Result { + let mut input = state + .corpus() + .get(corpus_idx)? + .borrow_mut() + .load_input() + .unwrap() + .clone(); + // The backup of the input + let backup = input.clone(); + // This is the buffer we'll randomly mutate during type_replace + let mut changed = input.clone(); + + // input will be consumed so clone it + let consumed_input = input.clone(); + + // First, run orig_input once and get the original hash + let orig_hash = + Self::get_raw_map_hash_run(fuzzer, executor, state, manager, consumed_input, name)?; + let changed_bytes = changed.bytes_mut(); + let input_len = changed_bytes.len(); + + // Binary heap, pop is logN, insert is logN + // We will separate this range into smaller ranges. + // Keep it sorted, we want biggest ones to come first + let mut ranges = BinaryHeap::new(); + ranges.push(Bigger(0..input_len)); + + // This heap contains the smaller ranges. Changes inside them does not affect the coverage. + // Keep it sorted, we want the earliest ones to come first so that it's easier to sort them + let mut ok_ranges = BinaryHeap::new(); + + // Now replace with random values (This is type_replace) + Self::type_replace(changed_bytes, state); + + // println!("Replaced bytes: {:#?}", changed_bytes); + // What we do is now to separate the input into smaller regions + // And in each small regions make sure changing those bytes in the regions does not affect the coverage + for _ in 0..input_len * 2 { + if let Some(b) = ranges.pop() { + // Let's try the largest one (ranges is sorted) + let r = b.0; + let range_start = r.start; + let range_end = r.end; + let copy_len = r.len(); + buffer_copy( + input.bytes_mut(), + changed.bytes(), + range_start, + range_start, + copy_len, + ); + + let consumed_input = input.clone(); + let changed_hash = Self::get_raw_map_hash_run( + fuzzer, + executor, + state, + manager, + consumed_input, + name, + )?; + + if orig_hash == changed_hash { + // The change in this range is safe! + // println!("this range safe to change: {:#?}", range_start..range_end); + + ok_ranges.push(Earlier(range_start..range_end)); + } else { + // Seems like this range is too big that we can't keep the original hash anymore + + // Revert the changes + buffer_copy( + input.bytes_mut(), + backup.bytes(), + range_start, + range_start, + copy_len, + ); + + // Add smaller range + if copy_len > 1 { + // Separate the ranges + ranges.push(Bigger(range_start..(range_start + copy_len / 2))); + ranges.push(Bigger((range_start + copy_len / 2)..range_end)); + } + } + } else { + break; + } + } + + // Now ok_ranges is a list of smaller range + // Each of them should be stored into a metadata and we'll use them later in afl++ redqueen + + // let's merge ranges in ok_ranges + let mut res: Vec> = Vec::new(); + for item in ok_ranges.into_sorted_vec().into_iter().rev() { + match res.last_mut() { + Some(last) => { + // Try merge + if last.end == item.0.start { + // The last one in `res` is the start of the new one + // so merge + last.end = item.0.end; + } else { + res.push(item.0); + } + } + None => { + res.push(item.0); + } + } + } + + if let Some(meta) = state.metadata_mut().get_mut::() { + meta.update(input.bytes().to_vec(), res); + + // println!("meta: {:#?}", meta); + } else { + let meta = TaintMetadata::new(input.bytes().to_vec(), res); + state.add_metadata::(meta); + } + + Ok(input) + } + + #[must_use] + /// Creates a new [`ColorizationStage`] + pub fn new(map_observer_name: &O) -> Self { + Self { + map_observer_name: map_observer_name.name().to_string(), + phantom: PhantomData, + } + } + + // Run the target and get map hash but before hitcounts's post_exec is used + fn get_raw_map_hash_run( + fuzzer: &mut Z, + executor: &mut E, + state: &mut E::State, + manager: &mut EM, + input: E::Input, + name: &str, + ) -> Result { + executor.observers_mut().pre_exec_all(state, &input)?; + + let exit_kind = executor.run_target(fuzzer, state, manager, &input)?; + + let observer = executor + .observers() + .match_name::(name) + .ok_or_else(|| Error::key_not_found("MapObserver not found".to_string()))?; + + let hash = observer.hash() as usize; + + executor + .observers_mut() + .post_exec_all(state, &input, &exit_kind)?; + + // let observers = executor.observers(); + // fuzzer.process_execution(state, manager, input, observers, &exit_kind, true)?; + + Ok(hash) + } + + /// Replace bytes with random values but following certain rules + #[allow(clippy::needless_range_loop)] + fn type_replace(bytes: &mut [u8], state: &mut E::State) { + let len = bytes.len(); + for idx in 0..len { + let c = match bytes[idx] { + 0x41..=0x46 => { + // 'A' + 1 + rand('F' - 'A') + 0x41 + 1 + state.rand_mut().below(5) as u8 + } + 0x61..=0x66 => { + // 'a' + 1 + rand('f' - 'a') + 0x61 + 1 + state.rand_mut().below(5) as u8 + } + 0x30 => { + // '0' -> '1' + 0x31 + } + 0x31 => { + // '1' -> '0' + 0x30 + } + 0x32..=0x39 => { + // '2' + 1 + rand('9' - '2') + 0x32 + 1 + state.rand_mut().below(7) as u8 + } + 0x47..=0x5a => { + // 'G' + 1 + rand('Z' - 'G') + 0x47 + 1 + state.rand_mut().below(19) as u8 + } + 0x67..=0x7a => { + // 'g' + 1 + rand('z' - 'g') + 0x67 + 1 + state.rand_mut().below(19) as u8 + } + 0x21..=0x2a => { + // '!' + 1 + rand('*' - '!'); + 0x21 + 1 + state.rand_mut().below(9) as u8 + } + 0x2c..=0x2e => { + // ',' + 1 + rand('.' - ',') + 0x2c + 1 + state.rand_mut().below(2) as u8 + } + 0x3a..=0x40 => { + // ':' + 1 + rand('@' - ':') + 0x3a + 1 + state.rand_mut().below(6) as u8 + } + 0x5b..=0x60 => { + // '[' + 1 + rand('`' - '[') + 0x5b + 1 + state.rand_mut().below(5) as u8 + } + 0x7b..=0x7e => { + // '{' + 1 + rand('~' - '{') + 0x7b + 1 + state.rand_mut().below(3) as u8 + } + 0x2b => { + // '+' -> '/' + 0x2f + } + 0x2f => { + // '/' -> '+' + 0x2b + } + 0x20 => { + // ' ' -> '\t' + 0x9 + } + 0x9 => { + // '\t' -> ' ' + 0x20 + } + 0xd => { + // '\r' -> '\n' + 0xa + } + 0xa => { + // '\n' -> '\r' + 0xd + } + 0x0 => 0x1, + 0x1 | 0xff => 0x0, + _ => { + if bytes[idx] < 32 { + bytes[idx] ^ 0x1f + } else { + bytes[idx] ^ 0x7f + } + } + }; + + bytes[idx] = c; + } + } +} diff --git a/libafl/src/stages/mod.rs b/libafl/src/stages/mod.rs index f05b8443f7..32f0295213 100644 --- a/libafl/src/stages/mod.rs +++ b/libafl/src/stages/mod.rs @@ -33,6 +33,9 @@ pub use owned::StagesOwnedList; pub mod tuneable; pub use tuneable::*; +pub mod colorization; +pub use colorization::*; + #[cfg(feature = "std")] pub mod concolic; #[cfg(feature = "std")] diff --git a/libafl/src/stages/tracing.rs b/libafl/src/stages/tracing.rs index af5fcf4480..613c79ade1 100644 --- a/libafl/src/stages/tracing.rs +++ b/libafl/src/stages/tracing.rs @@ -93,6 +93,11 @@ impl TracingStage { pub fn executor(&self) -> &TE { &self.tracer_executor } + + /// Gets the underlying tracer executor (mut) + pub fn executor_mut(&mut self) -> &mut TE { + &mut self.tracer_executor + } } /// A stage that runs the shadow executor using also the shadow observers