From 44b798c07ee4cce8d72098ee9e7510989b6fe9e6 Mon Sep 17 00:00:00 2001 From: "Dongjia \"toka\" Zhang" Date: Tue, 14 Mar 2023 01:34:16 +0900 Subject: [PATCH] AFL++ RedQueen (#1087) * step 1 * step 2 * Vec * comment * Observer * tmp * TaintedTracingStage * more * more * more * Idea * more * more * mmmmmore * moremoremore * more * all * clp * comment * core * push temporary debug change * note for myself * working * rename to AFLCmplogTracingStage * rename * revert fuzzers' change --- libafl/src/mutators/token_mutations.rs | 967 ++++++++++++++++++++++++- libafl/src/observers/cmp.rs | 344 ++++++++- libafl/src/stages/colorization.rs | 3 + libafl/src/stages/tracing.rs | 151 +++- 4 files changed, 1444 insertions(+), 21 deletions(-) diff --git a/libafl/src/mutators/token_mutations.rs b/libafl/src/mutators/token_mutations.rs index 75ca8e11a6..1da5794a9b 100644 --- a/libafl/src/mutators/token_mutations.rs +++ b/libafl/src/mutators/token_mutations.rs @@ -24,7 +24,8 @@ use crate::{ bolts::{rands::Rand, AsSlice}, inputs::{HasBytesVec, UsesInput}, mutators::{buffer_self_copy, mutations::buffer_copy, MutationResult, Mutator, Named}, - observers::cmp::{CmpValues, CmpValuesMetadata}, + observers::cmp::{AFLppCmpValuesMetadata, CmpValues, CmpValuesMetadata}, + stages::TaintMetadata, state::{HasMaxSize, HasMetadata, HasRand}, Error, }; @@ -598,6 +599,970 @@ impl I2SRandReplace { } } +const CMP_ATTTRIBUTE_IS_EQUAL: u8 = 1; +const CMP_ATTRIBUTE_IS_GREATER: u8 = 2; +const CMP_ATTRIBUTE_IS_LESSER: u8 = 4; +const CMP_ATTRIBUTE_IS_FP: u8 = 8; +const CMP_ATTRIBUTE_IS_FP_MOD: u8 = 16; +const CMP_ATTRIBUTE_IS_INT_MOD: u8 = 32; +const CMP_ATTRIBUTE_IS_TRANSFORM: u8 = 64; + +/// AFL++ redqueen mutation +#[derive(Debug, Default)] +pub struct AFLppRedQueen { + cmp_start_idx: usize, + cmp_h_start_idx: usize, + cmp_buf_start_idx: usize, + taint_idx: usize, + enable_transform: bool, + enable_arith: bool, +} + +impl AFLppRedQueen { + #[inline] + fn swapa(x: u8) -> u8 { + (x & 0xf8) + ((x & 7) ^ 0x07) + } + + /// Cmplog Pattern Matching + #[allow(clippy::cast_sign_loss)] + #[allow(clippy::too_many_arguments)] + #[allow(clippy::too_many_lines)] + #[allow(clippy::cast_possible_wrap)] + #[allow(clippy::if_not_else)] + #[allow(clippy::cast_precision_loss)] + pub fn cmp_extend_encoding( + &self, + pattern: u64, + repl: u64, + another_pattern: u64, + changed_val: u64, + attr: u8, + another_buf: &[u8], + buf: &mut [u8], // Unlike AFL++ we change the original buf (it's named buf here) + buf_idx: usize, + taint_len: usize, + input_len: usize, + hshape: usize, + ) -> bool { + // TODO: ascii2num (we need check q->is_ascii (in calibration stage(?))) + + // try Transform + if self.enable_transform + && pattern != another_pattern + && repl == changed_val + && attr <= CMP_ATTTRIBUTE_IS_EQUAL + { + // Try to identify transform magic + let mut bytes: usize = match hshape { + 0 => 0, // NEVER happen + 1 => 1, + 2 => 2, + 3 | 4 => 4, + _ => 8, + }; + // prevent overflow + bytes = core::cmp::min(bytes, input_len - buf_idx); + + let (b_val, o_b_val, mask): (u64, u64, u64) = match bytes { + 0 => { + (0, 0, 0) // cannot happen + } + 1 => ( + u64::from(buf[buf_idx]), + u64::from(another_buf[buf_idx]), + 0xff, + ), + 2 | 3 => ( + u64::from(u16::from_be_bytes( + another_buf[buf_idx..buf_idx + 2].try_into().unwrap(), + )), + u64::from(u16::from_be_bytes( + another_buf[buf_idx..buf_idx + 2].try_into().unwrap(), + )), + 0xffff, + ), + 4 | 5 | 6 | 7 => ( + u64::from(u32::from_be_bytes( + buf[buf_idx..buf_idx + 4].try_into().unwrap(), + )), + u64::from(u32::from_be_bytes( + another_buf[buf_idx..buf_idx + 4].try_into().unwrap(), + )), + 0xffff_ffff, + ), + _ => ( + u64::from_be_bytes(buf[buf_idx..buf_idx + 8].try_into().unwrap()), + u64::from_be_bytes(another_buf[buf_idx..buf_idx + 8].try_into().unwrap()), + 0xffff_ffff_ffff_ffff, + ), + }; + + // Try arith + let diff: i64 = (pattern - b_val) as i64; + let new_diff: i64 = (another_pattern - o_b_val) as i64; + + if diff == new_diff && diff != 0 { + let new_repl: u64 = (repl as i64 - diff) as u64; + + let ret = self.cmp_extend_encoding( + pattern, + new_repl, + another_pattern, + repl, + CMP_ATTRIBUTE_IS_TRANSFORM, + another_buf, + buf, + buf_idx, + taint_len, + input_len, + hshape, + ); + if ret { + return true; + } + } + + // Try XOR + + // Shadowing + let diff: i64 = (pattern ^ b_val) as i64; + let new_diff: i64 = (another_pattern ^ o_b_val) as i64; + + if diff == new_diff && diff != 0 { + let new_repl: u64 = (repl as i64 ^ diff) as u64; + let ret = self.cmp_extend_encoding( + pattern, + new_repl, + another_pattern, + repl, + CMP_ATTRIBUTE_IS_TRANSFORM, + another_buf, + buf, + buf_idx, + taint_len, + input_len, + hshape, + ); + + if ret { + return true; + } + } + + // Try Lowercase + // Shadowing + let diff = (b_val | 0x2020_2020_2020_2020 & mask) == (pattern & mask); + + let new_diff = (b_val | 0x2020_2020_2020_2020 & mask) == (another_pattern & mask); + + if new_diff && diff { + let new_repl: u64 = repl & (0x5f5f_5f5f_5f5f_5f5f & mask); + let ret = self.cmp_extend_encoding( + pattern, + new_repl, + another_pattern, + repl, + CMP_ATTRIBUTE_IS_TRANSFORM, + another_buf, + buf, + buf_idx, + taint_len, + input_len, + hshape, + ); + + if ret { + return true; + } + } + + // Try Uppercase + // Shadowing + let diff = (b_val | 0x5f5f_5f5f_5f5f_5f5f & mask) == (pattern & mask); + + let o_diff = (b_val | 0x5f5f_5f5f_5f5f_5f5f & mask) == (another_pattern & mask); + + if o_diff && diff { + let new_repl: u64 = repl & (0x2020_2020_2020_2020 & mask); + let ret = self.cmp_extend_encoding( + pattern, + new_repl, + another_pattern, + repl, + CMP_ATTRIBUTE_IS_TRANSFORM, + another_buf, + buf, + buf_idx, + taint_len, + input_len, + hshape, + ); + + if ret { + return true; + } + } + } + + let its_len = core::cmp::min(input_len - buf_idx, taint_len); + + // Try pattern matching + // println!("Pattern match"); + match hshape { + 0 => (), // NEVER HAPPEN, Do nothing + 1 => { + // 1 byte pattern match + let buf_8 = buf[buf_idx]; + let another_buf_8 = another_buf[buf_idx]; + if buf_8 == pattern as u8 && another_buf_8 == another_pattern as u8 { + buf[buf_idx] = repl as u8; + return true; + } + } + 2 | 3 => { + if its_len >= 2 { + let buf_16 = u16::from_be_bytes(buf[buf_idx..buf_idx + 2].try_into().unwrap()); + let another_buf_16 = + u16::from_be_bytes(another_buf[buf_idx..buf_idx + 2].try_into().unwrap()); + + if buf_16 == pattern as u16 && another_buf_16 == another_pattern as u16 { + buf[buf_idx] = (repl & 0xff) as u8; + buf[buf_idx + 1] = (repl >> 8 & 0xff) as u8; + return true; + } + } + } + 4 | 5 | 6 | 7 => { + if its_len >= 4 { + let buf_32 = u32::from_be_bytes(buf[buf_idx..buf_idx + 4].try_into().unwrap()); + let another_buf_32 = + u32::from_be_bytes(another_buf[buf_idx..buf_idx + 4].try_into().unwrap()); + // println!("buf: {buf_32} {another_buf_32} {pattern} {another_pattern}"); + if buf_32 == pattern as u32 && another_buf_32 == another_pattern as u32 { + // println!("Matched!"); + buf[buf_idx] = (repl & 0xff) as u8; + buf[buf_idx + 1] = (repl >> 8 & 0xff) as u8; + buf[buf_idx + 2] = (repl >> 16 & 0xff) as u8; + buf[buf_idx + 3] = (repl >> 24 & 0xff) as u8; + + return true; + } + } + } + _ => { + if its_len >= 8 { + let buf_64 = u64::from_be_bytes(buf[buf_idx..buf_idx + 8].try_into().unwrap()); + let another_buf_64 = + u64::from_be_bytes(another_buf[buf_idx..buf_idx + 8].try_into().unwrap()); + + if buf_64 == pattern && another_buf_64 == another_pattern { + buf[buf_idx] = (repl & 0xff) as u8; + buf[buf_idx + 1] = (repl >> 8 & 0xff) as u8; + buf[buf_idx + 2] = (repl >> 16 & 0xff) as u8; + buf[buf_idx + 3] = (repl >> 24 & 0xff) as u8; + buf[buf_idx + 4] = (repl >> 32 & 0xff) as u8; + buf[buf_idx + 5] = (repl >> 32 & 0xff) as u8; + buf[buf_idx + 6] = (repl >> 40 & 0xff) as u8; + buf[buf_idx + 7] = (repl >> 48 & 0xff) as u8; + return true; + } + } + } + } + + // Try arith + if self.enable_arith || attr != CMP_ATTRIBUTE_IS_TRANSFORM { + if (attr & (CMP_ATTRIBUTE_IS_GREATER | CMP_ATTRIBUTE_IS_LESSER)) == 0 || hshape < 4 { + return false; + } + + // Transform >= to < and <= to > + let attr = if (attr & CMP_ATTTRIBUTE_IS_EQUAL) != 0 + && (attr & (CMP_ATTRIBUTE_IS_GREATER | CMP_ATTRIBUTE_IS_LESSER)) != 0 + { + if attr & CMP_ATTRIBUTE_IS_GREATER != 0 { + attr + 2 + } else { + attr - 2 + } + } else { + attr + }; + + // FP + if (CMP_ATTRIBUTE_IS_FP..CMP_ATTRIBUTE_IS_FP_MOD).contains(&attr) { + let repl_new: u64; + + if attr & CMP_ATTRIBUTE_IS_GREATER != 0 { + if hshape == 4 && its_len >= 4 { + let mut g = repl as f32; + g += 1.0; + repl_new = u64::from(g as u32); + } else if hshape == 8 && its_len >= 8 { + let mut g = repl as f64; + g += 1.0; + repl_new = g as u64; + } else { + return false; + } + + let ret = self.cmp_extend_encoding( + pattern, + repl, + another_pattern, + repl_new, + CMP_ATTRIBUTE_IS_FP_MOD, + another_buf, + buf, + buf_idx, + taint_len, + input_len, + hshape, + ); + if ret { + return true; + } + } else { + if hshape == 4 && its_len >= 4 { + let mut g = repl as f32; + g -= 1.0; + repl_new = u64::from(g as u32); + } else if hshape == 8 && its_len >= 8 { + let mut g = repl as f64; + g -= 1.0; + repl_new = g as u64; + } else { + return false; + } + + let ret = self.cmp_extend_encoding( + pattern, + repl, + another_pattern, + repl_new, + CMP_ATTRIBUTE_IS_FP_MOD, + another_buf, + buf, + buf_idx, + taint_len, + input_len, + hshape, + ); + if ret { + return true; + } + } + } else if attr < CMP_ATTRIBUTE_IS_FP { + if attr & CMP_ATTRIBUTE_IS_GREATER != 0 { + let repl_new = repl + 1; + + let ret = self.cmp_extend_encoding( + pattern, + repl, + another_pattern, + repl_new, + CMP_ATTRIBUTE_IS_INT_MOD, + another_buf, + buf, + buf_idx, + taint_len, + input_len, + hshape, + ); + + if ret { + return true; + } + } else { + let repl_new = repl - 1; + + let ret = self.cmp_extend_encoding( + pattern, + repl, + another_pattern, + repl_new, + CMP_ATTRIBUTE_IS_INT_MOD, + another_buf, + buf, + buf_idx, + taint_len, + input_len, + hshape, + ); + + if ret { + return true; + } + } + } else { + return false; + } + } + + false + } + + /// rtn part from AFL++ + #[allow(clippy::too_many_arguments)] + pub fn rtn_extend_encoding( + &self, + pattern: &[u8], + repl: &[u8], + o_pattern: &[u8], + _changed_val: &[u8], + o_buf: &[u8], + buf: &mut [u8], + buf_idx: usize, + taint_len: usize, + input_len: usize, + hshape: usize, + ) -> bool { + let l0 = pattern.len(); + let ol0 = repl.len(); + // let l1 = o_pattern.len(); + // let ol1 = changed_val.len(); + + let lmax = core::cmp::max(l0, ol0); + let its_len = core::cmp::min( + core::cmp::min(input_len - buf_idx, taint_len), + core::cmp::min(lmax, hshape), + ); + + // TODO: Match before (This: https://github.com/AFLplusplus/AFLplusplus/blob/ea14f3fd40e32234989043a525e3853fcb33c1b6/src/afl-fuzz-redqueen.c#L2047) + let mut copy_len = 0; + for i in 0..its_len { + if pattern[i] != buf[buf_idx + i] && o_pattern[i] != o_buf[buf_idx + i] { + break; + } + copy_len += 1; + } + + if copy_len > 0 { + buffer_copy(buf, repl, 0, buf_idx, copy_len); + true + } else { + false + } + + // TODO: Transform (This: https://github.com/AFLplusplus/AFLplusplus/blob/stable/src/afl-fuzz-redqueen.c#L2089) + // It's hard to implement this naively + // because AFL++ redqueen does not check any pattern, but it calls its_fuzz() instead. + // we can't execute the harness inside a mutator + + // Direct matching + } +} + +impl Mutator for AFLppRedQueen +where + S: UsesInput + HasMetadata + HasRand + HasMaxSize, + I: HasBytesVec, +{ + #[allow(clippy::needless_range_loop)] + #[allow(clippy::too_many_lines)] + fn mutate( + &mut self, + state: &mut S, + input: &mut I, + stage_idx: i32, + ) -> Result { + // TODO + // add autotokens (https://github.com/AFLplusplus/AFLplusplus/blob/3881ccd0b7520f67fd0b34f010443dc249cbc8f1/src/afl-fuzz-redqueen.c#L1903) + // handle 128-bits logs + + let size = input.bytes().len(); + if size == 0 { + return Ok(MutationResult::Skipped); + } + + let (cmp_len, cmp_meta, taint_meta) = { + let cmp_meta = state.metadata().get::(); + let taint_meta = state.metadata().get::(); + if cmp_meta.is_none() || taint_meta.is_none() { + return Ok(MutationResult::Skipped); + } + + let cmp_len = cmp_meta.unwrap().headers().len(); + if cmp_len == 0 { + return Ok(MutationResult::Skipped); + } + (cmp_len, cmp_meta.unwrap(), taint_meta.unwrap()) + }; + + // These idxes must saved in this mutator itself! + let (cmp_start_idx, cmp_h_start_idx, cmp_buf_start_idx, mut taint_idx) = if stage_idx == 0 { + (0, 0, 0, 0) + } else { + ( + self.cmp_start_idx, + self.cmp_h_start_idx, + self.cmp_buf_start_idx, + self.taint_idx, + ) + }; + + let orig_cmpvals = cmp_meta.orig_cmpvals(); + let new_cmpvals = cmp_meta.new_cmpvals(); + let headers = cmp_meta.headers(); + let input_len = input.bytes().len(); + let new_bytes = taint_meta.input_vec(); + let orig_bytes = input.bytes_mut(); + // TODO: Swap this. + let taint = taint_meta.ranges(); + // println!("orig: {:#?} new: {:#?}", orig_cmpvals, new_cmpvals); + for cmp_idx in cmp_start_idx..cmp_len { + let (w_idx, header) = headers[cmp_idx]; + + if orig_cmpvals.get(&w_idx).is_none() || new_cmpvals.get(&w_idx).is_none() { + // These two should have same boolean value + + // so there's nothing interesting at cmp_idx, then just skip! + continue; + } + + let orig_val = orig_cmpvals.get(&w_idx).unwrap(); + let new_val = new_cmpvals.get(&w_idx).unwrap(); + + let logged = core::cmp::min(orig_val.len(), new_val.len()); + + for cmp_h_idx in cmp_h_start_idx..logged { + let mut skip_opt = false; + for prev_idx in 0..cmp_h_idx { + if new_val[prev_idx] == new_val[cmp_h_idx] { + skip_opt = true; + } + } + // Opt not in the paper + if skip_opt { + continue; + } + + for cmp_buf_idx in cmp_buf_start_idx..input_len { + let taint_len = match taint.get(taint_idx) { + Some(t) => { + if cmp_buf_idx < t.start { + input_len - cmp_buf_idx + } else { + // if cmp_buf_idx == t.end go to next range + if cmp_buf_idx == t.end { + taint_idx += 1; + } + + // Here cmp_buf_idx >= t.start + t.end - cmp_buf_idx + } + } + None => input_len - cmp_buf_idx, + }; + + let hshape = (header.shape() + 1) as usize; + let mut matched = false; + match (&orig_val[cmp_h_idx], &new_val[cmp_h_idx]) { + (CmpValues::U8(orig), CmpValues::U8(new)) => { + let (orig_v0, orig_v1, new_v0, new_v1) = (orig.0, orig.1, new.0, new.1); + + let attribute = header.attribute() as u8; + if new_v0 != orig_v0 && orig_v0 != orig_v1 { + // Compare v0 against v1 + if self.cmp_extend_encoding( + orig_v0.into(), + orig_v1.into(), + new_v0.into(), + new_v1.into(), + attribute, + new_bytes, + orig_bytes, + cmp_buf_idx, + taint_len, + input_len, + hshape, + ) { + matched = true; + } + + // Swapped + if self.cmp_extend_encoding( + orig_v0.swap_bytes().into(), + orig_v1.swap_bytes().into(), + new_v0.swap_bytes().into(), + new_v1.swap_bytes().into(), + attribute, + new_bytes, + orig_bytes, + cmp_buf_idx, + taint_len, + input_len, + hshape, + ) { + matched = true; + } + } + + if new_v1 != orig_v1 && orig_v0 != orig_v1 { + // Compare v1 against v0 + if self.cmp_extend_encoding( + orig_v1.into(), + orig_v0.into(), + new_v1.into(), + new_v0.into(), + Self::swapa(attribute), + new_bytes, + orig_bytes, + cmp_buf_idx, + taint_len, + input_len, + hshape, + ) { + matched = true; + } + + // Swapped + if self.cmp_extend_encoding( + orig_v1.swap_bytes().into(), + orig_v0.swap_bytes().into(), + new_v1.swap_bytes().into(), + new_v0.swap_bytes().into(), + Self::swapa(attribute), + new_bytes, + orig_bytes, + cmp_buf_idx, + taint_len, + input_len, + hshape, + ) { + matched = true; + } + } + } + (CmpValues::U16(orig), CmpValues::U16(new)) => { + let (orig_v0, orig_v1, new_v0, new_v1) = (orig.0, orig.1, new.0, new.1); + let attribute: u8 = header.attribute() as u8; + if new_v0 != orig_v0 && orig_v0 != orig_v1 { + // Compare v0 against v1 + if self.cmp_extend_encoding( + orig_v0.into(), + orig_v1.into(), + new_v0.into(), + new_v1.into(), + attribute, + new_bytes, + orig_bytes, + cmp_buf_idx, + taint_len, + input_len, + hshape, + ) { + matched = true; + } + + // Swapped + // Compare v0 against v1 + if self.cmp_extend_encoding( + orig_v0.swap_bytes().into(), + orig_v1.swap_bytes().into(), + new_v0.swap_bytes().into(), + new_v1.swap_bytes().into(), + attribute, + new_bytes, + orig_bytes, + cmp_buf_idx, + taint_len, + input_len, + hshape, + ) { + matched = true; + } + } + + if new_v1 != orig_v1 && orig_v0 != orig_v1 { + // Compare v1 against v0 + if self.cmp_extend_encoding( + orig_v1.into(), + orig_v0.into(), + new_v1.into(), + new_v0.into(), + Self::swapa(attribute), + new_bytes, + orig_bytes, + cmp_buf_idx, + taint_len, + input_len, + hshape, + ) { + matched = true; + } + + // Swapped + if self.cmp_extend_encoding( + orig_v1.swap_bytes().into(), + orig_v0.swap_bytes().into(), + new_v1.swap_bytes().into(), + new_v0.swap_bytes().into(), + Self::swapa(attribute), + new_bytes, + orig_bytes, + cmp_buf_idx, + taint_len, + input_len, + hshape, + ) { + matched = true; + } + } + } + (CmpValues::U32(orig), CmpValues::U32(new)) => { + let (orig_v0, orig_v1, new_v0, new_v1) = (orig.0, orig.1, new.0, new.1); + let attribute = header.attribute() as u8; + if new_v0 != orig_v0 && orig_v0 != orig_v1 { + // Compare v0 against v1 + if self.cmp_extend_encoding( + orig_v0.into(), + orig_v1.into(), + new_v0.into(), + new_v1.into(), + attribute, + new_bytes, + orig_bytes, + cmp_buf_idx, + taint_len, + input_len, + hshape, + ) { + matched = true; + } + + // swapped + // Compare v0 against v1 + if self.cmp_extend_encoding( + orig_v0.swap_bytes().into(), + orig_v1.swap_bytes().into(), + new_v0.swap_bytes().into(), + new_v1.swap_bytes().into(), + attribute, + new_bytes, + orig_bytes, + cmp_buf_idx, + taint_len, + input_len, + hshape, + ) { + matched = true; + } + } + + if new_v1 != orig_v1 && orig_v0 != orig_v1 { + // Compare v1 against v0 + if self.cmp_extend_encoding( + orig_v1.into(), + orig_v0.into(), + new_v1.into(), + new_v0.into(), + Self::swapa(attribute), + new_bytes, + orig_bytes, + cmp_buf_idx, + taint_len, + input_len, + hshape, + ) { + matched = true; + } + + // Swapped + // Compare v1 against v0 + if self.cmp_extend_encoding( + orig_v1.swap_bytes().into(), + orig_v0.swap_bytes().into(), + new_v1.swap_bytes().into(), + new_v0.swap_bytes().into(), + Self::swapa(attribute), + new_bytes, + orig_bytes, + cmp_buf_idx, + taint_len, + input_len, + hshape, + ) { + matched = true; + } + } + } + (CmpValues::U64(orig), CmpValues::U64(new)) => { + let (orig_v0, orig_v1, new_v0, new_v1) = (orig.0, orig.1, new.0, new.1); + let attribute = header.attribute() as u8; + if new_v0 != orig_v0 && orig_v0 != orig_v1 { + // Compare v0 against v1 + if self.cmp_extend_encoding( + orig_v0, + orig_v1, + new_v0, + new_v1, + attribute, + new_bytes, + orig_bytes, + cmp_buf_idx, + taint_len, + input_len, + hshape, + ) { + matched = true; + } + + // Swapped + // Compare v0 against v1 + if self.cmp_extend_encoding( + orig_v0.swap_bytes(), + orig_v1.swap_bytes(), + new_v0.swap_bytes(), + new_v1.swap_bytes(), + attribute, + new_bytes, + orig_bytes, + cmp_buf_idx, + taint_len, + input_len, + hshape, + ) { + matched = true; + } + } + + if new_v1 != orig_v1 && orig_v0 != orig_v1 { + // Compare v1 against v0 + if self.cmp_extend_encoding( + orig_v1, + orig_v0, + new_v1, + new_v0, + Self::swapa(attribute), + new_bytes, + orig_bytes, + cmp_buf_idx, + taint_len, + input_len, + hshape, + ) { + matched = true; + } + + // Swapped + // Compare v1 against v0 + if self.cmp_extend_encoding( + orig_v1.swap_bytes(), + orig_v0.swap_bytes(), + new_v1.swap_bytes(), + new_v0.swap_bytes(), + Self::swapa(attribute), + new_bytes, + orig_bytes, + cmp_buf_idx, + taint_len, + input_len, + hshape, + ) { + matched = true; + } + } + } + (CmpValues::Bytes(orig), CmpValues::Bytes(new)) => { + let (orig_v0, orig_v1, new_v0, new_v1) = + (&orig.0, &orig.1, &new.0, &new.1); + // let attribute = header.attribute() as u8; + if new_v0 != orig_v0 && orig_v0 != orig_v1 { + // Compare v0 against v1 + if self.rtn_extend_encoding( + orig_v0, + orig_v1, + new_v0, + new_v1, + new_bytes, + orig_bytes, + cmp_buf_idx, + taint_len, + input_len, + hshape, + ) { + matched = true; + } + } + + if new_v1 != orig_v1 && orig_v0 != orig_v1 { + // Compare v1 against v0 + if self.rtn_extend_encoding( + orig_v1, + orig_v0, + new_v1, + new_v0, + new_bytes, + orig_bytes, + cmp_buf_idx, + taint_len, + input_len, + hshape, + ) { + matched = true; + } + } + } + (_, _) => { + // It shouldn't have different shape! + } + } + + if matched { + // before returning the result + // save indexes + self.cmp_start_idx = cmp_start_idx; + self.cmp_h_start_idx = cmp_h_start_idx; + self.cmp_buf_start_idx = cmp_buf_start_idx + 1; // next + self.taint_idx = taint_idx; + + return Ok(MutationResult::Mutated); + } + // if no match then go to next round + } + } + } + + Ok(MutationResult::Skipped) + } +} + +impl Named for AFLppRedQueen { + fn name(&self) -> &str { + "AFLppRedQueen" + } +} + +impl AFLppRedQueen { + /// Create a new `AFLppRedQueen` Mutator + #[must_use] + pub fn new() -> Self { + Self { + cmp_start_idx: 0, + cmp_h_start_idx: 0, + cmp_buf_start_idx: 0, + taint_idx: 0, + enable_transform: false, + enable_arith: false, + } + } + + /// Constructor with cmplog options + #[must_use] + pub fn with_cmplog_options(transform: bool, arith: bool) -> Self { + Self { + cmp_start_idx: 0, + cmp_h_start_idx: 0, + cmp_buf_start_idx: 0, + taint_idx: 0, + enable_transform: transform, + enable_arith: arith, + } + } +} + #[cfg(test)] mod tests { #[cfg(feature = "std")] diff --git a/libafl/src/observers/cmp.rs b/libafl/src/observers/cmp.rs index d4794dd88f..32d668ad77 100644 --- a/libafl/src/observers/cmp.rs +++ b/libafl/src/observers/cmp.rs @@ -7,6 +7,7 @@ use alloc::{ use core::{fmt::Debug, marker::PhantomData}; use c2rust_bitfields::BitfieldStruct; +use hashbrown::HashMap; use serde::{de::DeserializeOwned, Deserialize, Deserializer, Serialize, Serializer}; use crate::{ @@ -19,7 +20,7 @@ use crate::{ }; /// Compare values collected during a run -#[derive(Debug, Serialize, Deserialize)] +#[derive(Eq, PartialEq, Debug, Serialize, Deserialize)] pub enum CmpValues { /// Two u8 values U8((u8, u8)), @@ -348,6 +349,263 @@ struct cmp_map { }; */ +/// A [`CmpObserver`] observer for AFL++ redqueen +#[derive(Serialize, Deserialize, Debug)] +pub struct AFLppStdCmpObserver<'a, S> +where + S: UsesInput + HasMetadata, +{ + cmp_map: OwnedRefMut<'a, AFLppCmpMap>, + size: Option>, + name: String, + add_meta: bool, + original: bool, + phantom: PhantomData, +} + +impl<'a, S> CmpObserver for AFLppStdCmpObserver<'a, S> +where + S: UsesInput + Debug + HasMetadata, +{ + /// Get the number of usable cmps (all by default) + fn usable_count(&self) -> usize { + match &self.size { + None => self.cmp_map.as_ref().len(), + Some(o) => *o.as_ref(), + } + } + + fn cmp_map(&self) -> &AFLppCmpMap { + self.cmp_map.as_ref() + } + + fn cmp_map_mut(&mut self) -> &mut AFLppCmpMap { + self.cmp_map.as_mut() + } + + /// Add [`struct@CmpValuesMetadata`] to the State including the logged values. + /// This routine does a basic loop filtering because loop index cmps are not interesting. + fn add_cmpvalues_meta(&mut self, state: &mut S) + where + S: HasMetadata, + { + #[allow(clippy::option_if_let_else)] // we can't mutate state in a closure + let meta = if let Some(meta) = state.metadata_mut().get_mut::() { + meta + } else { + state.add_metadata(AFLppCmpValuesMetadata::new()); + state + .metadata_mut() + .get_mut::() + .unwrap() + }; + + if self.original { + // If this observer is for original input, then we have run the un-mutated input + // Clear orig_cmpvals + meta.orig_cmpvals.clear(); + // Clear headers + meta.headers.clear(); + } else { + // If this observer is for the mutated input + meta.new_cmpvals.clear(); + } + + let count = self.usable_count(); + for i in 0..count { + if self.original { + // Update header + meta.headers.push((i, self.cmp_map().headers[i])); + } + + let execs = self.cmp_map().usable_executions_for(i); + if execs > 0 { + // Recongize loops and discard if needed + if execs > 4 { + let mut increasing_v0 = 0; + let mut increasing_v1 = 0; + let mut decreasing_v0 = 0; + let mut decreasing_v1 = 0; + + let mut last: Option = None; + for j in 0..execs { + if let Some(val) = self.cmp_map().values_of(i, j) { + if let Some(l) = last.and_then(|x| x.to_u64_tuple()) { + if let Some(v) = val.to_u64_tuple() { + if l.0.wrapping_add(1) == v.0 { + increasing_v0 += 1; + } + if l.1.wrapping_add(1) == v.1 { + increasing_v1 += 1; + } + if l.0.wrapping_sub(1) == v.0 { + decreasing_v0 += 1; + } + if l.1.wrapping_sub(1) == v.1 { + decreasing_v1 += 1; + } + } + } + last = Some(val); + } + } + // We check for execs-2 because the logged execs may wrap and have something like + // 8 9 10 3 4 5 6 7 + if increasing_v0 >= execs - 2 + || increasing_v1 >= execs - 2 + || decreasing_v0 >= execs - 2 + || decreasing_v1 >= execs - 2 + { + continue; + } + } + + let cmpmap_idx = i; + let mut cmp_values = Vec::new(); + if self.original { + // push into orig_cmpvals + // println!("Adding to orig_cmpvals"); + for j in 0..execs { + if let Some(val) = self.cmp_map().values_of(i, j) { + cmp_values.push(val); + } + } + // println!("idx: {cmpmap_idx} cmp_values: {:#?}", cmp_values); + meta.orig_cmpvals.insert(cmpmap_idx, cmp_values); + } else { + // push into new_cmpvals + // println!("Adding to new_cmpvals"); + for j in 0..execs { + if let Some(val) = self.cmp_map().values_of(i, j) { + cmp_values.push(val); + } + } + // println!("idx: {cmpmap_idx} cmp_values: {:#?}", cmp_values); + meta.new_cmpvals.insert(cmpmap_idx, cmp_values); + } + } + } + } +} + +impl<'a, S> Observer for AFLppStdCmpObserver<'a, S> +where + S: UsesInput + Debug + HasMetadata, +{ + fn pre_exec(&mut self, _state: &mut S, _input: &S::Input) -> Result<(), Error> { + self.cmp_map.as_mut().reset()?; + Ok(()) + } + + fn post_exec( + &mut self, + state: &mut S, + _input: &S::Input, + _exit_kind: &ExitKind, + ) -> Result<(), Error> { + if self.add_meta { + self.add_cmpvalues_meta(state); + } + Ok(()) + } +} + +impl<'a, S> Named for AFLppStdCmpObserver<'a, S> +where + S: UsesInput + HasMetadata, +{ + fn name(&self) -> &str { + &self.name + } +} + +impl<'a, S> AFLppStdCmpObserver<'a, S> +where + S: UsesInput + HasMetadata, +{ + /// Creates a new [`StdCmpObserver`] with the given name and map. + #[must_use] + pub fn new(name: &'static str, map: &'a mut AFLppCmpMap, add_meta: bool) -> Self { + Self { + name: name.to_string(), + size: None, + cmp_map: OwnedRefMut::Ref(map), + add_meta, + original: false, + phantom: PhantomData, + } + } + /// Setter for the flag if the executed input is a mutated one or the original one + pub fn set_original(&mut self, v: bool) { + self.original = v; + } + + /// Creates a new [`StdCmpObserver`] with the given name, map and reference to variable size. + #[must_use] + pub fn with_size( + name: &'static str, + map: &'a mut AFLppCmpMap, + add_meta: bool, + original: bool, + size: &'a mut usize, + ) -> Self { + Self { + name: name.to_string(), + size: Some(OwnedRefMut::Ref(size)), + cmp_map: OwnedRefMut::Ref(map), + add_meta, + original, + phantom: PhantomData, + } + } +} + +/// A state metadata holding a list of values logged from comparisons. AFL++ RQ version. +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct AFLppCmpValuesMetadata { + /// The first map of AFLppCmpVals retrieved by running the un-mutated input + #[serde(skip)] + pub orig_cmpvals: HashMap>, + /// The second map of AFLppCmpVals retrieved by runnning the mutated input + #[serde(skip)] + pub new_cmpvals: HashMap>, + /// The list of logged idx and headers retrieved by runnning the mutated input + #[serde(skip)] + pub headers: Vec<(usize, AFLppCmpHeader)>, +} + +crate::impl_serdeany!(AFLppCmpValuesMetadata); + +impl AFLppCmpValuesMetadata { + /// Constructor for `AFLppCmpValuesMetadata` + #[must_use] + pub fn new() -> Self { + Self { + orig_cmpvals: HashMap::new(), + new_cmpvals: HashMap::new(), + headers: Vec::new(), + } + } + + /// Getter for `orig_cmpvals` + #[must_use] + pub fn orig_cmpvals(&self) -> &HashMap> { + &self.orig_cmpvals + } + + /// Getter for `new_cmpvals` + #[must_use] + pub fn new_cmpvals(&self) -> &HashMap> { + &self.new_cmpvals + } + + /// Getter for `headers` + #[must_use] + pub fn headers(&self) -> &Vec<(usize, AFLppCmpHeader)> { + &self.headers + } +} + /// The AFL++ `CMP_MAP_W` pub const AFL_CMP_MAP_W: usize = 65536; /// The AFL++ `CMP_MAP_H` @@ -363,7 +621,7 @@ pub const AFL_CMP_TYPE_RTN: u32 = 2; /// The AFL++ `cmp_header` struct #[derive(Debug, Copy, Clone, BitfieldStruct)] #[repr(C, packed)] -pub struct AFLCmpHeader { +pub struct AFLppCmpHeader { #[bitfield(name = "hits", ty = "u32", bits = "0..=23")] #[bitfield(name = "id", ty = "u32", bits = "24..=47")] #[bitfield(name = "shape", ty = "u32", bits = "48..=52")] @@ -377,46 +635,98 @@ pub struct AFLCmpHeader { /// The AFL++ `cmp_operands` struct #[derive(Default, Debug, Clone, Copy)] #[repr(C, packed)] -pub struct AFLCmpOperands { +pub struct AFLppCmpOperands { v0: u64, v1: u64, v0_128: u64, v1_128: u64, } +impl AFLppCmpOperands { + #[must_use] + /// 64bit first cmp operand + pub fn v0(&self) -> u64 { + self.v0 + } + + #[must_use] + /// 64bit second cmp operand + pub fn v1(&self) -> u64 { + self.v1 + } + + #[must_use] + /// 128bit first cmp operand + pub fn v0_128(&self) -> u64 { + self.v0_128 + } + + #[must_use] + /// 128bit second cmp operand + pub fn v1_128(&self) -> u64 { + self.v1_128 + } +} + /// The AFL++ `cmpfn_operands` struct #[derive(Default, Debug, Clone, Copy)] #[repr(C, packed)] -pub struct AFLCmpFnOperands { +pub struct AFLppCmpFnOperands { v0: [u8; 31], v0_len: u8, v1: [u8; 31], v1_len: u8, } +impl AFLppCmpFnOperands { + #[must_use] + /// first rtn operand + pub fn v0(&self) -> &[u8; 31] { + &self.v0 + } + + #[must_use] + /// second rtn operand + pub fn v0_len(&self) -> u8 { + self.v0_len + } + + #[must_use] + /// first rtn operand len + pub fn v1(&self) -> &[u8; 31] { + &self.v1 + } + + #[must_use] + /// second rtn operand len + pub fn v1_len(&self) -> u8 { + self.v1_len + } +} + /// A proxy union to avoid casting operands as in AFL++ #[derive(Clone, Copy)] #[repr(C, packed)] -pub union AFLCmpVals { - operands: [[AFLCmpOperands; AFL_CMP_MAP_H]; AFL_CMP_MAP_W], - fn_operands: [[AFLCmpFnOperands; AFL_CMP_MAP_RTN_H]; AFL_CMP_MAP_W], +pub union AFLppCmpVals { + operands: [[AFLppCmpOperands; AFL_CMP_MAP_H]; AFL_CMP_MAP_W], + fn_operands: [[AFLppCmpFnOperands; AFL_CMP_MAP_RTN_H]; AFL_CMP_MAP_W], } -impl Debug for AFLCmpVals { +impl Debug for AFLppCmpVals { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - f.debug_struct("AFLCmpVals").finish_non_exhaustive() + f.debug_struct("AFLppCmpVals").finish_non_exhaustive() } } /// The AFL++ `cmp_map` struct, use with `StdCmpObserver` #[derive(Debug, Clone, Copy)] #[repr(C, packed)] -pub struct AFLCmpMap { - headers: [AFLCmpHeader; AFL_CMP_MAP_W], - vals: AFLCmpVals, +pub struct AFLppCmpMap { + headers: [AFLppCmpHeader; AFL_CMP_MAP_W], + vals: AFLppCmpVals, } -impl Serialize for AFLCmpMap { +impl Serialize for AFLppCmpMap { fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -431,7 +741,7 @@ impl Serialize for AFLCmpMap { } } -impl<'de> Deserialize<'de> for AFLCmpMap { +impl<'de> Deserialize<'de> for AFLppCmpMap { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, @@ -442,7 +752,7 @@ impl<'de> Deserialize<'de> for AFLCmpMap { } } -impl CmpMap for AFLCmpMap { +impl CmpMap for AFLppCmpMap { fn len(&self) -> usize { AFL_CMP_MAP_W } @@ -494,10 +804,10 @@ impl CmpMap for AFLCmpMap { unsafe { Some(CmpValues::Bytes(( self.vals.fn_operands[idx][execution].v0 - [..=(self.headers[idx].shape() as usize)] + [..(self.vals.fn_operands[idx][execution].v0_len as usize)] .to_vec(), self.vals.fn_operands[idx][execution].v1 - [..=(self.headers[idx].shape() as usize)] + [..(self.vals.fn_operands[idx][execution].v1_len as usize)] .to_vec(), ))) } diff --git a/libafl/src/stages/colorization.rs b/libafl/src/stages/colorization.rs index 1b6016c57f..7692a557e2 100644 --- a/libafl/src/stages/colorization.rs +++ b/libafl/src/stages/colorization.rs @@ -171,6 +171,8 @@ where let consumed_input = input.clone(); // First, run orig_input once and get the original hash + + // Idea: No need to do this every time let orig_hash = Self::get_raw_map_hash_run(fuzzer, executor, state, manager, consumed_input, name)?; let changed_bytes = changed.bytes_mut(); @@ -186,6 +188,7 @@ where // Keep it sorted, we want the earliest ones to come first so that it's easier to sort them let mut ok_ranges = BinaryHeap::new(); + // println!("Replaced bytes: {:#?}", changed_bytes); // Now replace with random values (This is type_replace) Self::type_replace(changed_bytes, state); diff --git a/libafl/src/stages/tracing.rs b/libafl/src/stages/tracing.rs index 613c79ade1..c061b462af 100644 --- a/libafl/src/stages/tracing.rs +++ b/libafl/src/stages/tracing.rs @@ -1,17 +1,20 @@ //! The tracing stage can trace the target and enrich a testcase with metadata, for example for `CmpLog`. +use alloc::string::{String, ToString}; use core::{fmt::Debug, marker::PhantomData}; #[cfg(feature = "introspection")] use crate::monitors::PerfFeature; use crate::{ + bolts::tuples::MatchName, corpus::{Corpus, CorpusId}, executors::{Executor, HasObservers, ShadowExecutor}, + inputs::{BytesInput, UsesInput}, mark_feature_time, - observers::ObserversTuple, - stages::Stage, + observers::{AFLppStdCmpObserver, ObserversTuple}, + stages::{colorization::TaintMetadata, Stage}, start_timer, - state::{HasClientPerfMonitor, HasCorpus, HasExecutions, State, UsesState}, + state::{HasClientPerfMonitor, HasCorpus, HasExecutions, HasMetadata, State, UsesState}, Error, }; @@ -100,6 +103,148 @@ impl TracingStage { } } +/// Trace with tainted input +#[derive(Clone, Debug)] +pub struct AFLppCmplogTracingStage { + tracer_executor: TE, + cmplog_observer_name: Option, + #[allow(clippy::type_complexity)] + phantom: PhantomData<(EM, TE, Z)>, +} + +impl UsesState for AFLppCmplogTracingStage +where + TE: UsesState, +{ + type State = TE::State; +} + +impl Stage for AFLppCmplogTracingStage +where + E: UsesState, + TE: Executor + HasObservers, + TE::State: HasClientPerfMonitor + + HasExecutions + + HasCorpus + + HasMetadata + + UsesInput, + EM: UsesState, + Z: UsesState, +{ + #[inline] + fn perform( + &mut self, + fuzzer: &mut Z, + _executor: &mut E, + state: &mut TE::State, + manager: &mut EM, + corpus_idx: CorpusId, + ) -> Result<(), Error> { + // First run with the un-mutated input + + let unmutated_input = state + .corpus() + .get(corpus_idx)? + .borrow_mut() + .load_input()? + .clone(); + + if let Some(name) = &self.cmplog_observer_name { + if let Some(ob) = self + .tracer_executor + .observers_mut() + .match_name_mut::>(name) + { + // This is not the original input, + // Set it to false + ob.set_original(true); + } + // I can't think of any use of this stage if you don't use AFLStdCmpObserver + // but do nothing ofcourse + } + + self.tracer_executor + .observers_mut() + .pre_exec_all(state, &unmutated_input)?; + + let exit_kind = + self.tracer_executor + .run_target(fuzzer, state, manager, &unmutated_input)?; + + *state.executions_mut() += 1; + + self.tracer_executor + .observers_mut() + .post_exec_all(state, &unmutated_input, &exit_kind)?; + + // Second run with the mutated input + let mutated_input = match state.metadata().get::() { + Some(meta) => BytesInput::from(meta.input_vec().as_ref()), + None => return Err(Error::unknown("No metadata found")), + }; + + if let Some(name) = &self.cmplog_observer_name { + if let Some(ob) = self + .tracer_executor + .observers_mut() + .match_name_mut::>(name) + { + // This is not the original input, + // Set it to false + ob.set_original(false); + } + // I can't think of any use of this stage if you don't use AFLStdCmpObserver + // but do nothing ofcourse + } + + self.tracer_executor + .observers_mut() + .pre_exec_all(state, &mutated_input)?; + + let exit_kind = self + .tracer_executor + .run_target(fuzzer, state, manager, &mutated_input)?; + + *state.executions_mut() += 1; + + self.tracer_executor + .observers_mut() + .post_exec_all(state, &mutated_input, &exit_kind)?; + + Ok(()) + } +} + +impl AFLppCmplogTracingStage { + /// Creates a new default stage + pub fn new(tracer_executor: TE) -> Self { + Self { + cmplog_observer_name: None, + tracer_executor, + phantom: PhantomData, + } + } + + /// With cmplog observer + pub fn with_cmplog_observer_name(tracer_executor: TE, name: &'static str) -> Self { + Self { + cmplog_observer_name: Some(name.to_string()), + tracer_executor, + phantom: PhantomData, + } + } + + /// Gets the underlying tracer executor + pub fn executor(&self) -> &TE { + &self.tracer_executor + } + + /// Gets the underlying tracer executor (mut) + pub fn executor_mut(&mut self) -> &mut TE { + &mut self.tracer_executor + } +} + /// A stage that runs the shadow executor using also the shadow observers #[derive(Clone, Debug)] pub struct ShadowTracingStage {