From c0894c40e54f5e4920e9431a4d12323e460cbef1 Mon Sep 17 00:00:00 2001 From: lazymio Date: Thu, 6 Mar 2025 23:04:39 +0800 Subject: [PATCH] Fix implementation of UniqueTrace and add UnclassifiedTrace (#3046) * Fix implementation of UniqueTrace and add UnclassifiedTrace * Update comments * Move the implemetnation to classify_counts * Only init for unique trace * The missing inline * Add a TODO --- libafl/src/executors/sand.rs | 39 +++++++--- libafl/src/observers/map/hitcount_map.rs | 91 ++++++++++++------------ 2 files changed, 75 insertions(+), 55 deletions(-) diff --git a/libafl/src/executors/sand.rs b/libafl/src/executors/sand.rs index 906692643e..ea4bf09979 100644 --- a/libafl/src/executors/sand.rs +++ b/libafl/src/executors/sand.rs @@ -13,7 +13,10 @@ use libafl_bolts::{ }; use super::{Executor, ExecutorsTuple, ExitKind, HasObservers, HasTimeout}; -use crate::{HasNamedMetadata, observers::MapObserver}; +use crate::{ + HasNamedMetadata, + observers::{MapObserver, classify_counts, init_count_class_16}, +}; /// The execution pattern of the [`SANDExecutor`]. The default value used in our paper is /// [`SANDExecutionPattern::SimplifiedTrace`] and we by design don't include coverage @@ -26,7 +29,12 @@ pub enum SANDExecutionPattern { #[default] SimplifiedTrace, /// The unique trace, captures ~99.9% bug-triggering inputs with more than >50% overhead. + /// Only use this pattern if you are really scared of missing any bugs =). UniqueTrace, + /// The unclassified unique trace, captures even more bug-triggering inputs compared to + /// unique trace. Not discussed in the paper but internally evaluated. Not adopted because + /// incurring tooooo much overhead + UnclassifiedTrace, } /// The core executor implementation. It wraps another executor and a list of extra executors. @@ -58,7 +66,7 @@ where (self.bitmap[idx] >> bidx) & 1 } - /// Create a new [`SANDExecutor`] + /// Create a new [`SANDExecutor`], the observer handle is supposed to be _raw_ edge observer. pub fn new( executor: E, sand_extra_executors: ET, @@ -66,6 +74,9 @@ where bitmap_size: usize, pattern: SANDExecutionPattern, ) -> Self { + if matches!(pattern, SANDExecutionPattern::UniqueTrace) { + init_count_class_16(); + } Self { executor, sand_executors: sand_extra_executors, @@ -76,7 +87,8 @@ where } } - /// Create a new [`SANDExecutor`] using paper setup + /// Create a new [`SANDExecutor`] using paper setup, the observer handle is supposed to be + /// _raw_ edge observer. pub fn new_paper(executor: E, sand_extra_executors: ET, observer_handle: Handle) -> Self { Self::new( executor, @@ -137,13 +149,20 @@ where let ot = self.executor.observers(); let ob = ot.get(&self.ob_ref).unwrap().as_ref(); let initial = ob.initial(); - let covs = match self.pattern { - SANDExecutionPattern::SimplifiedTrace => ob - .as_iter() - .map(|x| if *x == initial { 0x1 } else { 0x80 }) - .collect::>(), - SANDExecutionPattern::UniqueTrace => ob.to_vec(), - }; + let mut covs = ob.to_vec(); + match self.pattern { + SANDExecutionPattern::SimplifiedTrace => { + // TODO: SIMD Optimizations + for it in &mut covs { + *it = if *it == initial { 0x1 } else { 0x80 }; + } + } + SANDExecutionPattern::UniqueTrace => { + classify_counts(covs.as_mut_slice()); + } + SANDExecutionPattern::UnclassifiedTrace => {} + } + // Our paper uses xxh32 but it shouldn't have significant collision for most hashing algorithms. let pattern_hash = hash_std(&covs) as usize; diff --git a/libafl/src/observers/map/hitcount_map.rs b/libafl/src/observers/map/hitcount_map.rs index 4ac600bb2f..6dc9a22f25 100644 --- a/libafl/src/observers/map/hitcount_map.rs +++ b/libafl/src/observers/map/hitcount_map.rs @@ -39,7 +39,7 @@ static COUNT_CLASS_LOOKUP: [u8; 256] = [ static mut COUNT_CLASS_LOOKUP_16: Vec = vec![]; /// Initialize the 16-byte hitcounts map -fn init_count_class_16() { +pub(crate) fn init_count_class_16() { // # Safety // // Calling this from multiple threads may be racey and hence leak 65k mem or even create a broken lookup vec. @@ -62,6 +62,50 @@ fn init_count_class_16() { } } +/// AFL-style classify counts +#[inline] +#[expect(clippy::cast_ptr_alignment)] +pub(crate) fn classify_counts(map: &mut [u8]) { + let mut len = map.len(); + let align_offset = map.as_ptr().align_offset(size_of::()); + + // if len == 1, the next branch will already do this lookup + if len > 1 && align_offset != 0 { + debug_assert_eq!( + align_offset, 1, + "Aligning u8 to u16 should always be offset of 1?" + ); + unsafe { + *map.get_unchecked_mut(0) = + *COUNT_CLASS_LOOKUP.get_unchecked(*map.get_unchecked(0) as usize); + } + len -= 1; + } + + // Fix the last element + if (len & 1) != 0 { + unsafe { + *map.get_unchecked_mut(len - 1) = + *COUNT_CLASS_LOOKUP.get_unchecked(*map.get_unchecked(len - 1) as usize); + } + } + + let cnt = len / 2; + + let map16 = + unsafe { slice::from_raw_parts_mut(map.as_mut_ptr().add(align_offset) as *mut u16, cnt) }; + let count_class_lookup_16 = &raw mut COUNT_CLASS_LOOKUP_16; + + // 2022-07: Adding `enumerate` here increases execution speed/register allocation on x86_64. + #[expect(clippy::unused_enumerate_index)] + for (_i, item) in map16[0..cnt].iter_mut().enumerate() { + unsafe { + let count_class_lookup_16 = &mut *count_class_lookup_16; + *item = *(*count_class_lookup_16).get_unchecked(*item as usize); + } + } +} + /// Map observer with AFL-like hitcounts postprocessing /// /// [`MapObserver`]s that are not slice-backed, such as `MultiMapObserver`, can use @@ -95,51 +139,8 @@ where } #[inline] - #[expect(clippy::cast_ptr_alignment)] fn post_exec(&mut self, state: &mut S, input: &I, exit_kind: &ExitKind) -> Result<(), Error> { - let mut map = self.as_slice_mut(); - let mut len = map.len(); - let align_offset = map.as_ptr().align_offset(size_of::()); - - // if len == 1, the next branch will already do this lookup - if len > 1 && align_offset != 0 { - debug_assert_eq!( - align_offset, 1, - "Aligning u8 to u16 should always be offset of 1?" - ); - unsafe { - *map.get_unchecked_mut(0) = - *COUNT_CLASS_LOOKUP.get_unchecked(*map.get_unchecked(0) as usize); - } - len -= 1; - } - - // Fix the last element - if (len & 1) != 0 { - unsafe { - *map.get_unchecked_mut(len - 1) = - *COUNT_CLASS_LOOKUP.get_unchecked(*map.get_unchecked(len - 1) as usize); - } - } - - let cnt = len / 2; - - let map16 = unsafe { - slice::from_raw_parts_mut(map.as_mut_ptr().add(align_offset) as *mut u16, cnt) - }; - let count_class_lookup_16 = &raw mut COUNT_CLASS_LOOKUP_16; - - // 2022-07: Adding `enumerate` here increases execution speed/register allocation on x86_64. - #[expect(clippy::unused_enumerate_index)] - for (_i, item) in map16[0..cnt].iter_mut().enumerate() { - unsafe { - let count_class_lookup_16 = &mut *count_class_lookup_16; - *item = *(*count_class_lookup_16).get_unchecked(*item as usize); - } - } - - drop(map); - + classify_counts(&mut self.as_slice_mut()); self.base.post_exec(state, input, exit_kind) } }