Fix implementation of UniqueTrace and add UnclassifiedTrace (#3046)

* Fix implementation of UniqueTrace and add UnclassifiedTrace

* Update comments

* Move the implemetnation to classify_counts

* Only init for unique trace

* The missing inline

* Add a TODO
This commit is contained in:
lazymio 2025-03-06 23:04:39 +08:00 committed by GitHub
parent de2bc166f0
commit c0894c40e5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 75 additions and 55 deletions

View File

@ -13,7 +13,10 @@ use libafl_bolts::{
}; };
use super::{Executor, ExecutorsTuple, ExitKind, HasObservers, HasTimeout}; use super::{Executor, ExecutorsTuple, ExitKind, HasObservers, HasTimeout};
use crate::{HasNamedMetadata, observers::MapObserver}; use crate::{
HasNamedMetadata,
observers::{MapObserver, classify_counts, init_count_class_16},
};
/// The execution pattern of the [`SANDExecutor`]. The default value used in our paper is /// The execution pattern of the [`SANDExecutor`]. The default value used in our paper is
/// [`SANDExecutionPattern::SimplifiedTrace`] and we by design don't include coverage /// [`SANDExecutionPattern::SimplifiedTrace`] and we by design don't include coverage
@ -26,7 +29,12 @@ pub enum SANDExecutionPattern {
#[default] #[default]
SimplifiedTrace, SimplifiedTrace,
/// The unique trace, captures ~99.9% bug-triggering inputs with more than >50% overhead. /// The unique trace, captures ~99.9% bug-triggering inputs with more than >50% overhead.
/// Only use this pattern if you are really scared of missing any bugs =).
UniqueTrace, UniqueTrace,
/// The unclassified unique trace, captures even more bug-triggering inputs compared to
/// unique trace. Not discussed in the paper but internally evaluated. Not adopted because
/// incurring tooooo much overhead
UnclassifiedTrace,
} }
/// The core executor implementation. It wraps another executor and a list of extra executors. /// The core executor implementation. It wraps another executor and a list of extra executors.
@ -58,7 +66,7 @@ where
(self.bitmap[idx] >> bidx) & 1 (self.bitmap[idx] >> bidx) & 1
} }
/// Create a new [`SANDExecutor`] /// Create a new [`SANDExecutor`], the observer handle is supposed to be _raw_ edge observer.
pub fn new( pub fn new(
executor: E, executor: E,
sand_extra_executors: ET, sand_extra_executors: ET,
@ -66,6 +74,9 @@ where
bitmap_size: usize, bitmap_size: usize,
pattern: SANDExecutionPattern, pattern: SANDExecutionPattern,
) -> Self { ) -> Self {
if matches!(pattern, SANDExecutionPattern::UniqueTrace) {
init_count_class_16();
}
Self { Self {
executor, executor,
sand_executors: sand_extra_executors, sand_executors: sand_extra_executors,
@ -76,7 +87,8 @@ where
} }
} }
/// Create a new [`SANDExecutor`] using paper setup /// Create a new [`SANDExecutor`] using paper setup, the observer handle is supposed to be
/// _raw_ edge observer.
pub fn new_paper(executor: E, sand_extra_executors: ET, observer_handle: Handle<C>) -> Self { pub fn new_paper(executor: E, sand_extra_executors: ET, observer_handle: Handle<C>) -> Self {
Self::new( Self::new(
executor, executor,
@ -137,13 +149,20 @@ where
let ot = self.executor.observers(); let ot = self.executor.observers();
let ob = ot.get(&self.ob_ref).unwrap().as_ref(); let ob = ot.get(&self.ob_ref).unwrap().as_ref();
let initial = ob.initial(); let initial = ob.initial();
let covs = match self.pattern { let mut covs = ob.to_vec();
SANDExecutionPattern::SimplifiedTrace => ob match self.pattern {
.as_iter() SANDExecutionPattern::SimplifiedTrace => {
.map(|x| if *x == initial { 0x1 } else { 0x80 }) // TODO: SIMD Optimizations
.collect::<Vec<_>>(), for it in &mut covs {
SANDExecutionPattern::UniqueTrace => ob.to_vec(), *it = if *it == initial { 0x1 } else { 0x80 };
}; }
}
SANDExecutionPattern::UniqueTrace => {
classify_counts(covs.as_mut_slice());
}
SANDExecutionPattern::UnclassifiedTrace => {}
}
// Our paper uses xxh32 but it shouldn't have significant collision for most hashing algorithms. // Our paper uses xxh32 but it shouldn't have significant collision for most hashing algorithms.
let pattern_hash = hash_std(&covs) as usize; let pattern_hash = hash_std(&covs) as usize;

View File

@ -39,7 +39,7 @@ static COUNT_CLASS_LOOKUP: [u8; 256] = [
static mut COUNT_CLASS_LOOKUP_16: Vec<u16> = vec![]; static mut COUNT_CLASS_LOOKUP_16: Vec<u16> = vec![];
/// Initialize the 16-byte hitcounts map /// Initialize the 16-byte hitcounts map
fn init_count_class_16() { pub(crate) fn init_count_class_16() {
// # Safety // # Safety
// //
// Calling this from multiple threads may be racey and hence leak 65k mem or even create a broken lookup vec. // Calling this from multiple threads may be racey and hence leak 65k mem or even create a broken lookup vec.
@ -62,6 +62,50 @@ fn init_count_class_16() {
} }
} }
/// AFL-style classify counts
#[inline]
#[expect(clippy::cast_ptr_alignment)]
pub(crate) fn classify_counts(map: &mut [u8]) {
let mut len = map.len();
let align_offset = map.as_ptr().align_offset(size_of::<u16>());
// if len == 1, the next branch will already do this lookup
if len > 1 && align_offset != 0 {
debug_assert_eq!(
align_offset, 1,
"Aligning u8 to u16 should always be offset of 1?"
);
unsafe {
*map.get_unchecked_mut(0) =
*COUNT_CLASS_LOOKUP.get_unchecked(*map.get_unchecked(0) as usize);
}
len -= 1;
}
// Fix the last element
if (len & 1) != 0 {
unsafe {
*map.get_unchecked_mut(len - 1) =
*COUNT_CLASS_LOOKUP.get_unchecked(*map.get_unchecked(len - 1) as usize);
}
}
let cnt = len / 2;
let map16 =
unsafe { slice::from_raw_parts_mut(map.as_mut_ptr().add(align_offset) as *mut u16, cnt) };
let count_class_lookup_16 = &raw mut COUNT_CLASS_LOOKUP_16;
// 2022-07: Adding `enumerate` here increases execution speed/register allocation on x86_64.
#[expect(clippy::unused_enumerate_index)]
for (_i, item) in map16[0..cnt].iter_mut().enumerate() {
unsafe {
let count_class_lookup_16 = &mut *count_class_lookup_16;
*item = *(*count_class_lookup_16).get_unchecked(*item as usize);
}
}
}
/// Map observer with AFL-like hitcounts postprocessing /// Map observer with AFL-like hitcounts postprocessing
/// ///
/// [`MapObserver`]s that are not slice-backed, such as `MultiMapObserver`, can use /// [`MapObserver`]s that are not slice-backed, such as `MultiMapObserver`, can use
@ -95,51 +139,8 @@ where
} }
#[inline] #[inline]
#[expect(clippy::cast_ptr_alignment)]
fn post_exec(&mut self, state: &mut S, input: &I, exit_kind: &ExitKind) -> Result<(), Error> { fn post_exec(&mut self, state: &mut S, input: &I, exit_kind: &ExitKind) -> Result<(), Error> {
let mut map = self.as_slice_mut(); classify_counts(&mut self.as_slice_mut());
let mut len = map.len();
let align_offset = map.as_ptr().align_offset(size_of::<u16>());
// if len == 1, the next branch will already do this lookup
if len > 1 && align_offset != 0 {
debug_assert_eq!(
align_offset, 1,
"Aligning u8 to u16 should always be offset of 1?"
);
unsafe {
*map.get_unchecked_mut(0) =
*COUNT_CLASS_LOOKUP.get_unchecked(*map.get_unchecked(0) as usize);
}
len -= 1;
}
// Fix the last element
if (len & 1) != 0 {
unsafe {
*map.get_unchecked_mut(len - 1) =
*COUNT_CLASS_LOOKUP.get_unchecked(*map.get_unchecked(len - 1) as usize);
}
}
let cnt = len / 2;
let map16 = unsafe {
slice::from_raw_parts_mut(map.as_mut_ptr().add(align_offset) as *mut u16, cnt)
};
let count_class_lookup_16 = &raw mut COUNT_CLASS_LOOKUP_16;
// 2022-07: Adding `enumerate` here increases execution speed/register allocation on x86_64.
#[expect(clippy::unused_enumerate_index)]
for (_i, item) in map16[0..cnt].iter_mut().enumerate() {
unsafe {
let count_class_lookup_16 = &mut *count_class_lookup_16;
*item = *(*count_class_lookup_16).get_unchecked(*item as usize);
}
}
drop(map);
self.base.post_exec(state, input, exit_kind) self.base.post_exec(state, input, exit_kind)
} }
} }