From eb06d4a757bb829922b98d125b8fec05daf31c57 Mon Sep 17 00:00:00 2001 From: lazymio Date: Mon, 14 Apr 2025 18:29:51 +0800 Subject: [PATCH] Default to use SIMD acceleration map feedbacks (#3157) * Feature renam and clean urls * Fix features renaming * wip: working libafl_bolts simd * initial default SimdMapFeedback implementation * clippy * fix imports * clippy again * fmt * also generalize simplify_map * clippy again * fix no_std * fmt * fix import for no-std * fmt * fixes * Fix fuzzers * Fix cargo docs * better bounds * fmt * Fix fuzzer * Accidentally commit the file --- .../inprocess/libfuzzer_libmozjpeg/src/lib.rs | 5 +- fuzzers/inprocess/libfuzzer_libpng/src/lib.rs | 4 +- .../baby_fuzzer_multi/src/main.rs | 8 +- libafl/Cargo.toml | 6 +- libafl/src/feedbacks/map.rs | 209 +------ libafl/src/feedbacks/mod.rs | 2 +- libafl/src/feedbacks/simd.rs | 198 ++++--- libafl_bolts/Cargo.toml | 8 +- libafl_bolts/examples/simd/simd.rs | 63 ++- libafl_bolts/src/simd.rs | 528 ++++++++++++------ 10 files changed, 576 insertions(+), 455 deletions(-) diff --git a/fuzzers/inprocess/libfuzzer_libmozjpeg/src/lib.rs b/fuzzers/inprocess/libfuzzer_libmozjpeg/src/lib.rs index 861b449354..0d8cfb8ee2 100644 --- a/fuzzers/inprocess/libfuzzer_libmozjpeg/src/lib.rs +++ b/fuzzers/inprocess/libfuzzer_libmozjpeg/src/lib.rs @@ -11,7 +11,7 @@ use libafl::{ events::{setup_restarting_mgr_std, EventConfig}, executors::{inprocess::InProcessExecutor, ExitKind}, feedback_or, - feedbacks::{CrashFeedback, MaxMapFeedback}, + feedbacks::{CrashFeedback, DifferentIsNovel, MapFeedback, MaxMapFeedback}, fuzzer::{Fuzzer, StdFuzzer}, inputs::{BytesInput, HasTargetBytes}, monitors::SimpleMonitor, @@ -28,6 +28,7 @@ use libafl::{ }; use libafl_bolts::{ rands::StdRand, + simd::MaxReducer, tuples::{tuple_list, Merge}, AsSlice, }; @@ -101,7 +102,7 @@ fn fuzz(corpus_dirs: &[PathBuf], objective_dir: PathBuf, broker_port: u16) -> Re let mut feedback = feedback_or!( MaxMapFeedback::new(&edges_observer), MaxMapFeedback::new(&cmps_observer), - MaxMapFeedback::new(&allocs_observer) + MapFeedback::<_, DifferentIsNovel, _, MaxReducer>::new(&allocs_observer) ); // A feedback to choose if an input is a solution or not diff --git a/fuzzers/inprocess/libfuzzer_libpng/src/lib.rs b/fuzzers/inprocess/libfuzzer_libpng/src/lib.rs index 9aab597312..7882b9ab5d 100644 --- a/fuzzers/inprocess/libfuzzer_libpng/src/lib.rs +++ b/fuzzers/inprocess/libfuzzer_libpng/src/lib.rs @@ -62,8 +62,6 @@ pub extern "C" fn libafl_main() { #[cfg(not(test))] fn fuzz(corpus_dirs: &[PathBuf], objective_dir: PathBuf, broker_port: u16) -> Result<(), Error> { // 'While the stats are state, they are usually used in the broker - which is likely never restarted - - use libafl::feedbacks::simd::{SimdImplmentation, SimdMapFeedback}; let monitor = MultiMonitor::new(|s| println!("{s}")); // The restarting state will spawn the same process again as child, then restarted it each time it crashes. @@ -95,7 +93,7 @@ fn fuzz(corpus_dirs: &[PathBuf], objective_dir: PathBuf, broker_port: u16) -> Re // Create an observation channel to keep track of the execution time let time_observer = TimeObserver::new("time"); - let map_feedback = SimdMapFeedback::new(MaxMapFeedback::new(&edges_observer)); + let map_feedback = MaxMapFeedback::new(&edges_observer); let calibration = CalibrationStage::new(&map_feedback); // Feedback to rate the interestingness of an input diff --git a/fuzzers/structure_aware/baby_fuzzer_multi/src/main.rs b/fuzzers/structure_aware/baby_fuzzer_multi/src/main.rs index 39a5d028af..0f80e7d593 100644 --- a/fuzzers/structure_aware/baby_fuzzer_multi/src/main.rs +++ b/fuzzers/structure_aware/baby_fuzzer_multi/src/main.rs @@ -11,7 +11,7 @@ use libafl::{ events::SimpleEventManager, executors::{inprocess::InProcessExecutor, ExitKind}, feedback_or_fast, - feedbacks::{CrashFeedback, MaxMapFeedback, MinMapFeedback}, + feedbacks::{CrashFeedback, DifferentIsNovel, MapFeedback, MaxMapFeedback}, fuzzer::{Fuzzer, StdFuzzer}, inputs::{BytesInput, HasTargetBytes, MultipartInput}, mutators::{havoc_mutations::havoc_mutations, scheduled::HavocScheduledMutator}, @@ -21,7 +21,9 @@ use libafl::{ state::StdState, Evaluator, }; -use libafl_bolts::{nonnull_raw_mut, rands::StdRand, tuples::tuple_list, AsSlice}; +use libafl_bolts::{ + nonnull_raw_mut, rands::StdRand, simd::MinReducer, tuples::tuple_list, AsSlice, +}; /// Coverage map with explicit assignments due to the lack of instrumentation static mut SIGNALS: [u8; 128] = [0; 128]; @@ -89,7 +91,7 @@ pub fn main() { // Feedback to rate the interestingness of an input let signals_feedback = MaxMapFeedback::new(&signals_observer); - let count_feedback = MinMapFeedback::new(&count_observer); + let count_feedback = MapFeedback::<_, DifferentIsNovel, _, MinReducer>::new(&count_observer); let mut feedback = feedback_or_fast!(count_feedback, signals_feedback); diff --git a/libafl/Cargo.toml b/libafl/Cargo.toml index 2b1f227d00..fc9172798c 100644 --- a/libafl/Cargo.toml +++ b/libafl/Cargo.toml @@ -39,7 +39,7 @@ default = [ "regex", "serdeany_autoreg", "libafl_bolts/xxh3", - "stable_simd", + "simd", ] document-features = ["dep:document-features"] @@ -196,8 +196,8 @@ nautilus = [ "regex", ] -## Use the best SIMD implementation by our [benchmark](https://github.com/wtdcode/libafl_simd_bench) -stable_simd = ["libafl_bolts/stable_simd"] +## Use the best SIMD implementation by our benchmark +simd = ["libafl_bolts/simd"] [[example]] name = "tui_mock" diff --git a/libafl/src/feedbacks/map.rs b/libafl/src/feedbacks/map.rs index dd3a157e39..3e422b8043 100644 --- a/libafl/src/feedbacks/map.rs +++ b/libafl/src/feedbacks/map.rs @@ -4,18 +4,25 @@ use alloc::{borrow::Cow, vec::Vec}; use core::{ fmt::Debug, marker::PhantomData, - ops::{BitAnd, BitOr, Deref, DerefMut}, + ops::{Deref, DerefMut}, }; -#[rustversion::nightly] -use libafl_bolts::simd::std_covmap_is_interesting; +#[cfg(all(feature = "simd", target_arch = "x86_64"))] +use libafl_bolts::simd::vector::u8x16; +#[cfg(not(feature = "simd"))] +use libafl_bolts::simd::{MinReducer, OrReducer}; +#[cfg(feature = "simd")] +use libafl_bolts::simd::{SimdMaxReducer, SimdMinReducer, SimdOrReducer, vector::u8x32}; use libafl_bolts::{ - AsIter, AsSlice, HasRefCnt, Named, + AsIter, HasRefCnt, Named, + simd::{MaxReducer, NopReducer, Reducer}, tuples::{Handle, Handled, MatchName, MatchNameRef}, }; use num_traits::PrimInt; use serde::{Deserialize, Serialize, de::DeserializeOwned}; +#[cfg(feature = "simd")] +use super::simd::SimdMapFeedback; #[cfg(feature = "track_hit_feedbacks")] use crate::feedbacks::premature_last_result_err; use crate::{ @@ -29,11 +36,27 @@ use crate::{ state::HasExecutions, }; +#[cfg(feature = "simd")] +/// A [`SimdMapFeedback`] that implements the AFL algorithm using an [`SimdOrReducer`] combining the bits for the history map and the bit from (`HitcountsMapObserver`)[`crate::observers::HitcountsMapObserver`]. +pub type AflMapFeedback = SimdMapFeedback; +#[cfg(not(feature = "simd"))] /// A [`MapFeedback`] that implements the AFL algorithm using an [`OrReducer`] combining the bits for the history map and the bit from (`HitcountsMapObserver`)[`crate::observers::HitcountsMapObserver`]. pub type AflMapFeedback = MapFeedback; +#[cfg(all(feature = "simd", target_arch = "x86_64"))] +/// A [`SimdMapFeedback`] that strives to maximize the map contents. +pub type MaxMapFeedback = SimdMapFeedback; +#[cfg(all(feature = "simd", not(target_arch = "x86_64")))] +/// A [`SimdMapFeedback`] that strives to maximize the map contents. +pub type MaxMapFeedback = SimdMapFeedback; +#[cfg(not(feature = "simd"))] /// A [`MapFeedback`] that strives to maximize the map contents. pub type MaxMapFeedback = MapFeedback; + +#[cfg(feature = "simd")] +/// A [`SimdMapFeedback`] that strives to minimize the map contents. +pub type MinMapFeedback = SimdMapFeedback; +#[cfg(not(feature = "simd"))] /// A [`MapFeedback`] that strives to minimize the map contents. pub type MinMapFeedback = MapFeedback; @@ -47,79 +70,6 @@ pub type MaxMapPow2Feedback = MapFeedback = MapFeedback; -/// A `Reducer` function is used to aggregate values for the novelty search -pub trait Reducer { - /// Reduce two values to one value, with the current [`Reducer`]. - fn reduce(first: T, second: T) -> T; -} - -/// A [`OrReducer`] reduces the values returning the bitwise OR with the old value -#[derive(Clone, Debug)] -pub struct OrReducer {} - -impl Reducer for OrReducer -where - T: BitOr, -{ - #[inline] - fn reduce(history: T, new: T) -> T { - history | new - } -} - -/// A [`AndReducer`] reduces the values returning the bitwise AND with the old value -#[derive(Clone, Debug)] -pub struct AndReducer {} - -impl Reducer for AndReducer -where - T: BitAnd, -{ - #[inline] - fn reduce(history: T, new: T) -> T { - history & new - } -} - -/// A [`NopReducer`] does nothing, and just "reduces" to the second/`new` value. -#[derive(Clone, Debug)] -pub struct NopReducer {} - -impl Reducer for NopReducer { - #[inline] - fn reduce(_history: T, new: T) -> T { - new - } -} - -/// A [`MaxReducer`] reduces int values and returns their maximum. -#[derive(Clone, Debug)] -pub struct MaxReducer {} - -impl Reducer for MaxReducer -where - T: PartialOrd, -{ - #[inline] - fn reduce(first: T, second: T) -> T { - if first > second { first } else { second } - } -} - -/// A [`MinReducer`] reduces int values and returns their minimum. -#[derive(Clone, Debug)] -pub struct MinReducer {} - -impl Reducer for MinReducer -where - T: PartialOrd, -{ - #[inline] - fn reduce(first: T, second: T) -> T { - if first < second { first } else { second } - } -} - /// A `IsNovel` function is used to discriminate if a reduced value is considered novel. pub trait IsNovel { /// If a new value in the [`MapFeedback`] was found, @@ -351,7 +301,7 @@ where #[derive(Clone, Debug)] pub struct MapFeedback { /// New indexes observed in the last observation - novelties: Option>, + pub(crate) novelties: Option>, /// Name identifier of this instance name: Cow<'static, str>, /// Name identifier of the observer @@ -360,7 +310,7 @@ pub struct MapFeedback { stats_name: Cow<'static, str>, // The previous run's result of [`Self::is_interesting`] #[cfg(feature = "track_hit_feedbacks")] - last_result: Option, + pub(crate) last_result: Option, /// Phantom Data of Reducer #[expect(clippy::type_complexity)] phantom: PhantomData (N, O, R)>, @@ -391,24 +341,6 @@ where R: Reducer, S: HasNamedMetadata + HasExecutions, { - #[rustversion::nightly] - default fn is_interesting( - &mut self, - state: &mut S, - _manager: &mut EM, - _input: &I, - observers: &OT, - _exit_kind: &ExitKind, - ) -> Result { - let res = self.is_interesting_default(state, observers); - #[cfg(feature = "track_hit_feedbacks")] - { - self.last_result = Some(res); - } - Ok(res) - } - - #[rustversion::not(nightly)] fn is_interesting( &mut self, state: &mut S, @@ -528,28 +460,6 @@ where } } -/// Specialize for the common coverage map size, maximization of u8s -#[rustversion::nightly] -impl Feedback for MapFeedback -where - C: CanTrack + AsRef, - EM: EventFirer, - O: MapObserver + for<'a> AsSlice<'a, Entry = u8> + for<'a> AsIter<'a, Item = u8>, - OT: MatchName, - S: HasNamedMetadata + HasExecutions, -{ - fn is_interesting( - &mut self, - state: &mut S, - _manager: &mut EM, - _input: &I, - observers: &OT, - _exit_kind: &ExitKind, - ) -> Result { - Ok(self.is_interesting_u8_simd_optimized(state, observers, std_covmap_is_interesting)) - } -} - impl Named for MapFeedback { #[inline] fn name(&self) -> &Cow<'static, str> { @@ -676,67 +586,6 @@ where } } -/// Specialize for the common coverage map size, maximization of u8s -impl MapFeedback -where - O: MapObserver + for<'a> AsSlice<'a, Entry = u8> + for<'a> AsIter<'a, Item = u8>, - C: CanTrack + AsRef, -{ - #[allow(dead_code)] // this is true on stable wihout "stable_simd" - pub(crate) fn is_interesting_u8_simd_optimized( - &mut self, - state: &mut S, - observers: &OT, - simd: F, - ) -> bool - where - S: HasNamedMetadata, - OT: MatchName, - F: FnOnce(&[u8], &[u8], bool) -> (bool, Vec), - { - // TODO Replace with match_name_type when stable - let observer = observers.get(&self.map_ref).expect("MapObserver not found. This is likely because you entered the crash handler with the wrong executor/observer").as_ref(); - - let map_state = state - .named_metadata_map_mut() - .get_mut::>(&self.name) - .unwrap(); - let size = observer.usable_count(); - let len = observer.len(); - if map_state.history_map.len() < len { - map_state.history_map.resize(len, u8::default()); - } - - let map = observer.as_slice(); - debug_assert!(map.len() >= size); - - let history_map = map_state.history_map.as_slice(); - - // Non vector implementation for reference - /*for (i, history) in history_map.iter_mut().enumerate() { - let item = map[i]; - let reduced = MaxReducer::reduce(*history, item); - if DifferentIsNovel::is_novel(*history, reduced) { - *history = reduced; - interesting = true; - if self.novelties.is_some() { - self.novelties.as_mut().unwrap().push(i); - } - } - }*/ - - let (interesting, novelties) = simd(history_map, &map, self.novelties.is_some()); - if let Some(nov) = self.novelties.as_mut() { - *nov = novelties; - } - #[cfg(feature = "track_hit_feedbacks")] - { - self.last_result = Some(interesting); - } - interesting - } -} - #[cfg(test)] mod tests { use crate::feedbacks::{AllIsNovel, IsNovel, NextPow2IsNovel}; diff --git a/libafl/src/feedbacks/mod.rs b/libafl/src/feedbacks/mod.rs index 9460be35ee..745ac4093a 100644 --- a/libafl/src/feedbacks/mod.rs +++ b/libafl/src/feedbacks/mod.rs @@ -46,7 +46,7 @@ pub mod map; pub mod nautilus; #[cfg(feature = "std")] pub mod new_hash_feedback; -#[cfg(feature = "stable_simd")] +#[cfg(feature = "simd")] pub mod simd; #[cfg(feature = "std")] pub mod stdio; diff --git a/libafl/src/feedbacks/simd.rs b/libafl/src/feedbacks/simd.rs index e64ffcc9bc..f892ccffb7 100644 --- a/libafl/src/feedbacks/simd.rs +++ b/libafl/src/feedbacks/simd.rs @@ -1,24 +1,22 @@ //! SIMD accelerated map feedback with stable Rust. -use alloc::{borrow::Cow, vec::Vec}; +use alloc::borrow::Cow; +#[cfg(feature = "track_hit_feedbacks")] +use alloc::vec::Vec; use core::{ fmt::Debug, + marker::PhantomData, ops::{Deref, DerefMut}, }; use libafl_bolts::{ AsIter, AsSlice, Error, Named, - simd::{ - covmap_is_interesting_naive, covmap_is_interesting_u8x16, covmap_is_interesting_u8x32, - std_covmap_is_interesting, - }, - tuples::{Handle, MatchName}, + simd::{Reducer, SimdReducer, VectorType, covmap_is_interesting_simd}, + tuples::{Handle, MatchName, MatchNameRef}, }; use serde::{Serialize, de::DeserializeOwned}; -use super::{ - DifferentIsNovel, Feedback, HasObserverHandle, MapFeedback, MaxReducer, StateInitializer, -}; +use super::{DifferentIsNovel, Feedback, HasObserverHandle, MapFeedback, StateInitializer}; #[cfg(feature = "introspection")] use crate::state::HasClientPerfMonitor; use crate::{ @@ -26,93 +24,149 @@ use crate::{ corpus::Testcase, events::EventFirer, executors::ExitKind, + feedbacks::MapFeedbackMetadata, observers::{CanTrack, MapObserver}, state::HasExecutions, }; -/// The coverage map SIMD acceleration to use. -/// Benchmark is available at -#[derive(Debug, Clone, Default, Copy)] -pub enum SimdImplmentation { - /// The u8x16 implementation from wide, usually the fastest - #[default] - WideU8x16, - /// The u8x32 implementation from wide, slightly slower than u8x16 (~1%) - WideU8x32, - /// Naive implementation, reference only - Naive, -} - -impl SimdImplmentation { - fn dispatch_simd(self) -> CoverageMapFunPtr { - match self { - SimdImplmentation::WideU8x16 => covmap_is_interesting_u8x16, - SimdImplmentation::WideU8x32 => covmap_is_interesting_u8x32, - SimdImplmentation::Naive => covmap_is_interesting_naive, - } - } -} - -type CoverageMapFunPtr = fn(&[u8], &[u8], bool) -> (bool, Vec); - /// Stable Rust wrapper for SIMD accelerated map feedback. Unfortunately, we have to /// keep this until specialization is stablized (not yet since 2016). #[derive(Debug, Clone)] -pub struct SimdMapFeedback { - map: MapFeedback, - simd: CoverageMapFunPtr, +pub struct SimdMapFeedback +where + R: SimdReducer, +{ + map: MapFeedback, + _ph: PhantomData, } -impl SimdMapFeedback { +impl SimdMapFeedback +where + O: MapObserver + for<'a> AsSlice<'a, Entry = u8> + for<'a> AsIter<'a, Item = u8>, + C: CanTrack + AsRef, + R: SimdReducer, + V: VectorType + Copy + Eq, +{ + fn is_interesting_u8_simd_optimized(&mut self, state: &mut S, observers: &OT) -> bool + where + S: HasNamedMetadata, + OT: MatchName, + { + // TODO Replace with match_name_type when stable + let observer = observers.get(self.map.observer_handle()).expect("MapObserver not found. This is likely because you entered the crash handler with the wrong executor/observer").as_ref(); + + let map_state = state + .named_metadata_map_mut() + .get_mut::>(self.map.name()) + .unwrap(); + let size = observer.usable_count(); + let len = observer.len(); + if map_state.history_map.len() < len { + map_state.history_map.resize(len, u8::default()); + } + + let map = observer.as_slice(); + debug_assert!(map.len() >= size); + + let history_map = map_state.history_map.as_slice(); + + let (interesting, novelties) = + covmap_is_interesting_simd::(history_map, &map, self.map.novelties.is_some()); + if let Some(nov) = self.map.novelties.as_mut() { + *nov = novelties; + } + #[cfg(feature = "track_hit_feedbacks")] + { + self.last_result = Some(interesting); + } + interesting + } +} + +impl SimdMapFeedback +where + R: SimdReducer, +{ /// Wraps an existing map and enable SIMD acceleration. This will use standard SIMD /// implementation, which might vary based on target architecture according to our /// benchmark. #[must_use] - pub fn new(map: MapFeedback) -> Self { + pub fn wrap(map: MapFeedback) -> Self { Self { map, - simd: std_covmap_is_interesting, - } - } - - /// Wraps an existing map and enable SIMD acceleration according to arguments. - #[must_use] - pub fn with_simd( - map: MapFeedback, - simd: SimdImplmentation, - ) -> Self { - Self { - map, - simd: simd.dispatch_simd(), + _ph: PhantomData, } } } -impl Deref for SimdMapFeedback { - type Target = MapFeedback; +/// Implementation that mocks [`MapFeedback`], note the bound of O is intentionally stricter +/// than we we need to hint users when their entry is not `u8`. Without this bound, there +/// would be bound related errors in [`crate::fuzzer::StdFuzzer`], which is super confusing +/// and misleading. +impl SimdMapFeedback +where + R: SimdReducer, + C: CanTrack + AsRef + Named, + O: MapObserver + for<'a> AsSlice<'a, Entry = u8> + for<'a> AsIter<'a, Item = u8>, +{ + /// Mock [`MapFeedback::new`]. If you are getting bound errors, your entry is probably not + /// `u8` and you should use [`MapFeedback`] instead. + #[must_use] + pub fn new(map_observer: &C) -> Self { + let map = MapFeedback::new(map_observer); + Self { + map, + _ph: PhantomData, + } + } + + /// Mock [`MapFeedback::with_name`] If you are getting bound errors, your entry is probably not + /// `u8` and you should use [`MapFeedback`] instead. + #[must_use] + pub fn with_name(name: &'static str, map_observer: &C) -> Self { + let map = MapFeedback::with_name(name, map_observer); + Self { + map, + _ph: PhantomData, + } + } +} + +impl Deref for SimdMapFeedback +where + R: SimdReducer, +{ + type Target = MapFeedback; fn deref(&self) -> &Self::Target { &self.map } } -impl DerefMut for SimdMapFeedback { +impl DerefMut for SimdMapFeedback +where + R: SimdReducer, +{ fn deref_mut(&mut self) -> &mut Self::Target { &mut self.map } } -impl StateInitializer for SimdMapFeedback +impl StateInitializer for SimdMapFeedback where O: MapObserver, O::Entry: 'static + Default + Debug + DeserializeOwned + Serialize, S: HasNamedMetadata, + R: SimdReducer, { fn init_state(&mut self, state: &mut S) -> Result<(), Error> { self.map.init_state(state) } } -impl HasObserverHandle for SimdMapFeedback { +impl HasObserverHandle for SimdMapFeedback +where + R: SimdReducer, +{ type Observer = C; #[inline] @@ -121,7 +175,10 @@ impl HasObserverHandle for SimdMapFeedback { } } -impl Named for SimdMapFeedback { +impl Named for SimdMapFeedback +where + R: SimdReducer, +{ #[inline] fn name(&self) -> &Cow<'static, str> { self.map.name() @@ -129,13 +186,16 @@ impl Named for SimdMapFeedback { } // Delegate implementations to inner mapping except is_interesting -impl Feedback for SimdMapFeedback +impl Feedback for SimdMapFeedback where C: CanTrack + AsRef, EM: EventFirer, O: MapObserver + for<'a> AsSlice<'a, Entry = u8> + for<'a> AsIter<'a, Item = u8>, OT: MatchName, S: HasNamedMetadata + HasExecutions, + R: SimdReducer, + V: VectorType + Copy + Eq, + R::PrimitiveReducer: Reducer, { fn is_interesting( &mut self, @@ -145,9 +205,7 @@ where observers: &OT, _exit_kind: &ExitKind, ) -> Result { - let res = self - .map - .is_interesting_u8_simd_optimized(state, observers, self.simd); + let res = self.is_interesting_u8_simd_optimized(state, observers); Ok(res) } @@ -170,15 +228,23 @@ where #[cfg(feature = "track_hit_feedbacks")] fn last_result(&self) -> Result { // cargo +nightly doc asks so - as Feedback>::last_result( - &self.map, - ) + >::PrimitiveReducer> as Feedback< + EM, + I, + OT, + S, + >>::last_result(&self.map) } #[cfg(feature = "track_hit_feedbacks")] fn append_hit_feedbacks(&self, list: &mut Vec>) -> Result<(), Error> { // cargo +nightly doc asks so - as Feedback>::append_hit_feedbacks(&self.map, list) + >::PrimitiveReducer> as Feedback< + EM, + I, + OT, + S, + >>::append_hit_feedbacks(&self.map, list) } #[inline] diff --git a/libafl_bolts/Cargo.toml b/libafl_bolts/Cargo.toml index 48fd55070f..c7723feebe 100644 --- a/libafl_bolts/Cargo.toml +++ b/libafl_bolts/Cargo.toml @@ -52,7 +52,7 @@ std = [ "uds", "serial_test", "alloc", - "stable_simd", + "simd", ] ## Enables all features that allocate in `no_std` @@ -120,8 +120,8 @@ llmp_small_maps = ["alloc"] #! ### Stable SIMD features -## Use the best SIMD implementation by our [benchmark](https://github.com/wtdcode/libafl_simd_bench) -stable_simd = ["alloc", "wide"] +## Use the best SIMD implementation by our benchmark. +simd = ["alloc", "wide"] [build-dependencies] rustversion = { workspace = true } @@ -235,4 +235,4 @@ name = "simd" path = "./examples/simd/simd.rs" bench = true harness = false -required-features = ["std", "stable_simd"] +required-features = ["std", "simd"] diff --git a/libafl_bolts/examples/simd/simd.rs b/libafl_bolts/examples/simd/simd.rs index 7651cd041d..21727dac50 100644 --- a/libafl_bolts/examples/simd/simd.rs +++ b/libafl_bolts/examples/simd/simd.rs @@ -2,8 +2,9 @@ use chrono::Utc; use clap::Parser; use itertools::Itertools; use libafl_bolts::simd::{ - covmap_is_interesting_naive, covmap_is_interesting_u8x16, covmap_is_interesting_u8x32, - simplify_map_naive, simplify_map_u8x16, simplify_map_u8x32, + AndReducer, MaxReducer, MinReducer, OrReducer, Reducer, SimdAndReducer, SimdMaxReducer, + SimdMinReducer, SimdOrReducer, SimdReducer, VectorType, covmap_is_interesting_naive, + covmap_is_interesting_simd, simplify_map_naive, simplify_map_simd, }; use rand::{RngCore, rngs::ThreadRng}; @@ -92,6 +93,7 @@ type CovFuncPtr = fn(&[u8], &[u8], bool) -> (bool, Vec); struct CovInput { name: String, func: CovFuncPtr, + naive: CovFuncPtr, hist: Vec, map: Vec, rounds: usize, @@ -100,10 +102,15 @@ struct CovInput { } impl CovInput { - fn from_cli(name: &str, f: CovFuncPtr, cli: &Cli, rng: &ThreadRng) -> Self { + fn from_cli_simd>( + name: &str, + cli: &Cli, + rng: &ThreadRng, + ) -> Self { CovInput { name: name.to_string(), - func: f, + func: covmap_is_interesting_simd::, + naive: covmap_is_interesting_naive::, hist: vec![0; cli.map], map: vec![0; cli.map], rng: rng.clone(), @@ -111,6 +118,20 @@ impl CovInput { validate: cli.validate, } } + + fn from_cli_naive>(name: &str, cli: &Cli, rng: &ThreadRng) -> Self { + CovInput { + name: name.to_string(), + func: covmap_is_interesting_naive::, + naive: covmap_is_interesting_naive::, + hist: vec![0; cli.map], + map: vec![0; cli.map], + rng: rng.clone(), + rounds: cli.rounds, + validate: cli.validate, + } + } + fn measure_cov(mut self) -> Vec { println!("Running {}", &self.name); let mut outs = vec![]; @@ -126,11 +147,12 @@ impl CovInput { let (interesting, novelties) = (self.func)(&self.hist, &self.map, true); if self.validate { let (canonical_interesting, canonical_novelties) = - covmap_is_interesting_naive(&self.hist, &self.map, true); + (self.naive)(&self.hist, &self.map, true); assert!( canonical_interesting == interesting && novelties == canonical_novelties, - "Incorrect covmap impl. {canonical_interesting} vs {interesting}, {canonical_novelties:?} vs\n{novelties:?}" + "Incorrect {} impl. {canonical_interesting} vs {interesting}, {canonical_novelties:?} vs\n{novelties:?}", + self.name ); } let after = Utc::now(); @@ -176,8 +198,18 @@ fn main() { let simpls = [ SimplifyMapInput::from_cli("naive simplify_map", simplify_map_naive, &cli, &rng), - SimplifyMapInput::from_cli("u8x16 simplify_map", simplify_map_u8x16, &cli, &rng), - SimplifyMapInput::from_cli("u8x32 simplify_map", simplify_map_u8x32, &cli, &rng), + SimplifyMapInput::from_cli( + "u8x16 simplify_map", + simplify_map_simd::, + &cli, + &rng, + ), + SimplifyMapInput::from_cli( + "u8x32 simplify_map", + simplify_map_simd::, + &cli, + &rng, + ), ]; for bench in simpls { @@ -187,9 +219,18 @@ fn main() { } let benches = [ - CovInput::from_cli("naive cov", covmap_is_interesting_naive, &cli, &rng), - CovInput::from_cli("u8x16 cov", covmap_is_interesting_u8x16, &cli, &rng), - CovInput::from_cli("u8x32 cov", covmap_is_interesting_u8x32, &cli, &rng), + CovInput::from_cli_naive::("naive max cov", &cli, &rng), + CovInput::from_cli_simd::("u8x16 max cov", &cli, &rng), + CovInput::from_cli_simd::("u8x32 max cov", &cli, &rng), + CovInput::from_cli_naive::("naive min cov", &cli, &rng), + CovInput::from_cli_simd::("u8x16 min cov", &cli, &rng), + CovInput::from_cli_simd::("u8x32 min cov", &cli, &rng), + CovInput::from_cli_naive::("naive and cov", &cli, &rng), + CovInput::from_cli_simd::("u8x16 and cov", &cli, &rng), + CovInput::from_cli_simd::("u8x32 and cov", &cli, &rng), + CovInput::from_cli_naive::("naive or cov", &cli, &rng), + CovInput::from_cli_simd::("u8x16 or cov", &cli, &rng), + CovInput::from_cli_simd::("u8x32 or cov", &cli, &rng), ]; for bench in benches { diff --git a/libafl_bolts/src/simd.rs b/libafl_bolts/src/simd.rs index 329baedb0d..95ce91e4d3 100644 --- a/libafl_bolts/src/simd.rs +++ b/libafl_bolts/src/simd.rs @@ -2,6 +2,307 @@ #[cfg(feature = "alloc")] use alloc::{vec, vec::Vec}; +use core::ops::{BitAnd, BitOr}; + +#[cfg(feature = "wide")] +use wide::CmpEq; + +/// Re-export our vector types +#[cfg(feature = "wide")] +pub mod vector { + pub use wide::{u8x16, u8x32}; +} + +/// The SIMD based reducer implementation +#[cfg(feature = "wide")] +pub trait SimdReducer: Reducer { + /// The associated primitive reducer + type PrimitiveReducer: Reducer; +} + +/// A `Reducer` function is used to aggregate values for the novelty search +pub trait Reducer { + /// Reduce two values to one value, with the current [`Reducer`]. + fn reduce(first: T, second: T) -> T; +} + +#[cfg(feature = "wide")] +trait HasMax: Sized { + fn max_(self, rhs: Self) -> Self; +} + +#[cfg(feature = "wide")] +impl HasMax for wide::u8x16 { + fn max_(self, rhs: Self) -> Self { + self.max(rhs) + } +} + +#[cfg(feature = "wide")] +impl HasMax for wide::u8x32 { + fn max_(self, rhs: Self) -> Self { + self.max(rhs) + } +} + +#[cfg(feature = "wide")] +trait HasMin: Sized { + fn min_(self, rhs: Self) -> Self; +} + +#[cfg(feature = "wide")] +impl HasMin for wide::u8x16 { + fn min_(self, rhs: Self) -> Self { + self.min(rhs) + } +} + +#[cfg(feature = "wide")] +impl HasMin for wide::u8x32 { + fn min_(self, rhs: Self) -> Self { + self.min(rhs) + } +} + +/// A [`MaxReducer`] reduces int values and returns their maximum. +#[derive(Clone, Debug)] +pub struct MaxReducer {} + +impl Reducer for MaxReducer +where + T: PartialOrd, +{ + #[inline] + fn reduce(first: T, second: T) -> T { + if first > second { first } else { second } + } +} + +/// Unforunately we have to keep this type due to [`wide`] might not `PartialOrd` +#[cfg(feature = "wide")] +#[derive(Debug)] +pub struct SimdMaxReducer; + +#[cfg(feature = "wide")] +impl Reducer for SimdMaxReducer +where + T: HasMax, +{ + fn reduce(first: T, second: T) -> T { + first.max_(second) + } +} + +#[cfg(feature = "wide")] +impl SimdReducer for SimdMaxReducer +where + T: HasMax, +{ + type PrimitiveReducer = MaxReducer; +} + +/// A [`NopReducer`] does nothing, and just "reduces" to the second/`new` value. +#[derive(Clone, Debug)] +pub struct NopReducer {} + +impl Reducer for NopReducer { + #[inline] + fn reduce(_history: T, new: T) -> T { + new + } +} + +#[cfg(feature = "wide")] +impl SimdReducer for NopReducer { + type PrimitiveReducer = NopReducer; +} + +/// A [`MinReducer`] reduces int values and returns their minimum. +#[derive(Clone, Debug)] +pub struct MinReducer {} + +impl Reducer for MinReducer +where + T: PartialOrd, +{ + #[inline] + fn reduce(first: T, second: T) -> T { + if first < second { first } else { second } + } +} + +/// Unforunately we have to keep this type due to [`wide`] might not `PartialOrd` +#[cfg(feature = "wide")] +#[derive(Debug)] +pub struct SimdMinReducer; + +#[cfg(feature = "wide")] +impl Reducer for SimdMinReducer +where + T: HasMin, +{ + fn reduce(first: T, second: T) -> T { + first.min_(second) + } +} + +#[cfg(feature = "wide")] +impl SimdReducer for SimdMinReducer +where + T: HasMin, +{ + type PrimitiveReducer = MinReducer; +} + +/// A [`OrReducer`] reduces the values returning the bitwise OR with the old value +#[derive(Clone, Debug)] +pub struct OrReducer {} + +impl Reducer for OrReducer +where + T: BitOr, +{ + #[inline] + fn reduce(history: T, new: T) -> T { + history | new + } +} + +#[cfg(feature = "wide")] +impl SimdReducer for OrReducer +where + T: BitOr, +{ + type PrimitiveReducer = OrReducer; +} + +/// SIMD based [`OrReducer`], alias for consistency +#[cfg(feature = "wide")] +pub type SimdOrReducer = OrReducer; + +/// A [`AndReducer`] reduces the values returning the bitwise AND with the old value +#[derive(Clone, Debug)] +pub struct AndReducer {} + +impl Reducer for AndReducer +where + T: BitAnd, +{ + #[inline] + fn reduce(history: T, new: T) -> T { + history & new + } +} + +#[cfg(feature = "wide")] +impl SimdReducer for AndReducer +where + T: BitAnd, +{ + type PrimitiveReducer = AndReducer; +} + +/// SIMD based [`AndReducer`], alias for consistency +#[cfg(feature = "wide")] +pub type SimdAndReducer = AndReducer; + +#[cfg(feature = "wide")] +/// The vector type that can be used with coverage map +pub trait VectorType { + /// Number of bytes + const N: usize; + /// Zero vector + const ZERO: Self; + /// One vector + const ONE: Self; + /// 0x80 vector + const EIGHTY: Self; + + /// Construct vector from slice. Can't use N unless const generics is stablized. + fn from_slice(arr: &[u8]) -> Self; + + /// Collect novelties. We pass in base to avoid redo calculate for novelties indice. + fn novelties(hist: &[u8], map: &[u8], base: usize, novelties: &mut Vec); + + /// Do blending + #[must_use] + fn blend(self, lhs: Self, rhs: Self) -> Self; + + /// Can't reuse [`crate::AsSlice`] due to [`wide`] might implement `Deref` + fn as_slice(&self) -> &[u8]; +} + +#[cfg(feature = "wide")] +impl VectorType for wide::u8x16 { + const N: usize = Self::LANES as usize; + const ZERO: Self = Self::ZERO; + const ONE: Self = Self::new([0x1u8; Self::N]); + const EIGHTY: Self = Self::new([0x80u8; Self::N]); + + fn from_slice(arr: &[u8]) -> Self { + Self::new(arr[0..Self::N].try_into().unwrap()) + } + + fn novelties(hist: &[u8], map: &[u8], base: usize, novelties: &mut Vec) { + unsafe { + for j in base..(base + Self::N) { + let item = *map.get_unchecked(j); + if item > *hist.get_unchecked(j) { + novelties.push(j); + } + } + } + } + + fn blend(self, lhs: Self, rhs: Self) -> Self { + self.blend(lhs, rhs) + } + + fn as_slice(&self) -> &[u8] { + self.as_array_ref() + } +} + +#[cfg(feature = "wide")] +impl VectorType for wide::u8x32 { + const N: usize = Self::LANES as usize; + const ZERO: Self = Self::ZERO; + const ONE: Self = Self::new([0x1u8; Self::N]); + const EIGHTY: Self = Self::new([0x80u8; Self::N]); + + fn from_slice(arr: &[u8]) -> Self { + Self::new(arr[0..Self::N].try_into().unwrap()) + } + + fn novelties(hist: &[u8], map: &[u8], base: usize, novelties: &mut Vec) { + unsafe { + // Break into two loops so that LLVM will vectorize both loops. + // Or LLVM won't vectorize them and is super slow. We need a few + // extra intrinsic to wide and safe_arch to vectorize this manually. + for j in base..(base + Self::N / 2) { + let item = *map.get_unchecked(j); + if item > *hist.get_unchecked(j) { + novelties.push(j); + } + } + + for j in (base + Self::N / 2)..(base + Self::N) { + let item = *map.get_unchecked(j); + if item > *hist.get_unchecked(j) { + novelties.push(j); + } + } + } + } + + fn blend(self, lhs: Self, rhs: Self) -> Self { + self.blend(lhs, rhs) + } + + fn as_slice(&self) -> &[u8] { + self.as_array_ref() + } +} /// `simplify_map` naive implementaion. In most cases, this can be auto-vectorized. pub fn simplify_map_naive(map: &mut [u8]) { @@ -13,55 +314,23 @@ pub fn simplify_map_naive(map: &mut [u8]) { /// `simplify_map` implementation by u8x16, worse performance compared to LLVM /// auto-vectorization but faster if LLVM doesn't vectorize. #[cfg(feature = "wide")] -pub fn simplify_map_u8x16(map: &mut [u8]) { - type VectorType = wide::u8x16; - const N: usize = VectorType::LANES as usize; +pub fn simplify_map_simd(map: &mut [u8]) +where + V: VectorType + Copy + Eq + CmpEq, +{ let size = map.len(); - let steps = size / N; - let left = size % N; - let lhs = VectorType::new([0x1; N]); - let rhs = VectorType::new([0x80; N]); + let steps = size / V::N; + let left = size % V::N; + let lhs = V::ONE; + let rhs = V::EIGHTY; for step in 0..steps { - let i = step * N; - let mp = VectorType::new(map[i..(i + N)].try_into().unwrap()); + let i = step * V::N; + let mp = V::from_slice(&map[i..]); - let mask = mp.cmp_eq(VectorType::ZERO); + let mask = mp.cmp_eq(V::ZERO); let out = mask.blend(lhs, rhs); - map[i..i + N].copy_from_slice(out.as_array_ref()); - } - - #[allow(clippy::needless_range_loop)] - for j in (size - left)..size { - map[j] = if map[j] == 0 { 0x1 } else { 0x80 } - } -} - -/// `simplify_map` implementation by i8x32, achieving comparable performance with -/// LLVM auto-vectorization. -#[cfg(feature = "wide")] -pub fn simplify_map_u8x32(map: &mut [u8]) { - use wide::CmpEq; - - type VectorType = wide::u8x32; - const N: usize = VectorType::LANES as usize; - let size = map.len(); - let steps = size / N; - let left = size % N; - let lhs = VectorType::new([0x01; 32]); - let rhs = VectorType::new([0x80; 32]); - - for step in 0..steps { - let i = step * N; - let mp = VectorType::new(map[i..i + N].try_into().unwrap()); - - let mask = mp.cmp_eq(VectorType::ZERO); - let out = mask.blend(lhs, rhs); - unsafe { - out.as_array_ref() - .as_ptr() - .copy_to_nonoverlapping(map.as_mut_ptr().add(i), N); - } + map[i..i + V::N].copy_from_slice(out.as_slice()); } #[allow(clippy::needless_range_loop)] @@ -76,48 +345,46 @@ pub fn std_simplify_map(map: &mut [u8]) { simplify_map_naive(map); #[cfg(feature = "wide")] - simplify_map_u8x32(map); + simplify_map_simd::(map); } /// Coverage map insteresting implementation by u8x16. Slightly faster than nightly simd. #[cfg(all(feature = "alloc", feature = "wide"))] #[must_use] -pub fn covmap_is_interesting_u8x16( +pub fn covmap_is_interesting_simd( hist: &[u8], map: &[u8], collect_novelties: bool, -) -> (bool, Vec) { - type VectorType = wide::u8x16; +) -> (bool, Vec) +where + V: VectorType + Eq + Copy, + R: SimdReducer, +{ let mut novelties = vec![]; let mut interesting = false; let size = map.len(); - let steps = size / VectorType::LANES as usize; - let left = size % VectorType::LANES as usize; + let steps = size / V::N; + let left = size % V::N; if collect_novelties { for step in 0..steps { - let i = step * VectorType::LANES as usize; - let history = - VectorType::new(hist[i..i + VectorType::LANES as usize].try_into().unwrap()); - let items = VectorType::new(map[i..i + VectorType::LANES as usize].try_into().unwrap()); + let i = step * V::N; + let history = V::from_slice(&hist[i..]); + let items = V::from_slice(&map[i..]); - if items.max(history) != history { + let out = R::reduce(history, items); + if out != history { interesting = true; - unsafe { - for j in i..(i + VectorType::LANES as usize) { - let item = *map.get_unchecked(j); - if item > *hist.get_unchecked(j) { - novelties.push(j); - } - } - } + V::novelties(hist, map, i, &mut novelties); } } for j in (size - left)..size { unsafe { let item = *map.get_unchecked(j); - if item > *hist.get_unchecked(j) { + let history = *hist.get_unchecked(j); + let out = R::PrimitiveReducer::reduce(item, history); + if out != history { interesting = true; novelties.push(j); } @@ -125,12 +392,12 @@ pub fn covmap_is_interesting_u8x16( } } else { for step in 0..steps { - let i = step * VectorType::LANES as usize; - let history = - VectorType::new(hist[i..i + VectorType::LANES as usize].try_into().unwrap()); - let items = VectorType::new(map[i..i + VectorType::LANES as usize].try_into().unwrap()); + let i = step * V::N; + let history = V::from_slice(&hist[i..]); + let items = V::from_slice(&map[i..]); - if items.max(history) != history { + let out = R::reduce(history, items); + if out != history { interesting = true; break; } @@ -140,90 +407,9 @@ pub fn covmap_is_interesting_u8x16( for j in (size - left)..size { unsafe { let item = *map.get_unchecked(j); - if item > *hist.get_unchecked(j) { - interesting = true; - break; - } - } - } - } - } - - (interesting, novelties) -} - -/// Coverage map insteresting implementation by u8x32. Slightly faster than nightly simd but slightly -/// slower than u8x16 version. -#[cfg(all(feature = "alloc", feature = "wide"))] -#[must_use] -pub fn covmap_is_interesting_u8x32( - hist: &[u8], - map: &[u8], - collect_novelties: bool, -) -> (bool, Vec) { - type VectorType = wide::u8x32; - const N: usize = VectorType::LANES as usize; - let mut novelties = vec![]; - let mut interesting = false; - let size = map.len(); - let steps = size / N; - let left = size % N; - - if collect_novelties { - for step in 0..steps { - let i = step * N; - let history = VectorType::new(hist[i..i + N].try_into().unwrap()); - let items = VectorType::new(map[i..i + N].try_into().unwrap()); - - if items.max(history) != history { - interesting = true; - unsafe { - // Break into two loops so that LLVM will vectorize both loops. - // Or LLVM won't vectorize them and is super slow. We need a few - // extra intrinsic to wide and safe_arch to vectorize this manually. - for j in i..(i + N / 2) { - let item = *map.get_unchecked(j); - if item > *hist.get_unchecked(j) { - novelties.push(j); - } - } - - for j in (i + N / 2)..(i + N) { - let item = *map.get_unchecked(j); - if item > *hist.get_unchecked(j) { - novelties.push(j); - } - } - } - } - } - - for j in (size - left)..size { - unsafe { - let item = *map.get_unchecked(j); - if item > *hist.get_unchecked(j) { - interesting = true; - novelties.push(j); - } - } - } - } else { - for step in 0..steps { - let i = step * N; - let history = VectorType::new(hist[i..i + N].try_into().unwrap()); - let items = VectorType::new(map[i..i + N].try_into().unwrap()); - - if items.max(history) != history { - interesting = true; - break; - } - } - - if !interesting { - for j in (size - left)..size { - unsafe { - let item = *map.get_unchecked(j); - if item > *hist.get_unchecked(j) { + let history = *hist.get_unchecked(j); + let out = R::PrimitiveReducer::reduce(item, history); + if out != history { interesting = true; break; } @@ -238,18 +424,21 @@ pub fn covmap_is_interesting_u8x32( /// Coverage map insteresting naive implementation. Do not use it unless you have strong reasons to do. #[cfg(feature = "alloc")] #[must_use] -pub fn covmap_is_interesting_naive( +pub fn covmap_is_interesting_naive( hist: &[u8], map: &[u8], collect_novelties: bool, -) -> (bool, Vec) { +) -> (bool, Vec) +where + R: Reducer, +{ let mut novelties = vec![]; let mut interesting = false; let initial = 0; if collect_novelties { for (i, item) in map.iter().enumerate().filter(|(_, item)| **item != initial) { let existing = unsafe { *hist.get_unchecked(i) }; - let reduced = existing.max(*item); + let reduced = R::reduce(existing, *item); if existing != reduced { interesting = true; novelties.push(i); @@ -258,7 +447,7 @@ pub fn covmap_is_interesting_naive( } else { for (i, item) in map.iter().enumerate().filter(|(_, item)| **item != initial) { let existing = unsafe { *hist.get_unchecked(i) }; - let reduced = existing.max(*item); + let reduced = R::reduce(existing, *item); if existing != reduced { interesting = true; break; @@ -268,28 +457,3 @@ pub fn covmap_is_interesting_naive( (interesting, novelties) } - -/// Standard coverage map instereting implementation. Use the available fastest implementation by default. -#[cfg(feature = "alloc")] -#[allow(unused_variables)] // or we fail cargo doc -#[must_use] -pub fn std_covmap_is_interesting( - hist: &[u8], - map: &[u8], - collect_novelties: bool, -) -> (bool, Vec) { - #[cfg(not(feature = "wide"))] - return covmap_is_interesting_naive(hist, map, collect_novelties); - - #[cfg(feature = "wide")] - { - // Supported by benchmark: - // - on aarch64, u8x32 is 15% faster than u8x16 - // - on amd64, u8x16 is 10% faster compared to the u8x32 - #[cfg(target_arch = "aarch64")] - return covmap_is_interesting_u8x32(hist, map, collect_novelties); - - #[cfg(not(target_arch = "aarch64"))] - return covmap_is_interesting_u8x16(hist, map, collect_novelties); - } -}