From 61ad4a6ee8505bb23a6b6afe37999e89cba91e90 Mon Sep 17 00:00:00 2001 From: Dominik Maier Date: Tue, 29 Aug 2023 16:22:46 +0200 Subject: [PATCH] bolts: Make xxh3 hashing optional with `xxh3` feature flag (else use `ahash` for everything) (#1478) * Make xxh3 hashing optional (and default to ahash) * make xxh3 default anyway * move import * fix no_alloc * No ahash without alloc * fix import * Keep xxh3 as default for libafl as well * no randomness for xoshiro --- libafl/Cargo.toml | 2 +- libafl_bolts/Cargo.toml | 9 ++++++-- libafl_bolts/src/build_id.rs | 4 +++- libafl_bolts/src/lib.rs | 40 ++++++++++++++++++++++++++++++++++++ libafl_bolts/src/rands.rs | 19 ++++++++++------- libafl_bolts/src/serdeany.rs | 13 ++++++------ libafl_bolts/src/tuples.rs | 5 +++-- 7 files changed, 73 insertions(+), 19 deletions(-) diff --git a/libafl/Cargo.toml b/libafl/Cargo.toml index 8ffef80dd7..5e51fc8d1f 100644 --- a/libafl/Cargo.toml +++ b/libafl/Cargo.toml @@ -16,7 +16,7 @@ features = ["document-features"] all-features = true [features] -default = ["std", "derive", "llmp_compression", "llmp_small_maps", "llmp_broker_timeouts", "rand_trait", "fork", "prelude", "gzip", "regex", "serdeany_autoreg", "tui_monitor"] +default = ["std", "derive", "llmp_compression", "llmp_small_maps", "llmp_broker_timeouts", "rand_trait", "fork", "prelude", "gzip", "regex", "serdeany_autoreg", "tui_monitor", "libafl_bolts/xxh3"] document-features = ["dep:document-features"] #! # Feature Flags diff --git a/libafl_bolts/Cargo.toml b/libafl_bolts/Cargo.toml index 0c763eb42d..d0b04aac7e 100644 --- a/libafl_bolts/Cargo.toml +++ b/libafl_bolts/Cargo.toml @@ -17,7 +17,7 @@ features = ["document-features"] all-features = true [features] -default = ["std", "derive", "llmp_compression", "llmp_small_maps", "rand_trait", "prelude", "gzip", "serdeany_autoreg", "alloc"] +default = ["std", "derive", "llmp_compression", "llmp_small_maps", "rand_trait", "prelude", "gzip", "serdeany_autoreg", "alloc", "xxh3"] document-features = ["dep:document-features"] #! # Feature Flags @@ -56,6 +56,11 @@ errors_backtrace = ["backtrace"] ## Enables gzip compression in certain parts of the lib gzip = ["miniz_oxide", "alloc"] +## Replaces `ahash` with the potentially faster [`xxh3`](https://github.com/Cyan4973/xxHash) in some parts of the lib. +## This yields a stable and fast hash, but may increase the resulting binary size slightly +## This also enables certain hashing and rand features in `no_std` no-alloc. +xxh3 = ["xxhash-rust"] + #! ### SerdeAny features ## Automatically register all `#[derive(SerdeAny)]` types at startup. @@ -89,7 +94,7 @@ libafl_derive = { version = "0.11.0", optional = true, path = "../libafl_derive" rustversion = "1.0" tuple_list = { version = "0.1.3" } hashbrown = { version = "0.14", features = ["serde", "ahash"], default-features=false, optional = true } # A faster hashmap, nostd compatible -xxhash-rust = { version = "0.8.5", features = ["xxh3"] } # xxh3 hashing for rust +xxhash-rust = { version = "0.8.5", features = ["xxh3"], optional = true } # xxh3 hashing for rust serde = { version = "1.0", default-features = false, features = ["derive"] } # serialization lib erased-serde = { version = "0.3.21", default-features = false, optional = true } # erased serde postcard = { version = "1.0", features = ["alloc"], default-features = false, optional = true } # no_std compatible serde serialization format diff --git a/libafl_bolts/src/build_id.rs b/libafl_bolts/src/build_id.rs index ab1229331c..33da263b43 100644 --- a/libafl_bolts/src/build_id.rs +++ b/libafl_bolts/src/build_id.rs @@ -12,6 +12,8 @@ use std::{ use uuid::Uuid; +use crate::hasher_std; + static BUILD_ID: OnceLock = OnceLock::new(); /// Returns a [Uuid] uniquely representing the build of the current binary. @@ -81,7 +83,7 @@ fn from_type_id(mut hasher: H) -> H { } fn calculate() -> Uuid { - let hasher = xxhash_rust::xxh3::Xxh3::with_seed(0); + let hasher = hasher_std(); let hasher = from_exe(hasher.clone()).unwrap_or(hasher); let mut hasher = from_type_id(hasher); diff --git a/libafl_bolts/src/lib.rs b/libafl_bolts/src/lib.rs index f553335a0f..7511f1c424 100644 --- a/libafl_bolts/src/lib.rs +++ b/libafl_bolts/src/lib.rs @@ -130,15 +130,27 @@ pub mod serdeany; pub mod shmem; #[cfg(feature = "std")] pub mod staterestore; +// TODO: reenable once ahash works in no-alloc +#[cfg(any(feature = "xxh3", feature = "alloc"))] pub mod tuples; #[cfg(feature = "alloc")] use alloc::vec::Vec; +#[cfg(all(not(feature = "xxh3"), feature = "alloc"))] +use core::hash::BuildHasher; +#[cfg(any(feature = "xxh3", feature = "alloc"))] +use core::hash::Hasher; use core::{iter::Iterator, time}; #[cfg(feature = "std")] use std::time::{SystemTime, UNIX_EPOCH}; +// There's a bug in ahash that doesn't let it build in `alloc` without once_cell right now. +// TODO: re-enable once is resolved. +#[cfg(all(not(feature = "xxh3"), feature = "alloc"))] +use ahash::RandomState; use serde::{Deserialize, Serialize}; +#[cfg(feature = "xxh3")] +use xxhash_rust::xxh3::xxh3_64; /// The client ID == the sender id. #[repr(transparent)] @@ -209,6 +221,34 @@ fn display_error_backtrace(_f: &mut fmt::Formatter, _err: &ErrorBacktrace) -> fm fmt::Result::Ok(()) } +/// Returns the hasher for the input with a given hash, depending on features: +/// [`xxh3_64`](https://docs.rs/xxhash-rust/latest/xxhash_rust/xxh3/fn.xxh3_64.html) +/// if the `xxh3` feature is used, /// else [`ahash`](https://docs.rs/ahash/latest/ahash/). +#[cfg(any(feature = "xxh3", feature = "alloc"))] +#[must_use] +pub fn hasher_std() -> impl Hasher + Clone { + #[cfg(feature = "xxh3")] + return xxhash_rust::xxh3::Xxh3::new(); + #[cfg(not(feature = "xxh3"))] + RandomState::with_seeds(0, 0, 0, 0).build_hasher() +} + +/// Hashes the input with a given hash, depending on features: +/// [`xxh3_64`](https://docs.rs/xxhash-rust/latest/xxhash_rust/xxh3/fn.xxh3_64.html) +/// if the `xxh3` feature is used, /// else [`ahash`](https://docs.rs/ahash/latest/ahash/). +#[cfg(any(feature = "xxh3", feature = "alloc"))] +#[must_use] +pub fn hash_std(input: &[u8]) -> u64 { + #[cfg(feature = "xxh3")] + return xxh3_64(input); + #[cfg(not(feature = "xxh3"))] + { + let mut hasher = hasher_std(); + hasher.write(input); + hasher.finish() + } +} + /// Main error struct for `LibAFL` #[derive(Debug)] pub enum Error { diff --git a/libafl_bolts/src/rands.rs b/libafl_bolts/src/rands.rs index f85e374ac3..39280ed9c9 100644 --- a/libafl_bolts/src/rands.rs +++ b/libafl_bolts/src/rands.rs @@ -4,12 +4,11 @@ use core::{debug_assert, fmt::Debug}; #[cfg(feature = "rand_trait")] use rand_core::{self, impls::fill_bytes_via_next, RngCore}; use serde::{de::DeserializeOwned, Deserialize, Serialize}; -use xxhash_rust::xxh3::xxh3_64_with_seed; #[cfg(feature = "std")] use crate::current_nanos; - -const HASH_CONST: u64 = 0xa5b35705; +#[cfg(any(feature = "xxh3", feature = "alloc"))] +use crate::hash_std; /// The standard rand implementation for `LibAFL`. /// It is usually the right choice, with very good speed and a reasonable randomness. @@ -98,6 +97,7 @@ macro_rules! default_rand { } // Derive Default by calling `new(DEFAULT_SEED)` on each of the following Rand types. +#[cfg(any(feature = "xxh3", feature = "alloc"))] default_rand!(Xoshiro256StarRand); default_rand!(XorShift64Rand); default_rand!(Lehmer64Rand); @@ -145,6 +145,7 @@ macro_rules! impl_random { }; } +#[cfg(any(feature = "xxh3", feature = "alloc"))] impl_random!(Xoshiro256StarRand); impl_random!(XorShift64Rand); impl_random!(Lehmer64Rand); @@ -157,10 +158,12 @@ pub struct Xoshiro256StarRand { rand_seed: [u64; 4], } +// TODO: re-enable ahash works without alloc +#[cfg(any(feature = "xxh3", feature = "alloc"))] impl Rand for Xoshiro256StarRand { #[allow(clippy::unreadable_literal)] fn set_seed(&mut self, seed: u64) { - self.rand_seed[0] = xxh3_64_with_seed(&HASH_CONST.to_le_bytes(), seed); + self.rand_seed[0] = hash_std(&seed.to_be_bytes()); self.rand_seed[1] = self.rand_seed[0] ^ 0x1234567890abcdef; self.rand_seed[2] = self.rand_seed[0] & 0x0123456789abcdef; self.rand_seed[3] = self.rand_seed[0] | 0x01abcde43f567908; @@ -187,6 +190,7 @@ impl Rand for Xoshiro256StarRand { } } +#[cfg(any(feature = "xxh3", feature = "alloc"))] impl Xoshiro256StarRand { /// Creates a new Xoshiro rand with the given seed #[must_use] @@ -376,9 +380,9 @@ impl XkcdRand { mod tests { //use xxhash_rust::xxh3::xxh3_64_with_seed; - use crate::rands::{ - Rand, RomuDuoJrRand, RomuTrioRand, StdRand, XorShift64Rand, Xoshiro256StarRand, - }; + #[cfg(any(feature = "xxh3", feature = "alloc"))] + use crate::rands::Xoshiro256StarRand; + use crate::rands::{Rand, RomuDuoJrRand, RomuTrioRand, StdRand, XorShift64Rand}; fn test_single_rand(rand: &mut R) { assert_ne!(rand.next(), rand.next()); @@ -395,6 +399,7 @@ mod tests { test_single_rand(&mut RomuTrioRand::with_seed(0)); test_single_rand(&mut RomuDuoJrRand::with_seed(0)); test_single_rand(&mut XorShift64Rand::with_seed(0)); + #[cfg(any(feature = "xxh3", feature = "alloc"))] test_single_rand(&mut Xoshiro256StarRand::with_seed(0)); } diff --git a/libafl_bolts/src/serdeany.rs b/libafl_bolts/src/serdeany.rs index bdb5269ee4..8f5ce021ee 100644 --- a/libafl_bolts/src/serdeany.rs +++ b/libafl_bolts/src/serdeany.rs @@ -79,6 +79,7 @@ macro_rules! create_serde_registry_for_trait { use serde::{Deserialize, Serialize}; use $crate::{ anymap::{pack_type_id, unpack_type_id}, + hash_std, serdeany::{DeserializeCallback, DeserializeCallbackSeed}, Error, }; @@ -346,7 +347,7 @@ macro_rules! create_serde_registry_for_trait { match self.map.get(&unpack_type_id(TypeId::of::())) { None => None, Some(h) => h - .get(&xxhash_rust::xxh3::xxh3_64(name.as_bytes())) + .get(&hash_std(name.as_bytes())) .map(|x| x.as_any().downcast_ref::().unwrap()), } } @@ -359,7 +360,7 @@ macro_rules! create_serde_registry_for_trait { match self.map.get(&unpack_type_id(*typeid)) { None => None, Some(h) => h - .get(&xxhash_rust::xxh3::xxh3_64(name.as_bytes())) + .get(&hash_std(name.as_bytes())) .map(AsRef::as_ref), } } @@ -374,7 +375,7 @@ macro_rules! create_serde_registry_for_trait { match self.map.get_mut(&unpack_type_id(TypeId::of::())) { None => None, Some(h) => h - .get_mut(&xxhash_rust::xxh3::xxh3_64(name.as_bytes())) + .get_mut(&hash_std(name.as_bytes())) .map(|x| x.as_any_mut().downcast_mut::().unwrap()), } } @@ -390,7 +391,7 @@ macro_rules! create_serde_registry_for_trait { match self.map.get_mut(&unpack_type_id(*typeid)) { None => None, Some(h) => h - .get_mut(&xxhash_rust::xxh3::xxh3_64(name.as_bytes())) + .get_mut(&hash_std(name.as_bytes())) .map(AsMut::as_mut), } } @@ -552,7 +553,7 @@ macro_rules! create_serde_registry_for_trait { self.map .get_mut(&id) .unwrap() - .insert(xxhash_rust::xxh3::xxh3_64(name.as_bytes()), Box::new(val)); + .insert(hash_std(name.as_bytes()), Box::new(val)); } /// Returns the `len` of this map. @@ -587,7 +588,7 @@ macro_rules! create_serde_registry_for_trait { { match self.map.get(&unpack_type_id(TypeId::of::())) { None => false, - Some(h) => h.contains_key(&xxhash_rust::xxh3::xxh3_64(name.as_bytes())), + Some(h) => h.contains_key(&hash_std(name.as_bytes())), } } diff --git a/libafl_bolts/src/tuples.rs b/libafl_bolts/src/tuples.rs index fa956f156b..baa1e58bbf 100644 --- a/libafl_bolts/src/tuples.rs +++ b/libafl_bolts/src/tuples.rs @@ -8,8 +8,9 @@ use core::{ }; pub use tuple_list::{tuple_list, tuple_list_type, TupleList}; -use xxhash_rust::xxh3::xxh3_64; +#[cfg(any(feature = "xxh3", feature = "alloc"))] +use crate::hash_std; use crate::Named; /// Returns if the type `T` is equal to `U` @@ -87,7 +88,7 @@ pub trait HasNameId { /// Gets the `name_id` for this entry fn name_id(&self) -> u64 { - xxh3_64(self.const_name().as_bytes()) + hash_std(self.const_name().as_bytes()) } }