From b7889a5996aa56abe4a831eb846b5b5cf797a8cc Mon Sep 17 00:00:00 2001 From: "Marco C." <46560192+Marcondiro@users.noreply.github.com> Date: Wed, 13 Nov 2024 20:28:25 +0100 Subject: [PATCH] Move bitfields to bitbybit (#2688) * move to bitbybit * Restore bitbybit dependent code * Clippy --- Cargo.toml | 2 + libafl/Cargo.toml | 3 +- libafl/src/mutators/token_mutations.rs | 8 +- libafl/src/observers/cmp.rs | 40 +++++-- libafl_intelpt/Cargo.toml | 4 +- libafl_intelpt/src/lib.rs | 147 ++++++++++++------------- libafl_targets/src/cmps/mod.rs | 17 +-- 7 files changed, 122 insertions(+), 99 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 6fff8139a3..8bbc6cbcd4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -54,8 +54,10 @@ license = "MIT OR Apache-2.0" [workspace.dependencies] ahash = { version = "0.8.11", default-features = false } # The hash function already used in hashbrown +arbitrary-int = "1.2.7" # arbitrary sized integers, useful in combination with bitfields (bitbybit crate) backtrace = { version = "0.3.74", default-features = false } # Used to get the stacktrace in StacktraceObserver bindgen = "0.70.1" +bitbybit = "1.3.2" # bitfields, use this for bit fields and bit enums clap = "4.5.18" cc = "1.1.21" cmake = "0.1.51" diff --git a/libafl/Cargo.toml b/libafl/Cargo.toml index f9498a3423..fdd4d466b6 100644 --- a/libafl/Cargo.toml +++ b/libafl/Cargo.toml @@ -221,7 +221,8 @@ num-traits = { workspace = true, default-features = false } serde = { workspace = true, features = ["alloc"] } # serialization lib postcard = { workspace = true } # no_std compatible serde serialization format bincode = { version = "1.3.3", optional = true } -c2rust-bitfields = { version = "0.19.0", features = ["no_std"] } +bitbybit = { workspace = true } +arbitrary-int = { workspace = true } ahash = { workspace = true } # The hash function already used in hashbrown meminterval = { workspace = true, features = ["serde"] } backtrace = { workspace = true, optional = true } # Used to get the stacktrace in StacktraceObserver diff --git a/libafl/src/mutators/token_mutations.rs b/libafl/src/mutators/token_mutations.rs index ed031d05ab..00c8a0975f 100644 --- a/libafl/src/mutators/token_mutations.rs +++ b/libafl/src/mutators/token_mutations.rs @@ -1410,7 +1410,7 @@ where None => input_len - cmp_buf_idx, }; - let hshape = (header.shape() + 1) as usize; + let hshape = (header.shape().value() + 1) as usize; match (&orig_val[cmp_h_idx], &new_val[cmp_h_idx]) { (CmpValues::U8(_orig), CmpValues::U8(_new)) => { @@ -1506,7 +1506,7 @@ where } (CmpValues::U16(orig), CmpValues::U16(new)) => { let (orig_v0, orig_v1, new_v0, new_v1) = (orig.0, orig.1, new.0, new.1); - let attribute: u8 = header.attribute() as u8; + let attribute: u8 = header.attribute().value(); if new_v0 != orig_v0 && orig_v0 != orig_v1 { // Compare v0 against v1 @@ -1594,7 +1594,7 @@ where } (CmpValues::U32(orig), CmpValues::U32(new)) => { let (orig_v0, orig_v1, new_v0, new_v1) = (orig.0, orig.1, new.0, new.1); - let attribute = header.attribute() as u8; + let attribute = header.attribute().value(); let mut cmp_found = false; if new_v0 != orig_v0 && orig_v0 != orig_v1 { @@ -1687,7 +1687,7 @@ where } (CmpValues::U64(orig), CmpValues::U64(new)) => { let (orig_v0, orig_v1, new_v0, new_v1) = (orig.0, orig.1, new.0, new.1); - let attribute = header.attribute() as u8; + let attribute = header.attribute().value(); let mut cmp_found = false; if new_v0 != orig_v0 && orig_v0 != orig_v1 { diff --git a/libafl/src/observers/cmp.rs b/libafl/src/observers/cmp.rs index de83ac5b22..93b59cbba3 100644 --- a/libafl/src/observers/cmp.rs +++ b/libafl/src/observers/cmp.rs @@ -5,7 +5,8 @@ use core::{ ops::{Deref, DerefMut}, }; -use c2rust_bitfields::BitfieldStruct; +use arbitrary_int::{u1, u4, u5, u6}; +use bitbybit::bitfield; use hashbrown::HashMap; use libafl_bolts::{ownedref::OwnedRefMut, AsSlice, HasLen, Named}; use serde::{Deserialize, Serialize}; @@ -404,8 +405,6 @@ impl AFLppCmpValuesMetadata { } } -#[derive(Debug, Copy, Clone, BitfieldStruct)] -#[repr(C, packed)] /// Comparison header, used to describe a set of comparison values efficiently. /// /// # Bitfields @@ -413,17 +412,36 @@ impl AFLppCmpValuesMetadata { /// - hits: The number of hits of a particular comparison /// - id: Unused by ``LibAFL``, a unique ID for a particular comparison /// - shape: Whether a comparison is u8/u8, u16/u16, etc. -/// - _type: Whether the comparison value represents an instruction (like a `cmp`) or function +/// - type_: Whether the comparison value represents an instruction (like a `cmp`) or function /// call arguments /// - attribute: OR-ed bitflags describing whether the comparison is <, >, =, <=, >=, or transform /// - overflow: Whether the comparison overflows /// - reserved: Reserved for future use +#[bitfield(u32)] +#[derive(Debug)] pub struct AFLppCmpLogHeader { - /// The header values - #[bitfield(name = "hits", ty = "u32", bits = "0..=5")] // 6 bits up to 63 entries, we have CMP_MAP_H = 32 (so using half of it) - #[bitfield(name = "shape", ty = "u32", bits = "6..=10")] // 31 + 1 bytes max - #[bitfield(name = "_type", ty = "u8", bits = "11..=11")] // 2: cmp, rtn - #[bitfield(name = "attribute", ty = "u32", bits = "12..=15")] - // 16 types for arithmetic comparison types - pub data: [u8; 2], + /// The number of hits of a particular comparison + /// + /// 6 bits up to 63 entries, we have CMP_MAP_H = 32 (so using half of it) + #[bits(0..=5, r)] + hits: u6, + /// Whether a comparison is u8/u8, u16/u16, etc. + /// + /// 31 + 1 bytes max + #[bits(6..=10, r)] + shape: u5, + /// Whether the comparison value represents an instruction (like a `cmp`) or function call + /// arguments + /// + /// 2: cmp, rtn + #[bit(11, r)] + type_: u1, + /// OR-ed bitflags describing whether the comparison is <, >, =, <=, >=, or transform + /// + /// 16 types for arithmetic comparison types + #[bits(12..=15, r)] + attribute: u4, + /// data + #[bits(16..=31, r)] + data: u16, } diff --git a/libafl_intelpt/Cargo.toml b/libafl_intelpt/Cargo.toml index 6fffabdc40..4f040a4fbf 100644 --- a/libafl_intelpt/Cargo.toml +++ b/libafl_intelpt/Cargo.toml @@ -24,8 +24,8 @@ nix = { workspace = true } proc-maps = "0.4.0" [dependencies] -#arbitrary-int = { version = "1.2.7" } -#bitbybit = { version = "1.3.2" } +arbitrary-int = { workspace = true } +bitbybit = { workspace = true } libafl_bolts = { path = "../libafl_bolts", default-features = false } libc = { workspace = true } libipt = { workspace = true, optional = true } diff --git a/libafl_intelpt/src/lib.rs b/libafl_intelpt/src/lib.rs index f9616d7e3b..0493a715f7 100644 --- a/libafl_intelpt/src/lib.rs +++ b/libafl_intelpt/src/lib.rs @@ -32,10 +32,10 @@ use std::{ sync::LazyLock, }; -// #[cfg(target_os = "linux")] -// use arbitrary_int::u4; -// #[cfg(target_os = "linux")] -// use bitbybit::bitfield; +#[cfg(target_os = "linux")] +use arbitrary_int::u4; +#[cfg(target_os = "linux")] +use bitbybit::bitfield; #[cfg(target_os = "linux")] use caps::{CapSet, Capability}; #[cfg(target_os = "linux")] @@ -641,19 +641,19 @@ impl IntelPTBuilder { } } -// /// Perf event config for `IntelPT` -// /// -// /// (This is almost mapped to `IA32_RTIT_CTL MSR` by perf) -// #[cfg(target_os = "linux")] -// #[bitfield(u64, default = 0)] -// struct PtConfig { -// /// Disable call return address compression. AKA DisRETC in Intel SDM. -// #[bit(11, rw)] -// noretcomp: bool, -// /// Indicates the frequency of PSB packets. AKA PSBFreq in Intel SDM. -// #[bits(24..=27, rw)] -// psb_period: u4, -// } +/// Perf event config for `IntelPT` +/// +/// (This is almost mapped to `IA32_RTIT_CTL MSR` by perf) +#[cfg(target_os = "linux")] +#[bitfield(u64, default = 0)] +struct PtConfig { + /// Disable call return address compression. AKA DisRETC in Intel SDM. + #[bit(11, rw)] + noretcomp: bool, + /// Indicates the frequency of PSB packets. AKA PSBFreq in Intel SDM. + #[bits(24..=27, rw)] + psb_period: u4, +} /// Number of address filters available on the running CPU #[cfg(target_os = "linux")] @@ -815,12 +815,11 @@ fn new_perf_event_attr_intel_pt() -> Result { Ok(t) => Ok(*t), Err(e) => Err(Error::unsupported(e.clone())), }?; - // let config = PtConfig::builder() - // .with_noretcomp(true) - // .with_psb_period(u4::new(0)) - // .build() - // .raw_value; - let config = 0x08_00; // noretcomp + let config = PtConfig::builder() + .with_noretcomp(true) + .with_psb_period(u4::new(0)) + .build() + .raw_value; let mut attr = perf_event_attr { size: size_of::() as u32, @@ -940,8 +939,8 @@ const fn wrap_aux_pointer(ptr: u64, perf_aux_buffer_size: usize) -> u64 { #[cfg(test)] mod test { - // #[cfg(target_os = "linux")] - // use arbitrary_int::Number; + #[cfg(target_os = "linux")] + use arbitrary_int::Number; use static_assertions::assert_eq_size; use super::*; @@ -949,7 +948,7 @@ mod test { // Only 64-bit systems are supported, ensure we can use usize and u64 interchangeably assert_eq_size!(usize, u64); - /// Quick way to check if your machine is compatible with Intl PT's features used by libafl + /// Quick way to check if your machine is compatible with Intel PT's features used by libafl /// /// Simply run `cargo test intel_pt_check_availability -- --show-output` #[test] @@ -979,52 +978,52 @@ mod test { .unwrap(); } - // #[test] - // #[cfg(target_os = "linux")] - // fn intel_pt_pt_config_noretcomp_format() { - // let ptconfig_noretcomp = PtConfig::DEFAULT.with_noretcomp(true).raw_value; - // let path = format!("{PT_EVENT_PATH}/format/noretcomp"); - // let s = fs::read_to_string(&path).expect("Failed to read Intel PT config noretcomp format"); - // assert!( - // s.starts_with("config:"), - // "Unexpected Intel PT config noretcomp format" - // ); - // let bit = s["config:".len()..] - // .trim() - // .parse::() - // .expect("Failed to parse Intel PT config noretcomp format"); - // assert_eq!( - // ptconfig_noretcomp, - // 0b1 << bit, - // "Unexpected Intel PT config noretcomp format" - // ); - // } - // - // #[test] - // #[cfg(target_os = "linux")] - // fn intel_pt_pt_config_psb_period_format() { - // let ptconfig_psb_period = PtConfig::DEFAULT.with_psb_period(u4::MAX).raw_value; - // let path = format!("{PT_EVENT_PATH}/format/psb_period"); - // let s = - // fs::read_to_string(&path).expect("Failed to read Intel PT config psb_period format"); - // assert!( - // s.starts_with("config:"), - // "Unexpected Intel PT config psb_period format" - // ); - // let from = s["config:".len().."config:".len() + 2] - // .parse::() - // .expect("Failed to parse Intel PT config psb_period format"); - // let to = s["config:".len() + 3..] - // .trim() - // .parse::() - // .expect("Failed to parse Intel PT config psb_period format"); - // let mut format = 0; - // for bit in from..=to { - // format |= 0b1 << bit; - // } - // assert_eq!( - // ptconfig_psb_period, format, - // "Unexpected Intel PT config psb_period format" - // ); - // } + #[test] + #[cfg(target_os = "linux")] + fn intel_pt_pt_config_noretcomp_format() { + let ptconfig_noretcomp = PtConfig::DEFAULT.with_noretcomp(true).raw_value; + let path = format!("{PT_EVENT_PATH}/format/noretcomp"); + let s = fs::read_to_string(&path).expect("Failed to read Intel PT config noretcomp format"); + assert!( + s.starts_with("config:"), + "Unexpected Intel PT config noretcomp format" + ); + let bit = s["config:".len()..] + .trim() + .parse::() + .expect("Failed to parse Intel PT config noretcomp format"); + assert_eq!( + ptconfig_noretcomp, + 0b1 << bit, + "Unexpected Intel PT config noretcomp format" + ); + } + + #[test] + #[cfg(target_os = "linux")] + fn intel_pt_pt_config_psb_period_format() { + let ptconfig_psb_period = PtConfig::DEFAULT.with_psb_period(u4::MAX).raw_value; + let path = format!("{PT_EVENT_PATH}/format/psb_period"); + let s = + fs::read_to_string(&path).expect("Failed to read Intel PT config psb_period format"); + assert!( + s.starts_with("config:"), + "Unexpected Intel PT config psb_period format" + ); + let from = s["config:".len().."config:".len() + 2] + .parse::() + .expect("Failed to parse Intel PT config psb_period format"); + let to = s["config:".len() + 3..] + .trim() + .parse::() + .expect("Failed to parse Intel PT config psb_period format"); + let mut format = 0; + for bit in from..=to { + format |= 0b1 << bit; + } + assert_eq!( + ptconfig_psb_period, format, + "Unexpected Intel PT config psb_period format" + ); + } } diff --git a/libafl_targets/src/cmps/mod.rs b/libafl_targets/src/cmps/mod.rs index 597241b3b4..5ebe68ccce 100644 --- a/libafl_targets/src/cmps/mod.rs +++ b/libafl_targets/src/cmps/mod.rs @@ -442,7 +442,7 @@ pub static mut libafl_cmplog_map: CmpLogMap = CmpLogMap { #[cfg(feature = "cmplog_extended_instrumentation")] #[allow(clippy::large_stack_arrays)] pub static mut libafl_cmplog_map_extended: AFLppCmpLogMap = AFLppCmpLogMap { - headers: [AFLppCmpLogHeader { data: [0; 2] }; CMPLOG_MAP_W], + headers: [AFLppCmpLogHeader::new_with_raw_value(0); CMPLOG_MAP_W], vals: AFLppCmpLogVals { operands: [[AFLppCmpLogOperands { v0: 0, @@ -463,7 +463,7 @@ pub use libafl_cmplog_map as CMPLOG_MAP; pub use libafl_cmplog_map_extended as CMPLOG_MAP_EXTENDED; #[derive(Debug, Clone)] -#[repr(C, packed)] +#[repr(C)] /// Comparison map compatible with AFL++ cmplog instrumentation pub struct AFLppCmpLogMap { headers: [AFLppCmpLogHeader; CMPLOG_MAP_W], @@ -478,6 +478,7 @@ impl HasLen for AFLppCmpLogMap { impl AFLppCmpLogMap { #[must_use] + #[allow(clippy::cast_ptr_alignment)] /// Instantiate a new boxed zeroed `AFLppCmpLogMap`. This should be used to create a new /// map, because it is so large it cannot be allocated on the stack with default /// runtime configuration. @@ -524,6 +525,7 @@ impl Serialize for AFLppCmpLogMap { } impl<'de> Deserialize<'de> for AFLppCmpLogMap { + #[allow(clippy::cast_ptr_alignment)] fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, @@ -540,11 +542,11 @@ impl CmpMap for AFLppCmpLogMap { } fn executions_for(&self, idx: usize) -> usize { - self.headers[idx].hits() as usize + self.headers[idx].hits().value() as usize } fn usable_executions_for(&self, idx: usize) -> usize { - if self.headers[idx]._type() == CMPLOG_KIND_INS { + if self.headers[idx].type_().value() == CMPLOG_KIND_INS { if self.executions_for(idx) < CMPLOG_MAP_H { self.executions_for(idx) } else { @@ -558,9 +560,10 @@ impl CmpMap for AFLppCmpLogMap { } fn values_of(&self, idx: usize, execution: usize) -> Option { - if self.headers[idx]._type() == CMPLOG_KIND_INS { + let header = self.headers[idx]; + if header.type_().value() == CMPLOG_KIND_INS { unsafe { - match self.headers[idx].shape() { + match self.headers[idx].shape().value() { 0 => Some(CmpValues::U8(( self.vals.operands[idx][execution].v0 as u8, self.vals.operands[idx][execution].v1 as u8, @@ -600,7 +603,7 @@ impl CmpMap for AFLppCmpLogMap { fn reset(&mut self) -> Result<(), Error> { // For performance, we reset just the headers - self.headers.fill(AFLppCmpLogHeader { data: [0; 2] }); + self.headers.fill(AFLppCmpLogHeader::new_with_raw_value(0)); Ok(()) }