diff --git a/Cargo.toml b/Cargo.toml index 45bbf9b8d9..6f3289d70c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,9 +20,10 @@ members = [ "libafl_concolic/test/runtime_test", "utils/build_and_test_fuzzers", "utils/deexit", + "utils/drcov_utils", + "utils/gramatron/construct_automata", "utils/libafl_benches", "utils/libafl_jumper", - "utils/gramatron/construct_automata", ] default-members = [ "libafl", diff --git a/libafl_frida/src/asan/asan_rt.rs b/libafl_frida/src/asan/asan_rt.rs index b2592f4e69..94ac0e0bf6 100644 --- a/libafl_frida/src/asan/asan_rt.rs +++ b/libafl_frida/src/asan/asan_rt.rs @@ -162,7 +162,7 @@ impl FridaRuntime for AsanRuntime { fn init( &mut self, gum: &Gum, - _ranges: &RangeMap, + _ranges: &RangeMap, module_map: &Rc, ) { self.allocator.init(); diff --git a/libafl_frida/src/cmplog_rt.rs b/libafl_frida/src/cmplog_rt.rs index cf769683f0..b80a7a4f56 100644 --- a/libafl_frida/src/cmplog_rt.rs +++ b/libafl_frida/src/cmplog_rt.rs @@ -124,7 +124,7 @@ impl FridaRuntime for CmpLogRuntime { fn init( &mut self, _gum: &frida_gum::Gum, - _ranges: &RangeMap, + _ranges: &RangeMap, _module_map: &Rc, ) { self.generate_instrumentation_blobs(); diff --git a/libafl_frida/src/coverage_rt.rs b/libafl_frida/src/coverage_rt.rs index 57a496524c..4f1e17a031 100644 --- a/libafl_frida/src/coverage_rt.rs +++ b/libafl_frida/src/coverage_rt.rs @@ -37,7 +37,7 @@ impl FridaRuntime for CoverageRuntime { fn init( &mut self, _gum: &frida_gum::Gum, - _ranges: &RangeMap, + _ranges: &RangeMap, _module_map: &Rc, ) { } diff --git a/libafl_frida/src/drcov_rt.rs b/libafl_frida/src/drcov_rt.rs index 4b250cc3c9..5da459b314 100644 --- a/libafl_frida/src/drcov_rt.rs +++ b/libafl_frida/src/drcov_rt.rs @@ -23,7 +23,7 @@ pub struct DrCovRuntime { /// The basic blocks of this execution pub drcov_basic_blocks: Vec, /// The memory ranges of this target - ranges: RangeMap, + ranges: RangeMap, coverage_directory: PathBuf, } @@ -32,7 +32,7 @@ impl FridaRuntime for DrCovRuntime { fn init( &mut self, _gum: &frida_gum::Gum, - ranges: &RangeMap, + ranges: &RangeMap, _module_map: &Rc, ) { self.ranges = ranges.clone(); @@ -61,8 +61,8 @@ impl FridaRuntime for DrCovRuntime { let mut coverage_hasher = RandomState::with_seeds(0, 0, 0, 0).build_hasher(); for bb in &self.drcov_basic_blocks { - coverage_hasher.write_usize(bb.start); - coverage_hasher.write_usize(bb.end); + coverage_hasher.write_u64(bb.start); + coverage_hasher.write_u64(bb.end); } let coverage_hash = coverage_hasher.finish(); diff --git a/libafl_frida/src/executor.rs b/libafl_frida/src/executor.rs index 492fc1380a..0a113e83a0 100644 --- a/libafl_frida/src/executor.rs +++ b/libafl_frida/src/executor.rs @@ -189,10 +189,10 @@ where let mut ranges = helper.ranges().clone(); for module in frida_gum::Module::obtain(gum).enumerate_modules() { if module.base_address < Self::new as usize - && (Self::new as usize) < module.base_address + module.size + && (Self::new as usize as u64) < module.base_address as u64 + module.size as u64 { ranges.insert( - module.base_address..(module.base_address + module.size), + module.base_address as u64..(module.base_address as u64 + module.size as u64), (0xffff, "fuzzer".to_string()), ); break; @@ -201,11 +201,13 @@ where log::info!("disable_excludes: {:}", helper.disable_excludes); if !helper.disable_excludes { - for range in ranges.gaps(&(0..usize::MAX)) { + for range in ranges.gaps(&(0..u64::MAX)) { log::info!("excluding range: {:x}-{:x}", range.start, range.end); stalker.exclude(&MemoryRange::new( NativePointer(range.start as *mut c_void), - range.end - range.start, + usize::try_from(range.end - range.start).unwrap_or_else(|err| { + panic!("Address out of usize range: {range:?} - {err}") + }), )); } } diff --git a/libafl_frida/src/helper.rs b/libafl_frida/src/helper.rs index 67f97bdd3e..f0e6ae64d1 100644 --- a/libafl_frida/src/helper.rs +++ b/libafl_frida/src/helper.rs @@ -42,7 +42,7 @@ pub trait FridaRuntime: 'static + Debug { fn init( &mut self, gum: &Gum, - ranges: &RangeMap, + ranges: &RangeMap, module_map: &Rc, ); /// Deinitialization @@ -61,7 +61,7 @@ pub trait FridaRuntimeTuple: MatchFirstType + Debug { fn init_all( &mut self, gum: &Gum, - ranges: &RangeMap, + ranges: &RangeMap, module_map: &Rc, ); @@ -79,7 +79,7 @@ impl FridaRuntimeTuple for () { fn init_all( &mut self, _gum: &Gum, - _ranges: &RangeMap, + _ranges: &RangeMap, _module_map: &Rc, ) { } @@ -101,7 +101,7 @@ where fn init_all( &mut self, gum: &Gum, - ranges: &RangeMap, + ranges: &RangeMap, module_map: &Rc, ) { self.0.init(gum, ranges, module_map); @@ -317,20 +317,23 @@ impl FridaInstrumentationHelperBuilder { module.range().base_address().0 as usize ); let range = module.range(); - let start = range.base_address().0 as usize; - ranges - .borrow_mut() - .insert(start..(start + range.size()), (i as u16, module.path())); + let start = range.base_address().0 as u64; + ranges.borrow_mut().insert( + start..(start + range.size() as u64), + (i as u16, module.path()), + ); } for skip in skip_ranges { match skip { - SkipRange::Absolute(range) => ranges.borrow_mut().remove(range), + SkipRange::Absolute(range) => ranges + .borrow_mut() + .remove(range.start as u64..range.end as u64), SkipRange::ModuleRelative { name, range } => { let module_details = ModuleDetails::with_name(name).unwrap(); - let lib_start = module_details.range().base_address().0 as usize; - ranges - .borrow_mut() - .remove((lib_start + range.start)..(lib_start + range.end)); + let lib_start = module_details.range().base_address().0 as u64; + ranges.borrow_mut().remove( + (lib_start + range.start as u64)..(lib_start + range.end as u64), + ); } } } @@ -388,7 +391,7 @@ impl Default for FridaInstrumentationHelperBuilder { /// An helper that feeds `FridaInProcessExecutor` with edge-coverage instrumentation pub struct FridaInstrumentationHelper<'a, RT: 'a> { transformer: Transformer<'a>, - ranges: Rc>>, + ranges: Rc>>, runtimes: Rc>, stalker_enabled: bool, pub(crate) disable_excludes: bool, @@ -491,7 +494,7 @@ where #[allow(clippy::too_many_lines)] fn build_transformer( gum: &'a Gum, - ranges: &Rc>>, + ranges: &Rc>>, runtimes: &Rc>, ) -> Transformer<'a> { let ranges = Rc::clone(ranges); @@ -512,7 +515,7 @@ where fn transform( basic_block: StalkerIterator, output: &StalkerOutput, - ranges: &Rc>>, + ranges: &Rc>>, runtimes_unborrowed: &Rc>, decoder: InstDecoder, ) { @@ -525,7 +528,7 @@ where let address = instr.address(); // log::trace!("x - block @ {:x} transformed to {:x}", address, output.writer().pc()); //the ASAN check needs to be done before the hook_rt check due to x86 insns such as call [mem] - if ranges.borrow().contains_key(&(address as usize)) { + if ranges.borrow().contains_key(&address) { let mut runtimes = (*runtimes_unborrowed).borrow_mut(); if first { first = false; @@ -634,8 +637,8 @@ where { log::trace!("{basic_block_start:#016X}:{basic_block_size:X}"); rt.drcov_basic_blocks.push(DrCovBasicBlock::new( - basic_block_start as usize, - basic_block_start as usize + basic_block_size, + basic_block_start, + basic_block_start + basic_block_size as u64, )); } } @@ -697,7 +700,7 @@ where pub fn init( &mut self, gum: &'a Gum, - ranges: &RangeMap, + ranges: &RangeMap, module_map: &Rc, ) { (*self.runtimes) @@ -731,12 +734,12 @@ where /// Ranges #[must_use] - pub fn ranges(&self) -> Ref> { + pub fn ranges(&self) -> Ref> { self.ranges.borrow() } /// Mutable ranges - pub fn ranges_mut(&mut self) -> RefMut> { + pub fn ranges_mut(&mut self) -> RefMut> { (*self.ranges).borrow_mut() } } diff --git a/libafl_qemu/src/modules/drcov.rs b/libafl_qemu/src/modules/drcov.rs index 5720f2429f..46aa92d0da 100644 --- a/libafl_qemu/src/modules/drcov.rs +++ b/libafl_qemu/src/modules/drcov.rs @@ -40,7 +40,7 @@ libafl_bolts::impl_serdeany!(DrCovMetadata); #[derive(Debug)] pub struct DrCovModuleBuilder { filter: Option, - module_mapping: Option>, + module_mapping: Option>, filename: Option, full_trace: Option, } @@ -68,7 +68,7 @@ where } #[must_use] - pub fn module_mapping(self, module_mapping: RangeMap) -> Self { + pub fn module_mapping(self, module_mapping: RangeMap) -> Self { Self { filter: self.filter, module_mapping: Some(module_mapping), @@ -101,7 +101,7 @@ where #[derive(Debug)] pub struct DrCovModule { filter: F, - module_mapping: Option>, + module_mapping: Option>, filename: PathBuf, full_trace: bool, drcov_len: usize, @@ -124,7 +124,7 @@ impl DrCovModule { pub fn new( filter: F, filename: PathBuf, - module_mapping: Option>, + module_mapping: Option>, full_trace: bool, ) -> Self { if full_trace { @@ -168,11 +168,12 @@ impl DrCovModule { continue 'pcs_full; } if *idm == *id { + #[allow(clippy::unnecessary_cast)] // for GuestAddr -> u64 match lengths.get(pc) { Some(block_length) => { drcov_vec.push(DrCovBasicBlock::new( - *pc as usize, - *pc as usize + *block_length as usize, + *pc as u64, + *pc as u64 + *block_length as u64, )); } None => { @@ -215,11 +216,13 @@ impl DrCovModule { if !module_found { continue 'pcs; } + + #[allow(clippy::unnecessary_cast)] // for GuestAddr -> u64 match lengths.get(pc) { Some(block_length) => { drcov_vec.push(DrCovBasicBlock::new( - *pc as usize, - *pc as usize + *block_length as usize, + *pc as u64, + *pc as u64 + *block_length as u64, )); } None => { @@ -282,13 +285,14 @@ where let qemu = emulator_modules.qemu(); - let mut module_mapping: RangeMap = RangeMap::new(); + let mut module_mapping: RangeMap = RangeMap::new(); + #[allow(clippy::unnecessary_cast)] // for GuestAddr -> u64 for (i, (r, p)) in qemu .mappings() .filter_map(|m| { m.path() - .map(|p| ((m.start() as usize)..(m.end() as usize), p.to_string())) + .map(|p| ((m.start() as u64)..(m.end() as u64), p.to_string())) .filter(|(_, p)| !p.is_empty()) }) .enumerate() diff --git a/libafl_targets/src/drcov.rs b/libafl_targets/src/drcov.rs index a5a7a901be..9f8b448718 100644 --- a/libafl_targets/src/drcov.rs +++ b/libafl_targets/src/drcov.rs @@ -1,59 +1,112 @@ //! [`DrCov`](https://dynamorio.org/page_drcov.html) support for `LibAFL` `FRIDA` mode. //! //! It's writing basic-block trace files to be read by coverage analysis tools, such as [Lighthouse](https://github.com/gaasedelen/lighthouse), -//! [bncov](https://github.com/ForAllSecure/bncov), [dragondance](https://github.com/0ffffffffh/dragondance), etc. +//! [bncov](https://github.com/ForAllSecure/bncov), [cartographer](https://github.com/nccgroup/Cartographer), etc. use alloc::{string::String, vec::Vec}; +use core::{fmt::Debug, num::ParseIntError, ptr}; use std::{ fs::File, - io::{BufWriter, Write}, - path::Path, + io::{BufRead, BufReader, BufWriter, Read, Write}, + path::{Path, PathBuf}, }; +use hashbrown::HashSet; use libafl::Error; use rangemap::RangeMap; /// A basic block struct +/// This can be used to keep track of new addresses. #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct DrCovBasicBlock { /// Start of this basic block - pub start: usize, + pub start: u64, /// End of this basic block - pub end: usize, + pub end: u64, } -#[derive(Clone, Copy, Debug, PartialEq, Eq)] +/// A (Raw) Basic Block List Entry. +/// This is only relevant in combination with a [`DrCovReader`] or a [`DrCovWriter`]. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] #[repr(C)] -struct DrCovBasicBlockEntry { - start: u32, +pub struct DrCovBasicBlockEntry { + /// Start of this basic block + pub start: u32, + /// Size of this basic block size: u16, + /// The id of the `DrCov` module this block is in mod_id: u16, } +impl From<&[u8; 8]> for DrCovBasicBlockEntry { + fn from(value: &[u8; 8]) -> Self { + // # Safety + // The value is a valid u8 pointer. + // There's a chance that the value is not aligned to 32 bit, so we use `read_unaligned`. + assert_eq!( + size_of::(), + size_of::<[u8; 8]>(), + "`DrCovBasicBlockEntry` size changed!" + ); + unsafe { ptr::read_unaligned(ptr::from_ref(value) as *const DrCovBasicBlockEntry) } + } +} + +impl From for [u8; 8] { + fn from(value: DrCovBasicBlockEntry) -> Self { + // # Safety + // The value is a c struct. + // Casting its pointer to bytes should be safe. + // The resulting pointer needs to be less aligned. + assert_eq!( + size_of::(), + size_of::<[u8; 8]>(), + "`DrCovBasicBlockEntry` size changed!" + ); + unsafe { std::slice::from_raw_parts(ptr::from_ref(&value).cast::(), 8) } + .try_into() + .unwrap() + } +} + +impl From<&DrCovBasicBlockEntry> for &[u8] { + fn from(value: &DrCovBasicBlockEntry) -> Self { + // # Safety + // The value is a c struct. + // Casting its pointer to bytes should be safe. + unsafe { + std::slice::from_raw_parts( + ptr::from_ref(value).cast::(), + size_of::(), + ) + } + } +} + /// A writer for `DrCov` files #[derive(Debug)] pub struct DrCovWriter<'a> { - module_mapping: &'a RangeMap, + module_mapping: &'a RangeMap, } impl DrCovBasicBlock { /// Create a new [`DrCovBasicBlock`] with the given `start` and `end` addresses. #[must_use] - pub fn new(start: usize, end: usize) -> Self { + pub fn new(start: u64, end: u64) -> Self { Self { start, end } } /// Create a new [`DrCovBasicBlock`] with a given `start` address and a block size. #[must_use] - pub fn with_size(start: usize, size: usize) -> Self { - Self::new(start, start + size) + pub fn with_size(start: u64, size: usize) -> Self { + Self::new(start, start + u64::try_from(size).unwrap()) } } impl<'a> DrCovWriter<'a> { /// Create a new [`DrCovWriter`] #[must_use] - pub fn new(module_mapping: &'a RangeMap) -> Self { + pub fn new(module_mapping: &'a RangeMap) -> Self { Self { module_mapping } } @@ -63,49 +116,496 @@ impl<'a> DrCovWriter<'a> { P: AsRef, { let mut writer = BufWriter::new(File::create(path)?); + let modules = self.module_entries(); + writer.write_all(b"DRCOV VERSION: 2\nDRCOV FLAVOR: libafl\n")?; writer - .write_all(b"DRCOV VERSION: 2\nDRCOV FLAVOR: libafl\n") - .unwrap(); - - let modules: Vec<(&std::ops::Range, &(u16, String))> = - self.module_mapping.iter().collect(); - writer - .write_all(format!("Module Table: version 2, count {}\n", modules.len()).as_bytes()) - .unwrap(); - writer - .write_all(b"Columns: id, base, end, entry, checksum, timestamp, path\n") - .unwrap(); + .write_all(format!("Module Table: version 2, count {}\n", modules.len()).as_bytes())?; + writer.write_all(b"Columns: id, base, end, entry, checksum, timestamp, path\n")?; for module in modules { - let (range, (id, path)) = module; - writer - .write_all( - format!( - "{:03}, 0x{:x}, 0x{:x}, 0x00000000, 0x00000000, 0x00000000, {}\n", - id, range.start, range.end, path - ) - .as_bytes(), - ) - .unwrap(); + writer.write_all(module.to_module_line().as_bytes())?; + writer.write_all(b"\n")?; } - writer - .write_all(format!("BB Table: {} bbs\n", basic_blocks.len()).as_bytes()) - .unwrap(); - for block in basic_blocks { - let (range, (id, _)) = self.module_mapping.get_key_value(&block.start).unwrap(); - let basic_block = DrCovBasicBlockEntry { - start: (block.start - range.start) as u32, - size: (block.end - block.start) as u16, - mod_id: *id, - }; - writer - .write_all(unsafe { - std::slice::from_raw_parts(&raw const (basic_block) as *const u8, 8) - }) - .unwrap(); + + writer.write_all(format!("BB Table: {} bbs\n", basic_blocks.len()).as_bytes())?; + for block in self.basic_block_entries(basic_blocks) { + writer.write_all((&block).into()).unwrap(); } writer.flush()?; Ok(()) } + + /// Gets a [`Vec`] of all [`DrCovModuleEntry`] elements in this [`DrCovWriter`]. + #[must_use] + pub fn module_entries(&self) -> Vec { + self.module_mapping + .iter() + .map(|x| { + let (range, (id, path)) = x; + DrCovModuleEntry { + id: *id, + base: range.start, + end: range.end, + entry: 0, + checksum: 0, + timestamp: 0, + path: PathBuf::from(path), + } + }) + .collect() + } + + /// Gets a [`Vec`] of all [`DrCovBasicBlockEntry`] elements from a list of [`DrCovBasicBlock`] entries using the modules from this [`DrCovWriter`]. + #[must_use] + pub fn basic_block_entries( + &self, + basic_blocks: &[DrCovBasicBlock], + ) -> Vec { + let mut ret = Vec::with_capacity(basic_blocks.len()); + for block in basic_blocks { + let (range, (id, _)) = self + .module_mapping + .get_key_value(&block.start) + .unwrap_or_else(|| { + panic!( + "Could not read module at addr {:?}. Module list: {:?}.", + block.start, self.module_mapping + ) + }); + let basic_block = DrCovBasicBlockEntry { + start: (block.start - range.start) as u32, + size: (block.end - block.start) as u16, + mod_id: *id, + }; + ret.push(basic_block); + } + ret + } + + /// Creates a [`DrCovReader`] module out of this [`DrCovWriter`] + #[must_use] + pub fn to_reader(&self, basic_blocks: &[DrCovBasicBlock]) -> DrCovReader { + let modules = self.module_entries(); + let basic_blocks = self.basic_block_entries(basic_blocks); + + DrCovReader::from_data(modules, basic_blocks) + } +} + +/// An entry in the `DrCov` module list. +#[derive(Debug, Clone)] +pub struct DrCovModuleEntry { + /// The index of this module + pub id: u16, + /// Base of this module + pub base: u64, + /// End address of this module + pub end: u64, + /// Entry (can be zero) + pub entry: usize, + /// Checksum (can be zero) + pub checksum: usize, + /// Timestamp (can be zero) + pub timestamp: usize, + /// The path of this module + pub path: PathBuf, +} + +impl DrCovModuleEntry { + /// Gets the module line from this [`DrCovModuleEntry`] + #[must_use] + pub fn to_module_line(&self) -> String { + format!( + "{:03}, 0x{:x}, 0x{:x}, 0x{:x}, 0x{:x}, 0x{:x}, {:?}", + self.id, self.base, self.end, self.entry, self.checksum, self.timestamp, self.path + ) + } +} + +/// Read `DrCov` (v2) files created with [`DrCovWriter`] or other tools +pub struct DrCovReader { + /// The modules in this `DrCov` file + pub module_entries: Vec, + /// The list of basic blocks as [`DrCovBasicBlockEntry`]. + /// To get the blocks as [`DrCovBasicBlock`], call [`Self::basic_blocks`] instead. + pub basic_block_entries: Vec, +} + +impl Debug for DrCovReader { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("DrCovReader") + .field("modules", &self.module_entries) + .field("basic_blocks", &self.basic_block_entries.len()) + .finish() + } +} + +fn parse_hex_to_usize(str: &str) -> Result { + // Cut off the first 0x + usize::from_str_radix(&str[2..], 16) +} + +fn parse_hex_to_u64(str: &str) -> Result { + // Cut off the first 0x + u64::from_str_radix(&str[2..], 16) +} + +impl DrCovReader { + /// Parse a `drcov` file to memory. + pub fn read + ?Sized>(file: &P) -> Result { + let f = File::open(file)?; + let mut reader = BufReader::new(f); + + let mut header = String::new(); + reader.read_line(&mut header)?; + + let drcov_version = "DRCOV VERSION: 2"; + if header.to_uppercase().trim() != drcov_version { + return Err(Error::illegal_state(format!( + "No valid header. Expected {drcov_version} but got {header}" + ))); + } + + header.clear(); + reader.read_line(&mut header)?; + + let drcov_flavor = "DRCOV FLAVOR:"; + if header.to_uppercase().starts_with(drcov_flavor) { + // Ignore flavor line if it's not present. + log::info!("Got drcov flavor {drcov_flavor}"); + + header.clear(); + reader.read_line(&mut header)?; + } + + let Some(Ok(module_count)) = header + .split("Module Table: version 2, count ") + .nth(1) + .map(|x| x.trim().parse::()) + else { + return Err(Error::illegal_state(format!( + "Expected module table but got: {header}" + ))); + }; + + header.clear(); + reader.read_line(&mut header)?; + + if !header.starts_with("Columns: id, base, end, entry, checksum, timestamp, path") { + return Err(Error::illegal_state(format!( + "Module table has unknown or illegal columns: {header}" + ))); + } + + let mut modules = Vec::with_capacity(module_count); + + for _ in 0..module_count { + header.clear(); + reader.read_line(&mut header)?; + + let err = |x| { + Error::illegal_argument(format!( + "Unexpected module entry while parsing {x} in header: {header}" + )) + }; + + let mut split = header.split(", "); + + let Some(Ok(id)) = split.next().map(str::parse) else { + return Err(err("id")); + }; + + let Some(Ok(base)) = split.next().map(parse_hex_to_u64) else { + return Err(err("base")); + }; + + let Some(Ok(end)) = split.next().map(parse_hex_to_u64) else { + return Err(err("end")); + }; + + let Some(Ok(entry)) = split.next().map(parse_hex_to_usize) else { + return Err(err("entry")); + }; + + let Some(Ok(checksum)) = split.next().map(parse_hex_to_usize) else { + return Err(err("checksum")); + }; + + let Some(Ok(timestamp)) = split.next().map(parse_hex_to_usize) else { + return Err(err("timestamp")); + }; + + let Some(path) = split.next().map(|s| PathBuf::from(s.trim())) else { + return Err(err("path")); + }; + + modules.push(DrCovModuleEntry { + id, + base, + end, + entry, + checksum, + timestamp, + path, + }); + } + + header.clear(); + reader.read_line(&mut header)?; + + //"BB Table: {} bbs\n" + if !header.starts_with("BB Table: ") { + return Err(Error::illegal_state(format!( + "Error reading BB Table header. Got: {header}" + ))); + } + let mut bb = header.split(' '); + let Some(Ok(bb_count)) = bb.nth(2).map(str::parse) else { + return Err(Error::illegal_state(format!( + "Error parsing BB Table header count. Got: {header}" + ))); + }; + + let mut basic_blocks = Vec::with_capacity(bb_count); + + for _ in 0..bb_count { + let mut bb_entry = [0_u8; 8]; + reader.read_exact(&mut bb_entry)?; + basic_blocks.push((&bb_entry).into()); + } + + Ok(DrCovReader { + module_entries: modules, + basic_block_entries: basic_blocks, + }) + } + + /// Creates a [`DrCovReader`] pre-filled with data. + /// Rather pointless, use [`Self::read`] to actually read a file from disk. + #[must_use] + pub fn from_data( + modules: Vec, + basic_blocks: Vec, + ) -> Self { + Self { + module_entries: modules, + basic_block_entries: basic_blocks, + } + } + + /// Get a list of traversed [`DrCovBasicBlock`] nodes + #[must_use] + pub fn basic_blocks(&self) -> Vec { + let mut ret = Vec::with_capacity(self.basic_block_entries.len()); + + for basic_block in &self.basic_block_entries { + let bb_id = basic_block.mod_id; + if let Some(module) = self.module_by_id(bb_id) { + let start = module.base + u64::from(basic_block.start); + let end = start + u64::from(basic_block.size); + ret.push(DrCovBasicBlock::new(start, end)); + } else { + log::error!("Skipping basic block outside of any modules: {basic_block:?}"); + } + } + ret + } + + /// Get the module (range) map. This can be used to create a new [`DrCovWriter`]. + #[must_use] + pub fn module_map(&self) -> RangeMap { + let mut ret = RangeMap::new(); + for module in &self.module_entries { + ret.insert( + module.base..module.end, + ( + module.id, + module.path.clone().into_os_string().into_string().unwrap(), + ), + ); + } + ret + } + + /// Writes this data out to disk (again). + pub fn write>(&self, path: P) -> Result<(), Error> { + let ranges = self.module_map(); + let mut writer = DrCovWriter::new(&ranges); + writer.write(path, &self.basic_blocks()) + } + + /// Gets a list of all basic blocks, as absolute addresses, for u64 targets. + /// Useful for example for [`JmpScare`](https://github.com/fgsect/JMPscare) and other analyses. + #[must_use] + pub fn basic_block_addresses_u64(&self) -> Vec { + self.basic_blocks().iter().map(|x| x.start).collect() + } + + /// Gets a list of all basic blocks, as absolute addresses, for u32 targets. + /// Will return an [`Error`] if addresses are larger than 32 bit. + pub fn basic_block_addresses_u32(&self) -> Result, Error> { + let blocks = self.basic_blocks(); + let mut ret = Vec::with_capacity(blocks.len()); + for block in self.basic_blocks() { + ret.push(u32::try_from(block.start)?); + } + Ok(ret) + } + + /// Merges the contents of another [`DrCovReader`] instance into this one. + /// Useful to merge multiple coverage files of a fuzzing run into one drcov file. + /// Similar to [drcov-merge](https://github.com/vanhauser-thc/drcov-merge). + /// + /// If `unique` is set to 1, each block will end up in the resulting [`DrCovReader`] at most once. + /// + /// Will return an `Error` if the individual modules are not mergable. + /// In this case, the module list may already have been changed. + pub fn merge(&mut self, other: &DrCovReader, unique: bool) -> Result<(), Error> { + for module in &other.module_entries { + if let Some(own_module) = self.module_by_id(module.id) { + // Module exists, make sure it's the same. + if own_module.base != module.base || own_module.end != module.end { + return Err(Error::illegal_argument(format!("Module id of file to merge doesn't fit! Own modules: {:#x?}, other modules: {:#x?}", self.module_entries, other.module_entries))); + } + } else { + // We don't know the module. Insert as new module. + self.module_entries.push(module.clone()); + } + } + + if unique { + self.make_unique(); + } + let mut blocks = HashSet::new(); + + for block in &self.basic_block_entries { + blocks.insert(*block); + } + + for block in &other.basic_block_entries { + if !blocks.contains(block) { + blocks.insert(*block); + self.basic_block_entries.push(*block); + } + } + + Ok(()) + } + + /// Remove blocks that exist more than once in the trace, in-place. + pub fn make_unique(&mut self) { + let mut blocks = HashSet::new(); + let new_vec = self + .basic_block_entries + .iter() + .filter(|x| { + if blocks.contains(x) { + false + } else { + blocks.insert(*x); + true + } + }) + .copied() + .collect(); + drop(blocks); + + self.basic_block_entries = new_vec; + } + + /// Returns the module for a given `id`, or [`None`]. + #[must_use] + pub fn module_by_id(&self, id: u16) -> Option<&DrCovModuleEntry> { + self.module_entries.iter().find(|module| module.id == id) + } +} + +#[cfg(test)] +mod test { + use std::{ + env::temp_dir, + fs, + path::PathBuf, + string::{String, ToString}, + }; + + use rangemap::RangeMap; + + use super::{DrCovModuleEntry, DrCovReader, DrCovWriter}; + use crate::drcov::{DrCovBasicBlock, DrCovBasicBlockEntry}; + + #[test] + fn test_write_read_drcov() { + let mut ranges = RangeMap::::new(); + + ranges.insert(0x00..0x4242, (0xffff, "fuzzer".to_string())); + + ranges.insert(0x4242..0xFFFF, (0, "Entry0".to_string())); + ranges.insert(0xFFFF..0x424242, (1, "Entry1".to_string())); + + let mut writer = DrCovWriter::new(&ranges); + + let tmpdir = temp_dir(); + + let drcov_tmp_file = tmpdir.join("drcov_test.drcov"); + writer + .write( + &drcov_tmp_file, + &[ + DrCovBasicBlock::new(0x4242, 0x4250), + DrCovBasicBlock::new(0x10, 0x100), + DrCovBasicBlock::new(0x424200, 0x424240), + DrCovBasicBlock::new(0x10, 0x100), + ], + ) + .unwrap(); + + let reader = DrCovReader::read(&drcov_tmp_file).unwrap(); + + assert_eq!(reader.basic_block_entries.len(), 4); + assert_eq!(reader.module_map().len(), 3); + assert_eq!(reader.basic_blocks().len(), 4); + + // Let's do one more round :) + reader.write(&drcov_tmp_file).unwrap(); + let reader = DrCovReader::read(&drcov_tmp_file).unwrap(); + + assert_eq!(reader.basic_block_entries.len(), 4); + assert_eq!(reader.module_map().len(), 3); + assert_eq!(reader.basic_blocks().len(), 4); + + fs::remove_file(&drcov_tmp_file).unwrap(); + } + + #[test] + fn test_merge() { + let modules = vec![DrCovModuleEntry { + id: 0, + base: 0, + end: 0x4242, + entry: 0, + checksum: 0, + timestamp: 0, + path: PathBuf::new(), + }]; + let basic_blocks1 = vec![DrCovBasicBlockEntry { + mod_id: 0, + start: 0, + size: 42, + }]; + + let mut basic_blocks2 = basic_blocks1.clone(); + basic_blocks2.push(DrCovBasicBlockEntry { + mod_id: 0, + start: 4200, + size: 42, + }); + + let mut first = DrCovReader::from_data(modules.clone(), basic_blocks1); + let second = DrCovReader::from_data(modules, basic_blocks2); + + first.merge(&second, true).unwrap(); + assert_eq!(first.basic_block_entries.len(), 2); + } } diff --git a/utils/drcov_utils/Cargo.toml b/utils/drcov_utils/Cargo.toml new file mode 100644 index 0000000000..266159e260 --- /dev/null +++ b/utils/drcov_utils/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "drcov_utils" +edition = "2021" +version.workspace = true +description = "Utility functions to work with DrCov coverage files" +repository = "https://github.com/AFLplusplus/LibAFL/" +license = "MIT OR Apache-2.0" +categories = ["development-tools"] +keywords = ["fuzzing", "libafl", "drcov"] + +[dependencies] +libafl_targets = { path = "../../libafl_targets" } +clap = { workspace = true, features = ["derive", "wrap_help"] } + +[lints] +workspace = true diff --git a/utils/drcov_utils/README.md b/utils/drcov_utils/README.md new file mode 100644 index 0000000000..fd16cbd539 --- /dev/null +++ b/utils/drcov_utils/README.md @@ -0,0 +1,16 @@ +# LibAFL DrCov Utilities + +## Dump-DrCov_Addrs + +Simple commandline tool to display a list of all basic block addresses in a program. +This information can, for example, be used for further processing such as in [JmpScare](https://github.com/fgsect/JMPscare) or similar. +At the same time this tools shows how easily LibAFL's `DrCov` module can be used to parse coverage files. + +Run with `cargo run --release --bin drcov_dump_addrs -- -h` + +## DrCov_Merge + +A performant clone of [drcov-merge](https://github.com/vanhauser-thc/drcov-merge) using LibAFL's `DrCov` reader. +It can merge multiple DrCov files into a single DrCov file. + +Run with `cargo run --release --bin drcov_merge -- -h` diff --git a/utils/drcov_utils/src/bin/drcov_dump_addrs.rs b/utils/drcov_utils/src/bin/drcov_dump_addrs.rs new file mode 100644 index 0000000000..533dea3186 --- /dev/null +++ b/utils/drcov_utils/src/bin/drcov_dump_addrs.rs @@ -0,0 +1,80 @@ +use std::{ + fs::{create_dir_all, File}, + io::Write, + path::PathBuf, +}; + +use clap::Parser; +use libafl_targets::drcov::DrCovReader; + +#[derive(Parser, Debug)] +#[clap(author, version, about, long_about = None)] +#[allow(clippy::module_name_repetitions)] +#[command( + name = "drcov_dump_addrs", + about, + long_about = "Writes a list of all addresses from a DrCovFile" +)] +pub struct Opt { + #[arg(short, long, help = "DrCov traces to read", required = true)] + pub inputs: Vec, + #[arg( + short, + long, + help = "Output folder to write address files to. If none is set, this will output all addresses to stdout." + )] + pub out_dir: Option, +} + +fn main() { + let opts = Opt::parse(); + + if let Some(out_dir) = &opts.out_dir { + if !out_dir.exists() { + if let Err(err) = create_dir_all(out_dir) { + eprint!("Failed to create dir {out_dir:?}: {err:?}"); + } + } + + assert!(out_dir.is_dir(), "Out_dir {out_dir:?} not a directory!"); + } + + for input in opts.inputs { + let Ok(drcov) = DrCovReader::read(&input) + .map_err(|err| eprint!("Ignored coverage file {input:?}, reason: {err:?}")) + else { + continue; + }; + + if let Some(out_dir) = &opts.out_dir { + // Write files to a directory + let out_file = out_dir.join( + input + .file_name() + .expect("File without filename shouldn't exist"), + ); + + let Ok(mut file) = File::create_new(&out_file).map_err(|err| { + eprintln!("Could not create file {out_file:?} - continuing: {err:?}"); + }) else { + continue; + }; + + println!("Dumping addresses from drcov file {input:?} to {out_file:?}"); + + for line in drcov.basic_block_addresses_u64() { + file.write_all(format!("{line:#x}\n").as_bytes()) + .expect("Could not write to file"); + } + } else { + // dump to stdout + println!("# Blocks covered in {input:?}:"); + + for line in drcov.basic_block_addresses_u64() { + println!("{line:#x}"); + } + + println!(); + } + } +} diff --git a/utils/drcov_utils/src/bin/drcov_merge.rs b/utils/drcov_utils/src/bin/drcov_merge.rs new file mode 100644 index 0000000000..cb3d27725b --- /dev/null +++ b/utils/drcov_utils/src/bin/drcov_merge.rs @@ -0,0 +1,60 @@ +use std::path::PathBuf; + +use clap::Parser; +use libafl_targets::drcov::DrCovReader; + +#[derive(Parser, Debug)] +#[clap(author, version, about, long_about = None)] +#[allow(clippy::module_name_repetitions)] +#[command( + name = "drcov_merge", + about, + long_about = "Merges multiple DrCov coverage files into one" +)] +pub struct Opt { + #[arg(short, long, help = "DrCovFiles to merge", required = true)] + pub inputs: Vec, + #[arg(short, long, help = "Output DrCov file")] + pub output: PathBuf, + #[arg( + short, + long, + help = "If set, the merged file will contain every block exactly once." + )] + pub unique: bool, +} + +fn main() { + let opts = Opt::parse(); + + assert!( + opts.inputs.len() > 1, + "Need at least two inputs to merge anything." + ); + + let mut inputs = opts.inputs.iter(); + + let initial_input = inputs.next().unwrap(); + + if opts.unique { + println!("Unique block mode"); + } + + println!("Reading inital drcov file from {initial_input:?}"); + let mut main_drcov = DrCovReader::read(initial_input).expect("Failed to read fist input!"); + + for input in inputs { + if let Ok(current_drcov) = DrCovReader::read(input) + .map_err(|err| eprintln!("Warning: failed to read drcov file at {input:?}: {err:?}")) + { + println!("Merging {input:?}"); + if let Err(err) = main_drcov.merge(¤t_drcov, opts.unique) { + eprintln!("Warning: failed to merge drcov file at {input:?}: {err:?}"); + } + } + } + + main_drcov + .write(opts.output) + .expect("Failed to write merged drcov file to output path"); +} diff --git a/utils/libafl_jumper/Cargo.toml b/utils/libafl_jumper/Cargo.toml index 9a97fdb707..0d29ce9476 100644 --- a/utils/libafl_jumper/Cargo.toml +++ b/utils/libafl_jumper/Cargo.toml @@ -14,6 +14,3 @@ std = [] [build-dependencies] cc = "1" - -[dependencies] -hex = { version = "0.4", default-features = false } diff --git a/utils/libafl_jumper/src/main.rs b/utils/libafl_jumper/src/main.rs index 92c2743ada..72cbabff11 100644 --- a/utils/libafl_jumper/src/main.rs +++ b/utils/libafl_jumper/src/main.rs @@ -5,7 +5,7 @@ use core::ffi::CStr; #[cfg(not(any(test, feature = "std")))] use core::panic::PanicInfo; -use core::{arch::asm, ffi::c_void, ops::Shl}; +use core::{arch::asm, ffi::c_void}; #[cfg(not(any(test, feature = "std")))] #[panic_handler] @@ -120,15 +120,9 @@ pub unsafe extern "C" fn main(argc: i32, argv: *const *const u8) -> ! { } fn decode_hex_and_jmp(hex_string: &str) -> ! { - let mut hex_buf = [0_u8; 8]; - let hex_buf = &mut hex_buf[..hex_string.len() / 2]; - hex::decode_to_slice(hex_string, hex_buf).unwrap(); - - let mut addr: u64 = 0; - for val in hex_buf { - addr = addr.shl(8); - addr += u64::from(*val); - } + let Ok(addr) = u64::from_str_radix(hex_string, 16) else { + panic!("Could not parse hex string: {hex_string}"); + }; #[cfg(feature = "std")] println!("Hex: {addr:#x}");