From 5cdb7f7b05ab5946c0e2d54c6c6ae86c56d7c1dc Mon Sep 17 00:00:00 2001 From: WorksButNotTested <62701594+WorksButNotTested@users.noreply.github.com> Date: Wed, 18 Jan 2023 12:56:17 +0000 Subject: [PATCH] Improve AARCH64 performance (#989) --- libafl_frida/src/coverage_rt.rs | 146 +++++++++++--------------------- 1 file changed, 48 insertions(+), 98 deletions(-) diff --git a/libafl_frida/src/coverage_rt.rs b/libafl_frida/src/coverage_rt.rs index 0bb9f21fd7..a793a0f422 100644 --- a/libafl_frida/src/coverage_rt.rs +++ b/libafl_frida/src/coverage_rt.rs @@ -1,20 +1,10 @@ //! Functionality regarding binary-only coverage collection. use core::ptr::addr_of_mut; -use std::{ - cell::{Ref, RefCell}, - marker::PhantomPinned, - ops::Deref, - pin::Pin, - rc::Rc, -}; +use std::{cell::RefCell, marker::PhantomPinned, pin::Pin, rc::Rc}; #[cfg(target_arch = "aarch64")] use dynasmrt::DynasmLabelApi; use dynasmrt::{dynasm, DynasmApi}; -#[cfg(target_arch = "x86_64")] -use frida_gum::instruction_writer::X86InstructionWriter; -#[cfg(target_arch = "aarch64")] -use frida_gum::instruction_writer::{Aarch64Register, IndexMode}; use frida_gum::{instruction_writer::InstructionWriter, stalker::StalkerOutput}; use libafl::bolts::xxh3_rrmxmx_mixer; use rangemap::RangeMap; @@ -28,9 +18,6 @@ pub const MAP_SIZE: usize = 64 * 1024; struct CoverageRuntimeInner { map: [u8; MAP_SIZE], previous_pc: u64, - #[cfg(target_arch = "aarch64")] - current_log_impl: u64, - blob_maybe_log: Option>, _pinned: PhantomPinned, } @@ -53,8 +40,6 @@ impl FridaRuntime for CoverageRuntime { _ranges: &RangeMap, _modules_to_instrument: &[&str], ) { - #[cfg(target_arch = "aarch64")] - self.generate_maybe_log_blob(); } fn pre_exec( @@ -79,9 +64,6 @@ impl CoverageRuntime { Self(Rc::pin(RefCell::new(CoverageRuntimeInner { map: [0_u8; MAP_SIZE], previous_pc: 0, - #[cfg(target_arch = "aarch64")] - current_log_impl: 0, - blob_maybe_log: None, _pinned: PhantomPinned, }))) } @@ -91,49 +73,66 @@ impl CoverageRuntime { self.0.borrow_mut().map.as_mut_ptr() } - /// Retrieve the `maybe_log` code blob, that will write coverage into the map - #[must_use] - pub fn blob_maybe_log(&self) -> impl Deref> + '_ { - Ref::map(self.0.borrow(), |s| s.blob_maybe_log.as_ref().unwrap()) - } - /// A minimal `maybe_log` implementation. We insert this into the transformed instruction stream /// every time we need a copy that is within a direct branch of the start of the transformed basic /// block. #[cfg(target_arch = "aarch64")] - pub fn generate_maybe_log_blob(&mut self) { + pub fn generate_inline_code(&mut self, h64: u64) -> Box<[u8]> { + let mut borrow = self.0.borrow_mut(); + let prev_loc_ptr = addr_of_mut!(borrow.previous_pc); + let map_addr_ptr = addr_of_mut!(borrow.map); let mut ops = dynasmrt::VecAssembler::::new(0); dynasm!(ops ; .arch aarch64 - ; stp x1, x2, [sp, -0x10]! - ; stp x3, x4, [sp, -0x10]! + // Store the context + ; stp x0, x1, [sp, #-0xa0] + + // Load the previous_pc + ; ldr x1, >previous_loc + ; ldr x1, [x1] + + // Caltulate the edge id + ; ldr x0, >loc + ; eor x0, x1, x0 + + // Load the map byte address ; ldr x1, >map_addr - ; ldr x2, >previous_loc - ; ldr x4, [x2] - ; eor x4, x4, x0 - ; mov x3, u64::from((MAP_SIZE - 1) as u32) - ; and x4, x4, x3 - ; ldr x3, [x1, x4] - ; add x3, x3, #1 - ; str x3, [x1, x4] - ; add x0, xzr, x0, LSR #1 - ; str x0, [x2] - ; ldp x3, x4, [sp], #0x10 - ; ldp x1, x2, [sp], #0x10 - ; ret + ; add x0, x1, x0 + + // Update the map byte + ; ldrb w1, [x0] + ; add w1, w1, #1 + ; add x1, x1, x1, lsr #8 + ; strb w1, [x0] + + // Update the previous_pc value + ; ldr x0, >loc_shr + ; ldr x1, >previous_loc + ; str x0, [x1] + + // Restore the context + ; ldp x0, x1, [sp, #-0xa0] + + // Skip the data + ; b >end + ;map_addr: - ;.qword addr_of_mut!(self.0.borrow_mut().map) as i64 + ;.qword map_addr_ptr as i64 ;previous_loc: - ;.qword 0 + ;.qword prev_loc_ptr as i64 + ;loc: + ;.qword h64 as i64 + ;loc_shr: + ;.qword (h64 >> 1) as i64 + ;end: ); let ops_vec = ops.finalize().unwrap(); - self.0.borrow_mut().blob_maybe_log = - Some(ops_vec[..ops_vec.len() - 8].to_vec().into_boxed_slice()); + ops_vec[..ops_vec.len()].to_vec().into_boxed_slice() } /// Write inline instrumentation for coverage #[cfg(target_arch = "x86_64")] - pub fn generate_inline_code(&mut self, writer: &X86InstructionWriter, h64: u64) { + pub fn generate_inline_code(&mut self, h64: u64) -> Box<[u8]> { let mut borrow = self.0.borrow_mut(); let prev_loc_ptr = addr_of_mut!(borrow.previous_pc); let map_addr_ptr = addr_of_mut!(borrow.map); @@ -177,7 +176,7 @@ impl CoverageRuntime { ); let ops_vec = ops.finalize().unwrap(); - writer.put_bytes(&ops_vec[..ops_vec.len()].to_vec().into_boxed_slice()); + ops_vec[..ops_vec.len()].to_vec().into_boxed_slice() } /// Emits coverage mapping into the current basic block. @@ -185,56 +184,7 @@ impl CoverageRuntime { pub fn emit_coverage_mapping(&mut self, address: u64, output: &StalkerOutput) { let h64 = xxh3_rrmxmx_mixer(address); let writer = output.writer(); - - #[cfg(target_arch = "x86_64")] - { - self.generate_inline_code(&writer, h64 & (MAP_SIZE as u64 - 1)); - } - #[cfg(target_arch = "aarch64")] - { - #[allow(clippy::cast_possible_wrap)] - // gum redzone size is u32, we need an offset as i32. - let redzone_size = i64::from(frida_gum_sys::GUM_RED_ZONE_SIZE); - if self.0.borrow().current_log_impl == 0 - || !writer.can_branch_directly_to(self.0.borrow().current_log_impl) - || !writer.can_branch_directly_between( - writer.pc() + 128, - self.0.borrow().current_log_impl, - ) - { - let after_log_impl = writer.code_offset() + 1; - - #[cfg(target_arch = "x86_64")] - writer.put_jmp_near_label(after_log_impl); - #[cfg(target_arch = "aarch64")] - writer.put_b_label(after_log_impl); - - self.0.borrow_mut().current_log_impl = writer.pc(); - writer.put_bytes(&self.blob_maybe_log()); - let prev_loc_pointer = addr_of_mut!(self.0.borrow_mut().previous_pc) as u64; // Get the pointer to self.previous_pc - - writer.put_bytes(&prev_loc_pointer.to_ne_bytes()); - - writer.put_label(after_log_impl); - } - - writer.put_stp_reg_reg_reg_offset( - Aarch64Register::Lr, - Aarch64Register::X0, - Aarch64Register::Sp, - -(16 + redzone_size), - IndexMode::PreAdjust, - ); - writer.put_ldr_reg_u64(Aarch64Register::X0, h64 & (MAP_SIZE as u64 - 1)); - - writer.put_bl_imm(self.0.borrow().current_log_impl); - writer.put_ldp_reg_reg_reg_offset( - Aarch64Register::Lr, - Aarch64Register::X0, - Aarch64Register::Sp, - 16 + redzone_size, - IndexMode::PostAdjust, - ); - } + let code = self.generate_inline_code(h64 & (MAP_SIZE as u64 - 1)); + writer.put_bytes(&code); } }