Improve AARCH64 performance (#989)
This commit is contained in:
parent
ebc886032f
commit
5cdb7f7b05
@ -1,20 +1,10 @@
|
|||||||
//! Functionality regarding binary-only coverage collection.
|
//! Functionality regarding binary-only coverage collection.
|
||||||
use core::ptr::addr_of_mut;
|
use core::ptr::addr_of_mut;
|
||||||
use std::{
|
use std::{cell::RefCell, marker::PhantomPinned, pin::Pin, rc::Rc};
|
||||||
cell::{Ref, RefCell},
|
|
||||||
marker::PhantomPinned,
|
|
||||||
ops::Deref,
|
|
||||||
pin::Pin,
|
|
||||||
rc::Rc,
|
|
||||||
};
|
|
||||||
|
|
||||||
#[cfg(target_arch = "aarch64")]
|
#[cfg(target_arch = "aarch64")]
|
||||||
use dynasmrt::DynasmLabelApi;
|
use dynasmrt::DynasmLabelApi;
|
||||||
use dynasmrt::{dynasm, DynasmApi};
|
use dynasmrt::{dynasm, DynasmApi};
|
||||||
#[cfg(target_arch = "x86_64")]
|
|
||||||
use frida_gum::instruction_writer::X86InstructionWriter;
|
|
||||||
#[cfg(target_arch = "aarch64")]
|
|
||||||
use frida_gum::instruction_writer::{Aarch64Register, IndexMode};
|
|
||||||
use frida_gum::{instruction_writer::InstructionWriter, stalker::StalkerOutput};
|
use frida_gum::{instruction_writer::InstructionWriter, stalker::StalkerOutput};
|
||||||
use libafl::bolts::xxh3_rrmxmx_mixer;
|
use libafl::bolts::xxh3_rrmxmx_mixer;
|
||||||
use rangemap::RangeMap;
|
use rangemap::RangeMap;
|
||||||
@ -28,9 +18,6 @@ pub const MAP_SIZE: usize = 64 * 1024;
|
|||||||
struct CoverageRuntimeInner {
|
struct CoverageRuntimeInner {
|
||||||
map: [u8; MAP_SIZE],
|
map: [u8; MAP_SIZE],
|
||||||
previous_pc: u64,
|
previous_pc: u64,
|
||||||
#[cfg(target_arch = "aarch64")]
|
|
||||||
current_log_impl: u64,
|
|
||||||
blob_maybe_log: Option<Box<[u8]>>,
|
|
||||||
_pinned: PhantomPinned,
|
_pinned: PhantomPinned,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -53,8 +40,6 @@ impl FridaRuntime for CoverageRuntime {
|
|||||||
_ranges: &RangeMap<usize, (u16, String)>,
|
_ranges: &RangeMap<usize, (u16, String)>,
|
||||||
_modules_to_instrument: &[&str],
|
_modules_to_instrument: &[&str],
|
||||||
) {
|
) {
|
||||||
#[cfg(target_arch = "aarch64")]
|
|
||||||
self.generate_maybe_log_blob();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn pre_exec<I: libafl::inputs::Input + libafl::inputs::HasTargetBytes>(
|
fn pre_exec<I: libafl::inputs::Input + libafl::inputs::HasTargetBytes>(
|
||||||
@ -79,9 +64,6 @@ impl CoverageRuntime {
|
|||||||
Self(Rc::pin(RefCell::new(CoverageRuntimeInner {
|
Self(Rc::pin(RefCell::new(CoverageRuntimeInner {
|
||||||
map: [0_u8; MAP_SIZE],
|
map: [0_u8; MAP_SIZE],
|
||||||
previous_pc: 0,
|
previous_pc: 0,
|
||||||
#[cfg(target_arch = "aarch64")]
|
|
||||||
current_log_impl: 0,
|
|
||||||
blob_maybe_log: None,
|
|
||||||
_pinned: PhantomPinned,
|
_pinned: PhantomPinned,
|
||||||
})))
|
})))
|
||||||
}
|
}
|
||||||
@ -91,49 +73,66 @@ impl CoverageRuntime {
|
|||||||
self.0.borrow_mut().map.as_mut_ptr()
|
self.0.borrow_mut().map.as_mut_ptr()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Retrieve the `maybe_log` code blob, that will write coverage into the map
|
|
||||||
#[must_use]
|
|
||||||
pub fn blob_maybe_log(&self) -> impl Deref<Target = Box<[u8]>> + '_ {
|
|
||||||
Ref::map(self.0.borrow(), |s| s.blob_maybe_log.as_ref().unwrap())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A minimal `maybe_log` implementation. We insert this into the transformed instruction stream
|
/// A minimal `maybe_log` implementation. We insert this into the transformed instruction stream
|
||||||
/// every time we need a copy that is within a direct branch of the start of the transformed basic
|
/// every time we need a copy that is within a direct branch of the start of the transformed basic
|
||||||
/// block.
|
/// block.
|
||||||
#[cfg(target_arch = "aarch64")]
|
#[cfg(target_arch = "aarch64")]
|
||||||
pub fn generate_maybe_log_blob(&mut self) {
|
pub fn generate_inline_code(&mut self, h64: u64) -> Box<[u8]> {
|
||||||
|
let mut borrow = self.0.borrow_mut();
|
||||||
|
let prev_loc_ptr = addr_of_mut!(borrow.previous_pc);
|
||||||
|
let map_addr_ptr = addr_of_mut!(borrow.map);
|
||||||
let mut ops = dynasmrt::VecAssembler::<dynasmrt::aarch64::Aarch64Relocation>::new(0);
|
let mut ops = dynasmrt::VecAssembler::<dynasmrt::aarch64::Aarch64Relocation>::new(0);
|
||||||
dynasm!(ops
|
dynasm!(ops
|
||||||
; .arch aarch64
|
; .arch aarch64
|
||||||
; stp x1, x2, [sp, -0x10]!
|
// Store the context
|
||||||
; stp x3, x4, [sp, -0x10]!
|
; stp x0, x1, [sp, #-0xa0]
|
||||||
|
|
||||||
|
// Load the previous_pc
|
||||||
|
; ldr x1, >previous_loc
|
||||||
|
; ldr x1, [x1]
|
||||||
|
|
||||||
|
// Caltulate the edge id
|
||||||
|
; ldr x0, >loc
|
||||||
|
; eor x0, x1, x0
|
||||||
|
|
||||||
|
// Load the map byte address
|
||||||
; ldr x1, >map_addr
|
; ldr x1, >map_addr
|
||||||
; ldr x2, >previous_loc
|
; add x0, x1, x0
|
||||||
; ldr x4, [x2]
|
|
||||||
; eor x4, x4, x0
|
// Update the map byte
|
||||||
; mov x3, u64::from((MAP_SIZE - 1) as u32)
|
; ldrb w1, [x0]
|
||||||
; and x4, x4, x3
|
; add w1, w1, #1
|
||||||
; ldr x3, [x1, x4]
|
; add x1, x1, x1, lsr #8
|
||||||
; add x3, x3, #1
|
; strb w1, [x0]
|
||||||
; str x3, [x1, x4]
|
|
||||||
; add x0, xzr, x0, LSR #1
|
// Update the previous_pc value
|
||||||
; str x0, [x2]
|
; ldr x0, >loc_shr
|
||||||
; ldp x3, x4, [sp], #0x10
|
; ldr x1, >previous_loc
|
||||||
; ldp x1, x2, [sp], #0x10
|
; str x0, [x1]
|
||||||
; ret
|
|
||||||
|
// Restore the context
|
||||||
|
; ldp x0, x1, [sp, #-0xa0]
|
||||||
|
|
||||||
|
// Skip the data
|
||||||
|
; b >end
|
||||||
|
|
||||||
;map_addr:
|
;map_addr:
|
||||||
;.qword addr_of_mut!(self.0.borrow_mut().map) as i64
|
;.qword map_addr_ptr as i64
|
||||||
;previous_loc:
|
;previous_loc:
|
||||||
;.qword 0
|
;.qword prev_loc_ptr as i64
|
||||||
|
;loc:
|
||||||
|
;.qword h64 as i64
|
||||||
|
;loc_shr:
|
||||||
|
;.qword (h64 >> 1) as i64
|
||||||
|
;end:
|
||||||
);
|
);
|
||||||
let ops_vec = ops.finalize().unwrap();
|
let ops_vec = ops.finalize().unwrap();
|
||||||
self.0.borrow_mut().blob_maybe_log =
|
ops_vec[..ops_vec.len()].to_vec().into_boxed_slice()
|
||||||
Some(ops_vec[..ops_vec.len() - 8].to_vec().into_boxed_slice());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Write inline instrumentation for coverage
|
/// Write inline instrumentation for coverage
|
||||||
#[cfg(target_arch = "x86_64")]
|
#[cfg(target_arch = "x86_64")]
|
||||||
pub fn generate_inline_code(&mut self, writer: &X86InstructionWriter, h64: u64) {
|
pub fn generate_inline_code(&mut self, h64: u64) -> Box<[u8]> {
|
||||||
let mut borrow = self.0.borrow_mut();
|
let mut borrow = self.0.borrow_mut();
|
||||||
let prev_loc_ptr = addr_of_mut!(borrow.previous_pc);
|
let prev_loc_ptr = addr_of_mut!(borrow.previous_pc);
|
||||||
let map_addr_ptr = addr_of_mut!(borrow.map);
|
let map_addr_ptr = addr_of_mut!(borrow.map);
|
||||||
@ -177,7 +176,7 @@ impl CoverageRuntime {
|
|||||||
);
|
);
|
||||||
let ops_vec = ops.finalize().unwrap();
|
let ops_vec = ops.finalize().unwrap();
|
||||||
|
|
||||||
writer.put_bytes(&ops_vec[..ops_vec.len()].to_vec().into_boxed_slice());
|
ops_vec[..ops_vec.len()].to_vec().into_boxed_slice()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Emits coverage mapping into the current basic block.
|
/// Emits coverage mapping into the current basic block.
|
||||||
@ -185,56 +184,7 @@ impl CoverageRuntime {
|
|||||||
pub fn emit_coverage_mapping(&mut self, address: u64, output: &StalkerOutput) {
|
pub fn emit_coverage_mapping(&mut self, address: u64, output: &StalkerOutput) {
|
||||||
let h64 = xxh3_rrmxmx_mixer(address);
|
let h64 = xxh3_rrmxmx_mixer(address);
|
||||||
let writer = output.writer();
|
let writer = output.writer();
|
||||||
|
let code = self.generate_inline_code(h64 & (MAP_SIZE as u64 - 1));
|
||||||
#[cfg(target_arch = "x86_64")]
|
writer.put_bytes(&code);
|
||||||
{
|
|
||||||
self.generate_inline_code(&writer, h64 & (MAP_SIZE as u64 - 1));
|
|
||||||
}
|
|
||||||
#[cfg(target_arch = "aarch64")]
|
|
||||||
{
|
|
||||||
#[allow(clippy::cast_possible_wrap)]
|
|
||||||
// gum redzone size is u32, we need an offset as i32.
|
|
||||||
let redzone_size = i64::from(frida_gum_sys::GUM_RED_ZONE_SIZE);
|
|
||||||
if self.0.borrow().current_log_impl == 0
|
|
||||||
|| !writer.can_branch_directly_to(self.0.borrow().current_log_impl)
|
|
||||||
|| !writer.can_branch_directly_between(
|
|
||||||
writer.pc() + 128,
|
|
||||||
self.0.borrow().current_log_impl,
|
|
||||||
)
|
|
||||||
{
|
|
||||||
let after_log_impl = writer.code_offset() + 1;
|
|
||||||
|
|
||||||
#[cfg(target_arch = "x86_64")]
|
|
||||||
writer.put_jmp_near_label(after_log_impl);
|
|
||||||
#[cfg(target_arch = "aarch64")]
|
|
||||||
writer.put_b_label(after_log_impl);
|
|
||||||
|
|
||||||
self.0.borrow_mut().current_log_impl = writer.pc();
|
|
||||||
writer.put_bytes(&self.blob_maybe_log());
|
|
||||||
let prev_loc_pointer = addr_of_mut!(self.0.borrow_mut().previous_pc) as u64; // Get the pointer to self.previous_pc
|
|
||||||
|
|
||||||
writer.put_bytes(&prev_loc_pointer.to_ne_bytes());
|
|
||||||
|
|
||||||
writer.put_label(after_log_impl);
|
|
||||||
}
|
|
||||||
|
|
||||||
writer.put_stp_reg_reg_reg_offset(
|
|
||||||
Aarch64Register::Lr,
|
|
||||||
Aarch64Register::X0,
|
|
||||||
Aarch64Register::Sp,
|
|
||||||
-(16 + redzone_size),
|
|
||||||
IndexMode::PreAdjust,
|
|
||||||
);
|
|
||||||
writer.put_ldr_reg_u64(Aarch64Register::X0, h64 & (MAP_SIZE as u64 - 1));
|
|
||||||
|
|
||||||
writer.put_bl_imm(self.0.borrow().current_log_impl);
|
|
||||||
writer.put_ldp_reg_reg_reg_offset(
|
|
||||||
Aarch64Register::Lr,
|
|
||||||
Aarch64Register::X0,
|
|
||||||
Aarch64Register::Sp,
|
|
||||||
16 + redzone_size,
|
|
||||||
IndexMode::PostAdjust,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user