From 1bd21509af6fe5eca5bad820b131a0312047dc84 Mon Sep 17 00:00:00 2001 From: Andrey Fedotov Date: Tue, 4 Apr 2023 20:03:11 +0300 Subject: [PATCH] CASR deduplication for StacktraceObservers (#1184) * Implement CasrAsanBacktraceObserver for dedupe crashes using libCASR and ASAN reports. * Use casr observer with forkserver executor * Add casr deduplication for AsanBacktraceObserver * Add casr deduplication for BacktraceObserver * Add Stacktrace filtering * Move init_ignored_frames to constructors * Add go ignore regexps for BacktraceObservers --------- Co-authored-by: Dongjia "toka" Zhang --- fuzzers/backtrace_baby_fuzzers/README.md | 2 +- libafl/Cargo.toml | 2 + libafl/src/observers/stacktrace.rs | 112 ++++++++++++++++++++++- 3 files changed, 111 insertions(+), 5 deletions(-) diff --git a/fuzzers/backtrace_baby_fuzzers/README.md b/fuzzers/backtrace_baby_fuzzers/README.md index 336873adfe..05de4dcccc 100644 --- a/fuzzers/backtrace_baby_fuzzers/README.md +++ b/fuzzers/backtrace_baby_fuzzers/README.md @@ -1,6 +1,6 @@ # Backtrace baby fuzzers -The projects contained in this directory are simple fuzzers derived from the original baby_fuzzer examples, whose purpose is to show how to use a `BacktraceObserver` or an `ASANObserver` to dedupe crashes and other necessary components for this feature. +The projects contained in this directory are simple fuzzers derived from the original baby_fuzzer examples, whose purpose is to show how to use a `BacktraceObserver` or an `ASANObserver` to dedupe crashes and other necessary components for this feature. To use `casr` deduplication for `BacktraceObserver` or `ASANObserver` build LibAFL with `casr` feature. The examples cover: diff --git a/libafl/Cargo.toml b/libafl/Cargo.toml index fc56a89a80..cb82b92349 100644 --- a/libafl/Cargo.toml +++ b/libafl/Cargo.toml @@ -32,6 +32,7 @@ cmin = ["z3"] # corpus minimisation corpus_btreemap = [] # Switches from HashMap to BTreeMap for CorpusId gzip = ["miniz_oxide"] # Enables gzip compression in certain parts of the lib regex = ["std", "dep:regex"] # enables the NaiveTokenizer and StacktraceObserver +casr = ["libcasr", "std", "regex"] # enables deduplication based on libcasr for StacktraceObserver # features hiding dependencies licensed under GPL gpl = [] @@ -103,6 +104,7 @@ z3 = { version = "0.11", features = ["static-link-z3"], optional = true } # for pyo3 = { version = "0.17", optional = true, features = ["serde", "macros"] } concat-idents = { version = "1.1.3", optional = true } +libcasr = { version = "2.5", optional = true} # AGPL # !!! this create requires nightly diff --git a/libafl/src/observers/stacktrace.rs b/libafl/src/observers/stacktrace.rs index 708d980ca9..f151d60908 100644 --- a/libafl/src/observers/stacktrace.rs +++ b/libafl/src/observers/stacktrace.rs @@ -1,6 +1,14 @@ //! the ``StacktraceObserver`` looks up the stacktrace on the execution thread and computes a hash for it for dedupe -use alloc::string::{String, ToString}; +use alloc::{ + string::{String, ToString}, + vec::Vec, +}; +#[cfg(feature = "casr")] +use std::{ + collections::hash_map::DefaultHasher, + hash::{Hash, Hasher}, +}; use std::{ fmt::Debug, fs::{self, File}, @@ -10,6 +18,22 @@ use std::{ }; use backtrace::Backtrace; +#[cfg(feature = "casr")] +use libcasr::{ + asan::AsanStacktrace, + constants::{ + STACK_FRAME_FILEPATH_IGNORE_REGEXES_CPP, STACK_FRAME_FILEPATH_IGNORE_REGEXES_GO, + STACK_FRAME_FILEPATH_IGNORE_REGEXES_PYTHON, STACK_FRAME_FILEPATH_IGNORE_REGEXES_RUST, + STACK_FRAME_FUNCTION_IGNORE_REGEXES_CPP, STACK_FRAME_FUNCTION_IGNORE_REGEXES_GO, + STACK_FRAME_FUNCTION_IGNORE_REGEXES_PYTHON, STACK_FRAME_FUNCTION_IGNORE_REGEXES_RUST, + }, + init_ignored_frames, + stacktrace::{ + Filter, ParseStacktrace, Stacktrace, StacktraceEntry, STACK_FRAME_FILEPATH_IGNORE_REGEXES, + STACK_FRAME_FUNCTION_IGNORE_REGEXES, + }, +}; +#[cfg(not(feature = "casr"))] use regex::Regex; use serde::{Deserialize, Serialize}; @@ -22,6 +46,7 @@ use crate::{ Error, }; +#[cfg(not(feature = "casr"))] /// Collects the backtrace via [`Backtrace`] and [`Debug`] /// ([`Debug`] is currently used for dev purposes, symbols hash will be used eventually) #[must_use] @@ -45,6 +70,40 @@ pub fn collect_backtrace() -> u64 { hash } +#[cfg(feature = "casr")] +/// Collects the backtrace via [`Backtrace`] +#[must_use] +pub fn collect_backtrace() -> u64 { + let mut b = Backtrace::new_unresolved(); + if b.frames().is_empty() { + return 0; + } + b.resolve(); + let mut strace = Stacktrace::new(); + for frame in &b.frames()[1..] { + let mut strace_entry = StacktraceEntry::default(); + let symbols = frame.symbols(); + if symbols.len() > 1 { + let symbol = &symbols[0]; + if let Some(name) = symbol.name() { + strace_entry.function = name.as_str().unwrap_or("").to_string(); + } + if let Some(file) = symbol.filename() { + strace_entry.debug.file = file.to_str().unwrap_or("").to_string(); + } + strace_entry.debug.line = u64::from(symbol.lineno().unwrap_or(0)); + strace_entry.debug.column = u64::from(symbol.colno().unwrap_or(0)); + } + strace_entry.address = frame.ip() as u64; + strace.push(strace_entry); + } + + strace.filter(); + let mut s = DefaultHasher::new(); + strace.hash(&mut s); + s.finish() +} + /// An enum encoding the types of harnesses #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] pub enum HarnessType { @@ -66,6 +125,7 @@ pub struct BacktraceObserver<'a> { } impl<'a> BacktraceObserver<'a> { + #[cfg(not(feature = "casr"))] /// Creates a new [`BacktraceObserver`] with the given name. #[must_use] pub fn new( @@ -80,6 +140,22 @@ impl<'a> BacktraceObserver<'a> { } } + #[cfg(feature = "casr")] + /// Creates a new [`BacktraceObserver`] with the given name. + #[must_use] + pub fn new( + observer_name: &str, + backtrace_hash: &'a mut Option, + harness_type: HarnessType, + ) -> Self { + init_ignored_frames!("rust", "cpp", "go"); + Self { + observer_name: observer_name.to_string(), + hash: OwnedRefMut::Ref(backtrace_hash), + harness_type, + } + } + /// Updates the hash value of this observer. fn update_hash(&mut self, hash: u64) { *self.hash.as_mut() = Some(hash); @@ -189,6 +265,7 @@ pub struct AsanBacktraceObserver { } impl AsanBacktraceObserver { + #[cfg(not(feature = "casr"))] /// Creates a new [`BacktraceObserver`] with the given name. #[must_use] pub fn new(observer_name: &str) -> Self { @@ -198,14 +275,25 @@ impl AsanBacktraceObserver { } } + #[cfg(feature = "casr")] + /// Creates a new [`BacktraceObserver`] with the given name. + #[must_use] + pub fn new(observer_name: &str) -> Self { + init_ignored_frames!("rust", "cpp", "go"); + Self { + observer_name: observer_name.to_string(), + hash: None, + } + } + /// read ASAN output from the child stderr and parse it. pub fn parse_asan_output_from_childstderr( &mut self, stderr: &mut ChildStderr, ) -> Result<(), Error> { - let mut buf = String::new(); - stderr.read_to_string(&mut buf)?; - self.parse_asan_output(&buf); + let mut buf = Vec::new(); + stderr.read_to_end(&mut buf)?; + self.parse_asan_output(&String::from_utf8_lossy(&buf)); Ok(()) } @@ -222,6 +310,7 @@ impl AsanBacktraceObserver { Ok(()) } + #[cfg(not(feature = "casr"))] /// parse ASAN error output emited by the target command and compute the hash pub fn parse_asan_output(&mut self, output: &str) { let mut hash = 0; @@ -233,6 +322,21 @@ impl AsanBacktraceObserver { self.update_hash(hash); } + #[cfg(feature = "casr")] + /// parse ASAN error output emited by the target command and compute the hash + pub fn parse_asan_output(&mut self, output: &str) { + let mut hash = 0; + if let Ok(st_vec) = AsanStacktrace::extract_stacktrace(output) { + if let Ok(mut stacktrace) = AsanStacktrace::parse_stacktrace(&st_vec) { + stacktrace.filter(); + let mut s = DefaultHasher::new(); + stacktrace.hash(&mut s); + hash = s.finish(); + } + } + self.update_hash(hash); + } + /// Updates the hash value of this observer. fn update_hash(&mut self, hash: u64) { self.hash = Some(hash);