CASR deduplication for StacktraceObservers (#1184)

* Implement CasrAsanBacktraceObserver for dedupe crashes using libCASR and ASAN reports.

* Use casr observer with forkserver executor

* Add casr deduplication for AsanBacktraceObserver

* Add casr deduplication for BacktraceObserver

* Add Stacktrace filtering

* Move init_ignored_frames to constructors

* Add go ignore regexps for BacktraceObservers

---------

Co-authored-by: Dongjia "toka" Zhang <tokazerkje@outlook.com>
This commit is contained in:
Andrey Fedotov 2023-04-04 20:03:11 +03:00 committed by GitHub
parent ccd1211cd2
commit 1bd21509af
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 111 additions and 5 deletions

View File

@ -1,6 +1,6 @@
# Backtrace baby fuzzers
The projects contained in this directory are simple fuzzers derived from the original baby_fuzzer examples, whose purpose is to show how to use a `BacktraceObserver` or an `ASANObserver` to dedupe crashes and other necessary components for this feature.
The projects contained in this directory are simple fuzzers derived from the original baby_fuzzer examples, whose purpose is to show how to use a `BacktraceObserver` or an `ASANObserver` to dedupe crashes and other necessary components for this feature. To use `casr` deduplication for `BacktraceObserver` or `ASANObserver` build LibAFL with `casr` feature.
The examples cover:

View File

@ -32,6 +32,7 @@ cmin = ["z3"] # corpus minimisation
corpus_btreemap = [] # Switches from HashMap to BTreeMap for CorpusId
gzip = ["miniz_oxide"] # Enables gzip compression in certain parts of the lib
regex = ["std", "dep:regex"] # enables the NaiveTokenizer and StacktraceObserver
casr = ["libcasr", "std", "regex"] # enables deduplication based on libcasr for StacktraceObserver
# features hiding dependencies licensed under GPL
gpl = []
@ -103,6 +104,7 @@ z3 = { version = "0.11", features = ["static-link-z3"], optional = true } # for
pyo3 = { version = "0.17", optional = true, features = ["serde", "macros"] }
concat-idents = { version = "1.1.3", optional = true }
libcasr = { version = "2.5", optional = true}
# AGPL
# !!! this create requires nightly

View File

@ -1,6 +1,14 @@
//! the ``StacktraceObserver`` looks up the stacktrace on the execution thread and computes a hash for it for dedupe
use alloc::string::{String, ToString};
use alloc::{
string::{String, ToString},
vec::Vec,
};
#[cfg(feature = "casr")]
use std::{
collections::hash_map::DefaultHasher,
hash::{Hash, Hasher},
};
use std::{
fmt::Debug,
fs::{self, File},
@ -10,6 +18,22 @@ use std::{
};
use backtrace::Backtrace;
#[cfg(feature = "casr")]
use libcasr::{
asan::AsanStacktrace,
constants::{
STACK_FRAME_FILEPATH_IGNORE_REGEXES_CPP, STACK_FRAME_FILEPATH_IGNORE_REGEXES_GO,
STACK_FRAME_FILEPATH_IGNORE_REGEXES_PYTHON, STACK_FRAME_FILEPATH_IGNORE_REGEXES_RUST,
STACK_FRAME_FUNCTION_IGNORE_REGEXES_CPP, STACK_FRAME_FUNCTION_IGNORE_REGEXES_GO,
STACK_FRAME_FUNCTION_IGNORE_REGEXES_PYTHON, STACK_FRAME_FUNCTION_IGNORE_REGEXES_RUST,
},
init_ignored_frames,
stacktrace::{
Filter, ParseStacktrace, Stacktrace, StacktraceEntry, STACK_FRAME_FILEPATH_IGNORE_REGEXES,
STACK_FRAME_FUNCTION_IGNORE_REGEXES,
},
};
#[cfg(not(feature = "casr"))]
use regex::Regex;
use serde::{Deserialize, Serialize};
@ -22,6 +46,7 @@ use crate::{
Error,
};
#[cfg(not(feature = "casr"))]
/// Collects the backtrace via [`Backtrace`] and [`Debug`]
/// ([`Debug`] is currently used for dev purposes, symbols hash will be used eventually)
#[must_use]
@ -45,6 +70,40 @@ pub fn collect_backtrace() -> u64 {
hash
}
#[cfg(feature = "casr")]
/// Collects the backtrace via [`Backtrace`]
#[must_use]
pub fn collect_backtrace() -> u64 {
let mut b = Backtrace::new_unresolved();
if b.frames().is_empty() {
return 0;
}
b.resolve();
let mut strace = Stacktrace::new();
for frame in &b.frames()[1..] {
let mut strace_entry = StacktraceEntry::default();
let symbols = frame.symbols();
if symbols.len() > 1 {
let symbol = &symbols[0];
if let Some(name) = symbol.name() {
strace_entry.function = name.as_str().unwrap_or("").to_string();
}
if let Some(file) = symbol.filename() {
strace_entry.debug.file = file.to_str().unwrap_or("").to_string();
}
strace_entry.debug.line = u64::from(symbol.lineno().unwrap_or(0));
strace_entry.debug.column = u64::from(symbol.colno().unwrap_or(0));
}
strace_entry.address = frame.ip() as u64;
strace.push(strace_entry);
}
strace.filter();
let mut s = DefaultHasher::new();
strace.hash(&mut s);
s.finish()
}
/// An enum encoding the types of harnesses
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
pub enum HarnessType {
@ -66,6 +125,7 @@ pub struct BacktraceObserver<'a> {
}
impl<'a> BacktraceObserver<'a> {
#[cfg(not(feature = "casr"))]
/// Creates a new [`BacktraceObserver`] with the given name.
#[must_use]
pub fn new(
@ -80,6 +140,22 @@ impl<'a> BacktraceObserver<'a> {
}
}
#[cfg(feature = "casr")]
/// Creates a new [`BacktraceObserver`] with the given name.
#[must_use]
pub fn new(
observer_name: &str,
backtrace_hash: &'a mut Option<u64>,
harness_type: HarnessType,
) -> Self {
init_ignored_frames!("rust", "cpp", "go");
Self {
observer_name: observer_name.to_string(),
hash: OwnedRefMut::Ref(backtrace_hash),
harness_type,
}
}
/// Updates the hash value of this observer.
fn update_hash(&mut self, hash: u64) {
*self.hash.as_mut() = Some(hash);
@ -189,6 +265,7 @@ pub struct AsanBacktraceObserver {
}
impl AsanBacktraceObserver {
#[cfg(not(feature = "casr"))]
/// Creates a new [`BacktraceObserver`] with the given name.
#[must_use]
pub fn new(observer_name: &str) -> Self {
@ -198,14 +275,25 @@ impl AsanBacktraceObserver {
}
}
#[cfg(feature = "casr")]
/// Creates a new [`BacktraceObserver`] with the given name.
#[must_use]
pub fn new(observer_name: &str) -> Self {
init_ignored_frames!("rust", "cpp", "go");
Self {
observer_name: observer_name.to_string(),
hash: None,
}
}
/// read ASAN output from the child stderr and parse it.
pub fn parse_asan_output_from_childstderr(
&mut self,
stderr: &mut ChildStderr,
) -> Result<(), Error> {
let mut buf = String::new();
stderr.read_to_string(&mut buf)?;
self.parse_asan_output(&buf);
let mut buf = Vec::new();
stderr.read_to_end(&mut buf)?;
self.parse_asan_output(&String::from_utf8_lossy(&buf));
Ok(())
}
@ -222,6 +310,7 @@ impl AsanBacktraceObserver {
Ok(())
}
#[cfg(not(feature = "casr"))]
/// parse ASAN error output emited by the target command and compute the hash
pub fn parse_asan_output(&mut self, output: &str) {
let mut hash = 0;
@ -233,6 +322,21 @@ impl AsanBacktraceObserver {
self.update_hash(hash);
}
#[cfg(feature = "casr")]
/// parse ASAN error output emited by the target command and compute the hash
pub fn parse_asan_output(&mut self, output: &str) {
let mut hash = 0;
if let Ok(st_vec) = AsanStacktrace::extract_stacktrace(output) {
if let Ok(mut stacktrace) = AsanStacktrace::parse_stacktrace(&st_vec) {
stacktrace.filter();
let mut s = DefaultHasher::new();
stacktrace.hash(&mut s);
hash = s.finish();
}
}
self.update_hash(hash);
}
/// Updates the hash value of this observer.
fn update_hash(&mut self, hash: u64) {
self.hash = Some(hash);