Implementation of SAND: Decoupling Sanitization from Fuzzing for Low Overhead (#3037)

* Reference implementation of SAND: Decoupling Sanitization from Fuzzing for Low Overhead

* Format code

* make clippy happy

* Update docs

* clean output

* fmt

* Fix for nostd

* Update docs

* use use alloc::vec::Vec;

* Docs updates

* Update docs

* Format toml

* Format again

* Add CI

* Rename run_targets to run_target_all

* Update docs

* Update justfile to test fuzzer

* left out justfile

* Format

* Corner case when bitmap size is as small as 1

* Add comments

* clippy

* Format vuln.c

* Format toml

* Fix doc

* Fix justfile

* Move ExecutorsTuple to executors/mod.rs

* Fix

---------

Co-authored-by: Dongjia "toka" Zhang <tokazerkje@outlook.com>
This commit is contained in:
lazymio 2025-03-06 21:15:22 +08:00 committed by GitHub
parent eaa600e5ac
commit de2bc166f0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 919 additions and 5 deletions

View File

@ -275,6 +275,7 @@ jobs:
- ./fuzzers/forkserver/forkserver_libafl_cc - ./fuzzers/forkserver/forkserver_libafl_cc
- ./fuzzers/forkserver/fuzzbench_forkserver - ./fuzzers/forkserver/fuzzbench_forkserver
- ./fuzzers/forkserver/fuzzbench_forkserver_cmplog - ./fuzzers/forkserver/fuzzbench_forkserver_cmplog
- ./fuzzers/forkserver/fuzzbench_forkserver_sand
- ./fuzzers/forkserver/libafl-fuzz - ./fuzzers/forkserver/libafl-fuzz
- ./fuzzers/forkserver/baby_fuzzer_with_forkexecutor - ./fuzzers/forkserver/baby_fuzzer_with_forkexecutor

View File

@ -0,0 +1,2 @@
libpng-*
fuzzer

View File

@ -0,0 +1,53 @@
[package]
name = "fuzzbench_forkserver_sand"
version = "0.15.1"
authors = [
"Andrea Fioraldi <andreafioraldi@gmail.com>",
"Dominik Maier <domenukk@gmail.com>",
"Ziqiao Kong <mio@lazym.io>",
]
edition = "2021"
[profile.release]
lto = true
codegen-units = 1
opt-level = 3
debug = true
[profile.release-fuzzbench]
inherits = "release"
debug = false
strip = true
[build-dependencies]
cc = { version = "1.1.22", features = ["parallel"] }
which = "6.0.3"
[dependencies]
libafl = { path = "../../../libafl" }
libafl_bolts = { path = "../../../libafl_bolts" }
libafl_targets = { path = "../../../libafl_targets", features = [
"sancov_pcguard_hitcounts",
"libfuzzer",
"pointer_maps",
] }
libafl_cc = { path = "../../../libafl_cc" }
log = { version = "0.4.22", features = ["release_max_level_info"] }
clap = { version = "4.5.18", features = ["default"] }
nix = { version = "0.29.0", features = ["signal"] }
[[bin]]
name = "sand_cc"
path = "src/cc.rs"
[[bin]]
name = "sand_cxx"
path = "src/cxx.rs"
[[bin]]
name = "fuzzbench_forkserver_sand"
path = "src/main.rs"
[lib]
name = "forkserver_sand"
crate-type = ["staticlib"]

View File

@ -0,0 +1,73 @@
FUZZER_NAME := 'fuzzbench_forkserver_sand'
FORKSERVER_NAME := 'fuzzbench_forkserver_sand'
CARGO_TARGET_DIR := env("CARGO_TARGET_DIR", "target")
PROFILE := env("PROFILE", "release")
PROFILE_DIR := if PROFILE == "release" { "release" } else if PROFILE == "dev" { "debug" } else { "debug" }
LIBAFL_CC := PROJECT_DIR / CARGO_TARGET_DIR / PROFILE_DIR / "sand_cc"
LIBAFL_CXX := PROJECT_DIR / CARGO_TARGET_DIR / PROFILE_DIR / "sand_cxx"
FUZZER := PROJECT_DIR / CARGO_TARGET_DIR / PROFILE_DIR / FUZZER_NAME
FORKSERVER := PROJECT_DIR / CARGO_TARGET_DIR / PROFILE_DIR / FORKSERVER_NAME
PROJECT_DIR := absolute_path(".")
alias cc := cxx
[linux]
[macos]
cxx:
cargo build --profile {{PROFILE}}
[windows]
cxx:
echo "Unsupported on this platform"
[linux]
[macos]
fuzzer: cxx
{{LIBAFL_CC}} {{PROJECT_DIR}}/src/vuln.c -o vuln_native -lm -lz
[windows]
fuzzer:
echo "Unsupported on this platform"
[linux]
[macos]
fuzzer_asan: cxx
AFL_SAN_NO_INST=1 {{LIBAFL_CC}} {{PROJECT_DIR}}/src/vuln.c -fsanitize=address -o vuln_asan -lm -lz
[windows]
fuzzer_asan:
echo "Unsupported on this platform"
[linux]
[macos]
run: fuzzer fuzzer_asan
#!/bin/bash
mkdir -p input && echo "a" >> input/a
taskset -c 1 {{FUZZER}} -i input -o /tmp/out -a ./vuln_asan -t 1000 ./vuln_native
[windows]
run: fuzzer fuzzer_asan
echo "Unsupported on this platform"
[linux]
[macos]
test: fuzzer fuzzer_asan
#!/bin/bash
mkdir -p input && echo "a" >> input/a
timeout 10s {{FUZZER}} -i input -o /tmp/out -a ./vuln_asan -t 1000 ./vuln_native | tee fuzz_stdout.log || true
if grep -qa "objectives: 1" fuzz_stdout.log; then
echo "Fuzzer is working"
else
echo "Fuzzer does not generate any testcases or any crashes"
exit 1
fi
[windows]
test: fuzzer fuzzer_asan
echo "Unsupported on this platform"
clean:
rm -rf {{FUZZER}}
rm -rf vuln_native vuln_asan
cargo clean

View File

@ -0,0 +1,46 @@
use std::env;
use libafl_cc::{ClangWrapper, CompilerWrapper, ToolWrapper};
pub fn main() {
let args: Vec<String> = env::args().collect();
if args.len() > 1 {
let mut dir = env::current_exe().unwrap();
let wrapper_name = dir.file_name().unwrap().to_str().unwrap();
let is_cpp = match wrapper_name[wrapper_name.len()-2..].to_lowercase().as_str() {
"cc" => false,
"++" | "pp" | "xx" => true,
_ => panic!("Could not figure out if c or c++ wrapper was called. Expected {dir:?} to end with c or cxx"),
};
let no_inst = std::env::var("AFL_SAN_NO_INST").ok().is_some();
dir.pop();
let mut cc = ClangWrapper::new();
if !no_inst {
cc.add_arg("-fsanitize-coverage=trace-pc-guard");
}
if let Some(code) = cc
.cpp(is_cpp)
// silence the compiler wrapper output, needed for some configure scripts.
.silence(true)
.parse_args(&args)
.expect("Failed to parse the command line")
// Imitate afl-cc's compile definitions
.add_arg("-D__AFL_FUZZ_INIT()=int __afl_sharedmem_fuzzing = 1;extern unsigned int *__afl_fuzz_len;extern unsigned char *__afl_fuzz_ptr;unsigned char __afl_fuzz_alt[1048576];unsigned char *__afl_fuzz_alt_ptr = __afl_fuzz_alt;void libafl_start_forkserver(void)")
.add_arg("-D__AFL_FUZZ_TESTCASE_BUF=(__afl_fuzz_ptr ? __afl_fuzz_ptr : __afl_fuzz_alt_ptr)")
.add_arg("-D__AFL_FUZZ_TESTCASE_LEN=(__afl_fuzz_ptr ? *__afl_fuzz_len : (*__afl_fuzz_len = read(0, __afl_fuzz_alt_ptr, 1048576)) == 0xffffffff ? 0 : *__afl_fuzz_len)")
.add_arg("-D__AFL_INIT()=libafl_start_forkserver()")
// Link with libafl's forkserver implementation
.link_staticlib(&dir, "forkserver_sand")
.run()
.expect("Failed to run the wrapped compiler")
{
std::process::exit(code);
}
} else {
panic!("LibAFL CC: No Arguments given");
}
}

View File

@ -0,0 +1,5 @@
pub mod cc;
fn main() {
cc::main();
}

View File

@ -0,0 +1,9 @@
use libafl_targets::{map_shared_memory, start_forkserver};
#[no_mangle]
pub extern "C" fn libafl_start_forkserver() {
// Map shared memory region for the edge coverage map
map_shared_memory();
// Start the forkserver
start_forkserver();
}

View File

@ -0,0 +1,437 @@
use core::{cell::RefCell, time::Duration};
use std::{
env,
fs::{self, OpenOptions},
io::Write,
path::PathBuf,
process,
};
use clap::{Arg, ArgAction, Command};
use libafl::{
corpus::{Corpus, InMemoryOnDiskCorpus, OnDiskCorpus},
events::SimpleEventManager,
executors::{forkserver::ForkserverExecutor, sand::SANDExecutor},
feedback_or,
feedbacks::{CrashFeedback, MaxMapFeedback, TimeFeedback},
fuzzer::{Fuzzer, StdFuzzer},
inputs::BytesInput,
monitors::SimpleMonitor,
mutators::{
havoc_mutations, token_mutations::I2SRandReplace, tokens_mutations, StdMOptMutator,
StdScheduledMutator, Tokens,
},
observers::{CanTrack, HitcountsMapObserver, StdCmpObserver, StdMapObserver, TimeObserver},
schedulers::{
powersched::PowerSchedule, IndexesLenTimeMinimizerScheduler, StdWeightedScheduler,
},
stages::{
calibrate::CalibrationStage, power::StdPowerMutationalStage, StdMutationalStage,
TracingStage,
},
state::{HasCorpus, StdState},
Error, HasMetadata,
};
use libafl_bolts::{
current_time,
ownedref::OwnedRefMut,
rands::StdRand,
shmem::{ShMem, ShMemProvider, UnixShMemProvider},
tuples::{tuple_list, Handled, Merge},
AsSliceMut,
};
use libafl_targets::cmps::AFLppCmpLogMap;
use nix::sys::signal::Signal;
pub fn main() {
let res = match Command::new(env!("CARGO_PKG_NAME"))
.version(env!("CARGO_PKG_VERSION"))
.author("AFLplusplus team")
.about("LibAFL-based fuzzer for Fuzzbench")
.arg(
Arg::new("out")
.short('o')
.long("output")
.help("The directory to place finds in ('corpus')"),
)
.arg(
Arg::new("in")
.short('i')
.long("input")
.help("The directory to read initial inputs from ('seeds')"),
)
.arg(
Arg::new("tokens")
.short('x')
.long("tokens")
.help("A file to read tokens from, to be used during fuzzing"),
)
.arg(
Arg::new("logfile")
.short('l')
.long("logfile")
.help("Duplicates all output to this file")
.default_value("libafl.log"),
)
.arg(
Arg::new("timeout")
.short('t')
.long("timeout")
.help("Timeout for each individual execution, in milliseconds")
.default_value("1200"),
)
.arg(
Arg::new("exec")
.help("The instrumented binary we want to fuzz")
.required(true),
)
.arg(
Arg::new("debug-child")
.short('d')
.long("debug-child")
.help("If not set, the child's stdout and stderror will be redirected to /dev/null")
.action(ArgAction::SetTrue),
)
.arg(
Arg::new("signal")
.short('s')
.long("signal")
.help("Signal used to stop child")
.default_value("SIGKILL"),
)
.arg(
Arg::new("cmplog")
.short('c')
.long("cmplog")
.help("The instrumented binary with cmplog"),
)
.arg(
Arg::new("sand")
.short('a')
.long("sand")
.action(ArgAction::Append),
)
.arg(Arg::new("arguments"))
.try_get_matches()
{
Ok(res) => res,
Err(err) => {
println!(
"Syntax: {}, [-x dictionary] -o corpus_dir -i seed_dir\n{:?}",
env::current_exe()
.unwrap_or_else(|_| "fuzzer".into())
.to_string_lossy(),
err,
);
return;
}
};
println!(
"Workdir: {:?}",
env::current_dir().unwrap().to_string_lossy().to_string()
);
// For fuzzbench, crashes and finds are inside the same `corpus` directory, in the "queue" and "crashes" subdir.
let mut out_dir = PathBuf::from(
res.get_one::<String>("out")
.expect("The --output parameter is missing")
.to_string(),
);
if fs::create_dir(&out_dir).is_err() {
println!("Out dir at {:?} already exists.", &out_dir);
if !out_dir.is_dir() {
println!("Out dir at {:?} is not a valid directory!", &out_dir);
return;
}
}
let mut crashes = out_dir.clone();
crashes.push("crashes");
out_dir.push("queue");
let in_dir = PathBuf::from(
res.get_one::<String>("in")
.expect("The --input parameter is missing")
.to_string(),
);
if !in_dir.is_dir() {
println!("In dir at {:?} is not a valid directory!", &in_dir);
return;
}
let tokens = res.get_one::<String>("tokens").map(PathBuf::from);
let logfile = PathBuf::from(res.get_one::<String>("logfile").unwrap().to_string());
let timeout = Duration::from_millis(
res.get_one::<String>("timeout")
.unwrap()
.to_string()
.parse()
.expect("Could not parse timeout in milliseconds"),
);
let executable = res
.get_one::<String>("exec")
.expect("The executable is missing")
.to_string();
let debug_child = res.get_flag("debug-child");
let signal = str::parse::<Signal>(
&res.get_one::<String>("signal")
.expect("The --signal parameter is missing")
.to_string(),
)
.unwrap();
let cmplog_exec = res
.get_one::<String>("cmplog")
.map(std::string::ToString::to_string);
let arguments = res
.get_many::<String>("arguments")
.map(|v| v.map(std::string::ToString::to_string).collect::<Vec<_>>())
.unwrap_or_default();
let sands = res.get_many::<String>("sand").map(|t| {
t.into_iter()
.map(std::string::ToString::to_string)
.collect::<Vec<_>>()
});
fuzz(
out_dir,
crashes,
&in_dir,
tokens,
&logfile,
timeout,
executable,
debug_child,
signal,
&cmplog_exec,
&sands,
&arguments,
)
.expect("An error occurred while fuzzing");
}
/// The actual fuzzer
#[expect(clippy::too_many_arguments)]
fn fuzz(
corpus_dir: PathBuf,
objective_dir: PathBuf,
seed_dir: &PathBuf,
tokenfile: Option<PathBuf>,
logfile: &PathBuf,
timeout: Duration,
executable: String,
debug_child: bool,
signal: Signal,
cmplog_exec: &Option<String>,
sand_execs: &Option<Vec<String>>,
arguments: &[String],
) -> Result<(), Error> {
// a large initial map size that should be enough
// to house all potential coverage maps for our targets
// (we will eventually reduce the used size according to the actual map)
const MAP_SIZE: usize = 65_536;
let log = RefCell::new(OpenOptions::new().append(true).create(true).open(logfile)?);
// 'While the monitor are state, they are usually used in the broker - which is likely never restarted
let monitor = SimpleMonitor::new(|s| {
println!("{s}");
writeln!(log.borrow_mut(), "{:?} {}", current_time(), s).unwrap();
});
// The event manager handle the various events generated during the fuzzing loop
// such as the notification of the addition of a new item to the corpus
let mut mgr = SimpleEventManager::new(monitor);
// The unix shmem provider for shared memory, to match AFL++'s shared memory at the target side
let mut shmem_provider = UnixShMemProvider::new().unwrap();
// The coverage map shared between observer and executor
let mut shmem = shmem_provider.new_shmem(MAP_SIZE).unwrap();
// let the forkserver know the shmid
unsafe {
shmem.write_to_env("__AFL_SHM_ID").unwrap();
}
let shmem_buf = shmem.as_slice_mut();
// To let know the AFL++ binary that we have a big map
std::env::set_var("AFL_MAP_SIZE", format!("{}", MAP_SIZE));
// Create an observation channel using the hitcounts map of AFL++
let edges_observer = unsafe {
HitcountsMapObserver::new(StdMapObserver::new("shared_mem", shmem_buf)).track_indices()
};
// Create an observation channel to keep track of the execution time
let time_observer = TimeObserver::new("time");
let map_feedback = MaxMapFeedback::new(&edges_observer);
let calibration = CalibrationStage::new(&map_feedback);
// Feedback to rate the interestingness of an input
// This one is composed by two Feedbacks in OR
let mut feedback = feedback_or!(
// New maximization map feedback linked to the edges observer and the feedback state
map_feedback,
// Time feedback, this one does not need a feedback state
TimeFeedback::new(&time_observer)
);
// A feedback to choose if an input is a solution or not
let mut objective = CrashFeedback::new();
// create a State from scratch
let mut state = StdState::new(
// RNG
StdRand::new(),
// Corpus that will be evolved, we keep it in memory for performance
InMemoryOnDiskCorpus::<BytesInput>::new(corpus_dir).unwrap(),
// Corpus in which we store solutions (crashes in this example),
// on disk so the user can get them after stopping the fuzzer
OnDiskCorpus::new(objective_dir).unwrap(),
// States of the feedbacks.
// The feedbacks can report the data that should persist in the State.
&mut feedback,
// Same for objective feedbacks
&mut objective,
)
.unwrap();
println!("Let's fuzz :)");
// Setup a MOPT mutator
let mutator = StdMOptMutator::new(
&mut state,
havoc_mutations().merge(tokens_mutations()),
7,
5,
)?;
let power: StdPowerMutationalStage<_, _, BytesInput, _, _, _> =
StdPowerMutationalStage::new(mutator);
// A minimization+queue policy to get testcasess from the corpus
let scheduler = IndexesLenTimeMinimizerScheduler::new(
&edges_observer,
StdWeightedScheduler::with_schedule(
&mut state,
&edges_observer,
Some(PowerSchedule::explore()),
),
);
let edge_handle = edges_observer.handle();
// A fuzzer with feedbacks and a corpus scheduler
let mut fuzzer = StdFuzzer::new(scheduler, feedback, objective);
let mut tokens = Tokens::new();
let mut executor = ForkserverExecutor::builder()
.program(executable)
.debug_child(debug_child)
.shmem_provider(&mut shmem_provider)
.autotokens(&mut tokens)
.parse_afl_cmdline(arguments)
.coverage_map_size(MAP_SIZE)
.timeout(timeout)
.kill_signal(signal)
.is_persistent(true)
.build_dynamic_map(edges_observer, tuple_list!(time_observer))
.unwrap();
// Read tokens
if let Some(tokenfile) = tokenfile {
tokens.add_from_file(tokenfile)?;
}
if !tokens.is_empty() {
state.add_metadata(tokens);
}
state
.load_initial_inputs(&mut fuzzer, &mut executor, &mut mgr, &[seed_dir.clone()])
.unwrap_or_else(|_| {
println!("Failed to load initial corpus at {:?}", &seed_dir);
process::exit(0);
});
println!("We imported {} inputs from disk.", state.corpus().count());
let mut sand_executors = vec![];
for (idx, sand) in sand_execs
.as_ref()
.map(|t| t.iter())
.into_iter()
.flatten()
.enumerate()
{
// The extra binaries doesn't need track coverage
let buf = Box::leak(Box::new(vec![0; MAP_SIZE]));
let edges_observer = unsafe {
HitcountsMapObserver::new(StdMapObserver::new(
format!("dumb_shm_{}", idx),
buf.as_mut_slice(),
))
.track_indices()
};
let time_observer = TimeObserver::new(format!("dumb_tm_{}", idx));
let executor = ForkserverExecutor::builder()
.program(sand.clone())
.debug_child(debug_child)
.shmem_provider(&mut shmem_provider)
.parse_afl_cmdline(arguments)
.coverage_map_size(MAP_SIZE)
.timeout(timeout)
.kill_signal(signal)
.is_persistent(true)
.build_dynamic_map(edges_observer, tuple_list!(time_observer))
.unwrap();
sand_executors.push(executor);
}
let mut executor = SANDExecutor::new_paper(executor, sand_executors, edge_handle);
if let Some(exec) = &cmplog_exec {
// The cmplog map shared between observer and executor
let mut cmplog_shmem = shmem_provider.uninit_on_shmem::<AFLppCmpLogMap>().unwrap();
// let the forkserver know the shmid
unsafe {
cmplog_shmem.write_to_env("__AFL_CMPLOG_SHM_ID").unwrap();
}
let cmpmap = unsafe { OwnedRefMut::<AFLppCmpLogMap>::from_shmem(&mut cmplog_shmem) };
let cmplog_observer = StdCmpObserver::new("cmplog", cmpmap, true);
let cmplog_executor = ForkserverExecutor::builder()
.program(exec)
.debug_child(debug_child)
.shmem_provider(&mut shmem_provider)
.parse_afl_cmdline(arguments)
.is_persistent(true)
.timeout(timeout * 10)
.kill_signal(signal)
.build(tuple_list!(cmplog_observer))
.unwrap();
let tracing = TracingStage::new(cmplog_executor);
// Setup a randomic Input2State stage
let i2s =
StdMutationalStage::new(StdScheduledMutator::new(tuple_list!(I2SRandReplace::new())));
// The order of the stages matter!
let mut stages = tuple_list!(calibration, tracing, i2s, power);
fuzzer.fuzz_loop(&mut stages, &mut executor, &mut state, &mut mgr)?;
} else {
// The order of the stages matter!
let mut stages = tuple_list!(calibration, power);
fuzzer.fuzz_loop(&mut stages, &mut executor, &mut state, &mut mgr)?;
}
// Never reached
Ok(())
}

View File

@ -0,0 +1,40 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *p;
// The following line is needed for shared memory testcase fuzzing
__AFL_FUZZ_INIT();
void vuln(char *buf) {
p = malloc(1024);
memcpy(p, buf, 16);
free(p);
if (buf[0] == 0x41) {
p[0] = buf[0];
} else {
p = buf;
}
}
int main(int argc, char **argv) {
// Start the forkserver at this point (i.e., forks will happen here)
__AFL_INIT();
// The following five lines are for normal fuzzing.
/*
FILE *file = stdin;
if (argc > 1) { file = fopen(argv[1], "rb"); }
char buf[16];
char *p = fgets(buf, 16, file);
buf[15] = 0;
*/
// The following line is also needed for shared memory testcase fuzzing
unsigned char *buf = __AFL_FUZZ_TESTCASE_BUF; // must be after __AFL_INIT
vuln((char *)buf);
return 0;
}

View File

@ -1,6 +1,5 @@
//! Executors take input, and run it in the target. //! Executors take input, and run it in the target.
#[cfg(unix)]
use alloc::vec::Vec; use alloc::vec::Vec;
use core::{fmt::Debug, time::Duration}; use core::{fmt::Debug, time::Duration};
@ -29,6 +28,8 @@ pub mod differential;
#[cfg(all(feature = "std", feature = "fork", unix))] #[cfg(all(feature = "std", feature = "fork", unix))]
pub mod forkserver; pub mod forkserver;
pub mod inprocess; pub mod inprocess;
/// SAND(<https://github.com/wtdcode/sand-aflpp>) implementation
pub mod sand;
/// The module for inproc fork executor /// The module for inproc fork executor
#[cfg(all(feature = "std", unix))] #[cfg(all(feature = "std", unix))]
@ -137,6 +138,75 @@ pub trait HasTimeout {
fn set_timeout(&mut self, timeout: Duration); fn set_timeout(&mut self, timeout: Duration);
} }
/// Like [`crate::observers::ObserversTuple`], a list of executors
pub trait ExecutorsTuple<EM, I, S, Z> {
/// Execute the executors and stop if any of them returns a crash
fn run_target_all(
&mut self,
fuzzer: &mut Z,
state: &mut S,
mgr: &mut EM,
input: &I,
) -> Result<ExitKind, Error>;
}
/// Since in most cases, the executors types can not be determined during compilation
/// time (for instance, the number of executors might change), this implementation would
/// act as a small helper.
impl<E, EM, I, S, Z> ExecutorsTuple<EM, I, S, Z> for Vec<E>
where
E: Executor<EM, I, S, Z>,
{
fn run_target_all(
&mut self,
fuzzer: &mut Z,
state: &mut S,
mgr: &mut EM,
input: &I,
) -> Result<ExitKind, Error> {
let mut kind = ExitKind::Ok;
for e in self.iter_mut() {
kind = e.run_target(fuzzer, state, mgr, input)?;
if kind == ExitKind::Crash {
return Ok(kind);
}
}
Ok(kind)
}
}
impl<EM, I, S, Z> ExecutorsTuple<EM, I, S, Z> for () {
fn run_target_all(
&mut self,
_fuzzer: &mut Z,
_state: &mut S,
_mgr: &mut EM,
_input: &I,
) -> Result<ExitKind, Error> {
Ok(ExitKind::Ok)
}
}
impl<Head, Tail, EM, I, S, Z> ExecutorsTuple<EM, I, S, Z> for (Head, Tail)
where
Head: Executor<EM, I, S, Z>,
Tail: ExecutorsTuple<EM, I, S, Z>,
{
fn run_target_all(
&mut self,
fuzzer: &mut Z,
state: &mut S,
mgr: &mut EM,
input: &I,
) -> Result<ExitKind, Error> {
let kind = self.0.run_target(fuzzer, state, mgr, input)?;
if kind == ExitKind::Crash {
return Ok(kind);
}
self.1.run_target_all(fuzzer, state, mgr, input)
}
}
/// The common signals we want to handle /// The common signals we want to handle
#[cfg(unix)] #[cfg(unix)]
#[inline] #[inline]

View File

@ -0,0 +1,170 @@
//! Implementation for "SAND: Decoupling Sanitization from Fuzzing for Low Overhead"
//! Reference Implementation: <https://github.com/wtdcode/sand-aflpp>
//! Detailed docs: <https://github.com/AFLplusplus/AFLplusplus/blob/stable/docs/SAND.md>
//! Maintainer: Ziqiao Kong (<https://github.com/wtdcode>)
//! Preprint: <https://arxiv.org/abs/2402.16497> accepted by ICSE'25
use alloc::vec::Vec;
use core::marker::PhantomData;
use libafl_bolts::{
AsIter, Error, Named, hash_std,
tuples::{Handle, MatchName, MatchNameRef},
};
use super::{Executor, ExecutorsTuple, ExitKind, HasObservers, HasTimeout};
use crate::{HasNamedMetadata, observers::MapObserver};
/// The execution pattern of the [`SANDExecutor`]. The default value used in our paper is
/// [`SANDExecutionPattern::SimplifiedTrace`] and we by design don't include coverage
/// increasing pattern here as it will miss at least 25% bugs and easy enough to implement
/// by iterating the crash corpus.
#[derive(Debug, Clone, Default, Copy)]
pub enum SANDExecutionPattern {
/// The simplified trace, captures ~92% bug triggering inputs with ~20% overhead
/// on overage (less than 5% overhead on most targets during evaluation)
#[default]
SimplifiedTrace,
/// The unique trace, captures ~99.9% bug-triggering inputs with more than >50% overhead.
UniqueTrace,
}
/// The core executor implementation. It wraps another executor and a list of extra executors.
/// Please refer to [SAND.md](https://github.com/AFLplusplus/AFLplusplus/blob/stable/docs/SAND.md) for
/// how to build `sand_executors`.
#[derive(Debug, Clone)]
pub struct SANDExecutor<E, ET, C, O> {
executor: E,
sand_executors: ET,
bitmap: Vec<u8>,
ob_ref: Handle<C>,
pattern: SANDExecutionPattern,
ph: PhantomData<O>,
}
impl<E, ET, C, O> SANDExecutor<E, ET, C, O>
where
C: Named,
{
fn bitmap_set(&mut self, idx: usize) {
let bidx = idx % 8;
let idx = (idx / 8) % self.bitmap.len();
*self.bitmap.get_mut(idx).unwrap() |= 1u8 << bidx;
}
fn bitmap_read(&mut self, idx: usize) -> u8 {
let bidx = idx % 8;
let idx = (idx / 8) % self.bitmap.len();
(self.bitmap[idx] >> bidx) & 1
}
/// Create a new [`SANDExecutor`]
pub fn new(
executor: E,
sand_extra_executors: ET,
observer_handle: Handle<C>,
bitmap_size: usize,
pattern: SANDExecutionPattern,
) -> Self {
Self {
executor,
sand_executors: sand_extra_executors,
bitmap: vec![0; bitmap_size],
ob_ref: observer_handle,
pattern,
ph: PhantomData,
}
}
/// Create a new [`SANDExecutor`] using paper setup
pub fn new_paper(executor: E, sand_extra_executors: ET, observer_handle: Handle<C>) -> Self {
Self::new(
executor,
sand_extra_executors,
observer_handle,
1 << 29,
SANDExecutionPattern::SimplifiedTrace,
)
}
}
impl<E, ET, C, O> HasTimeout for SANDExecutor<E, ET, C, O>
where
E: HasTimeout,
{
fn timeout(&self) -> core::time::Duration {
self.executor.timeout()
}
fn set_timeout(&mut self, timeout: core::time::Duration) {
self.executor.set_timeout(timeout);
}
}
impl<E, ET, C, O> HasObservers for SANDExecutor<E, ET, C, O>
where
E: HasObservers,
{
type Observers = E::Observers;
fn observers(&self) -> libafl_bolts::tuples::RefIndexable<&Self::Observers, Self::Observers> {
self.executor.observers()
}
fn observers_mut(
&mut self,
) -> libafl_bolts::tuples::RefIndexable<&mut Self::Observers, Self::Observers> {
self.executor.observers_mut()
}
}
impl<E, ET, C, O, EM, I, S, Z, OT> Executor<EM, I, S, Z> for SANDExecutor<E, ET, C, O>
where
ET: ExecutorsTuple<EM, I, S, Z>,
E: Executor<EM, I, S, Z> + HasObservers<Observers = OT>,
OT: MatchName,
O: MapObserver<Entry = u8> + for<'it> AsIter<'it, Item = u8>,
C: AsRef<O> + Named,
S: HasNamedMetadata,
{
fn run_target(
&mut self,
fuzzer: &mut Z,
state: &mut S,
mgr: &mut EM,
input: &I,
) -> Result<ExitKind, Error> {
let kind = self.executor.run_target(fuzzer, state, mgr, input)?;
let ot = self.executor.observers();
let ob = ot.get(&self.ob_ref).unwrap().as_ref();
let initial = ob.initial();
let covs = match self.pattern {
SANDExecutionPattern::SimplifiedTrace => ob
.as_iter()
.map(|x| if *x == initial { 0x1 } else { 0x80 })
.collect::<Vec<_>>(),
SANDExecutionPattern::UniqueTrace => ob.to_vec(),
};
// Our paper uses xxh32 but it shouldn't have significant collision for most hashing algorithms.
let pattern_hash = hash_std(&covs) as usize;
let ret = if kind == ExitKind::Ok {
if self.bitmap_read(pattern_hash) == 0 {
let sand_kind = self
.sand_executors
.run_target_all(fuzzer, state, mgr, input)?;
if sand_kind == ExitKind::Crash {
Ok(sand_kind)
} else {
Ok(kind)
}
} else {
Ok(kind)
}
} else {
Ok(kind)
};
self.bitmap_set(pattern_hash);
ret
}
}

View File

@ -324,9 +324,12 @@ mod instant_serializer {
impl TimeObserver { impl TimeObserver {
/// Creates a new [`TimeObserver`] with the given name. /// Creates a new [`TimeObserver`] with the given name.
#[must_use] #[must_use]
pub fn new(name: &'static str) -> Self { pub fn new<S>(name: S) -> Self
where
S: Into<Cow<'static, str>>,
{
Self { Self {
name: Cow::from(name), name: name.into(),
#[cfg(feature = "std")] #[cfg(feature = "std")]
start_time: Instant::now(), start_time: Instant::now(),

View File

@ -2,6 +2,7 @@
use alloc::string::{String, ToString}; use alloc::string::{String, ToString};
use libafl_bolts::{HasLen, HasRefCnt}; use libafl_bolts::{HasLen, HasRefCnt};
use num_traits::Zero;
use crate::{ use crate::{
Error, HasMetadata, Error, HasMetadata,
@ -273,7 +274,6 @@ where
let psmeta = state.metadata::<SchedulerMetadata>()?; let psmeta = state.metadata::<SchedulerMetadata>()?;
let tcmeta = entry.metadata::<SchedulerTestcaseMetadata>()?; let tcmeta = entry.metadata::<SchedulerTestcaseMetadata>()?;
// This means that this testcase has never gone through the calibration stage before1, // This means that this testcase has never gone through the calibration stage before1,
// In this case we'll just return the default weight // In this case we'll just return the default weight
// This methoud is called in corpus's on_add() method. Fuzz_level is zero at that time. // This methoud is called in corpus's on_add() method. Fuzz_level is zero at that time.
@ -305,7 +305,12 @@ where
} }
weight *= avg_exec_us / q_exec_us; weight *= avg_exec_us / q_exec_us;
weight *= libm::log2(q_bitmap_size).max(1.0) / avg_bitmap_size; weight *= if avg_bitmap_size.is_zero() {
// This can happen when the bitmap size of the target is as small as 1.
1.0
} else {
libm::log2(q_bitmap_size).max(1.0) / avg_bitmap_size
};
let tc_ref = match entry.metadata_map().get::<MapIndexesMetadata>() { let tc_ref = match entry.metadata_map().get::<MapIndexesMetadata>() {
Some(meta) => meta.refcnt() as f64, Some(meta) => meta.refcnt() as f64,