From de2bc166f00702d3cfbda0532f11411046caedd5 Mon Sep 17 00:00:00 2001 From: lazymio Date: Thu, 6 Mar 2025 21:15:22 +0800 Subject: [PATCH] Implementation of `SAND: Decoupling Sanitization from Fuzzing for Low Overhead` (#3037) * Reference implementation of SAND: Decoupling Sanitization from Fuzzing for Low Overhead * Format code * make clippy happy * Update docs * clean output * fmt * Fix for nostd * Update docs * use use alloc::vec::Vec; * Docs updates * Update docs * Format toml * Format again * Add CI * Rename run_targets to run_target_all * Update docs * Update justfile to test fuzzer * left out justfile * Format * Corner case when bitmap size is as small as 1 * Add comments * clippy * Format vuln.c * Format toml * Fix doc * Fix justfile * Move ExecutorsTuple to executors/mod.rs * Fix --------- Co-authored-by: Dongjia "toka" Zhang --- .github/workflows/build_and_test.yml | 1 + .../fuzzbench_forkserver_sand/.gitignore | 2 + .../fuzzbench_forkserver_sand/Cargo.toml | 53 +++ .../fuzzbench_forkserver_sand/Justfile | 73 +++ .../fuzzbench_forkserver_sand/src/cc.rs | 46 ++ .../fuzzbench_forkserver_sand/src/cxx.rs | 5 + .../fuzzbench_forkserver_sand/src/lib.rs | 9 + .../fuzzbench_forkserver_sand/src/main.rs | 437 ++++++++++++++++++ .../fuzzbench_forkserver_sand/src/vuln.c | 40 ++ libafl/src/executors/mod.rs | 72 ++- libafl/src/executors/sand.rs | 170 +++++++ libafl/src/observers/mod.rs | 7 +- libafl/src/schedulers/testcase_score.rs | 9 +- 13 files changed, 919 insertions(+), 5 deletions(-) create mode 100644 fuzzers/forkserver/fuzzbench_forkserver_sand/.gitignore create mode 100644 fuzzers/forkserver/fuzzbench_forkserver_sand/Cargo.toml create mode 100644 fuzzers/forkserver/fuzzbench_forkserver_sand/Justfile create mode 100644 fuzzers/forkserver/fuzzbench_forkserver_sand/src/cc.rs create mode 100644 fuzzers/forkserver/fuzzbench_forkserver_sand/src/cxx.rs create mode 100644 fuzzers/forkserver/fuzzbench_forkserver_sand/src/lib.rs create mode 100644 fuzzers/forkserver/fuzzbench_forkserver_sand/src/main.rs create mode 100644 fuzzers/forkserver/fuzzbench_forkserver_sand/src/vuln.c create mode 100644 libafl/src/executors/sand.rs diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 9a03430375..652f88c583 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -275,6 +275,7 @@ jobs: - ./fuzzers/forkserver/forkserver_libafl_cc - ./fuzzers/forkserver/fuzzbench_forkserver - ./fuzzers/forkserver/fuzzbench_forkserver_cmplog + - ./fuzzers/forkserver/fuzzbench_forkserver_sand - ./fuzzers/forkserver/libafl-fuzz - ./fuzzers/forkserver/baby_fuzzer_with_forkexecutor diff --git a/fuzzers/forkserver/fuzzbench_forkserver_sand/.gitignore b/fuzzers/forkserver/fuzzbench_forkserver_sand/.gitignore new file mode 100644 index 0000000000..d3561edaf7 --- /dev/null +++ b/fuzzers/forkserver/fuzzbench_forkserver_sand/.gitignore @@ -0,0 +1,2 @@ +libpng-* +fuzzer diff --git a/fuzzers/forkserver/fuzzbench_forkserver_sand/Cargo.toml b/fuzzers/forkserver/fuzzbench_forkserver_sand/Cargo.toml new file mode 100644 index 0000000000..f7d585c70d --- /dev/null +++ b/fuzzers/forkserver/fuzzbench_forkserver_sand/Cargo.toml @@ -0,0 +1,53 @@ +[package] +name = "fuzzbench_forkserver_sand" +version = "0.15.1" +authors = [ + "Andrea Fioraldi ", + "Dominik Maier ", + "Ziqiao Kong ", +] +edition = "2021" + +[profile.release] +lto = true +codegen-units = 1 +opt-level = 3 +debug = true + +[profile.release-fuzzbench] +inherits = "release" +debug = false +strip = true + +[build-dependencies] +cc = { version = "1.1.22", features = ["parallel"] } +which = "6.0.3" + +[dependencies] +libafl = { path = "../../../libafl" } +libafl_bolts = { path = "../../../libafl_bolts" } +libafl_targets = { path = "../../../libafl_targets", features = [ + "sancov_pcguard_hitcounts", + "libfuzzer", + "pointer_maps", +] } +libafl_cc = { path = "../../../libafl_cc" } +log = { version = "0.4.22", features = ["release_max_level_info"] } +clap = { version = "4.5.18", features = ["default"] } +nix = { version = "0.29.0", features = ["signal"] } + +[[bin]] +name = "sand_cc" +path = "src/cc.rs" + +[[bin]] +name = "sand_cxx" +path = "src/cxx.rs" + +[[bin]] +name = "fuzzbench_forkserver_sand" +path = "src/main.rs" + +[lib] +name = "forkserver_sand" +crate-type = ["staticlib"] diff --git a/fuzzers/forkserver/fuzzbench_forkserver_sand/Justfile b/fuzzers/forkserver/fuzzbench_forkserver_sand/Justfile new file mode 100644 index 0000000000..d0f8e3c07c --- /dev/null +++ b/fuzzers/forkserver/fuzzbench_forkserver_sand/Justfile @@ -0,0 +1,73 @@ +FUZZER_NAME := 'fuzzbench_forkserver_sand' +FORKSERVER_NAME := 'fuzzbench_forkserver_sand' +CARGO_TARGET_DIR := env("CARGO_TARGET_DIR", "target") +PROFILE := env("PROFILE", "release") +PROFILE_DIR := if PROFILE == "release" { "release" } else if PROFILE == "dev" { "debug" } else { "debug" } +LIBAFL_CC := PROJECT_DIR / CARGO_TARGET_DIR / PROFILE_DIR / "sand_cc" +LIBAFL_CXX := PROJECT_DIR / CARGO_TARGET_DIR / PROFILE_DIR / "sand_cxx" +FUZZER := PROJECT_DIR / CARGO_TARGET_DIR / PROFILE_DIR / FUZZER_NAME +FORKSERVER := PROJECT_DIR / CARGO_TARGET_DIR / PROFILE_DIR / FORKSERVER_NAME +PROJECT_DIR := absolute_path(".") + + +alias cc := cxx + +[linux] +[macos] +cxx: + cargo build --profile {{PROFILE}} + +[windows] +cxx: + echo "Unsupported on this platform" + +[linux] +[macos] +fuzzer: cxx + {{LIBAFL_CC}} {{PROJECT_DIR}}/src/vuln.c -o vuln_native -lm -lz + +[windows] +fuzzer: + echo "Unsupported on this platform" + +[linux] +[macos] +fuzzer_asan: cxx + AFL_SAN_NO_INST=1 {{LIBAFL_CC}} {{PROJECT_DIR}}/src/vuln.c -fsanitize=address -o vuln_asan -lm -lz + +[windows] +fuzzer_asan: + echo "Unsupported on this platform" + +[linux] +[macos] +run: fuzzer fuzzer_asan + #!/bin/bash + mkdir -p input && echo "a" >> input/a + taskset -c 1 {{FUZZER}} -i input -o /tmp/out -a ./vuln_asan -t 1000 ./vuln_native + +[windows] +run: fuzzer fuzzer_asan + echo "Unsupported on this platform" + +[linux] +[macos] +test: fuzzer fuzzer_asan + #!/bin/bash + mkdir -p input && echo "a" >> input/a + timeout 10s {{FUZZER}} -i input -o /tmp/out -a ./vuln_asan -t 1000 ./vuln_native | tee fuzz_stdout.log || true + if grep -qa "objectives: 1" fuzz_stdout.log; then + echo "Fuzzer is working" + else + echo "Fuzzer does not generate any testcases or any crashes" + exit 1 + fi + +[windows] +test: fuzzer fuzzer_asan + echo "Unsupported on this platform" + +clean: + rm -rf {{FUZZER}} + rm -rf vuln_native vuln_asan + cargo clean diff --git a/fuzzers/forkserver/fuzzbench_forkserver_sand/src/cc.rs b/fuzzers/forkserver/fuzzbench_forkserver_sand/src/cc.rs new file mode 100644 index 0000000000..87a807928f --- /dev/null +++ b/fuzzers/forkserver/fuzzbench_forkserver_sand/src/cc.rs @@ -0,0 +1,46 @@ +use std::env; + +use libafl_cc::{ClangWrapper, CompilerWrapper, ToolWrapper}; + +pub fn main() { + let args: Vec = env::args().collect(); + if args.len() > 1 { + let mut dir = env::current_exe().unwrap(); + let wrapper_name = dir.file_name().unwrap().to_str().unwrap(); + + let is_cpp = match wrapper_name[wrapper_name.len()-2..].to_lowercase().as_str() { + "cc" => false, + "++" | "pp" | "xx" => true, + _ => panic!("Could not figure out if c or c++ wrapper was called. Expected {dir:?} to end with c or cxx"), + }; + + let no_inst = std::env::var("AFL_SAN_NO_INST").ok().is_some(); + + dir.pop(); + + let mut cc = ClangWrapper::new(); + if !no_inst { + cc.add_arg("-fsanitize-coverage=trace-pc-guard"); + } + if let Some(code) = cc + .cpp(is_cpp) + // silence the compiler wrapper output, needed for some configure scripts. + .silence(true) + .parse_args(&args) + .expect("Failed to parse the command line") + // Imitate afl-cc's compile definitions + .add_arg("-D__AFL_FUZZ_INIT()=int __afl_sharedmem_fuzzing = 1;extern unsigned int *__afl_fuzz_len;extern unsigned char *__afl_fuzz_ptr;unsigned char __afl_fuzz_alt[1048576];unsigned char *__afl_fuzz_alt_ptr = __afl_fuzz_alt;void libafl_start_forkserver(void)") + .add_arg("-D__AFL_FUZZ_TESTCASE_BUF=(__afl_fuzz_ptr ? __afl_fuzz_ptr : __afl_fuzz_alt_ptr)") + .add_arg("-D__AFL_FUZZ_TESTCASE_LEN=(__afl_fuzz_ptr ? *__afl_fuzz_len : (*__afl_fuzz_len = read(0, __afl_fuzz_alt_ptr, 1048576)) == 0xffffffff ? 0 : *__afl_fuzz_len)") + .add_arg("-D__AFL_INIT()=libafl_start_forkserver()") + // Link with libafl's forkserver implementation + .link_staticlib(&dir, "forkserver_sand") + .run() + .expect("Failed to run the wrapped compiler") + { + std::process::exit(code); + } + } else { + panic!("LibAFL CC: No Arguments given"); + } +} diff --git a/fuzzers/forkserver/fuzzbench_forkserver_sand/src/cxx.rs b/fuzzers/forkserver/fuzzbench_forkserver_sand/src/cxx.rs new file mode 100644 index 0000000000..ba4c76b1fe --- /dev/null +++ b/fuzzers/forkserver/fuzzbench_forkserver_sand/src/cxx.rs @@ -0,0 +1,5 @@ +pub mod cc; + +fn main() { + cc::main(); +} diff --git a/fuzzers/forkserver/fuzzbench_forkserver_sand/src/lib.rs b/fuzzers/forkserver/fuzzbench_forkserver_sand/src/lib.rs new file mode 100644 index 0000000000..2646c42bf2 --- /dev/null +++ b/fuzzers/forkserver/fuzzbench_forkserver_sand/src/lib.rs @@ -0,0 +1,9 @@ +use libafl_targets::{map_shared_memory, start_forkserver}; + +#[no_mangle] +pub extern "C" fn libafl_start_forkserver() { + // Map shared memory region for the edge coverage map + map_shared_memory(); + // Start the forkserver + start_forkserver(); +} diff --git a/fuzzers/forkserver/fuzzbench_forkserver_sand/src/main.rs b/fuzzers/forkserver/fuzzbench_forkserver_sand/src/main.rs new file mode 100644 index 0000000000..7e2b7bca99 --- /dev/null +++ b/fuzzers/forkserver/fuzzbench_forkserver_sand/src/main.rs @@ -0,0 +1,437 @@ +use core::{cell::RefCell, time::Duration}; +use std::{ + env, + fs::{self, OpenOptions}, + io::Write, + path::PathBuf, + process, +}; + +use clap::{Arg, ArgAction, Command}; +use libafl::{ + corpus::{Corpus, InMemoryOnDiskCorpus, OnDiskCorpus}, + events::SimpleEventManager, + executors::{forkserver::ForkserverExecutor, sand::SANDExecutor}, + feedback_or, + feedbacks::{CrashFeedback, MaxMapFeedback, TimeFeedback}, + fuzzer::{Fuzzer, StdFuzzer}, + inputs::BytesInput, + monitors::SimpleMonitor, + mutators::{ + havoc_mutations, token_mutations::I2SRandReplace, tokens_mutations, StdMOptMutator, + StdScheduledMutator, Tokens, + }, + observers::{CanTrack, HitcountsMapObserver, StdCmpObserver, StdMapObserver, TimeObserver}, + schedulers::{ + powersched::PowerSchedule, IndexesLenTimeMinimizerScheduler, StdWeightedScheduler, + }, + stages::{ + calibrate::CalibrationStage, power::StdPowerMutationalStage, StdMutationalStage, + TracingStage, + }, + state::{HasCorpus, StdState}, + Error, HasMetadata, +}; +use libafl_bolts::{ + current_time, + ownedref::OwnedRefMut, + rands::StdRand, + shmem::{ShMem, ShMemProvider, UnixShMemProvider}, + tuples::{tuple_list, Handled, Merge}, + AsSliceMut, +}; +use libafl_targets::cmps::AFLppCmpLogMap; +use nix::sys::signal::Signal; + +pub fn main() { + let res = match Command::new(env!("CARGO_PKG_NAME")) + .version(env!("CARGO_PKG_VERSION")) + .author("AFLplusplus team") + .about("LibAFL-based fuzzer for Fuzzbench") + .arg( + Arg::new("out") + .short('o') + .long("output") + .help("The directory to place finds in ('corpus')"), + ) + .arg( + Arg::new("in") + .short('i') + .long("input") + .help("The directory to read initial inputs from ('seeds')"), + ) + .arg( + Arg::new("tokens") + .short('x') + .long("tokens") + .help("A file to read tokens from, to be used during fuzzing"), + ) + .arg( + Arg::new("logfile") + .short('l') + .long("logfile") + .help("Duplicates all output to this file") + .default_value("libafl.log"), + ) + .arg( + Arg::new("timeout") + .short('t') + .long("timeout") + .help("Timeout for each individual execution, in milliseconds") + .default_value("1200"), + ) + .arg( + Arg::new("exec") + .help("The instrumented binary we want to fuzz") + .required(true), + ) + .arg( + Arg::new("debug-child") + .short('d') + .long("debug-child") + .help("If not set, the child's stdout and stderror will be redirected to /dev/null") + .action(ArgAction::SetTrue), + ) + .arg( + Arg::new("signal") + .short('s') + .long("signal") + .help("Signal used to stop child") + .default_value("SIGKILL"), + ) + .arg( + Arg::new("cmplog") + .short('c') + .long("cmplog") + .help("The instrumented binary with cmplog"), + ) + .arg( + Arg::new("sand") + .short('a') + .long("sand") + .action(ArgAction::Append), + ) + .arg(Arg::new("arguments")) + .try_get_matches() + { + Ok(res) => res, + Err(err) => { + println!( + "Syntax: {}, [-x dictionary] -o corpus_dir -i seed_dir\n{:?}", + env::current_exe() + .unwrap_or_else(|_| "fuzzer".into()) + .to_string_lossy(), + err, + ); + return; + } + }; + + println!( + "Workdir: {:?}", + env::current_dir().unwrap().to_string_lossy().to_string() + ); + + // For fuzzbench, crashes and finds are inside the same `corpus` directory, in the "queue" and "crashes" subdir. + let mut out_dir = PathBuf::from( + res.get_one::("out") + .expect("The --output parameter is missing") + .to_string(), + ); + if fs::create_dir(&out_dir).is_err() { + println!("Out dir at {:?} already exists.", &out_dir); + if !out_dir.is_dir() { + println!("Out dir at {:?} is not a valid directory!", &out_dir); + return; + } + } + let mut crashes = out_dir.clone(); + crashes.push("crashes"); + out_dir.push("queue"); + + let in_dir = PathBuf::from( + res.get_one::("in") + .expect("The --input parameter is missing") + .to_string(), + ); + if !in_dir.is_dir() { + println!("In dir at {:?} is not a valid directory!", &in_dir); + return; + } + + let tokens = res.get_one::("tokens").map(PathBuf::from); + + let logfile = PathBuf::from(res.get_one::("logfile").unwrap().to_string()); + + let timeout = Duration::from_millis( + res.get_one::("timeout") + .unwrap() + .to_string() + .parse() + .expect("Could not parse timeout in milliseconds"), + ); + + let executable = res + .get_one::("exec") + .expect("The executable is missing") + .to_string(); + + let debug_child = res.get_flag("debug-child"); + + let signal = str::parse::( + &res.get_one::("signal") + .expect("The --signal parameter is missing") + .to_string(), + ) + .unwrap(); + + let cmplog_exec = res + .get_one::("cmplog") + .map(std::string::ToString::to_string); + + let arguments = res + .get_many::("arguments") + .map(|v| v.map(std::string::ToString::to_string).collect::>()) + .unwrap_or_default(); + + let sands = res.get_many::("sand").map(|t| { + t.into_iter() + .map(std::string::ToString::to_string) + .collect::>() + }); + fuzz( + out_dir, + crashes, + &in_dir, + tokens, + &logfile, + timeout, + executable, + debug_child, + signal, + &cmplog_exec, + &sands, + &arguments, + ) + .expect("An error occurred while fuzzing"); +} + +/// The actual fuzzer +#[expect(clippy::too_many_arguments)] +fn fuzz( + corpus_dir: PathBuf, + objective_dir: PathBuf, + seed_dir: &PathBuf, + tokenfile: Option, + logfile: &PathBuf, + timeout: Duration, + executable: String, + debug_child: bool, + signal: Signal, + cmplog_exec: &Option, + sand_execs: &Option>, + arguments: &[String], +) -> Result<(), Error> { + // a large initial map size that should be enough + // to house all potential coverage maps for our targets + // (we will eventually reduce the used size according to the actual map) + const MAP_SIZE: usize = 65_536; + + let log = RefCell::new(OpenOptions::new().append(true).create(true).open(logfile)?); + + // 'While the monitor are state, they are usually used in the broker - which is likely never restarted + let monitor = SimpleMonitor::new(|s| { + println!("{s}"); + writeln!(log.borrow_mut(), "{:?} {}", current_time(), s).unwrap(); + }); + + // The event manager handle the various events generated during the fuzzing loop + // such as the notification of the addition of a new item to the corpus + let mut mgr = SimpleEventManager::new(monitor); + + // The unix shmem provider for shared memory, to match AFL++'s shared memory at the target side + let mut shmem_provider = UnixShMemProvider::new().unwrap(); + + // The coverage map shared between observer and executor + let mut shmem = shmem_provider.new_shmem(MAP_SIZE).unwrap(); + // let the forkserver know the shmid + unsafe { + shmem.write_to_env("__AFL_SHM_ID").unwrap(); + } + let shmem_buf = shmem.as_slice_mut(); + // To let know the AFL++ binary that we have a big map + std::env::set_var("AFL_MAP_SIZE", format!("{}", MAP_SIZE)); + + // Create an observation channel using the hitcounts map of AFL++ + let edges_observer = unsafe { + HitcountsMapObserver::new(StdMapObserver::new("shared_mem", shmem_buf)).track_indices() + }; + + // Create an observation channel to keep track of the execution time + let time_observer = TimeObserver::new("time"); + + let map_feedback = MaxMapFeedback::new(&edges_observer); + + let calibration = CalibrationStage::new(&map_feedback); + + // Feedback to rate the interestingness of an input + // This one is composed by two Feedbacks in OR + let mut feedback = feedback_or!( + // New maximization map feedback linked to the edges observer and the feedback state + map_feedback, + // Time feedback, this one does not need a feedback state + TimeFeedback::new(&time_observer) + ); + + // A feedback to choose if an input is a solution or not + let mut objective = CrashFeedback::new(); + + // create a State from scratch + let mut state = StdState::new( + // RNG + StdRand::new(), + // Corpus that will be evolved, we keep it in memory for performance + InMemoryOnDiskCorpus::::new(corpus_dir).unwrap(), + // Corpus in which we store solutions (crashes in this example), + // on disk so the user can get them after stopping the fuzzer + OnDiskCorpus::new(objective_dir).unwrap(), + // States of the feedbacks. + // The feedbacks can report the data that should persist in the State. + &mut feedback, + // Same for objective feedbacks + &mut objective, + ) + .unwrap(); + + println!("Let's fuzz :)"); + + // Setup a MOPT mutator + let mutator = StdMOptMutator::new( + &mut state, + havoc_mutations().merge(tokens_mutations()), + 7, + 5, + )?; + + let power: StdPowerMutationalStage<_, _, BytesInput, _, _, _> = + StdPowerMutationalStage::new(mutator); + + // A minimization+queue policy to get testcasess from the corpus + let scheduler = IndexesLenTimeMinimizerScheduler::new( + &edges_observer, + StdWeightedScheduler::with_schedule( + &mut state, + &edges_observer, + Some(PowerSchedule::explore()), + ), + ); + let edge_handle = edges_observer.handle(); + + // A fuzzer with feedbacks and a corpus scheduler + let mut fuzzer = StdFuzzer::new(scheduler, feedback, objective); + + let mut tokens = Tokens::new(); + let mut executor = ForkserverExecutor::builder() + .program(executable) + .debug_child(debug_child) + .shmem_provider(&mut shmem_provider) + .autotokens(&mut tokens) + .parse_afl_cmdline(arguments) + .coverage_map_size(MAP_SIZE) + .timeout(timeout) + .kill_signal(signal) + .is_persistent(true) + .build_dynamic_map(edges_observer, tuple_list!(time_observer)) + .unwrap(); + + // Read tokens + if let Some(tokenfile) = tokenfile { + tokens.add_from_file(tokenfile)?; + } + if !tokens.is_empty() { + state.add_metadata(tokens); + } + + state + .load_initial_inputs(&mut fuzzer, &mut executor, &mut mgr, &[seed_dir.clone()]) + .unwrap_or_else(|_| { + println!("Failed to load initial corpus at {:?}", &seed_dir); + process::exit(0); + }); + println!("We imported {} inputs from disk.", state.corpus().count()); + + let mut sand_executors = vec![]; + for (idx, sand) in sand_execs + .as_ref() + .map(|t| t.iter()) + .into_iter() + .flatten() + .enumerate() + { + // The extra binaries doesn't need track coverage + let buf = Box::leak(Box::new(vec![0; MAP_SIZE])); + let edges_observer = unsafe { + HitcountsMapObserver::new(StdMapObserver::new( + format!("dumb_shm_{}", idx), + buf.as_mut_slice(), + )) + .track_indices() + }; + let time_observer = TimeObserver::new(format!("dumb_tm_{}", idx)); + let executor = ForkserverExecutor::builder() + .program(sand.clone()) + .debug_child(debug_child) + .shmem_provider(&mut shmem_provider) + .parse_afl_cmdline(arguments) + .coverage_map_size(MAP_SIZE) + .timeout(timeout) + .kill_signal(signal) + .is_persistent(true) + .build_dynamic_map(edges_observer, tuple_list!(time_observer)) + .unwrap(); + sand_executors.push(executor); + } + let mut executor = SANDExecutor::new_paper(executor, sand_executors, edge_handle); + + if let Some(exec) = &cmplog_exec { + // The cmplog map shared between observer and executor + let mut cmplog_shmem = shmem_provider.uninit_on_shmem::().unwrap(); + // let the forkserver know the shmid + unsafe { + cmplog_shmem.write_to_env("__AFL_CMPLOG_SHM_ID").unwrap(); + } + let cmpmap = unsafe { OwnedRefMut::::from_shmem(&mut cmplog_shmem) }; + + let cmplog_observer = StdCmpObserver::new("cmplog", cmpmap, true); + + let cmplog_executor = ForkserverExecutor::builder() + .program(exec) + .debug_child(debug_child) + .shmem_provider(&mut shmem_provider) + .parse_afl_cmdline(arguments) + .is_persistent(true) + .timeout(timeout * 10) + .kill_signal(signal) + .build(tuple_list!(cmplog_observer)) + .unwrap(); + + let tracing = TracingStage::new(cmplog_executor); + + // Setup a randomic Input2State stage + let i2s = + StdMutationalStage::new(StdScheduledMutator::new(tuple_list!(I2SRandReplace::new()))); + + // The order of the stages matter! + let mut stages = tuple_list!(calibration, tracing, i2s, power); + + fuzzer.fuzz_loop(&mut stages, &mut executor, &mut state, &mut mgr)?; + } else { + // The order of the stages matter! + let mut stages = tuple_list!(calibration, power); + + fuzzer.fuzz_loop(&mut stages, &mut executor, &mut state, &mut mgr)?; + } + + // Never reached + Ok(()) +} diff --git a/fuzzers/forkserver/fuzzbench_forkserver_sand/src/vuln.c b/fuzzers/forkserver/fuzzbench_forkserver_sand/src/vuln.c new file mode 100644 index 0000000000..fe184806a2 --- /dev/null +++ b/fuzzers/forkserver/fuzzbench_forkserver_sand/src/vuln.c @@ -0,0 +1,40 @@ +#include +#include +#include + +char *p; + +// The following line is needed for shared memory testcase fuzzing +__AFL_FUZZ_INIT(); + +void vuln(char *buf) { + p = malloc(1024); + memcpy(p, buf, 16); + free(p); + + if (buf[0] == 0x41) { + p[0] = buf[0]; + } else { + p = buf; + } +} + +int main(int argc, char **argv) { + // Start the forkserver at this point (i.e., forks will happen here) + __AFL_INIT(); + + // The following five lines are for normal fuzzing. + /* + FILE *file = stdin; + if (argc > 1) { file = fopen(argv[1], "rb"); } + char buf[16]; + char *p = fgets(buf, 16, file); + buf[15] = 0; + */ + + // The following line is also needed for shared memory testcase fuzzing + unsigned char *buf = __AFL_FUZZ_TESTCASE_BUF; // must be after __AFL_INIT + vuln((char *)buf); + + return 0; +} \ No newline at end of file diff --git a/libafl/src/executors/mod.rs b/libafl/src/executors/mod.rs index 169de9ff3f..2e5754908c 100644 --- a/libafl/src/executors/mod.rs +++ b/libafl/src/executors/mod.rs @@ -1,6 +1,5 @@ //! Executors take input, and run it in the target. -#[cfg(unix)] use alloc::vec::Vec; use core::{fmt::Debug, time::Duration}; @@ -29,6 +28,8 @@ pub mod differential; #[cfg(all(feature = "std", feature = "fork", unix))] pub mod forkserver; pub mod inprocess; +/// SAND() implementation +pub mod sand; /// The module for inproc fork executor #[cfg(all(feature = "std", unix))] @@ -137,6 +138,75 @@ pub trait HasTimeout { fn set_timeout(&mut self, timeout: Duration); } +/// Like [`crate::observers::ObserversTuple`], a list of executors +pub trait ExecutorsTuple { + /// Execute the executors and stop if any of them returns a crash + fn run_target_all( + &mut self, + fuzzer: &mut Z, + state: &mut S, + mgr: &mut EM, + input: &I, + ) -> Result; +} + +/// Since in most cases, the executors types can not be determined during compilation +/// time (for instance, the number of executors might change), this implementation would +/// act as a small helper. +impl ExecutorsTuple for Vec +where + E: Executor, +{ + fn run_target_all( + &mut self, + fuzzer: &mut Z, + state: &mut S, + mgr: &mut EM, + input: &I, + ) -> Result { + let mut kind = ExitKind::Ok; + for e in self.iter_mut() { + kind = e.run_target(fuzzer, state, mgr, input)?; + if kind == ExitKind::Crash { + return Ok(kind); + } + } + Ok(kind) + } +} + +impl ExecutorsTuple for () { + fn run_target_all( + &mut self, + _fuzzer: &mut Z, + _state: &mut S, + _mgr: &mut EM, + _input: &I, + ) -> Result { + Ok(ExitKind::Ok) + } +} + +impl ExecutorsTuple for (Head, Tail) +where + Head: Executor, + Tail: ExecutorsTuple, +{ + fn run_target_all( + &mut self, + fuzzer: &mut Z, + state: &mut S, + mgr: &mut EM, + input: &I, + ) -> Result { + let kind = self.0.run_target(fuzzer, state, mgr, input)?; + if kind == ExitKind::Crash { + return Ok(kind); + } + self.1.run_target_all(fuzzer, state, mgr, input) + } +} + /// The common signals we want to handle #[cfg(unix)] #[inline] diff --git a/libafl/src/executors/sand.rs b/libafl/src/executors/sand.rs new file mode 100644 index 0000000000..906692643e --- /dev/null +++ b/libafl/src/executors/sand.rs @@ -0,0 +1,170 @@ +//! Implementation for "SAND: Decoupling Sanitization from Fuzzing for Low Overhead" +//! Reference Implementation: +//! Detailed docs: +//! Maintainer: Ziqiao Kong () +//! Preprint: accepted by ICSE'25 + +use alloc::vec::Vec; +use core::marker::PhantomData; + +use libafl_bolts::{ + AsIter, Error, Named, hash_std, + tuples::{Handle, MatchName, MatchNameRef}, +}; + +use super::{Executor, ExecutorsTuple, ExitKind, HasObservers, HasTimeout}; +use crate::{HasNamedMetadata, observers::MapObserver}; + +/// The execution pattern of the [`SANDExecutor`]. The default value used in our paper is +/// [`SANDExecutionPattern::SimplifiedTrace`] and we by design don't include coverage +/// increasing pattern here as it will miss at least 25% bugs and easy enough to implement +/// by iterating the crash corpus. +#[derive(Debug, Clone, Default, Copy)] +pub enum SANDExecutionPattern { + /// The simplified trace, captures ~92% bug triggering inputs with ~20% overhead + /// on overage (less than 5% overhead on most targets during evaluation) + #[default] + SimplifiedTrace, + /// The unique trace, captures ~99.9% bug-triggering inputs with more than >50% overhead. + UniqueTrace, +} + +/// The core executor implementation. It wraps another executor and a list of extra executors. +/// Please refer to [SAND.md](https://github.com/AFLplusplus/AFLplusplus/blob/stable/docs/SAND.md) for +/// how to build `sand_executors`. +#[derive(Debug, Clone)] +pub struct SANDExecutor { + executor: E, + sand_executors: ET, + bitmap: Vec, + ob_ref: Handle, + pattern: SANDExecutionPattern, + ph: PhantomData, +} + +impl SANDExecutor +where + C: Named, +{ + fn bitmap_set(&mut self, idx: usize) { + let bidx = idx % 8; + let idx = (idx / 8) % self.bitmap.len(); + *self.bitmap.get_mut(idx).unwrap() |= 1u8 << bidx; + } + + fn bitmap_read(&mut self, idx: usize) -> u8 { + let bidx = idx % 8; + let idx = (idx / 8) % self.bitmap.len(); + (self.bitmap[idx] >> bidx) & 1 + } + + /// Create a new [`SANDExecutor`] + pub fn new( + executor: E, + sand_extra_executors: ET, + observer_handle: Handle, + bitmap_size: usize, + pattern: SANDExecutionPattern, + ) -> Self { + Self { + executor, + sand_executors: sand_extra_executors, + bitmap: vec![0; bitmap_size], + ob_ref: observer_handle, + pattern, + ph: PhantomData, + } + } + + /// Create a new [`SANDExecutor`] using paper setup + pub fn new_paper(executor: E, sand_extra_executors: ET, observer_handle: Handle) -> Self { + Self::new( + executor, + sand_extra_executors, + observer_handle, + 1 << 29, + SANDExecutionPattern::SimplifiedTrace, + ) + } +} + +impl HasTimeout for SANDExecutor +where + E: HasTimeout, +{ + fn timeout(&self) -> core::time::Duration { + self.executor.timeout() + } + + fn set_timeout(&mut self, timeout: core::time::Duration) { + self.executor.set_timeout(timeout); + } +} + +impl HasObservers for SANDExecutor +where + E: HasObservers, +{ + type Observers = E::Observers; + fn observers(&self) -> libafl_bolts::tuples::RefIndexable<&Self::Observers, Self::Observers> { + self.executor.observers() + } + + fn observers_mut( + &mut self, + ) -> libafl_bolts::tuples::RefIndexable<&mut Self::Observers, Self::Observers> { + self.executor.observers_mut() + } +} + +impl Executor for SANDExecutor +where + ET: ExecutorsTuple, + E: Executor + HasObservers, + OT: MatchName, + O: MapObserver + for<'it> AsIter<'it, Item = u8>, + C: AsRef + Named, + S: HasNamedMetadata, +{ + fn run_target( + &mut self, + fuzzer: &mut Z, + state: &mut S, + mgr: &mut EM, + input: &I, + ) -> Result { + let kind = self.executor.run_target(fuzzer, state, mgr, input)?; + let ot = self.executor.observers(); + let ob = ot.get(&self.ob_ref).unwrap().as_ref(); + let initial = ob.initial(); + let covs = match self.pattern { + SANDExecutionPattern::SimplifiedTrace => ob + .as_iter() + .map(|x| if *x == initial { 0x1 } else { 0x80 }) + .collect::>(), + SANDExecutionPattern::UniqueTrace => ob.to_vec(), + }; + // Our paper uses xxh32 but it shouldn't have significant collision for most hashing algorithms. + let pattern_hash = hash_std(&covs) as usize; + + let ret = if kind == ExitKind::Ok { + if self.bitmap_read(pattern_hash) == 0 { + let sand_kind = self + .sand_executors + .run_target_all(fuzzer, state, mgr, input)?; + if sand_kind == ExitKind::Crash { + Ok(sand_kind) + } else { + Ok(kind) + } + } else { + Ok(kind) + } + } else { + Ok(kind) + }; + + self.bitmap_set(pattern_hash); + ret + } +} diff --git a/libafl/src/observers/mod.rs b/libafl/src/observers/mod.rs index b0c38ea2a8..10bc9fee73 100644 --- a/libafl/src/observers/mod.rs +++ b/libafl/src/observers/mod.rs @@ -324,9 +324,12 @@ mod instant_serializer { impl TimeObserver { /// Creates a new [`TimeObserver`] with the given name. #[must_use] - pub fn new(name: &'static str) -> Self { + pub fn new(name: S) -> Self + where + S: Into>, + { Self { - name: Cow::from(name), + name: name.into(), #[cfg(feature = "std")] start_time: Instant::now(), diff --git a/libafl/src/schedulers/testcase_score.rs b/libafl/src/schedulers/testcase_score.rs index 7b3162a237..0aa4e5df20 100644 --- a/libafl/src/schedulers/testcase_score.rs +++ b/libafl/src/schedulers/testcase_score.rs @@ -2,6 +2,7 @@ use alloc::string::{String, ToString}; use libafl_bolts::{HasLen, HasRefCnt}; +use num_traits::Zero; use crate::{ Error, HasMetadata, @@ -273,7 +274,6 @@ where let psmeta = state.metadata::()?; let tcmeta = entry.metadata::()?; - // This means that this testcase has never gone through the calibration stage before1, // In this case we'll just return the default weight // This methoud is called in corpus's on_add() method. Fuzz_level is zero at that time. @@ -305,7 +305,12 @@ where } weight *= avg_exec_us / q_exec_us; - weight *= libm::log2(q_bitmap_size).max(1.0) / avg_bitmap_size; + weight *= if avg_bitmap_size.is_zero() { + // This can happen when the bitmap size of the target is as small as 1. + 1.0 + } else { + libm::log2(q_bitmap_size).max(1.0) / avg_bitmap_size + }; let tc_ref = match entry.metadata_map().get::() { Some(meta) => meta.refcnt() as f64,