diff --git a/fuzzers/fuzzbench/src/lib.rs b/fuzzers/fuzzbench/src/lib.rs index be6cb25459..3de293f85f 100644 --- a/fuzzers/fuzzbench/src/lib.rs +++ b/fuzzers/fuzzbench/src/lib.rs @@ -39,11 +39,11 @@ use libafl::{ StdMOptMutator, StdScheduledMutator, Tokens, }, observers::{HitcountsMapObserver, StdMapObserver, TimeObserver}, - schedulers::{IndexesLenTimeMinimizerScheduler, PowerQueueScheduler}, + schedulers::{ + powersched::PowerSchedule, IndexesLenTimeMinimizerScheduler, PowerQueueScheduler, + }, stages::{ - calibrate::CalibrationStage, - power::{PowerMutationalStage, PowerSchedule}, - StdMutationalStage, TracingStage, + calibrate::CalibrationStage, power::PowerMutationalStage, StdMutationalStage, TracingStage, }, state::{HasCorpus, HasMetadata, StdState}, Error, @@ -299,7 +299,7 @@ fn fuzz( println!("Warning: LLVMFuzzerInitialize failed with -1") } - let calibration = CalibrationStage::new(&mut state, &edges_observer); + let calibration = CalibrationStage::new(&edges_observer); // Setup a randomic Input2State stage let i2s = StdMutationalStage::new(StdScheduledMutator::new(tuple_list!(I2SRandReplace::new()))); @@ -307,7 +307,8 @@ fn fuzz( // Setup a MOPT mutator let mutator = StdMOptMutator::new(&mut state, havoc_mutations().merge(tokens_mutations()), 5)?; - let power = PowerMutationalStage::new(mutator, PowerSchedule::FAST, &edges_observer); + let power = + PowerMutationalStage::new(&mut state, mutator, &edges_observer, PowerSchedule::FAST); // A minimization+queue policy to get testcasess from the corpus let scheduler = IndexesLenTimeMinimizerScheduler::new(PowerQueueScheduler::new()); diff --git a/fuzzers/fuzzbench_fork_qemu/src/fuzzer.rs b/fuzzers/fuzzbench_fork_qemu/src/fuzzer.rs index 96d35f0ac4..90699fe579 100644 --- a/fuzzers/fuzzbench_fork_qemu/src/fuzzer.rs +++ b/fuzzers/fuzzbench_fork_qemu/src/fuzzer.rs @@ -36,11 +36,12 @@ use libafl::{ StdMOptMutator, StdScheduledMutator, Tokens, }, observers::{ConstMapObserver, HitcountsMapObserver, TimeObserver}, - schedulers::{IndexesLenTimeMinimizerScheduler, PowerQueueScheduler}, + schedulers::{ + powersched::PowerSchedule, IndexesLenTimeMinimizerScheduler, PowerQueueScheduler, + }, stages::{ - calibrate::CalibrationStage, - power::{PowerMutationalStage, PowerSchedule}, - ShadowTracingStage, StdMutationalStage, + calibrate::CalibrationStage, power::PowerMutationalStage, ShadowTracingStage, + StdMutationalStage, }, state::{HasCorpus, HasMetadata, StdState}, Error, @@ -270,7 +271,7 @@ fn fuzz( ) }); - let calibration = CalibrationStage::new(&mut state, &edges_observer); + let calibration = CalibrationStage::new(&edges_observer); // Setup a randomic Input2State stage let i2s = StdMutationalStage::new(StdScheduledMutator::new(tuple_list!(I2SRandReplace::new()))); @@ -278,7 +279,8 @@ fn fuzz( // Setup a MOPT mutator let mutator = StdMOptMutator::new(&mut state, havoc_mutations().merge(tokens_mutations()), 5)?; - let power = PowerMutationalStage::new(mutator, PowerSchedule::FAST, &edges_observer); + let power = + PowerMutationalStage::new(&mut state, mutator, &edges_observer, PowerSchedule::FAST); // A minimization+queue policy to get testcasess from the corpus let scheduler = IndexesLenTimeMinimizerScheduler::new(PowerQueueScheduler::new()); diff --git a/fuzzers/fuzzbench_qemu/src/fuzzer.rs b/fuzzers/fuzzbench_qemu/src/fuzzer.rs index 103092194e..b8da496d40 100644 --- a/fuzzers/fuzzbench_qemu/src/fuzzer.rs +++ b/fuzzers/fuzzbench_qemu/src/fuzzer.rs @@ -36,11 +36,12 @@ use libafl::{ StdMOptMutator, StdScheduledMutator, Tokens, }, observers::{HitcountsMapObserver, TimeObserver, VariableMapObserver}, - schedulers::{IndexesLenTimeMinimizerScheduler, PowerQueueScheduler}, + schedulers::{ + powersched::PowerSchedule, IndexesLenTimeMinimizerScheduler, PowerQueueScheduler, + }, stages::{ - calibrate::CalibrationStage, - power::{PowerMutationalStage, PowerSchedule}, - ShadowTracingStage, StdMutationalStage, + calibrate::CalibrationStage, power::PowerMutationalStage, ShadowTracingStage, + StdMutationalStage, }, state::{HasCorpus, HasMetadata, StdState}, Error, @@ -283,7 +284,7 @@ fn fuzz( ) }); - let calibration = CalibrationStage::new(&mut state, &edges_observer); + let calibration = CalibrationStage::new(&edges_observer); // Setup a randomic Input2State stage let i2s = StdMutationalStage::new(StdScheduledMutator::new(tuple_list!(I2SRandReplace::new()))); @@ -291,7 +292,8 @@ fn fuzz( // Setup a MOPT mutator let mutator = StdMOptMutator::new(&mut state, havoc_mutations().merge(tokens_mutations()), 5)?; - let power = PowerMutationalStage::new(mutator, PowerSchedule::FAST, &edges_observer); + let power = + PowerMutationalStage::new(&mut state, mutator, &edges_observer, PowerSchedule::FAST); // A minimization+queue policy to get testcasess from the corpus let scheduler = IndexesLenTimeMinimizerScheduler::new(PowerQueueScheduler::new()); diff --git a/fuzzers/fuzzbench_selected/.gitignore b/fuzzers/fuzzbench_selected/.gitignore new file mode 100644 index 0000000000..d3561edaf7 --- /dev/null +++ b/fuzzers/fuzzbench_selected/.gitignore @@ -0,0 +1,2 @@ +libpng-* +fuzzer diff --git a/fuzzers/fuzzbench_selected/Cargo.toml b/fuzzers/fuzzbench_selected/Cargo.toml new file mode 100644 index 0000000000..93231ff387 --- /dev/null +++ b/fuzzers/fuzzbench_selected/Cargo.toml @@ -0,0 +1,33 @@ +[package] +name = "fuzzbench" +version = "0.7.1" +authors = ["Andrea Fioraldi ", "Dominik Maier "] +edition = "2021" + +[features] +default = ["std"] +std = [] + +[profile.release] +lto = true +codegen-units = 1 +opt-level = 3 +debug = true + +[build-dependencies] +cc = { version = "1.0", features = ["parallel"] } +which = { version = "4.0.2" } +num_cpus = "1.0" + +[dependencies] +libafl = { path = "../../libafl/" } +libafl_targets = { path = "../../libafl_targets/", features = ["sancov_pcguard_hitcounts", "sancov_cmplog", "libfuzzer"] } +# TODO Include it only when building cc +libafl_cc = { path = "../../libafl_cc/" } +clap = { version = "3.0", features = ["default"] } +nix = "0.23" +mimalloc = { version = "*", default-features = false } + +[lib] +name = "fuzzbench" +crate-type = ["staticlib"] diff --git a/fuzzers/fuzzbench_selected/Makefile.toml b/fuzzers/fuzzbench_selected/Makefile.toml new file mode 100644 index 0000000000..be7dca4516 --- /dev/null +++ b/fuzzers/fuzzbench_selected/Makefile.toml @@ -0,0 +1,99 @@ +[env] +FUZZER_NAME="fuzzer" +PROJECT_DIR = { script = ["pwd"] } + +[tasks.unsupported] +script_runner="@shell" +script=''' +echo "Cargo-make not integrated yet on this" +''' + +# Compilers +[tasks.cxx] +linux_alias = "cxx_unix" +mac_alias = "cxx_unix" +windows_alias = "unsupported" + +[tasks.cxx_unix] +command = "cargo" +args = ["build" , "--release"] + +[tasks.cc] +linux_alias = "cc_unix" +mac_alias = "cc_unix" +windows_alias = "unsupported" + +[tasks.cc_unix] +command = "cargo" +args = ["build" , "--release"] + +# fuzz.o File +[tasks.fuzz_o] +linux_alias = "fuzz_o_unix" +mac_alias = "fuzz_o_unix" +windows_alias = "unsupported" + +[tasks.fuzz_o_unix] +command = "target/release/libafl_cc" +args = ["--libafl-no-link", "-O3", "-c", "fuzz.c", "-o", "fuzz.o"] +dependencies = ["cc", "cxx"] + +# Fuzzer +[tasks.fuzzer] +linux_alias = "fuzzer_unix" +mac_alias = "fuzzer_unix" +windows_alias = "unsupported" + +[tasks.fuzzer_unix] +command = "target/release/libafl_cxx" +args = ["--libafl", "fuzz.o", "-o", "${FUZZER_NAME}", "-lm", "-lz"] +dependencies = ["cc", "cxx", "fuzz_o"] + +# Run +[tasks.run] +linux_alias = "run_unix" +mac_alias = "run_unix" +windows_alias = "unsupported" + +[tasks.run_unix] +script_runner="@shell" +script=''' +rm -rf libafl_unix_shmem_server || true +mkdir in || true +echo a > in/a +./${FUZZER_NAME} -o out -i in +''' +dependencies = ["fuzzer"] + + +# Test +[tasks.test] +linux_alias = "test_unix" +mac_alias = "test_unix" +windows_alias = "unsupported" + +[tasks.test_unix] +script_runner="@shell" +script=''' +rm -rf libafl_unix_shmem_server || true +mkdir in || true +echo a > in/a +# Allow sigterm as exit code +timeout 11s ./${FUZZER_NAME} -o out -i in || [ $? -eq 124 ] +rm -rf out || true +rm -rf in || true +''' +dependencies = ["fuzzer"] + +# Clean +[tasks.clean] +linux_alias = "clean_unix" +mac_alias = "clean_unix" +windows_alias = "unsupported" + +[tasks.clean_unix] +script_runner="@shell" +script=''' +rm ./${FUZZER_NAME} || true +rm fuzz.o || true +''' \ No newline at end of file diff --git a/fuzzers/fuzzbench_selected/README.md b/fuzzers/fuzzbench_selected/README.md new file mode 100644 index 0000000000..df34f5e090 --- /dev/null +++ b/fuzzers/fuzzbench_selected/README.md @@ -0,0 +1,17 @@ +# Fuzzbench Harness + +This folder contains an example fuzzer tailored for fuzzbench. +It uses the best possible setting, with the exception of a SimpleRestartingEventManager instead of an LlmpEventManager - since fuzzbench is single threaded. +Real fuzz campaigns should consider using multithreaded LlmpEventManager, see the other examples. + +## Build + +To build this example, run `cargo build --release`. +This will build the fuzzer compilers (`libafl_cc` and `libafl_cpp`) with `src/lib.rs` as fuzzer. +The fuzzer uses the libfuzzer compatibility layer and the SanitizerCoverage runtime functions for coverage feedback. + +These can then be used to build libfuzzer harnesses in the software project of your choice. +Finally, just run the resulting binary with `out_dir`, `in_dir`. + +In any real-world scenario, you should use `taskset` to pin each client to an empty CPU core, the lib does not pick an empty core automatically (yet). + diff --git a/fuzzers/fuzzbench_selected/fuzz.c b/fuzzers/fuzzbench_selected/fuzz.c new file mode 100644 index 0000000000..7eea9f3b30 --- /dev/null +++ b/fuzzers/fuzzbench_selected/fuzz.c @@ -0,0 +1,15 @@ +#include +#include + +int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + if (Size >= 8 && *(uint32_t*)Data == 0xaabbccdd) + abort(); +} + +/* +int main() { + + char buf [10] = {0}; + LLVMFuzzerTestOneInput(buf, 10); + +}*/ diff --git a/fuzzers/fuzzbench_selected/src/bin/libafl_cc.rs b/fuzzers/fuzzbench_selected/src/bin/libafl_cc.rs new file mode 100644 index 0000000000..68a84b0333 --- /dev/null +++ b/fuzzers/fuzzbench_selected/src/bin/libafl_cc.rs @@ -0,0 +1,42 @@ +use libafl_cc::{ClangWrapper, CompilerWrapper, LLVMPasses}; +use std::env; + +pub fn main() { + let args: Vec = env::args().collect(); + if args.len() > 1 { + let mut dir = env::current_exe().unwrap(); + let wrapper_name = dir.file_name().unwrap().to_str().unwrap(); + + let is_cpp = match wrapper_name[wrapper_name.len()-2..].to_lowercase().as_str() { + "cc" => false, + "++" | "pp" | "xx" => true, + _ => panic!("Could not figure out if c or c++ warpper was called. Expected {:?} to end with c or cxx", dir), + }; + + dir.pop(); + + let mut cc = ClangWrapper::new(); + + #[cfg(target_os = "linux")] + cc.add_pass(LLVMPasses::AutoTokens); + + if let Some(code) = cc + .cpp(is_cpp) + // silence the compiler wrapper output, needed for some configure scripts. + .silence(true) + // add arguments only if --libafl or --libafl-no-link are present + .need_libafl_arg(true) + .parse_args(&args) + .expect("Failed to parse the command line") + .link_staticlib(&dir, "fuzzbench") + .add_arg("-fsanitize-coverage=trace-pc-guard,trace-cmp") + .add_pass(LLVMPasses::CmpLogRtn) + .run() + .expect("Failed to run the wrapped compiler") + { + std::process::exit(code); + } + } else { + panic!("LibAFL CC: No Arguments given"); + } +} diff --git a/fuzzers/fuzzbench_selected/src/bin/libafl_cxx.rs b/fuzzers/fuzzbench_selected/src/bin/libafl_cxx.rs new file mode 100644 index 0000000000..ce786239b0 --- /dev/null +++ b/fuzzers/fuzzbench_selected/src/bin/libafl_cxx.rs @@ -0,0 +1,5 @@ +pub mod libafl_cc; + +fn main() { + libafl_cc::main() +} diff --git a/fuzzers/fuzzbench_selected/src/lib.rs b/fuzzers/fuzzbench_selected/src/lib.rs new file mode 100644 index 0000000000..bbc632d3ec --- /dev/null +++ b/fuzzers/fuzzbench_selected/src/lib.rs @@ -0,0 +1,401 @@ +//! A singlethreaded libfuzzer-like fuzzer that can auto-restart. +use mimalloc::MiMalloc; +#[global_allocator] +static GLOBAL: MiMalloc = MiMalloc; + +use clap::{App, Arg}; +use core::{cell::RefCell, time::Duration}; +#[cfg(unix)] +use nix::{self, unistd::dup}; +#[cfg(unix)] +use std::os::unix::io::{AsRawFd, FromRawFd}; +use std::{ + env, + fs::{self, File, OpenOptions}, + io::{self, Read, Write}, + path::PathBuf, + process, +}; + +use libafl::{ + bolts::{ + current_nanos, current_time, + os::dup2, + rands::StdRand, + shmem::{ShMemProvider, StdShMemProvider}, + tuples::{tuple_list, Merge}, + AsSlice, + }, + corpus::{Corpus, OnDiskCorpus}, + events::SimpleRestartingEventManager, + executors::{inprocess::InProcessExecutor, ExitKind, TimeoutExecutor}, + feedback_or, + feedbacks::{CrashFeedback, MapFeedbackState, MaxMapFeedback, TimeFeedback}, + fuzzer::{Fuzzer, StdFuzzer}, + inputs::{BytesInput, HasTargetBytes}, + monitors::SimpleMonitor, + mutators::{ + scheduled::havoc_mutations, token_mutations::I2SRandReplace, tokens_mutations, + StdMOptMutator, StdScheduledMutator, Tokens, + }, + observers::{HitcountsMapObserver, StdMapObserver, TimeObserver}, + schedulers::{powersched::PowerSchedule, IndexesLenTimeMinimizerScheduler, WeightedScheduler}, + stages::{ + calibrate::CalibrationStage, power::PowerMutationalStage, StdMutationalStage, TracingStage, + }, + state::{HasCorpus, HasMetadata, StdState}, + Error, +}; +use libafl_targets::{ + libfuzzer_initialize, libfuzzer_test_one_input, CmpLogObserver, CMPLOG_MAP, EDGES_MAP, + MAX_EDGES_NUM, +}; + +#[cfg(target_os = "linux")] +use libafl_targets::autotokens; + +/// The fuzzer main (as `no_mangle` C function) +#[no_mangle] +pub fn libafl_main() { + // Registry the metadata types used in this fuzzer + // Needed only on no_std + //RegistryBuilder::register::(); + + let res = match App::new("libafl_fuzzbench") + .version("0.7.1") + .author("AFLplusplus team") + .about("LibAFL-based fuzzer for Fuzzbench") + .arg( + Arg::new("out") + .short('o') + .long("output") + .help("The directory to place finds in ('corpus')") + .takes_value(true), + ) + .arg( + Arg::new("in") + .short('i') + .long("input") + .help("The directory to read initial inputs from ('seeds')") + .takes_value(true), + ) + .arg( + Arg::new("tokens") + .short('x') + .long("tokens") + .help("A file to read tokens from, to be used during fuzzing") + .takes_value(true), + ) + .arg( + Arg::new("logfile") + .short('l') + .long("logfile") + .help("Duplicates all output to this file") + .default_value("libafl.log"), + ) + .arg( + Arg::new("timeout") + .short('t') + .long("timeout") + .help("Timeout for each individual execution, in milliseconds") + .default_value("1200"), + ) + .arg(Arg::new("remaining").multiple_values(true)) + .try_get_matches() + { + Ok(res) => res, + Err(err) => { + println!( + "Syntax: {}, [-x dictionary] -o corpus_dir -i seed_dir\n{:?}", + env::current_exe() + .unwrap_or_else(|_| "fuzzer".into()) + .to_string_lossy(), + err.info, + ); + return; + } + }; + + println!( + "Workdir: {:?}", + env::current_dir().unwrap().to_string_lossy().to_string() + ); + + if let Some(filenames) = res.values_of("remaining") { + let filenames: Vec<&str> = filenames.collect(); + if !filenames.is_empty() { + run_testcases(&filenames); + return; + } + } + + // For fuzzbench, crashes and finds are inside the same `corpus` directory, in the "queue" and "crashes" subdir. + let mut out_dir = PathBuf::from( + res.value_of("out") + .expect("The --output parameter is missing") + .to_string(), + ); + if fs::create_dir(&out_dir).is_err() { + println!("Out dir at {:?} already exists.", &out_dir); + if !out_dir.is_dir() { + println!("Out dir at {:?} is not a valid directory!", &out_dir); + return; + } + } + let mut crashes = out_dir.clone(); + crashes.push("crashes"); + out_dir.push("queue"); + + let in_dir = PathBuf::from( + res.value_of("in") + .expect("The --input parameter is missing") + .to_string(), + ); + if !in_dir.is_dir() { + println!("In dir at {:?} is not a valid directory!", &in_dir); + return; + } + + let tokens = res.value_of("tokens").map(PathBuf::from); + + let logfile = PathBuf::from(res.value_of("logfile").unwrap().to_string()); + + let timeout = Duration::from_millis( + res.value_of("timeout") + .unwrap() + .to_string() + .parse() + .expect("Could not parse timeout in milliseconds"), + ); + + fuzz(out_dir, crashes, in_dir, tokens, logfile, timeout) + .expect("An error occurred while fuzzing"); +} + +fn run_testcases(filenames: &[&str]) { + // The actual target run starts here. + // Call LLVMFUzzerInitialize() if present. + let args: Vec = env::args().collect(); + if libfuzzer_initialize(&args) == -1 { + println!("Warning: LLVMFuzzerInitialize failed with -1") + } + + println!( + "You are not fuzzing, just executing {} testcases", + filenames.len() + ); + for fname in filenames { + println!("Executing {}", fname); + + let mut file = File::open(fname).expect("No file found"); + let mut buffer = vec![]; + file.read_to_end(&mut buffer).expect("Buffer overflow"); + + libfuzzer_test_one_input(&buffer); + } +} + +/// The actual fuzzer +fn fuzz( + corpus_dir: PathBuf, + objective_dir: PathBuf, + seed_dir: PathBuf, + tokenfile: Option, + logfile: PathBuf, + timeout: Duration, +) -> Result<(), Error> { + let log = RefCell::new( + OpenOptions::new() + .append(true) + .create(true) + .open(&logfile)?, + ); + + #[cfg(unix)] + let mut stdout_cpy = unsafe { + let new_fd = dup(io::stdout().as_raw_fd())?; + File::from_raw_fd(new_fd) + }; + #[cfg(unix)] + let file_null = File::open("/dev/null")?; + + // 'While the monitor are state, they are usually used in the broker - which is likely never restarted + let monitor = SimpleMonitor::new(|s| { + #[cfg(unix)] + writeln!(&mut stdout_cpy, "{}", s).unwrap(); + #[cfg(windows)] + println!("{}", s); + writeln!(log.borrow_mut(), "{:?} {}", current_time(), s).unwrap(); + }); + + // We need a shared map to store our state before a crash. + // This way, we are able to continue fuzzing afterwards. + let mut shmem_provider = StdShMemProvider::new()?; + + let (state, mut mgr) = match SimpleRestartingEventManager::launch(monitor, &mut shmem_provider) + { + // The restarting state will spawn the same process again as child, then restarted it each time it crashes. + Ok(res) => res, + Err(err) => match err { + Error::ShuttingDown => { + return Ok(()); + } + _ => { + panic!("Failed to setup the restarter: {}", err); + } + }, + }; + + // Create an observation channel using the coverage map + // We don't use the hitcounts (see the Cargo.toml, we use pcguard_edges) + let edges = unsafe { &mut EDGES_MAP[0..MAX_EDGES_NUM] }; + let edges_observer = HitcountsMapObserver::new(StdMapObserver::new("edges", edges)); + + // Create an observation channel to keep track of the execution time + let time_observer = TimeObserver::new("time"); + + let cmplog = unsafe { &mut CMPLOG_MAP }; + let cmplog_observer = CmpLogObserver::new("cmplog", cmplog, true); + + // The state of the edges feedback. + let feedback_state = MapFeedbackState::with_observer(&edges_observer); + + // Feedback to rate the interestingness of an input + // This one is composed by two Feedbacks in OR + let feedback = feedback_or!( + // New maximization map feedback linked to the edges observer and the feedback state + MaxMapFeedback::new_tracking(&feedback_state, &edges_observer, true, false), + // Time feedback, this one does not need a feedback state + TimeFeedback::new_with_observer(&time_observer) + ); + + // A feedback to choose if an input is a solution or not + let objective = CrashFeedback::new(); + + // If not restarting, create a State from scratch + let mut state = state.unwrap_or_else(|| { + StdState::new( + // RNG + StdRand::with_seed(current_nanos()), + // Corpus that will be evolved, we keep it in memory for performance + OnDiskCorpus::new(corpus_dir).unwrap(), + // Corpus in which we store solutions (crashes in this example), + // on disk so the user can get them after stopping the fuzzer + OnDiskCorpus::new(objective_dir).unwrap(), + // States of the feedbacks. + // They are the data related to the feedbacks that you want to persist in the State. + tuple_list!(feedback_state), + ) + }); + + println!("Let's fuzz :)"); + + // The actual target run starts here. + // Call LLVMFUzzerInitialize() if present. + let args: Vec = env::args().collect(); + if libfuzzer_initialize(&args) == -1 { + println!("Warning: LLVMFuzzerInitialize failed with -1") + } + + let calibration = CalibrationStage::new(&edges_observer); + + // Setup a randomic Input2State stage + let i2s = StdMutationalStage::new(StdScheduledMutator::new(tuple_list!(I2SRandReplace::new()))); + + // Setup a MOPT mutator + let mutator = StdMOptMutator::new(&mut state, havoc_mutations().merge(tokens_mutations()), 5)?; + + let power = + PowerMutationalStage::new(&mut state, mutator, &edges_observer, PowerSchedule::FAST); + + // A minimization+queue policy to get testcasess from the corpus + let scheduler = IndexesLenTimeMinimizerScheduler::new(WeightedScheduler::new()); + + // A fuzzer with feedbacks and a corpus scheduler + let mut fuzzer = StdFuzzer::new(scheduler, feedback, objective); + + // The wrapped harness function, calling out to the LLVM-style harness + let mut harness = |input: &BytesInput| { + let target = input.target_bytes(); + let buf = target.as_slice(); + libfuzzer_test_one_input(buf); + ExitKind::Ok + }; + + let mut tracing_harness = harness; + + // Create the executor for an in-process function with one observer for edge coverage and one for the execution time + let mut executor = TimeoutExecutor::new( + InProcessExecutor::new( + &mut harness, + tuple_list!(edges_observer, time_observer), + &mut fuzzer, + &mut state, + &mut mgr, + )?, + timeout, + ); + + // Setup a tracing stage in which we log comparisons + let tracing = TracingStage::new(TimeoutExecutor::new( + InProcessExecutor::new( + &mut tracing_harness, + tuple_list!(cmplog_observer), + &mut fuzzer, + &mut state, + &mut mgr, + )?, + // Give it more time! + timeout * 10, + )); + + // The order of the stages matter! + let mut stages = tuple_list!(calibration, tracing, i2s, power); + + // Read tokens + if state.metadata().get::().is_none() { + let mut toks = Tokens::default(); + if let Some(tokenfile) = tokenfile { + toks.add_from_file(tokenfile)?; + } + #[cfg(target_os = "linux")] + { + toks += autotokens()?; + } + + if !toks.is_empty() { + state.add_metadata(toks); + } + } + + // In case the corpus is empty (on first run), reset + if state.corpus().count() < 1 { + state + .load_initial_inputs(&mut fuzzer, &mut executor, &mut mgr, &[seed_dir.clone()]) + .unwrap_or_else(|_| { + println!("Failed to load initial corpus at {:?}", &seed_dir); + process::exit(0); + }); + println!("We imported {} inputs from disk.", state.corpus().count()); + } + + // Remove target ouput (logs still survive) + #[cfg(unix)] + { + let null_fd = file_null.as_raw_fd(); + dup2(null_fd, io::stdout().as_raw_fd())?; + dup2(null_fd, io::stderr().as_raw_fd())?; + } + // reopen file to make sure we're at the end + log.replace( + OpenOptions::new() + .append(true) + .create(true) + .open(&logfile)?, + ); + + fuzzer.fuzz_loop(&mut stages, &mut executor, &mut state, &mut mgr)?; + + // Never reached + Ok(()) +} diff --git a/fuzzers/fuzzbench_text/src/lib.rs b/fuzzers/fuzzbench_text/src/lib.rs index c6c556746e..4672f8389a 100644 --- a/fuzzers/fuzzbench_text/src/lib.rs +++ b/fuzzers/fuzzbench_text/src/lib.rs @@ -45,11 +45,12 @@ use libafl::{ tokens_mutations, StdMOptMutator, StdScheduledMutator, Tokens, }, observers::{HitcountsMapObserver, StdMapObserver, TimeObserver}, - schedulers::{IndexesLenTimeMinimizerScheduler, PowerQueueScheduler}, + schedulers::{ + powersched::PowerSchedule, IndexesLenTimeMinimizerScheduler, PowerQueueScheduler, + }, stages::{ - calibrate::CalibrationStage, - power::{PowerMutationalStage, PowerSchedule}, - GeneralizationStage, StdMutationalStage, TracingStage, + calibrate::CalibrationStage, power::PowerMutationalStage, GeneralizationStage, + StdMutationalStage, TracingStage, }, state::{HasCorpus, HasMetadata, StdState}, Error, @@ -360,7 +361,7 @@ fn fuzz_binary( println!("Warning: LLVMFuzzerInitialize failed with -1") } - let calibration = CalibrationStage::new(&mut state, &edges_observer); + let calibration = CalibrationStage::new(&edges_observer); // Setup a randomic Input2State stage let i2s = StdMutationalStage::new(StdScheduledMutator::new(tuple_list!(I2SRandReplace::new()))); @@ -368,7 +369,8 @@ fn fuzz_binary( // Setup a MOPT mutator let mutator = StdMOptMutator::new(&mut state, havoc_mutations().merge(tokens_mutations()), 5)?; - let power = PowerMutationalStage::new(mutator, PowerSchedule::FAST, &edges_observer); + let power = + PowerMutationalStage::new(&mut state, mutator, &edges_observer, PowerSchedule::FAST); // A minimization+queue policy to get testcasess from the corpus let scheduler = IndexesLenTimeMinimizerScheduler::new(PowerQueueScheduler::new()); @@ -564,7 +566,7 @@ fn fuzz_text( println!("Warning: LLVMFuzzerInitialize failed with -1") } - let calibration = CalibrationStage::new(&mut state, &edges_observer); + let calibration = CalibrationStage::new(&edges_observer); // Setup a randomic Input2State stage let i2s = StdMutationalStage::new(StdScheduledMutator::new(tuple_list!(I2SRandReplace::new()))); @@ -572,7 +574,8 @@ fn fuzz_text( // Setup a MOPT mutator let mutator = StdMOptMutator::new(&mut state, havoc_mutations().merge(tokens_mutations()), 5)?; - let power = PowerMutationalStage::new(mutator, PowerSchedule::FAST, &edges_observer); + let power = + PowerMutationalStage::new(&mut state, mutator, &edges_observer, PowerSchedule::FAST); let grimoire_mutator = StdScheduledMutator::with_max_iterations( tuple_list!( diff --git a/fuzzers/libfuzzer_libpng/src/lib.rs b/fuzzers/libfuzzer_libpng/src/lib.rs index aabcf4e55d..97ca7a9e30 100644 --- a/fuzzers/libfuzzer_libpng/src/lib.rs +++ b/fuzzers/libfuzzer_libpng/src/lib.rs @@ -25,11 +25,8 @@ use libafl::{ mutators::scheduled::{havoc_mutations, tokens_mutations, StdScheduledMutator}, mutators::token_mutations::Tokens, observers::{HitcountsMapObserver, StdMapObserver, TimeObserver}, - schedulers::{IndexesLenTimeMinimizerScheduler, PowerQueueScheduler}, - stages::{ - calibrate::CalibrationStage, - power::{PowerMutationalStage, PowerSchedule}, - }, + schedulers::{powersched::PowerSchedule, IndexesLenTimeMinimizerScheduler, WeightedScheduler}, + stages::{calibrate::CalibrationStage, power::PowerMutationalStage}, state::{HasCorpus, HasMetadata, StdState}, Error, }; @@ -130,13 +127,14 @@ fn fuzz(corpus_dirs: &[PathBuf], objective_dir: PathBuf, broker_port: u16) -> Re let mutator = StdScheduledMutator::new(havoc_mutations().merge(tokens_mutations())); - let calibration = CalibrationStage::new(&mut state, &edges_observer); - let power = PowerMutationalStage::new(mutator, PowerSchedule::FAST, &edges_observer); + let calibration = CalibrationStage::new(&edges_observer); + let power = + PowerMutationalStage::new(&mut state, mutator, &edges_observer, PowerSchedule::FAST); let mut stages = tuple_list!(calibration, power); // A minimization+queue policy to get testcasess from the corpus - let scheduler = IndexesLenTimeMinimizerScheduler::new(PowerQueueScheduler::new()); + let scheduler = IndexesLenTimeMinimizerScheduler::new(WeightedScheduler::new()); // A fuzzer with feedbacks and a corpus scheduler let mut fuzzer = StdFuzzer::new(scheduler, feedback, objective); diff --git a/fuzzers/tutorial/src/lib.rs b/fuzzers/tutorial/src/lib.rs index a2bc2adcd8..59aae2f9f6 100644 --- a/fuzzers/tutorial/src/lib.rs +++ b/fuzzers/tutorial/src/lib.rs @@ -17,11 +17,8 @@ use libafl::{ inputs::HasTargetBytes, monitors::MultiMonitor, observers::{HitcountsMapObserver, StdMapObserver, TimeObserver}, - schedulers::PowerQueueScheduler, - stages::{ - calibrate::CalibrationStage, - power::{PowerMutationalStage, PowerSchedule}, - }, + schedulers::{powersched::PowerSchedule, PowerQueueScheduler}, + stages::{calibrate::CalibrationStage, power::PowerMutationalStage}, state::{HasCorpus, StdState}, Error, }; @@ -128,8 +125,9 @@ fn fuzz(corpus_dirs: &[PathBuf], objective_dir: PathBuf, broker_port: u16) -> Re // Setup a lain mutator with a mutational stage let mutator = LainMutator::new(); - let calibration = CalibrationStage::new(&mut state, &edges_observer); - let power = PowerMutationalStage::new(mutator, PowerSchedule::FAST, &edges_observer); + let calibration = CalibrationStage::new(&edges_observer); + let power = + PowerMutationalStage::new(&mut state, mutator, &edges_observer, PowerSchedule::FAST); let mut stages = tuple_list!(calibration, power); diff --git a/libafl/src/corpus/testcase.rs b/libafl/src/corpus/testcase.rs index e73dc27292..0271adb239 100644 --- a/libafl/src/corpus/testcase.rs +++ b/libafl/src/corpus/testcase.rs @@ -1,14 +1,20 @@ //! The testcase is a struct embedded in each corpus. //! It will contain a respective input, and metadata. -use alloc::string::String; +use alloc::string::{String, ToString}; use core::{convert::Into, default::Default, option::Option, time::Duration}; use serde::{Deserialize, Serialize}; use crate::{ - bolts::{serdeany::SerdeAnyMap, HasLen}, + bolts::{serdeany::SerdeAnyMap, HasLen, HasRefCnt}, + corpus::Corpus, + feedbacks::MapIndexesMetadata, inputs::Input, - state::HasMetadata, + schedulers::{ + minimizer::{IsFavoredMetadata, TopRatedsMetadata}, + powersched::{PowerSchedule, PowerScheduleMetadata}, + }, + state::{HasCorpus, HasMetadata}, Error, }; @@ -52,6 +58,11 @@ where } } +/// Constants for powerschedules +const POWER_BETA: f64 = 1.0; +const MAX_FACTOR: f64 = POWER_BETA * 32.0; +const HAVOC_MAX_MULT: f64 = 64.0; + /// Impl of a testcase impl Testcase where @@ -201,6 +212,280 @@ where ..Testcase::default() } } + + /// Compute the `weight` used in weighted corpus entry selection algo + #[allow(clippy::cast_precision_loss, clippy::cast_lossless)] + pub fn compute_weight(&self, state: &S) -> Result + where + S: HasCorpus + HasMetadata, + { + let mut weight = 1.0; + let psmeta = state + .metadata() + .get::() + .ok_or_else(|| Error::KeyNotFound("PowerScheduleMetadata not found".to_string()))?; + + let tcmeta = self + .metadata() + .get::() + .ok_or_else(|| Error::KeyNotFound("PowerScheduleTestData not found".to_string()))?; + + // This means that this testcase has never gone through the calibration stage before1, + // In this case we'll just return the default weight + if tcmeta.fuzz_level() == 0 || psmeta.cycles() == 0 { + return Ok(weight); + } + + let q_exec_us = self + .exec_time() + .ok_or_else(|| Error::KeyNotFound("exec_time not set".to_string()))? + .as_nanos() as f64; + let favored = self.has_metadata::(); + + let avg_exec_us = psmeta.exec_time().as_nanos() as f64 / psmeta.cycles() as f64; + let avg_bitmap_size = psmeta.bitmap_size() / psmeta.bitmap_entries(); + + let q_bitmap_size = tcmeta.bitmap_size() as f64; + + match psmeta.strat() { + PowerSchedule::FAST | PowerSchedule::COE | PowerSchedule::LIN | PowerSchedule::QUAD => { + let hits = psmeta.n_fuzz()[tcmeta.n_fuzz_entry()]; + if hits > 0 { + weight *= libm::log10(f64::from(hits)) + 1.0; + } + } + // EXPLORE and EXPLOIT fall into this + _ => {} + } + + weight *= avg_exec_us / q_exec_us; + weight *= libm::log2(q_bitmap_size) / (avg_bitmap_size as f64); + + let tc_ref = match self.metadata().get::() { + Some(meta) => meta.refcnt() as f64, + None => 0.0, + }; + + let avg_top_size = state + .metadata() + .get::() + .ok_or_else(|| Error::KeyNotFound("TopRatedsMetadata not found".to_string()))? + .map() + .len() as f64; + weight *= 1.0 + (tc_ref / avg_top_size); + + if favored { + weight *= 5.0; + } + + // was it fuzzed before? + if tcmeta.fuzz_level() == 0 { + weight *= 2.0; + } + + assert!(weight.is_normal()); + + Ok(weight) + } + + /// Compute the `power` we assign to each corpus entry + #[inline] + #[allow( + clippy::cast_precision_loss, + clippy::too_many_lines, + clippy::cast_sign_loss + )] + pub fn calculate_score(&self, state: &S) -> Result + where + S: HasCorpus + HasMetadata, + { + let psmeta = state + .metadata() + .get::() + .ok_or_else(|| Error::KeyNotFound("PowerScheduleMetadata not found".to_string()))?; + + let fuzz_mu = if psmeta.strat() == PowerSchedule::COE { + let corpus = state.corpus(); + let mut n_paths = 0; + let mut v = 0.0; + for idx in 0..corpus.count() { + let n_fuzz_entry = corpus + .get(idx)? + .borrow() + .metadata() + .get::() + .ok_or_else(|| { + Error::KeyNotFound("PowerScheduleTestData not found".to_string()) + })? + .n_fuzz_entry(); + v += libm::log2(f64::from(psmeta.n_fuzz()[n_fuzz_entry])); + n_paths += 1; + } + + if n_paths == 0 { + return Err(Error::Unknown(String::from("Queue state corrput"))); + } + + v /= f64::from(n_paths); + v + } else { + 0.0 + }; + + let mut perf_score = 100.0; + let q_exec_us = self + .exec_time() + .ok_or_else(|| Error::KeyNotFound("exec_time not set".to_string()))? + .as_nanos() as f64; + + let avg_exec_us = psmeta.exec_time().as_nanos() as f64 / psmeta.cycles() as f64; + let avg_bitmap_size = psmeta.bitmap_size() / psmeta.bitmap_entries(); + + let favored = self.has_metadata::(); + let tcmeta = self + .metadata() + .get::() + .ok_or_else(|| { + Error::KeyNotFound("PowerScheduleTestcaseMetaData not found".to_string()) + })?; + + if q_exec_us * 0.1 > avg_exec_us { + perf_score = 10.0; + } else if q_exec_us * 0.2 > avg_exec_us { + perf_score = 25.0; + } else if q_exec_us * 0.5 > avg_exec_us { + perf_score = 50.0; + } else if q_exec_us * 0.75 > avg_exec_us { + perf_score = 75.0; + } else if q_exec_us * 4.0 < avg_exec_us { + perf_score = 300.0; + } else if q_exec_us * 3.0 < avg_exec_us { + perf_score = 200.0; + } else if q_exec_us * 2.0 < avg_exec_us { + perf_score = 150.0; + } + + let q_bitmap_size = tcmeta.bitmap_size() as f64; + if q_bitmap_size * 0.3 > avg_bitmap_size as f64 { + perf_score *= 3.0; + } else if q_bitmap_size * 0.5 > avg_bitmap_size as f64 { + perf_score *= 2.0; + } else if q_bitmap_size * 0.75 > avg_bitmap_size as f64 { + perf_score *= 1.5; + } else if q_bitmap_size * 3.0 < avg_bitmap_size as f64 { + perf_score *= 0.25; + } else if q_bitmap_size * 2.0 < avg_bitmap_size as f64 { + perf_score *= 0.5; + } else if q_bitmap_size * 1.5 < avg_bitmap_size as f64 { + perf_score *= 0.75; + } + + if tcmeta.handicap() >= 4 { + perf_score *= 4.0; + // tcmeta.set_handicap(tcmeta.handicap() - 4); + } else if tcmeta.handicap() > 0 { + perf_score *= 2.0; + // tcmeta.set_handicap(tcmeta.handicap() - 1); + } + + if tcmeta.depth() >= 4 && tcmeta.depth() < 8 { + perf_score *= 2.0; + } else if tcmeta.depth() >= 8 && tcmeta.depth() < 14 { + perf_score *= 3.0; + } else if tcmeta.depth() >= 14 && tcmeta.depth() < 25 { + perf_score *= 4.0; + } else if tcmeta.depth() >= 25 { + perf_score *= 5.0; + } + + let mut factor: f64 = 1.0; + + // COE and Fast schedule are fairly different from what are described in the original thesis, + // This implementation follows the changes made in this pull request https://github.com/AFLplusplus/AFLplusplus/pull/568 + match psmeta.strat() { + PowerSchedule::EXPLORE => { + // Nothing happens in EXPLORE + } + PowerSchedule::EXPLOIT => { + factor = MAX_FACTOR; + } + PowerSchedule::COE => { + if libm::log2(f64::from(psmeta.n_fuzz()[tcmeta.n_fuzz_entry()])) > fuzz_mu + && !favored + { + // Never skip favorites. + factor = 0.0; + } + } + PowerSchedule::FAST => { + if tcmeta.fuzz_level() != 0 { + let lg = libm::log2(f64::from(psmeta.n_fuzz()[tcmeta.n_fuzz_entry()])); + + match lg { + f if f < 2.0 => { + factor = 4.0; + } + f if (2.0..4.0).contains(&f) => { + factor = 3.0; + } + f if (4.0..5.0).contains(&f) => { + factor = 2.0; + } + f if (6.0..7.0).contains(&f) => { + if !favored { + factor = 0.8; + } + } + f if (7.0..8.0).contains(&f) => { + if !favored { + factor = 0.6; + } + } + f if f >= 8.0 => { + if !favored { + factor = 0.4; + } + } + _ => { + factor = 1.0; + } + } + + if favored { + factor *= 1.15; + } + } + } + PowerSchedule::LIN => { + factor = (tcmeta.fuzz_level() as f64) + / f64::from(psmeta.n_fuzz()[tcmeta.n_fuzz_entry()] + 1); + } + PowerSchedule::QUAD => { + factor = ((tcmeta.fuzz_level() * tcmeta.fuzz_level()) as f64) + / f64::from(psmeta.n_fuzz()[tcmeta.n_fuzz_entry()] + 1); + } + } + + if psmeta.strat() != PowerSchedule::EXPLORE { + if factor > MAX_FACTOR { + factor = MAX_FACTOR; + } + + perf_score *= factor / POWER_BETA; + } + + // Lower bound if the strat is not COE. + if psmeta.strat() == PowerSchedule::COE && perf_score < 1.0 { + perf_score = 1.0; + } + + // Upper bound + if perf_score > HAVOC_MAX_MULT * 100.0 { + perf_score = HAVOC_MAX_MULT * 100.0; + } + + Ok(perf_score as usize) + } } impl Default for Testcase diff --git a/libafl/src/mutators/mopt_mutator.rs b/libafl/src/mutators/mopt_mutator.rs index ab8ab4dc5c..6f923d9a3f 100644 --- a/libafl/src/mutators/mopt_mutator.rs +++ b/libafl/src/mutators/mopt_mutator.rs @@ -2,7 +2,7 @@ use alloc::{string::ToString, vec::Vec}; use crate::{ - bolts::{rands::Rand, rands::StdRand}, + bolts::{current_nanos, rands::Rand, rands::StdRand}, corpus::Corpus, inputs::Input, mutators::{ComposedByMutations, MutationResult, Mutator, MutatorsTuple, ScheduledMutator}, @@ -140,7 +140,7 @@ impl MOpt { /// Creates a new [`struct@MOpt`] instance. pub fn new(operator_num: usize, swarm_num: usize) -> Result { let mut mopt = Self { - rand: StdRand::with_seed(0), + rand: StdRand::with_seed(current_nanos()), total_finds: 0, finds_until_last_swarm: 0, w_init: 0.9, diff --git a/libafl/src/schedulers/minimizer.rs b/libafl/src/schedulers/minimizer.rs index 339eeab40c..3e7425f0de 100644 --- a/libafl/src/schedulers/minimizer.rs +++ b/libafl/src/schedulers/minimizer.rs @@ -41,6 +41,12 @@ impl TopRatedsMetadata { map: HashMap::default(), } } + + /// Getter for map + #[must_use] + pub fn map(&self) -> &HashMap { + &self.map + } } impl Default for TopRatedsMetadata { diff --git a/libafl/src/schedulers/mod.rs b/libafl/src/schedulers/mod.rs index b644bc08a3..ee65dcb787 100644 --- a/libafl/src/schedulers/mod.rs +++ b/libafl/src/schedulers/mod.rs @@ -17,6 +17,9 @@ pub use minimizer::{ IndexesLenTimeMinimizerScheduler, LenTimeMinimizerScheduler, MinimizerScheduler, }; +pub mod weighted; +pub use weighted::WeightedScheduler; + pub mod powersched; pub use powersched::PowerQueueScheduler; diff --git a/libafl/src/schedulers/powersched.rs b/libafl/src/schedulers/powersched.rs index d8572c436e..bf6f274670 100644 --- a/libafl/src/schedulers/powersched.rs +++ b/libafl/src/schedulers/powersched.rs @@ -1,15 +1,144 @@ //! The queue corpus scheduler for power schedules. -use alloc::string::{String, ToString}; +use alloc::{ + string::{String, ToString}, + vec::Vec, +}; use crate::{ corpus::{Corpus, PowerScheduleTestcaseMetaData}, inputs::Input, schedulers::Scheduler, - stages::PowerScheduleMetadata, state::{HasCorpus, HasMetadata}, Error, }; +use core::time::Duration; +use serde::{Deserialize, Serialize}; +/// The n fuzz size +pub const N_FUZZ_SIZE: usize = 1 << 21; + +crate::impl_serdeany!(PowerScheduleMetadata); + +/// The metadata used for power schedules +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct PowerScheduleMetadata { + /// Powerschedule strategy + strat: PowerSchedule, + /// Measured exec time during calibration + exec_time: Duration, + /// Calibration cycles + cycles: u64, + /// Size of the observer map + bitmap_size: u64, + /// Number of filled map entries + bitmap_entries: u64, + /// Queue cycles + queue_cycles: u64, + /// The vector to contain the frequency of each execution path. + n_fuzz: Vec, +} + +/// The metadata for runs in the calibration stage. +impl PowerScheduleMetadata { + /// Creates a new [`struct@PowerScheduleMetadata`] + #[must_use] + pub fn new(strat: PowerSchedule) -> Self { + Self { + strat, + exec_time: Duration::from_millis(0), + cycles: 0, + bitmap_size: 0, + bitmap_entries: 0, + queue_cycles: 0, + n_fuzz: vec![0; N_FUZZ_SIZE], + } + } + + /// The powerschedule strategy + #[must_use] + pub fn strat(&self) -> PowerSchedule { + self.strat + } + + /// The measured exec time during calibration + #[must_use] + pub fn exec_time(&self) -> Duration { + self.exec_time + } + + /// Set the measured exec + pub fn set_exec_time(&mut self, time: Duration) { + self.exec_time = time; + } + + /// The cycles + #[must_use] + pub fn cycles(&self) -> u64 { + self.cycles + } + + /// Sets the cycles + pub fn set_cycles(&mut self, val: u64) { + self.cycles = val; + } + + /// The bitmap size + #[must_use] + pub fn bitmap_size(&self) -> u64 { + self.bitmap_size + } + + /// Sets the bitmap size + pub fn set_bitmap_size(&mut self, val: u64) { + self.bitmap_size = val; + } + + /// The number of filled map entries + #[must_use] + pub fn bitmap_entries(&self) -> u64 { + self.bitmap_entries + } + + /// Sets the number of filled map entries + pub fn set_bitmap_entries(&mut self, val: u64) { + self.bitmap_entries = val; + } + + /// The amount of queue cycles + #[must_use] + pub fn queue_cycles(&self) -> u64 { + self.queue_cycles + } + + /// Sets the amount of queue cycles + pub fn set_queue_cycles(&mut self, val: u64) { + self.queue_cycles = val; + } + + /// Gets the `n_fuzz`. + #[must_use] + pub fn n_fuzz(&self) -> &[u32] { + &self.n_fuzz + } + + /// Sets the `n_fuzz`. + #[must_use] + pub fn n_fuzz_mut(&mut self) -> &mut [u32] { + &mut self.n_fuzz + } +} + +/// The power schedule to use +#[allow(missing_docs)] +#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq)] +pub enum PowerSchedule { + EXPLORE, + EXPLOIT, + FAST, + COE, + LIN, + QUAD, +} /// A corpus scheduler using power schedules #[derive(Clone, Debug)] @@ -31,9 +160,9 @@ where let current_idx = *state.corpus().current(); let mut depth = match current_idx { - Some(idx) => state + Some(parent_idx) => state .corpus() - .get(idx)? + .get(parent_idx)? .borrow_mut() .metadata_mut() .get_mut::() diff --git a/libafl/src/schedulers/weighted.rs b/libafl/src/schedulers/weighted.rs new file mode 100644 index 0000000000..43a0c084d4 --- /dev/null +++ b/libafl/src/schedulers/weighted.rs @@ -0,0 +1,285 @@ +//! The queue corpus scheduler with weighted queue item selection from aflpp (`https://github.com/AFLplusplus/AFLplusplus/blob/1d4f1e48797c064ee71441ba555b29fc3f467983/src/afl-fuzz-queue.c#L32`) +//! This queue corpus scheduler needs calibration stage and the power schedule stage. + +use alloc::{ + string::{String, ToString}, + vec::Vec, +}; + +use crate::{ + bolts::rands::Rand, + corpus::{Corpus, PowerScheduleTestcaseMetaData}, + inputs::Input, + schedulers::{powersched::PowerScheduleMetadata, Scheduler}, + state::{HasCorpus, HasMetadata, HasRand}, + Error, +}; +use core::marker::PhantomData; +use serde::{Deserialize, Serialize}; + +#[derive(Serialize, Deserialize, Clone, Debug)] + +/// The Metadata for `WeightedScheduler` +pub struct WeightedScheduleMetadata { + /// The fuzzer execution spent in the current cycles + runs_in_current_cycle: usize, + /// Alias table for weighted queue entry selection + alias_table: Vec, + /// Probability for which queue entry is selected + alias_probability: Vec, +} + +impl Default for WeightedScheduleMetadata { + fn default() -> Self { + Self::new() + } +} + +impl WeightedScheduleMetadata { + /// Constructor for `WeightedScheduleMetadata` + #[must_use] + pub fn new() -> Self { + Self { + runs_in_current_cycle: 0, + alias_table: vec![0], + alias_probability: vec![0.0], + } + } + + /// The getter for `runs_in_current_cycle` + #[must_use] + pub fn runs_in_current_cycle(&self) -> usize { + self.runs_in_current_cycle + } + + /// The setter for `runs_in_current_cycle` + pub fn set_runs_current_cycle(&mut self, cycles: usize) { + self.runs_in_current_cycle = cycles; + } + + /// The getter for `alias_table` + #[must_use] + pub fn alias_table(&self) -> &[usize] { + &self.alias_table + } + + /// The setter for `alias_table` + pub fn set_alias_table(&mut self, table: Vec) { + self.alias_table = table; + } + + /// The getter for `alias_probability` + #[must_use] + pub fn alias_probability(&self) -> &[f64] { + &self.alias_probability + } + + /// The setter for `alias_probability` + pub fn set_alias_probability(&mut self, probability: Vec) { + self.alias_probability = probability; + } +} + +crate::impl_serdeany!(WeightedScheduleMetadata); + +/// A corpus scheduler using power schedules with weighted queue item selection algo. +#[derive(Clone, Debug)] +pub struct WeightedScheduler { + phantom: PhantomData<(I, S)>, +} + +impl Default for WeightedScheduler +where + I: Input, + S: HasCorpus + HasMetadata + HasRand, +{ + fn default() -> Self { + Self::new() + } +} + +impl WeightedScheduler +where + I: Input, + S: HasCorpus + HasMetadata + HasRand, +{ + /// Create a new [`WeightedScheduler`] + #[must_use] + pub fn new() -> Self { + Self { + phantom: PhantomData, + } + } + + /// Create a new alias table when the fuzzer finds a new corpus entry + #[allow( + clippy::unused_self, + clippy::similar_names, + clippy::cast_precision_loss, + clippy::cast_lossless + )] + pub fn create_alias_table(&self, state: &mut S) -> Result<(), Error> { + let n = state.corpus().count(); + + let mut alias_table: Vec = vec![0; n]; + let mut alias_probability: Vec = vec![0.0; n]; + let mut weights: Vec = vec![0.0; n]; + + let mut p_arr: Vec = vec![0.0; n]; + let mut s_arr: Vec = vec![0; n]; + let mut l_arr: Vec = vec![0; n]; + + let mut sum: f64 = 0.0; + + for (i, item) in weights.iter_mut().enumerate().take(n) { + let testcase = state.corpus().get(i)?.borrow(); + let weight = testcase.compute_weight(state)?; + *item = weight; + sum += weight; + } + + for i in 0..n { + p_arr[i] = weights[i] * (n as f64) / sum; + } + + // # of items in queue S + let mut n_s = 0; + + // # of items in queue L + let mut n_l = 0; + // Divide P into two queues, S and L + for s in (0..n).rev() { + if p_arr[s] < 1.0 { + s_arr[n_s] = s; + n_s += 1; + } else { + l_arr[n_l] = s; + n_l += 1; + } + } + + while n_s > 0 && n_l > 0 { + n_s -= 1; + n_l -= 1; + let a = s_arr[n_s]; + let g = l_arr[n_l]; + + alias_probability[a] = p_arr[a]; + alias_table[a] = g; + p_arr[g] = p_arr[g] + p_arr[a] - 1.0; + + if p_arr[g] < 1.0 { + s_arr[n_s] = g; + n_s += 1; + } else { + l_arr[n_l] = g; + n_l += 1; + } + } + + while n_l > 0 { + n_l -= 1; + alias_probability[l_arr[n_l]] = 1.0; + } + + while n_s > 0 { + n_s -= 1; + alias_probability[s_arr[n_s]] = 1.0; + } + + let wsmeta = state + .metadata_mut() + .get_mut::() + .ok_or_else(|| Error::KeyNotFound("WeigthedScheduleMetadata not found".to_string()))?; + + // Update metadata + wsmeta.set_alias_probability(alias_probability); + wsmeta.set_alias_table(alias_table); + Ok(()) + } +} + +impl Scheduler for WeightedScheduler +where + S: HasCorpus + HasMetadata + HasRand, + I: Input, +{ + /// Add an entry to the corpus and return its index + fn on_add(&self, state: &mut S, idx: usize) -> Result<(), Error> { + if !state.has_metadata::() { + state.add_metadata(WeightedScheduleMetadata::new()); + } + + let current_idx = *state.corpus().current(); + + let mut depth = match current_idx { + Some(parent_idx) => state + .corpus() + .get(parent_idx)? + .borrow_mut() + .metadata_mut() + .get_mut::() + .ok_or_else(|| Error::KeyNotFound("PowerScheduleTestData not found".to_string()))? + .depth(), + None => 0, + }; + + // Attach a `PowerScheduleTestData` to the queue entry. + depth += 1; + state + .corpus() + .get(idx)? + .borrow_mut() + .add_metadata(PowerScheduleTestcaseMetaData::new(depth)); + + // Recrate the alias table + self.create_alias_table(state)?; + Ok(()) + } + + #[allow(clippy::similar_names, clippy::cast_precision_loss)] + fn next(&self, state: &mut S) -> Result { + if state.corpus().count() == 0 { + Err(Error::Empty(String::from("No entries in corpus"))) + } else { + let corpus_counts = state.corpus().count(); + let s = state.rand_mut().below(corpus_counts as u64) as usize; + // Choose a random value between 0.000000000 and 1.000000000 + let probability = state.rand_mut().between(0, 1000000000) as f64 / 1000000000_f64; + + let wsmeta = state + .metadata_mut() + .get_mut::() + .ok_or_else(|| { + Error::KeyNotFound("WeigthedScheduleMetadata not found".to_string()) + })?; + + let current_cycles = wsmeta.runs_in_current_cycle(); + + if current_cycles > corpus_counts { + wsmeta.set_runs_current_cycle(0); + } else { + wsmeta.set_runs_current_cycle(current_cycles + 1); + } + + let idx = if probability < wsmeta.alias_probability()[s] { + s + } else { + wsmeta.alias_table()[s] + }; + + // Update depth + if current_cycles > corpus_counts { + let psmeta = state + .metadata_mut() + .get_mut::() + .ok_or_else(|| { + Error::KeyNotFound("PowerScheduleMetadata not found".to_string()) + })?; + psmeta.set_queue_cycles(psmeta.queue_cycles() + 1); + } + + Ok(idx) + } + } +} diff --git a/libafl/src/stages/calibrate.rs b/libafl/src/stages/calibrate.rs index 84edde2c36..0ddc5b9d80 100644 --- a/libafl/src/stages/calibrate.rs +++ b/libafl/src/stages/calibrate.rs @@ -10,14 +10,12 @@ use crate::{ fuzzer::Evaluator, inputs::Input, observers::{MapObserver, ObserversTuple}, + schedulers::powersched::PowerScheduleMetadata, stages::Stage, state::{HasClientPerfMonitor, HasCorpus, HasFeedbackStates, HasMetadata}, Error, }; -use alloc::{ - string::{String, ToString}, - vec::Vec, -}; +use alloc::string::{String, ToString}; use core::{fmt::Debug, marker::PhantomData, time::Duration}; use num_traits::Bounded; use serde::{Deserialize, Serialize}; @@ -65,7 +63,7 @@ where .metadata() .get::() .ok_or_else(|| Error::KeyNotFound("PowerScheduleMetadata not found".to_string()))? - .queue_cycles; + .queue_cycles(); let input = state .corpus() .get(corpus_idx)? @@ -196,111 +194,6 @@ where } } -/// The n fuzz size -pub const N_FUZZ_SIZE: usize = 1 << 21; - -/// The metadata used for power schedules -#[derive(Serialize, Deserialize, Clone, Debug)] -pub struct PowerScheduleMetadata { - /// Measured exec time during calibration - exec_time: Duration, - /// Calibration cycles - cycles: u64, - /// Size of the observer map - bitmap_size: u64, - /// Number of filled map entries - bitmap_entries: u64, - /// Queue cycles - queue_cycles: u64, - /// The vector to contain the frequency of each execution path. - n_fuzz: Vec, -} - -/// The metadata for runs in the calibration stage. -impl PowerScheduleMetadata { - /// Creates a new [`struct@PowerScheduleMetadata`] - #[must_use] - pub fn new() -> Self { - Self { - exec_time: Duration::from_millis(0), - cycles: 0, - bitmap_size: 0, - bitmap_entries: 0, - queue_cycles: 0, - n_fuzz: vec![0; N_FUZZ_SIZE], - } - } - - /// The measured exec time during calibration - #[must_use] - pub fn exec_time(&self) -> Duration { - self.exec_time - } - - /// Set the measured exec - pub fn set_exec_time(&mut self, time: Duration) { - self.exec_time = time; - } - - /// The cycles - #[must_use] - pub fn cycles(&self) -> u64 { - self.cycles - } - - /// Sets the cycles - pub fn set_cycles(&mut self, val: u64) { - self.cycles = val; - } - - /// The bitmap size - #[must_use] - pub fn bitmap_size(&self) -> u64 { - self.bitmap_size - } - - /// Sets the bitmap size - pub fn set_bitmap_size(&mut self, val: u64) { - self.bitmap_size = val; - } - - /// The number of filled map entries - #[must_use] - pub fn bitmap_entries(&self) -> u64 { - self.bitmap_entries - } - - /// Sets the number of filled map entries - pub fn set_bitmap_entries(&mut self, val: u64) { - self.bitmap_entries = val; - } - - /// The amount of queue cycles - #[must_use] - pub fn queue_cycles(&self) -> u64 { - self.queue_cycles - } - - /// Sets the amount of queue cycles - pub fn set_queue_cycles(&mut self, val: u64) { - self.queue_cycles = val; - } - - /// Gets the `n_fuzz`. - #[must_use] - pub fn n_fuzz(&self) -> &[u32] { - &self.n_fuzz - } - - /// Sets the `n_fuzz`. - #[must_use] - pub fn n_fuzz_mut(&mut self) -> &mut [u32] { - &mut self.n_fuzz - } -} - -crate::impl_serdeany!(PowerScheduleMetadata); - impl CalibrationStage where I: Input, @@ -309,8 +202,7 @@ where S: HasCorpus + HasMetadata, { /// Create a new [`CalibrationStage`]. - pub fn new(state: &mut S, map_observer_name: &O) -> Self { - state.add_metadata::(PowerScheduleMetadata::new()); + pub fn new(map_observer_name: &O) -> Self { Self { map_observer_name: map_observer_name.name().to_string(), stage_max: CAL_STAGE_START, @@ -318,9 +210,3 @@ where } } } - -impl Default for PowerScheduleMetadata { - fn default() -> Self { - Self::new() - } -} diff --git a/libafl/src/stages/mod.rs b/libafl/src/stages/mod.rs index b3648349a7..1f48f72fbf 100644 --- a/libafl/src/stages/mod.rs +++ b/libafl/src/stages/mod.rs @@ -14,7 +14,7 @@ pub mod tracing; pub use tracing::{ShadowTracingStage, TracingStage}; pub mod calibrate; -pub use calibrate::{CalibrationStage, PowerScheduleMetadata}; +pub use calibrate::CalibrationStage; pub mod power; pub use power::PowerMutationalStage; diff --git a/libafl/src/stages/power.rs b/libafl/src/stages/power.rs index 3355c9c8c7..7a0642d2b2 100644 --- a/libafl/src/stages/power.rs +++ b/libafl/src/stages/power.rs @@ -4,34 +4,17 @@ use alloc::string::{String, ToString}; use core::{fmt::Debug, marker::PhantomData}; use crate::{ - corpus::{Corpus, PowerScheduleTestcaseMetaData, Testcase}, + corpus::{Corpus, PowerScheduleTestcaseMetaData}, executors::{Executor, HasObservers}, fuzzer::Evaluator, inputs::Input, mutators::Mutator, observers::{MapObserver, ObserversTuple}, - schedulers::minimizer::IsFavoredMetadata, - stages::{MutationalStage, PowerScheduleMetadata, Stage}, + schedulers::powersched::{PowerSchedule, PowerScheduleMetadata}, + stages::{MutationalStage, Stage}, state::{HasClientPerfMonitor, HasCorpus, HasMetadata}, Error, }; - -/// The power schedule to use -#[allow(missing_docs)] -#[derive(Clone, Debug, PartialEq)] -pub enum PowerSchedule { - EXPLORE, - FAST, - COE, - LIN, - QUAD, - EXPLOIT, -} - -const POWER_BETA: f64 = 1.0; -const MAX_FACTOR: f64 = POWER_BETA * 32.0; -const HAVOC_MAX_MULT: f64 = 64.0; - /// The mutational stage using power schedules #[derive(Clone, Debug)] pub struct PowerMutationalStage @@ -46,8 +29,6 @@ where { map_observer_name: String, mutator: M, - /// The employed power schedule strategy - strat: PowerSchedule, #[allow(clippy::type_complexity)] phantom: PhantomData<(E, EM, I, O, OT, S, Z)>, } @@ -77,19 +58,28 @@ where /// Gets the number of iterations as a random number fn iterations(&self, state: &mut S, corpus_idx: usize) -> Result { - let psmeta = state - .metadata() - .get::() - .ok_or_else(|| Error::KeyNotFound("PowerScheduleMetadata not found".to_string()))?; + // Calculate score + let score = state + .corpus() + .get(corpus_idx)? + .borrow() + .calculate_score(state); - let mut fuzz_mu = 0.0; - if self.strat == PowerSchedule::COE { - fuzz_mu = self.fuzz_mu(state, psmeta)?; - } + // Update handicap let mut testcase = state.corpus().get(corpus_idx)?.borrow_mut(); + let tcmeta = testcase + .metadata_mut() + .get_mut::() + .ok_or_else(|| { + Error::KeyNotFound("PowerScheduleTestcaseMetaData not found".to_string()) + })?; + if tcmeta.handicap() >= 4 { + tcmeta.set_handicap(tcmeta.handicap() - 4); + } else if tcmeta.handicap() > 0 { + tcmeta.set_handicap(tcmeta.handicap() - 1); + } - // 1 + state.rand_mut().below(DEFAULT_MUTATIONAL_MAX_ITERATIONS) as usize - self.calculate_score(&mut testcase, psmeta, fuzz_mu) + score } #[allow(clippy::cast_possible_wrap)] @@ -187,205 +177,12 @@ where Z: Evaluator, { /// Creates a new [`PowerMutationalStage`] - pub fn new(mutator: M, strat: PowerSchedule, map_observer_name: &O) -> Self { + pub fn new(state: &mut S, mutator: M, map_observer_name: &O, strat: PowerSchedule) -> Self { + state.add_metadata::(PowerScheduleMetadata::new(strat)); Self { map_observer_name: map_observer_name.name().to_string(), mutator, - strat, phantom: PhantomData, } } - - /// Compute the parameter `μ` used in the COE schedule. - #[inline] - #[allow(clippy::unused_self)] - pub fn fuzz_mu(&self, state: &S, psmeta: &PowerScheduleMetadata) -> Result { - let corpus = state.corpus(); - let mut n_paths = 0; - let mut fuzz_mu = 0.0; - for idx in 0..corpus.count() { - let n_fuzz_entry = corpus - .get(idx)? - .borrow() - .metadata() - .get::() - .ok_or_else(|| Error::KeyNotFound("PowerScheduleTestData not found".to_string()))? - .n_fuzz_entry(); - fuzz_mu += libm::log2(f64::from(psmeta.n_fuzz()[n_fuzz_entry])); - n_paths += 1; - } - - if n_paths == 0 { - return Err(Error::Unknown(String::from("Queue state corrput"))); - } - - fuzz_mu /= f64::from(n_paths); - Ok(fuzz_mu) - } - - /// Compute the `power` we assign to each corpus entry - #[inline] - #[allow( - clippy::cast_precision_loss, - clippy::too_many_lines, - clippy::cast_sign_loss - )] - fn calculate_score( - &self, - testcase: &mut Testcase, - psmeta: &PowerScheduleMetadata, - fuzz_mu: f64, - ) -> Result { - let mut perf_score = 100.0; - let q_exec_us = testcase - .exec_time() - .ok_or_else(|| Error::KeyNotFound("exec_time not set".to_string()))? - .as_nanos() as f64; - - let avg_exec_us = psmeta.exec_time().as_nanos() as f64 / psmeta.cycles() as f64; - let avg_bitmap_size = psmeta.bitmap_size() / psmeta.bitmap_entries(); - - let favored = testcase.has_metadata::(); - let tcmeta = testcase - .metadata_mut() - .get_mut::() - .ok_or_else(|| Error::KeyNotFound("PowerScheduleTestData not found".to_string()))?; - - if q_exec_us * 0.1 > avg_exec_us { - perf_score = 10.0; - } else if q_exec_us * 0.2 > avg_exec_us { - perf_score = 25.0; - } else if q_exec_us * 0.5 > avg_exec_us { - perf_score = 50.0; - } else if q_exec_us * 0.75 > avg_exec_us { - perf_score = 75.0; - } else if q_exec_us * 4.0 < avg_exec_us { - perf_score = 300.0; - } else if q_exec_us * 3.0 < avg_exec_us { - perf_score = 200.0; - } else if q_exec_us * 2.0 < avg_exec_us { - perf_score = 150.0; - } - - let q_bitmap_size = tcmeta.bitmap_size() as f64; - if q_bitmap_size * 0.3 > avg_bitmap_size as f64 { - perf_score *= 3.0; - } else if q_bitmap_size * 0.5 > avg_bitmap_size as f64 { - perf_score *= 2.0; - } else if q_bitmap_size * 0.75 > avg_bitmap_size as f64 { - perf_score *= 1.5; - } else if q_bitmap_size * 3.0 < avg_bitmap_size as f64 { - perf_score *= 0.25; - } else if q_bitmap_size * 2.0 < avg_bitmap_size as f64 { - perf_score *= 0.5; - } else if q_bitmap_size * 1.5 < avg_bitmap_size as f64 { - perf_score *= 0.75; - } - - if tcmeta.handicap() >= 4 { - perf_score *= 4.0; - tcmeta.set_handicap(tcmeta.handicap() - 4); - } else if tcmeta.handicap() > 0 { - perf_score *= 2.0; - tcmeta.set_handicap(tcmeta.handicap() - 1); - } - - if tcmeta.depth() >= 4 && tcmeta.depth() < 8 { - perf_score *= 2.0; - } else if tcmeta.depth() >= 8 && tcmeta.depth() < 14 { - perf_score *= 3.0; - } else if tcmeta.depth() >= 14 && tcmeta.depth() < 25 { - perf_score *= 4.0; - } else if tcmeta.depth() >= 25 { - perf_score *= 5.0; - } - - let mut factor: f64 = 1.0; - - // COE and Fast schedule are fairly different from what are described in the original thesis, - // This implementation follows the changes made in this pull request https://github.com/AFLplusplus/AFLplusplus/pull/568 - match &self.strat { - PowerSchedule::EXPLORE => { - // Nothing happens in EXPLORE - } - PowerSchedule::EXPLOIT => { - factor = MAX_FACTOR; - } - PowerSchedule::COE => { - if libm::log2(f64::from(psmeta.n_fuzz()[tcmeta.n_fuzz_entry()])) > fuzz_mu - && !favored - { - // Never skip favorites. - factor = 0.0; - } - } - PowerSchedule::FAST => { - if tcmeta.fuzz_level() != 0 { - let lg = libm::log2(f64::from(psmeta.n_fuzz()[tcmeta.n_fuzz_entry()])); - - match lg { - f if f < 2.0 => { - factor = 4.0; - } - f if (2.0..4.0).contains(&f) => { - factor = 3.0; - } - f if (4.0..5.0).contains(&f) => { - factor = 2.0; - } - f if (6.0..7.0).contains(&f) => { - if !favored { - factor = 0.8; - } - } - f if (7.0..8.0).contains(&f) => { - if !favored { - factor = 0.6; - } - } - f if f >= 8.0 => { - if !favored { - factor = 0.4; - } - } - _ => { - factor = 1.0; - } - } - - if favored { - factor *= 1.15; - } - } - } - PowerSchedule::LIN => { - factor = (tcmeta.fuzz_level() as f64) - / f64::from(psmeta.n_fuzz()[tcmeta.n_fuzz_entry()] + 1); - } - PowerSchedule::QUAD => { - factor = ((tcmeta.fuzz_level() * tcmeta.fuzz_level()) as f64) - / f64::from(psmeta.n_fuzz()[tcmeta.n_fuzz_entry()] + 1); - } - } - - if self.strat != PowerSchedule::EXPLORE { - if factor > MAX_FACTOR { - factor = MAX_FACTOR; - } - - perf_score *= factor / POWER_BETA; - } - - // Lower bound if the strat is not COE. - if self.strat == PowerSchedule::COE && perf_score < 1.0 { - perf_score = 1.0; - } - - // Upper bound - if perf_score > HAVOC_MAX_MULT * 100.0 { - perf_score = HAVOC_MAX_MULT * 100.0; - } - - Ok(perf_score as usize) - } }