diff --git a/fuzzers/baby_fuzzer_grimoire/.gitignore b/fuzzers/baby_fuzzer_grimoire/.gitignore new file mode 100644 index 0000000000..a977a2ca5b --- /dev/null +++ b/fuzzers/baby_fuzzer_grimoire/.gitignore @@ -0,0 +1 @@ +libpng-* \ No newline at end of file diff --git a/fuzzers/baby_fuzzer_grimoire/Cargo.toml b/fuzzers/baby_fuzzer_grimoire/Cargo.toml new file mode 100644 index 0000000000..8559181552 --- /dev/null +++ b/fuzzers/baby_fuzzer_grimoire/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "baby_fuzzer" +version = "0.7.1" +authors = ["Andrea Fioraldi ", "Dominik Maier "] +edition = "2021" + +[features] +default = ["std"] +std = [] + +[profile.dev] +panic = "abort" + +[profile.release] +panic = "abort" +lto = true +codegen-units = 1 +opt-level = 3 +debug = true + +[dependencies] +libafl = { path = "../../libafl/" } diff --git a/fuzzers/baby_fuzzer_grimoire/README.md b/fuzzers/baby_fuzzer_grimoire/README.md new file mode 100644 index 0000000000..42fd9a5011 --- /dev/null +++ b/fuzzers/baby_fuzzer_grimoire/README.md @@ -0,0 +1,8 @@ +# Baby fuzzer + +This is a minimalistic example about how to create a libafl based fuzzer. + +It runs on a single core until a crash occurs and then exits. + +The tested program is a simple Rust function without any instrumentation. +For real fuzzing, you will want to add some sort to add coverage or other feedback. \ No newline at end of file diff --git a/fuzzers/baby_fuzzer_grimoire/corpus/new file b/fuzzers/baby_fuzzer_grimoire/corpus/new file new file mode 100644 index 0000000000..a1391ba87c --- /dev/null +++ b/fuzzers/baby_fuzzer_grimoire/corpus/new file @@ -0,0 +1,4 @@ +fn pippo(v) { return "hello world " + v; } +var a = 666; +name = "scozzo" + a; +pippo(name); diff --git a/fuzzers/baby_fuzzer_grimoire/src/main.rs b/fuzzers/baby_fuzzer_grimoire/src/main.rs new file mode 100644 index 0000000000..59dd8d725b --- /dev/null +++ b/fuzzers/baby_fuzzer_grimoire/src/main.rs @@ -0,0 +1,177 @@ +use std::io::Read; +use std::{fs, path::PathBuf}; + +#[cfg(windows)] +use std::ptr::write_volatile; + +use libafl::{ + bolts::{current_nanos, rands::StdRand, tuples::tuple_list, AsSlice}, + corpus::{InMemoryCorpus, OnDiskCorpus, QueueCorpusScheduler}, + events::SimpleEventManager, + executors::{inprocess::InProcessExecutor, ExitKind}, + feedbacks::{CrashFeedback, MapFeedbackState, MaxMapFeedback}, + fuzzer::{Evaluator, Fuzzer, StdFuzzer}, + inputs::{GeneralizedInput, HasTargetBytes}, + monitors::SimpleMonitor, + mutators::{ + havoc_mutations, scheduled::StdScheduledMutator, GrimoireExtensionMutator, + GrimoireRandomDeleteMutator, GrimoireRecursiveReplacementMutator, + GrimoireStringReplacementMutator, Tokens, + }, + observers::StdMapObserver, + stages::{mutational::StdMutationalStage, GeneralizationStage}, + state::{HasMetadata, StdState}, +}; + +/// Coverage map with explicit assignments due to the lack of instrumentation +static mut SIGNALS: [u8; 16] = [0; 16]; + +/// Assign a signal to the signals map +fn signals_set(idx: usize) { + unsafe { SIGNALS[idx] = 1 }; +} + +fn is_sub(mut haystack: &[T], needle: &[T]) -> bool { + if needle.len() == 0 { + return true; + } + while !haystack.is_empty() { + if haystack.starts_with(needle) { + return true; + } + haystack = &haystack[1..]; + } + false +} + +#[allow(clippy::similar_names)] +pub fn main() { + let mut initial_inputs = vec![]; + for entry in fs::read_dir("./corpus").unwrap() { + let path = entry.unwrap().path(); + let attr = fs::metadata(&path); + if attr.is_err() { + continue; + } + let attr = attr.unwrap(); + + if attr.is_file() && attr.len() > 0 { + println!("Loading file {:?} ...", &path); + let mut file = fs::File::open(path).expect("no file found"); + let mut buffer = vec![]; + file.read_to_end(&mut buffer).expect("buffer overflow"); + let input = GeneralizedInput::new(buffer); + initial_inputs.push(input); + } + } + + // The closure that we want to fuzz + let mut harness = |input: &GeneralizedInput| { + let target_bytes = input.target_bytes(); + let bytes = target_bytes.as_slice(); + + if is_sub(bytes, "fn".as_bytes()) { + signals_set(2); + } + + if is_sub(bytes, "pippopippo".as_bytes()) { + signals_set(3); + } + + unsafe { + if input.grimoire_mutated { + // println!(">>> {:?}", input.generalized()); + println!(">>> {:?}", std::str::from_utf8_unchecked(bytes)); + } + } + signals_set(1); + ExitKind::Ok + }; + + // Create an observation channel using the signals map + let observer = StdMapObserver::new("signals", unsafe { &mut SIGNALS }); + + // The state of the edges feedback. + let feedback_state = MapFeedbackState::with_observer(&observer); + + // Feedback to rate the interestingness of an input + let feedback = MaxMapFeedback::new_tracking(&feedback_state, &observer, false, true); + + // A feedback to choose if an input is a solution or not + let objective = CrashFeedback::new(); + + // create a State from scratch + let mut state = StdState::new( + // RNG + StdRand::with_seed(current_nanos()), + // Corpus that will be evolved, we keep it in memory for performance + InMemoryCorpus::new(), + // Corpus in which we store solutions (crashes in this example), + // on disk so the user can get them after stopping the fuzzer + OnDiskCorpus::new(PathBuf::from("./crashes")).unwrap(), + // States of the feedbacks. + // They are the data related to the feedbacks that you want to persist in the State. + tuple_list!(feedback_state), + ); + + if state.metadata().get::().is_none() { + state.add_metadata(Tokens::new(vec![ + "FOO".as_bytes().to_vec(), + "BAR".as_bytes().to_vec(), + ])); + } + + // The Monitor trait define how the fuzzer stats are reported to the user + let monitor = SimpleMonitor::new(|s| println!("{}", s)); + + // The event manager handle the various events generated during the fuzzing loop + // such as the notification of the addition of a new item to the corpus + let mut mgr = SimpleEventManager::new(monitor); + + // A queue policy to get testcasess from the corpus + let scheduler = QueueCorpusScheduler::new(); + + // A fuzzer with feedbacks and a corpus scheduler + let mut fuzzer = StdFuzzer::new(scheduler, feedback, objective); + + let generalization = GeneralizationStage::new(&observer); + + // Create the executor for an in-process function with just one observer + let mut executor = InProcessExecutor::new( + &mut harness, + tuple_list!(observer), + &mut fuzzer, + &mut state, + &mut mgr, + ) + .expect("Failed to create the Executor"); + + // Setup a mutational stage with a basic bytes mutator + let mutator = StdScheduledMutator::with_max_iterations(havoc_mutations(), 2); + let grimoire_mutator = StdScheduledMutator::with_max_iterations( + tuple_list!( + GrimoireExtensionMutator::new(), + GrimoireRecursiveReplacementMutator::new(), + GrimoireStringReplacementMutator::new(), + // give more probability to avoid large inputs + GrimoireRandomDeleteMutator::new(), + GrimoireRandomDeleteMutator::new(), + ), + 3, + ); + let mut stages = tuple_list!( + generalization, + StdMutationalStage::new(mutator), + StdMutationalStage::new(grimoire_mutator) + ); + + for input in initial_inputs { + fuzzer + .evaluate_input(&mut state, &mut executor, &mut mgr, input) + .unwrap(); + } + + fuzzer + .fuzz_loop(&mut stages, &mut executor, &mut state, &mut mgr) + .expect("Error in the fuzzing loop"); +} diff --git a/fuzzers/fuzzbench_text/.gitignore b/fuzzers/fuzzbench_text/.gitignore new file mode 100644 index 0000000000..d3561edaf7 --- /dev/null +++ b/fuzzers/fuzzbench_text/.gitignore @@ -0,0 +1,2 @@ +libpng-* +fuzzer diff --git a/fuzzers/fuzzbench_text/Cargo.toml b/fuzzers/fuzzbench_text/Cargo.toml new file mode 100644 index 0000000000..ae16d1f072 --- /dev/null +++ b/fuzzers/fuzzbench_text/Cargo.toml @@ -0,0 +1,34 @@ +[package] +name = "fuzzbench" +version = "0.7.1" +authors = ["Andrea Fioraldi ", "Dominik Maier "] +edition = "2021" + +[features] +default = ["std"] +std = [] + +[profile.release] +lto = true +codegen-units = 1 +opt-level = 3 +debug = true + +[build-dependencies] +cc = { version = "1.0", features = ["parallel"] } +which = { version = "4.0.2" } +num_cpus = "1.0" + +[dependencies] +libafl = { path = "../../libafl/" } +libafl_targets = { path = "../../libafl_targets/", features = ["sancov_pcguard_hitcounts", "sancov_cmplog", "libfuzzer"] } +# TODO Include it only when building cc +libafl_cc = { path = "../../libafl_cc/" } +clap = { version = "3.0", features = ["default"] } +nix = "0.23" +mimalloc = { version = "*", default-features = false } +content_inspector = "0.2.4" + +[lib] +name = "fuzzbench" +crate-type = ["staticlib"] diff --git a/fuzzers/fuzzbench_text/Makefile b/fuzzers/fuzzbench_text/Makefile new file mode 100644 index 0000000000..c412cb2f42 --- /dev/null +++ b/fuzzers/fuzzbench_text/Makefile @@ -0,0 +1,49 @@ +FUZZER_NAME="fuzzer" +PROJECT_DIR=$(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) + +PHONY: all + +all: fuzzer + +target/release/libafl_cxx: src/* src/bin/* + # Build the libpng libfuzzer library + cargo build --release + +target/release/libafl_cc: target/release/libafl_cxx + +fuzz.o: fuzz.c target/release/libafl_cc + target/release/libafl_cc --libafl-no-link -O3 -c $^ -o $@ + +fuzzer: target/release/libafl_cxx fuzz.o + # Build the fuzzer compiler + cargo build --release + + # Build the harness + target/release/libafl_cxx \ + --libafl \ + fuzz.o \ + -o $(FUZZER_NAME) \ + -lm -lz + +clean: + rm ./$(FUZZER_NAME) || true + rm fuzz.o || true + +run: all + ./$(FUZZER_NAME) + +short_test: all + rm -rf libafl_unix_shmem_server || true + mkdir in || true + echo a > in/a + # Allow sigterm as exit code + (timeout 11s ./$(FUZZER_NAME) -o out -i in || [ $$? -eq 124 ]) + rm -rf out || true + rm -rf in || true + +test: all + mkdir in || true + echo a > in/a + (timeout 60s ./$(FUZZER_NAME) -o out -i in || [ $$? -eq 124 ]) + rm -rf out || true + rm -rf in || true diff --git a/fuzzers/fuzzbench_text/README.md b/fuzzers/fuzzbench_text/README.md new file mode 100644 index 0000000000..13b314743c --- /dev/null +++ b/fuzzers/fuzzbench_text/README.md @@ -0,0 +1,19 @@ +# Fuzzbench Harness (text) + +This folder contains an example fuzzer tailored for fuzzbench. +It uses the best possible setting, with the exception of a SimpleRestartingEventManager instead of an LlmpEventManager - since fuzzbench is single threaded. +Real fuzz campaigns should consider using multithreaded LlmpEventManager, see the other examples. + +This fuzzer autodetect if the dictionary and the initial inputs are text or binary data, and enables Grimoire in case of text. + +## Build + +To build this example, run `cargo build --release`. +This will build the fuzzer compilers (`libafl_cc` and `libafl_cpp`) with `src/lib.rs` as fuzzer. +The fuzzer uses the libfuzzer compatibility layer and the SanitizerCoverage runtime functions for coverage feedback. + +These can then be used to build libfuzzer harnesses in the software project of your choice. +Finally, just run the resulting binary with `out_dir`, `in_dir`. + +In any real-world scenario, you should use `taskset` to pin each client to an empty CPU core, the lib does not pick an empty core automatically (yet). + diff --git a/fuzzers/fuzzbench_text/fuzz.c b/fuzzers/fuzzbench_text/fuzz.c new file mode 100644 index 0000000000..7eea9f3b30 --- /dev/null +++ b/fuzzers/fuzzbench_text/fuzz.c @@ -0,0 +1,15 @@ +#include +#include + +int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + if (Size >= 8 && *(uint32_t*)Data == 0xaabbccdd) + abort(); +} + +/* +int main() { + + char buf [10] = {0}; + LLVMFuzzerTestOneInput(buf, 10); + +}*/ diff --git a/fuzzers/fuzzbench_text/src/bin/libafl_cc.rs b/fuzzers/fuzzbench_text/src/bin/libafl_cc.rs new file mode 100644 index 0000000000..8c9e37f638 --- /dev/null +++ b/fuzzers/fuzzbench_text/src/bin/libafl_cc.rs @@ -0,0 +1,38 @@ +use libafl_cc::{ClangWrapper, CompilerWrapper, LLVMPasses}; +use std::env; + +pub fn main() { + let args: Vec = env::args().collect(); + if args.len() > 1 { + let mut dir = env::current_exe().unwrap(); + let wrapper_name = dir.file_name().unwrap().to_str().unwrap(); + + let is_cpp = match wrapper_name[wrapper_name.len()-2..].to_lowercase().as_str() { + "cc" => false, + "++" | "pp" | "xx" => true, + _ => panic!("Could not figure out if c or c++ warpper was called. Expected {:?} to end with c or cxx", dir), + }; + + dir.pop(); + + let mut cc = ClangWrapper::new(); + if let Some(code) = cc + .cpp(is_cpp) + // silence the compiler wrapper output, needed for some configure scripts. + .silence(true) + // add arguments only if --libafl or --libafl-no-link are present + .need_libafl_arg(true) + .parse_args(&args) + .expect("Failed to parse the command line") + .link_staticlib(&dir, "fuzzbench") + .add_arg("-fsanitize-coverage=trace-pc-guard,trace-cmp") + .add_pass(LLVMPasses::CmpLogRtn) + .run() + .expect("Failed to run the wrapped compiler") + { + std::process::exit(code); + } + } else { + panic!("LibAFL CC: No Arguments given"); + } +} diff --git a/fuzzers/fuzzbench_text/src/bin/libafl_cxx.rs b/fuzzers/fuzzbench_text/src/bin/libafl_cxx.rs new file mode 100644 index 0000000000..ce786239b0 --- /dev/null +++ b/fuzzers/fuzzbench_text/src/bin/libafl_cxx.rs @@ -0,0 +1,5 @@ +pub mod libafl_cc; + +fn main() { + libafl_cc::main() +} diff --git a/fuzzers/fuzzbench_text/src/lib.rs b/fuzzers/fuzzbench_text/src/lib.rs new file mode 100644 index 0000000000..99fef86fe4 --- /dev/null +++ b/fuzzers/fuzzbench_text/src/lib.rs @@ -0,0 +1,669 @@ +//! A singlethreaded libfuzzer-like fuzzer that can auto-restart. +use mimalloc::MiMalloc; +#[global_allocator] +static GLOBAL: MiMalloc = MiMalloc; + +use clap::{App, Arg}; +use content_inspector::inspect; +use core::{cell::RefCell, time::Duration}; +#[cfg(unix)] +use nix::{self, unistd::dup}; +#[cfg(unix)] +use std::os::unix::io::{AsRawFd, FromRawFd}; +use std::{ + env, + fs::{self, File, OpenOptions}, + io::{self, Read, Write}, + path::{Path, PathBuf}, + process, +}; + +use libafl::{ + bolts::{ + current_nanos, current_time, + os::dup2, + rands::StdRand, + shmem::{ShMemProvider, StdShMemProvider}, + tuples::{tuple_list, Merge}, + AsSlice, + }, + corpus::{ + Corpus, IndexesLenTimeMinimizerCorpusScheduler, OnDiskCorpus, PowerQueueCorpusScheduler, + }, + events::SimpleRestartingEventManager, + executors::{inprocess::InProcessExecutor, ExitKind, TimeoutExecutor}, + feedback_or, + feedbacks::{CrashFeedback, MapFeedbackState, MaxMapFeedback, TimeFeedback}, + fuzzer::{Fuzzer, StdFuzzer}, + inputs::{BytesInput, GeneralizedInput, HasTargetBytes}, + monitors::SimpleMonitor, + mutators::{ + grimoire::{ + GrimoireExtensionMutator, GrimoireRandomDeleteMutator, + GrimoireRecursiveReplacementMutator, GrimoireStringReplacementMutator, + }, + scheduled::havoc_mutations, + token_mutations::I2SRandReplace, + tokens_mutations, StdMOptMutator, StdScheduledMutator, Tokens, + }, + observers::{HitcountsMapObserver, StdMapObserver, TimeObserver}, + stages::{ + calibrate::CalibrationStage, + power::{PowerMutationalStage, PowerSchedule}, + GeneralizationStage, StdMutationalStage, TracingStage, + }, + state::{HasCorpus, HasMetadata, StdState}, + Error, +}; +use libafl_targets::{ + libfuzzer_initialize, libfuzzer_test_one_input, CmpLogObserver, CMPLOG_MAP, EDGES_MAP, + MAX_EDGES_NUM, +}; + +/// The fuzzer main (as `no_mangle` C function) +#[no_mangle] +pub fn libafl_main() { + // Registry the metadata types used in this fuzzer + // Needed only on no_std + //RegistryBuilder::register::(); + + let res = match App::new("libafl_fuzzbench") + .version("0.7.1") + .author("AFLplusplus team") + .about("LibAFL-based fuzzer for Fuzzbench") + .arg( + Arg::new("out") + .short('o') + .long("output") + .help("The directory to place finds in ('corpus')") + .takes_value(true), + ) + .arg( + Arg::new("in") + .short('i') + .long("input") + .help("The directory to read initial inputs from ('seeds')") + .takes_value(true), + ) + .arg( + Arg::new("tokens") + .short('x') + .long("tokens") + .help("A file to read tokens from, to be used during fuzzing") + .takes_value(true), + ) + .arg( + Arg::new("logfile") + .short('l') + .long("logfile") + .help("Duplicates all output to this file") + .default_value("libafl.log"), + ) + .arg( + Arg::new("timeout") + .short('t') + .long("timeout") + .help("Timeout for each individual execution, in milliseconds") + .default_value("1200"), + ) + .arg(Arg::new("remaining").multiple_values(true)) + .try_get_matches() + { + Ok(res) => res, + Err(err) => { + println!( + "Syntax: {}, [-x dictionary] -o corpus_dir -i seed_dir\n{:?}", + env::current_exe() + .unwrap_or_else(|_| "fuzzer".into()) + .to_string_lossy(), + err.info, + ); + return; + } + }; + + println!( + "Workdir: {:?}", + env::current_dir().unwrap().to_string_lossy().to_string() + ); + + if let Some(filenames) = res.values_of("remaining") { + let filenames: Vec<&str> = filenames.collect(); + if !filenames.is_empty() { + run_testcases(&filenames); + return; + } + } + + // For fuzzbench, crashes and finds are inside the same `corpus` directory, in the "queue" and "crashes" subdir. + let mut out_dir = PathBuf::from( + res.value_of("out") + .expect("The --output parameter is missing") + .to_string(), + ); + if fs::create_dir(&out_dir).is_err() { + println!("Out dir at {:?} already exists.", &out_dir); + if !out_dir.is_dir() { + println!("Out dir at {:?} is not a valid directory!", &out_dir); + return; + } + } + let mut crashes = out_dir.clone(); + crashes.push("crashes"); + out_dir.push("queue"); + + let in_dir = PathBuf::from( + res.value_of("in") + .expect("The --input parameter is missing") + .to_string(), + ); + if !in_dir.is_dir() { + println!("In dir at {:?} is not a valid directory!", &in_dir); + return; + } + + let tokens = res.value_of("tokens").map(PathBuf::from); + + let logfile = PathBuf::from(res.value_of("logfile").unwrap().to_string()); + + let timeout = Duration::from_millis( + res.value_of("timeout") + .unwrap() + .to_string() + .parse() + .expect("Could not parse timeout in milliseconds"), + ); + + if check_if_textual(&in_dir, &tokens) { + fuzz_text(out_dir, crashes, in_dir, tokens, logfile, timeout) + .expect("An error occurred while fuzzing"); + } else { + fuzz_binary(out_dir, crashes, in_dir, tokens, logfile, timeout) + .expect("An error occurred while fuzzing"); + } +} + +fn count_textual_inputs(dir: &Path) -> (usize, usize) { + let mut textuals = 0; + let mut total = 0; + for entry in fs::read_dir(dir).unwrap() { + let entry = entry.unwrap(); + let path = entry.path(); + let attributes = fs::metadata(&path); + if attributes.is_err() { + continue; + } + let attr = attributes.unwrap(); + if attr.is_dir() { + let (found, tot) = count_textual_inputs(&path); + textuals += found; + total += tot; + } else if attr.is_file() && attr.len() != 0 { + let mut file = File::open(&path).expect("No file found"); + let mut buffer = vec![]; + file.read_to_end(&mut buffer).expect("Buffer overflow"); + + if inspect(&buffer).is_text() { + println!("Testcase {:?} is text", &path); + textuals += 1; + } else { + println!("Testcase {:?} is binary", &path); + } + total += 1; + } + } + (textuals, total) +} + +fn check_if_textual(seeds_dir: &Path, tokenfile: &Option) -> bool { + let (found, tot) = count_textual_inputs(&seeds_dir); + let is_text = found * 100 / tot > 90; // 90% of text inputs + if let Some(tokenfile) = tokenfile { + let toks = Tokens::from_tokens_file(tokenfile).unwrap(); + if !toks.tokens().is_empty() { + let mut cnt = 0; + for t in toks.tokens() { + if inspect(t).is_text() { + cnt += 1; + } + } + return is_text && cnt * 100 / toks.tokens().len() > 90; // 90% of text tokens + } + } + is_text +} + +fn run_testcases(filenames: &[&str]) { + // The actual target run starts here. + // Call LLVMFUzzerInitialize() if present. + let args: Vec = env::args().collect(); + if libfuzzer_initialize(&args) == -1 { + println!("Warning: LLVMFuzzerInitialize failed with -1") + } + + println!( + "You are not fuzzing, just executing {} testcases", + filenames.len() + ); + for fname in filenames { + println!("Executing {}", fname); + + let mut file = File::open(fname).expect("No file found"); + let mut buffer = vec![]; + file.read_to_end(&mut buffer).expect("Buffer overflow"); + + libfuzzer_test_one_input(&buffer); + } +} + +/// The actual fuzzer +fn fuzz_binary( + corpus_dir: PathBuf, + objective_dir: PathBuf, + seed_dir: PathBuf, + tokenfile: Option, + logfile: PathBuf, + timeout: Duration, +) -> Result<(), Error> { + let log = RefCell::new( + OpenOptions::new() + .append(true) + .create(true) + .open(&logfile)?, + ); + + #[cfg(unix)] + let mut stdout_cpy = unsafe { + let new_fd = dup(io::stdout().as_raw_fd())?; + File::from_raw_fd(new_fd) + }; + #[cfg(unix)] + let file_null = File::open("/dev/null")?; + + // 'While the monitor are state, they are usually used in the broker - which is likely never restarted + let monitor = SimpleMonitor::new(|s| { + #[cfg(unix)] + writeln!(&mut stdout_cpy, "{}", s).unwrap(); + #[cfg(windows)] + println!("{}", s); + writeln!(log.borrow_mut(), "{:?} {}", current_time(), s).unwrap(); + }); + + // We need a shared map to store our state before a crash. + // This way, we are able to continue fuzzing afterwards. + let mut shmem_provider = StdShMemProvider::new()?; + + let (state, mut mgr) = match SimpleRestartingEventManager::launch(monitor, &mut shmem_provider) + { + // The restarting state will spawn the same process again as child, then restarted it each time it crashes. + Ok(res) => res, + Err(err) => match err { + Error::ShuttingDown => { + return Ok(()); + } + _ => { + panic!("Failed to setup the restarter: {}", err); + } + }, + }; + + // Create an observation channel using the coverage map + // We don't use the hitcounts (see the Cargo.toml, we use pcguard_edges) + let edges = unsafe { &mut EDGES_MAP[0..MAX_EDGES_NUM] }; + let edges_observer = HitcountsMapObserver::new(StdMapObserver::new("edges", edges)); + + // Create an observation channel to keep track of the execution time + let time_observer = TimeObserver::new("time"); + + let cmplog = unsafe { &mut CMPLOG_MAP }; + let cmplog_observer = CmpLogObserver::new("cmplog", cmplog, true); + + // The state of the edges feedback. + let feedback_state = MapFeedbackState::with_observer(&edges_observer); + + // Feedback to rate the interestingness of an input + // This one is composed by two Feedbacks in OR + let feedback = feedback_or!( + // New maximization map feedback linked to the edges observer and the feedback state + MaxMapFeedback::new_tracking(&feedback_state, &edges_observer, true, false), + // Time feedback, this one does not need a feedback state + TimeFeedback::new_with_observer(&time_observer) + ); + + // A feedback to choose if an input is a solution or not + let objective = CrashFeedback::new(); + + // If not restarting, create a State from scratch + let mut state = state.unwrap_or_else(|| { + StdState::new( + // RNG + StdRand::with_seed(current_nanos()), + // Corpus that will be evolved, we keep it in memory for performance + OnDiskCorpus::new(corpus_dir).unwrap(), + // Corpus in which we store solutions (crashes in this example), + // on disk so the user can get them after stopping the fuzzer + OnDiskCorpus::new(objective_dir).unwrap(), + // States of the feedbacks. + // They are the data related to the feedbacks that you want to persist in the State. + tuple_list!(feedback_state), + ) + }); + + println!("Let's fuzz :)"); + + // The actual target run starts here. + // Call LLVMFUzzerInitialize() if present. + let args: Vec = env::args().collect(); + if libfuzzer_initialize(&args) == -1 { + println!("Warning: LLVMFuzzerInitialize failed with -1") + } + + let calibration = CalibrationStage::new(&mut state, &edges_observer); + + // Setup a randomic Input2State stage + let i2s = StdMutationalStage::new(StdScheduledMutator::new(tuple_list!(I2SRandReplace::new()))); + + // Setup a MOPT mutator + let mutator = StdMOptMutator::new(&mut state, havoc_mutations().merge(tokens_mutations()), 5)?; + + let power = PowerMutationalStage::new(mutator, PowerSchedule::FAST, &edges_observer); + + // A minimization+queue policy to get testcasess from the corpus + let scheduler = IndexesLenTimeMinimizerCorpusScheduler::new(PowerQueueCorpusScheduler::new()); + + // A fuzzer with feedbacks and a corpus scheduler + let mut fuzzer = StdFuzzer::new(scheduler, feedback, objective); + + // The wrapped harness function, calling out to the LLVM-style harness + let mut harness = |input: &BytesInput| { + let target = input.target_bytes(); + let buf = target.as_slice(); + libfuzzer_test_one_input(buf); + ExitKind::Ok + }; + + let mut tracing_harness = harness; + + // Create the executor for an in-process function with one observer for edge coverage and one for the execution time + let mut executor = TimeoutExecutor::new( + InProcessExecutor::new( + &mut harness, + tuple_list!(edges_observer, time_observer), + &mut fuzzer, + &mut state, + &mut mgr, + )?, + timeout, + ); + + // Setup a tracing stage in which we log comparisons + let tracing = TracingStage::new(TimeoutExecutor::new( + InProcessExecutor::new( + &mut tracing_harness, + tuple_list!(cmplog_observer), + &mut fuzzer, + &mut state, + &mut mgr, + )?, + // Give it more time! + timeout * 10, + )); + + // The order of the stages matter! + let mut stages = tuple_list!(calibration, tracing, i2s, power); + + // Read tokens + if let Some(tokenfile) = tokenfile { + if state.metadata().get::().is_none() { + state.add_metadata(Tokens::from_tokens_file(tokenfile)?); + } + } + + // In case the corpus is empty (on first run), reset + if state.corpus().count() < 1 { + state + .load_initial_inputs(&mut fuzzer, &mut executor, &mut mgr, &[seed_dir.clone()]) + .unwrap_or_else(|_| { + println!("Failed to load initial corpus at {:?}", &seed_dir); + process::exit(0); + }); + println!("We imported {} inputs from disk.", state.corpus().count()); + } + + // Remove target ouput (logs still survive) + #[cfg(unix)] + { + let null_fd = file_null.as_raw_fd(); + dup2(null_fd, io::stdout().as_raw_fd())?; + dup2(null_fd, io::stderr().as_raw_fd())?; + } + // reopen file to make sure we're at the end + log.replace( + OpenOptions::new() + .append(true) + .create(true) + .open(&logfile)?, + ); + + fuzzer.fuzz_loop(&mut stages, &mut executor, &mut state, &mut mgr)?; + + // Never reached + Ok(()) +} + +/// The actual fuzzer based on Grimoire +fn fuzz_text( + corpus_dir: PathBuf, + objective_dir: PathBuf, + seed_dir: PathBuf, + tokenfile: Option, + logfile: PathBuf, + timeout: Duration, +) -> Result<(), Error> { + let log = RefCell::new( + OpenOptions::new() + .append(true) + .create(true) + .open(&logfile)?, + ); + + #[cfg(unix)] + let mut stdout_cpy = unsafe { + let new_fd = dup(io::stdout().as_raw_fd())?; + File::from_raw_fd(new_fd) + }; + #[cfg(unix)] + let file_null = File::open("/dev/null")?; + + // 'While the monitor are state, they are usually used in the broker - which is likely never restarted + let monitor = SimpleMonitor::new(|s| { + #[cfg(unix)] + writeln!(&mut stdout_cpy, "{}", s).unwrap(); + #[cfg(windows)] + println!("{}", s); + writeln!(log.borrow_mut(), "{:?} {}", current_time(), s).unwrap(); + }); + + // We need a shared map to store our state before a crash. + // This way, we are able to continue fuzzing afterwards. + let mut shmem_provider = StdShMemProvider::new()?; + + let (state, mut mgr) = match SimpleRestartingEventManager::launch(monitor, &mut shmem_provider) + { + // The restarting state will spawn the same process again as child, then restarted it each time it crashes. + Ok(res) => res, + Err(err) => match err { + Error::ShuttingDown => { + return Ok(()); + } + _ => { + panic!("Failed to setup the restarter: {}", err); + } + }, + }; + + // Create an observation channel using the coverage map + // We don't use the hitcounts (see the Cargo.toml, we use pcguard_edges) + let edges = unsafe { &mut EDGES_MAP[0..MAX_EDGES_NUM] }; + let edges_observer = HitcountsMapObserver::new(StdMapObserver::new("edges", edges)); + + // Create an observation channel to keep track of the execution time + let time_observer = TimeObserver::new("time"); + + let cmplog = unsafe { &mut CMPLOG_MAP }; + let cmplog_observer = CmpLogObserver::new("cmplog", cmplog, true); + + // The state of the edges feedback. + let feedback_state = MapFeedbackState::with_observer(&edges_observer); + + // Feedback to rate the interestingness of an input + // This one is composed by two Feedbacks in OR + let feedback = feedback_or!( + // New maximization map feedback linked to the edges observer and the feedback state + MaxMapFeedback::new_tracking(&feedback_state, &edges_observer, true, true), + // Time feedback, this one does not need a feedback state + TimeFeedback::new_with_observer(&time_observer) + ); + + // A feedback to choose if an input is a solution or not + let objective = CrashFeedback::new(); + + // If not restarting, create a State from scratch + let mut state = state.unwrap_or_else(|| { + StdState::new( + // RNG + StdRand::with_seed(current_nanos()), + // Corpus that will be evolved, we keep it in memory for performance + OnDiskCorpus::new(corpus_dir).unwrap(), + // Corpus in which we store solutions (crashes in this example), + // on disk so the user can get them after stopping the fuzzer + OnDiskCorpus::new(objective_dir).unwrap(), + // States of the feedbacks. + // They are the data related to the feedbacks that you want to persist in the State. + tuple_list!(feedback_state), + ) + }); + + println!("Let's fuzz :)"); + + // The actual target run starts here. + // Call LLVMFUzzerInitialize() if present. + let args: Vec = env::args().collect(); + if libfuzzer_initialize(&args) == -1 { + println!("Warning: LLVMFuzzerInitialize failed with -1") + } + + let calibration = CalibrationStage::new(&mut state, &edges_observer); + + // Setup a randomic Input2State stage + let i2s = StdMutationalStage::new(StdScheduledMutator::new(tuple_list!(I2SRandReplace::new()))); + + // Setup a MOPT mutator + let mutator = StdMOptMutator::new(&mut state, havoc_mutations().merge(tokens_mutations()), 5)?; + + let power = PowerMutationalStage::new(mutator, PowerSchedule::FAST, &edges_observer); + + let grimoire_mutator = StdScheduledMutator::with_max_iterations( + tuple_list!( + GrimoireExtensionMutator::new(), + GrimoireRecursiveReplacementMutator::new(), + GrimoireStringReplacementMutator::new(), + // give more probability to avoid large inputs + GrimoireRandomDeleteMutator::new(), + GrimoireRandomDeleteMutator::new(), + ), + 3, + ); + let grimoire = StdMutationalStage::new(grimoire_mutator); + + // A minimization+queue policy to get testcasess from the corpus + let scheduler = IndexesLenTimeMinimizerCorpusScheduler::new(PowerQueueCorpusScheduler::new()); + + // A fuzzer with feedbacks and a corpus scheduler + let mut fuzzer = StdFuzzer::new(scheduler, feedback, objective); + + // The wrapped harness function, calling out to the LLVM-style harness + let mut harness = |input: &GeneralizedInput| { + let target = input.target_bytes(); + let buf = target.as_slice(); + libfuzzer_test_one_input(buf); + ExitKind::Ok + }; + + let mut tracing_harness = harness; + + let generalization = GeneralizationStage::new(&edges_observer); + + // Create the executor for an in-process function with one observer for edge coverage and one for the execution time + let mut executor = TimeoutExecutor::new( + InProcessExecutor::new( + &mut harness, + tuple_list!(edges_observer, time_observer), + &mut fuzzer, + &mut state, + &mut mgr, + )?, + timeout, + ); + + // Setup a tracing stage in which we log comparisons + let tracing = TracingStage::new(TimeoutExecutor::new( + InProcessExecutor::new( + &mut tracing_harness, + tuple_list!(cmplog_observer), + &mut fuzzer, + &mut state, + &mut mgr, + )?, + // Give it more time! + timeout * 10, + )); + + // The order of the stages matter! + let mut stages = tuple_list!(generalization, calibration, tracing, i2s, power, grimoire); + + // Read tokens + if let Some(tokenfile) = tokenfile { + if state.metadata().get::().is_none() { + state.add_metadata(Tokens::from_tokens_file(tokenfile)?); + } + } + + // In case the corpus is empty (on first run), reset + if state.corpus().count() < 1 { + state + .load_from_directory( + &mut fuzzer, + &mut executor, + &mut mgr, + &seed_dir, + false, + &mut |_, _, path| GeneralizedInput::from_bytes_file(path), + ) + .unwrap_or_else(|_| { + println!("Failed to load initial corpus at {:?}", &seed_dir); + process::exit(0); + }); + println!("We imported {} inputs from disk.", state.corpus().count()); + } + + // Remove target ouput (logs still survive) + #[cfg(unix)] + { + let null_fd = file_null.as_raw_fd(); + dup2(null_fd, io::stdout().as_raw_fd())?; + dup2(null_fd, io::stderr().as_raw_fd())?; + } + // reopen file to make sure we're at the end + log.replace( + OpenOptions::new() + .append(true) + .create(true) + .open(&logfile)?, + ); + + fuzzer.fuzz_loop(&mut stages, &mut executor, &mut state, &mut mgr)?; + + // Never reached + Ok(()) +} diff --git a/libafl/src/corpus/testcase.rs b/libafl/src/corpus/testcase.rs index 2a2594c744..de7c15b493 100644 --- a/libafl/src/corpus/testcase.rs +++ b/libafl/src/corpus/testcase.rs @@ -99,7 +99,8 @@ where /// Set the input #[inline] - pub fn set_input(&mut self, input: I) { + pub fn set_input(&mut self, mut input: I) { + input.wrapped_as_testcase(); self.input = Some(input); } @@ -157,19 +158,22 @@ where where T: Into, { - Testcase { + let mut slf = Testcase { input: Some(input.into()), filename: None, metadata: SerdeAnyMap::new(), exec_time: None, cached_len: None, executions: 0, - } + }; + slf.input.as_mut().unwrap().wrapped_as_testcase(); + slf } /// Create a new Testcase instance given an [`Input`] and a `filename` #[inline] - pub fn with_filename(input: I, filename: String) -> Self { + pub fn with_filename(mut input: I, filename: String) -> Self { + input.wrapped_as_testcase(); Testcase { input: Some(input), filename: Some(filename), @@ -182,7 +186,8 @@ where /// Create a new Testcase instance given an [`Input`] and the number of executions #[inline] - pub fn with_executions(input: I, executions: usize) -> Self { + pub fn with_executions(mut input: I, executions: usize) -> Self { + input.wrapped_as_testcase(); Testcase { input: Some(input), filename: None, diff --git a/libafl/src/inputs/bytes.rs b/libafl/src/inputs/bytes.rs index 67ec943eff..1c6c94f954 100644 --- a/libafl/src/inputs/bytes.rs +++ b/libafl/src/inputs/bytes.rs @@ -105,18 +105,3 @@ impl BytesInput { Self { bytes } } } - -#[cfg(test)] -mod tests { - use crate::bolts::rands::{Rand, StdRand}; - - #[test] - fn test_input() { - let mut rand = StdRand::with_seed(0); - assert_ne!(rand.next(), rand.next()); - assert!(rand.below(100) < 100); - assert_eq!(rand.below(1), 0); - assert_eq!(rand.between(10, 10), 10); - assert!(rand.between(11, 20) > 10); - } -} diff --git a/libafl/src/inputs/generalized.rs b/libafl/src/inputs/generalized.rs new file mode 100644 index 0000000000..b7bcaa8705 --- /dev/null +++ b/libafl/src/inputs/generalized.rs @@ -0,0 +1,224 @@ +//! The `GeneralizedInput` is an input that ca be generalized to represent a rule, used by Grimoire + +use ahash::AHasher; +use alloc::{borrow::ToOwned, rc::Rc, string::String, vec::Vec}; +use core::hash::Hasher; +use core::{cell::RefCell, convert::From}; +use serde::{Deserialize, Serialize}; + +#[cfg(feature = "std")] +use crate::Error; +#[cfg(feature = "std")] +use std::{fs::File, io::Read, path::Path}; + +use crate::{ + bolts::{ownedref::OwnedSlice, HasLen}, + inputs::{HasBytesVec, HasTargetBytes, Input}, +}; + +/// An item of the generalized input +#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)] +pub enum GeneralizedItem { + /// Real bytes + Bytes(Vec), + /// An insertion point + Gap, +} + +/// A bytes input with a generalized version mainly used for Grimoire +#[derive(Serialize, Deserialize, Clone, Debug, Default, PartialEq, Eq)] +pub struct GeneralizedInput { + /// The raw input bytes + bytes: Vec, + generalized: Option>, + /// If was mutated or not by Grimoire + pub grimoire_mutated: bool, +} + +impl Input for GeneralizedInput { + /// Generate a name for this input + fn generate_name(&self, _idx: usize) -> String { + let mut hasher = AHasher::new_with_keys(0, 0); + // TODO add generalized + hasher.write(self.bytes()); + format!("{:016x}", hasher.finish()) + } + + /// An hook executed before being added to the corpus + fn wrapped_as_testcase(&mut self) { + // remove generalized for inputs generated with bit-level mutations + if !self.grimoire_mutated { + self.generalized = None; + } + // restore to allow bit-level mutations + self.grimoire_mutated = false; + } +} + +/// Rc Ref-cell from Input +impl From for Rc> { + fn from(input: GeneralizedInput) -> Self { + Rc::new(RefCell::new(input)) + } +} + +impl HasBytesVec for GeneralizedInput { + #[inline] + fn bytes(&self) -> &[u8] { + &self.bytes + } + + #[inline] + fn bytes_mut(&mut self) -> &mut Vec { + &mut self.bytes + } +} + +impl HasTargetBytes for GeneralizedInput { + #[inline] + fn target_bytes(&self) -> OwnedSlice { + if self.grimoire_mutated { + OwnedSlice::from(self.generalized_to_bytes()) + } else { + OwnedSlice::from(&self.bytes) + } + } +} + +impl HasLen for GeneralizedInput { + #[inline] + fn len(&self) -> usize { + self.bytes.len() + } +} + +impl From> for GeneralizedInput { + fn from(bytes: Vec) -> Self { + Self::new(bytes) + } +} + +impl From<&[u8]> for GeneralizedInput { + fn from(bytes: &[u8]) -> Self { + Self::new(bytes.to_owned()) + } +} + +impl GeneralizedInput { + /// Creates a new bytes input using the given bytes + #[must_use] + pub fn new(bytes: Vec) -> Self { + Self { + bytes, + generalized: None, + grimoire_mutated: false, + } + } + + /// Fill the generalized vector from a slice of option (None -> Gap) + pub fn generalized_from_options(&mut self, v: &[Option]) { + let mut res = vec![]; + let mut bytes = vec![]; + if v.first() != Some(&None) { + res.push(GeneralizedItem::Gap); + } + for e in v { + match e { + None => { + if !bytes.is_empty() { + res.push(GeneralizedItem::Bytes(bytes.clone())); + bytes.clear(); + } + res.push(GeneralizedItem::Gap); + } + Some(b) => { + bytes.push(*b); + } + } + } + if !bytes.is_empty() { + res.push(GeneralizedItem::Bytes(bytes)); + } + if res.last() != Some(&GeneralizedItem::Gap) { + res.push(GeneralizedItem::Gap); + } + self.generalized = Some(res); + } + + /// Extend the generalized input + pub fn generalized_extend(&mut self, other: &[GeneralizedItem]) { + let gen = self.generalized.get_or_insert_with(Vec::new); + if gen.last().is_some() + && other.first().is_some() + && *gen.last().unwrap() == GeneralizedItem::Gap + && *other.first().unwrap() == GeneralizedItem::Gap + { + gen.extend_from_slice(&other[1..]); + } else { + gen.extend_from_slice(other); + } + } + + /// Get the size of the generalized + #[must_use] + pub fn generalized_len(&self) -> usize { + match &self.generalized { + None => 0, + Some(gen) => { + let mut size = 0; + for item in gen { + match item { + GeneralizedItem::Bytes(b) => size += b.len(), + GeneralizedItem::Gap => size += 1, + } + } + size + } + } + } + + /// Convert generalized to bytes + #[must_use] + pub fn generalized_to_bytes(&self) -> Vec { + match &self.generalized { + None => vec![], + Some(gen) => { + let mut bytes = vec![]; + for item in gen { + if let GeneralizedItem::Bytes(b) = item { + bytes.extend_from_slice(b); + } + } + bytes + } + } + } + + /// Get the generalized input + #[must_use] + pub fn generalized(&self) -> Option<&[GeneralizedItem]> { + self.generalized.as_deref() + } + + /// Get the generalized input (mut) + pub fn generalized_mut(&mut self) -> &mut Option> { + &mut self.generalized + } + + /// Load from a plain file of bytes + #[must_use] + #[cfg(feature = "std")] + pub fn from_bytes_file

(path: P) -> Result + where + P: AsRef, + { + let mut file = File::open(path)?; + let mut bytes: Vec = vec![]; + file.read_to_end(&mut bytes)?; + Ok(Self { + bytes, + generalized: None, + grimoire_mutated: false, + }) + } +} diff --git a/libafl/src/inputs/mod.rs b/libafl/src/inputs/mod.rs index 50b5baafa2..b95e5c7700 100644 --- a/libafl/src/inputs/mod.rs +++ b/libafl/src/inputs/mod.rs @@ -9,6 +9,9 @@ pub use encoded::*; pub mod gramatron; pub use gramatron::*; +pub mod generalized; +pub use generalized::*; + #[cfg(feature = "nautilus")] pub mod nautilus; #[cfg(feature = "nautilus")] @@ -64,6 +67,9 @@ pub trait Input: Clone + Serialize + serde::de::DeserializeOwned + Debug { /// Generate a name for this input fn generate_name(&self, idx: usize) -> String; + + /// An hook executed if the input is stored as `Testcase` + fn wrapped_as_testcase(&mut self) {} } /// An input for tests, mainly. There is no real use much else. @@ -80,6 +86,7 @@ impl HasTargetBytes for NopInput { } } +// TODO change this to fn target_bytes(&self, buffer: &mut Vec) -> &[u8]; /// Can be represented with a vector of bytes /// This representation is not necessarily deserializable /// Instead, it can be used as bytes input for a target diff --git a/libafl/src/mutators/grimoire.rs b/libafl/src/mutators/grimoire.rs new file mode 100644 index 0000000000..0c1a7eb6ed --- /dev/null +++ b/libafl/src/mutators/grimoire.rs @@ -0,0 +1,409 @@ +//! Grimoire is the rewritten grimoire mutator in rust. +//! See the original repo [`Grimoire`](https://github.com/RUB-SysSec/grimoire) for more details. + +use alloc::vec::Vec; +use core::cmp::{max, min}; + +use crate::{ + bolts::{rands::Rand, tuples::Named}, + corpus::Corpus, + inputs::{GeneralizedInput, GeneralizedItem}, + mutators::{token_mutations::Tokens, MutationResult, Mutator}, + stages::generalization::GeneralizedIndexesMetadata, + state::{HasCorpus, HasMetadata, HasRand}, + Error, +}; + +const RECURSIVE_REPLACEMENT_DEPTH: [usize; 6] = [2, 4, 8, 16, 32, 64]; +const MAX_RECURSIVE_REPLACEMENT_LEN: usize = 64 << 10; +const CHOOSE_SUBINPUT_PROB: u64 = 50; + +fn extend_with_random_generalized( + state: &mut S, + items: &mut Vec, + gap_indices: &mut Vec, +) -> Result<(), Error> +where + S: HasMetadata + HasRand + HasCorpus, +{ + let rand_idx = state.rand_mut().next() as usize; + + let idx = { + let meta = state.metadata_mut().get_mut::().ok_or_else(|| { + Error::KeyNotFound("GeneralizedIndexesMetadata needed by extend_with_random_generalized() not found, make sure that you have GeneralizationStage in".into()) + })?; + + *meta + .indexes + .iter() + .nth(rand_idx % meta.indexes.len()) + .unwrap() + }; + + /*if state + .corpus() + .get(idx)? + .borrow_mut() + .load_input()? + .generalized() + .is_none() + { + return Ok(true); + }*/ + + if state.rand_mut().below(100) > CHOOSE_SUBINPUT_PROB { + if state.rand_mut().below(100) < 50 { + let rand1 = state.rand_mut().next() as usize; + let rand2 = state.rand_mut().next() as usize; + + let mut other_testcase = state.corpus().get(idx)?.borrow_mut(); + let other = other_testcase.load_input()?; + + if other.generalized_len() > 0 { + let gen = other.generalized().unwrap(); + + for (i, _) in gen + .iter() + .enumerate() + .filter(|&(_, x)| *x == GeneralizedItem::Gap) + { + gap_indices.push(i); + } + let min_idx = gap_indices[rand1 % gap_indices.len()]; + let max_idx = gap_indices[rand2 % gap_indices.len()]; + let (mut min_idx, max_idx) = (min(min_idx, max_idx), max(min_idx, max_idx)); + + gap_indices.clear(); + + if items.last() == Some(&GeneralizedItem::Gap) { + min_idx += 1; + } + items.extend_from_slice(&gen[min_idx..=max_idx]); + + debug_assert!(items.first() == Some(&GeneralizedItem::Gap)); + debug_assert!(items.last() == Some(&GeneralizedItem::Gap)); + + return Ok(()); + } + } + + let rand1 = state.rand_mut().next() as usize; + + if let Some(meta) = state.metadata().get::() { + if !meta.tokens().is_empty() { + let tok = &meta.tokens()[rand1 % meta.tokens().len()]; + if items.last() != Some(&GeneralizedItem::Gap) { + items.push(GeneralizedItem::Gap); + } + items.push(GeneralizedItem::Bytes(tok.clone())); + items.push(GeneralizedItem::Gap); + + debug_assert!(items.first() == Some(&GeneralizedItem::Gap)); + debug_assert!(items.last() == Some(&GeneralizedItem::Gap)); + + return Ok(()); + } + } + } + + let mut other_testcase = state.corpus().get(idx)?.borrow_mut(); + let other = other_testcase.load_input()?; + let gen = other.generalized().unwrap(); + + if items.last() == Some(&GeneralizedItem::Gap) && gen.first() == Some(&GeneralizedItem::Gap) { + items.extend_from_slice(&gen[1..]); + } else { + items.extend_from_slice(gen); + } + + debug_assert!(items.first() == Some(&GeneralizedItem::Gap)); + debug_assert!(items.last() == Some(&GeneralizedItem::Gap)); + + Ok(()) +} + +/// Extend the generalized input with another random one from the corpus +#[derive(Debug, Default)] +pub struct GrimoireExtensionMutator { + gap_indices: Vec, +} + +impl Mutator for GrimoireExtensionMutator +where + S: HasMetadata + HasRand + HasCorpus, +{ + fn mutate( + &mut self, + state: &mut S, + input: &mut GeneralizedInput, + _stage_idx: i32, + ) -> Result { + if input.generalized().is_none() { + return Ok(MutationResult::Skipped); + } + + extend_with_random_generalized( + state, + input.generalized_mut().as_mut().unwrap(), + &mut self.gap_indices, + )?; + + input.grimoire_mutated = true; + Ok(MutationResult::Mutated) + } +} + +impl Named for GrimoireExtensionMutator { + fn name(&self) -> &str { + "GrimoireExtensionMutator" + } +} + +impl GrimoireExtensionMutator { + /// Creates a new [`GrimoireExtensionMutator`]. + #[must_use] + pub fn new() -> Self { + Self { + gap_indices: vec![], + } + } +} + +/// Extend the generalized input with another random one from the corpus +#[derive(Debug, Default)] +pub struct GrimoireRecursiveReplacementMutator { + scratch: Vec, + gap_indices: Vec, +} + +impl Mutator for GrimoireRecursiveReplacementMutator +where + S: HasMetadata + HasRand + HasCorpus, +{ + fn mutate( + &mut self, + state: &mut S, + input: &mut GeneralizedInput, + _stage_idx: i32, + ) -> Result { + if input.generalized().is_none() { + return Ok(MutationResult::Skipped); + } + + let mut mutated = MutationResult::Skipped; + + let depth = *state.rand_mut().choose(&RECURSIVE_REPLACEMENT_DEPTH); + for _ in 0..depth { + if input.generalized_len() >= MAX_RECURSIVE_REPLACEMENT_LEN { + break; + } + + let gen = input.generalized_mut().as_mut().unwrap(); + + for (i, _) in gen + .iter() + .enumerate() + .filter(|&(_, x)| *x == GeneralizedItem::Gap) + { + self.gap_indices.push(i); + } + let selected = *state.rand_mut().choose(&self.gap_indices); + self.gap_indices.clear(); + + self.scratch.extend_from_slice(&gen[selected + 1..]); + gen.truncate(selected); + + extend_with_random_generalized(state, gen, &mut self.gap_indices)?; + + gen.extend_from_slice(&self.scratch); + self.scratch.clear(); + + mutated = MutationResult::Mutated; + input.grimoire_mutated = true; + } + + Ok(mutated) + } +} + +impl Named for GrimoireRecursiveReplacementMutator { + fn name(&self) -> &str { + "GrimoireRecursiveReplacementMutator" + } +} + +impl GrimoireRecursiveReplacementMutator { + /// Creates a new [`GrimoireRecursiveReplacementMutator`]. + #[must_use] + pub fn new() -> Self { + Self { + scratch: vec![], + gap_indices: vec![], + } + } +} + +/// Replace matching tokens with others from the tokens metadata +#[derive(Debug, Default)] +pub struct GrimoireStringReplacementMutator {} + +impl Mutator for GrimoireStringReplacementMutator +where + S: HasMetadata + HasRand, +{ + fn mutate( + &mut self, + state: &mut S, + input: &mut GeneralizedInput, + _stage_idx: i32, + ) -> Result { + if input.generalized().is_none() { + return Ok(MutationResult::Skipped); + } + + let tokens_len = { + let meta = state.metadata().get::(); + if meta.is_none() { + return Ok(MutationResult::Skipped); + } + if meta.unwrap().tokens().is_empty() { + return Ok(MutationResult::Skipped); + } + meta.unwrap().tokens().len() + }; + let token_find = state.rand_mut().below(tokens_len as u64) as usize; + let mut token_replace = state.rand_mut().below(tokens_len as u64) as usize; + if token_find == token_replace { + token_replace = state.rand_mut().below(tokens_len as u64) as usize; + } + + let stop_at_first = state.rand_mut().below(100) > 50; + let mut rand_idx = state.rand_mut().next() as usize; + + let meta = state.metadata().get::().unwrap(); + let token_1 = &meta.tokens()[token_find]; + let token_2 = &meta.tokens()[token_replace]; + + let mut mutated = MutationResult::Skipped; + + let gen = input.generalized_mut().as_mut().unwrap(); + rand_idx %= gen.len(); + + 'first: for item in &mut gen[..rand_idx] { + if let GeneralizedItem::Bytes(bytes) = item { + if bytes.len() < token_1.len() { + continue; + } + for i in 0..(bytes.len() - token_1.len()) { + if bytes[i..].starts_with(token_1) { + bytes.splice(i..(i + token_1.len()), token_2.clone()); + + mutated = MutationResult::Mutated; + if stop_at_first { + break 'first; + } + } + } + } + } + if mutated == MutationResult::Skipped || !stop_at_first { + 'second: for item in &mut gen[rand_idx..] { + if let GeneralizedItem::Bytes(bytes) = item { + if bytes.len() < token_1.len() { + continue; + } + for i in 0..(bytes.len() - token_1.len()) { + if bytes[i..].starts_with(token_1) { + bytes.splice(i..(i + token_1.len()), token_2.clone()); + + mutated = MutationResult::Mutated; + if stop_at_first { + break 'second; + } + } + } + } + } + } + + input.grimoire_mutated = true; + Ok(mutated) + } +} + +impl Named for GrimoireStringReplacementMutator { + fn name(&self) -> &str { + "GrimoireStringReplacementMutator" + } +} + +impl GrimoireStringReplacementMutator { + /// Creates a new [`GrimoireExtensionMutator`]. + #[must_use] + pub fn new() -> Self { + Self::default() + } +} + +/// Randomly delete a part of the generalized input +#[derive(Debug, Default)] +pub struct GrimoireRandomDeleteMutator { + gap_indices: Vec, +} + +impl Mutator for GrimoireRandomDeleteMutator +where + S: HasMetadata + HasRand + HasCorpus, +{ + fn mutate( + &mut self, + state: &mut S, + input: &mut GeneralizedInput, + _stage_idx: i32, + ) -> Result { + if input.generalized().is_none() { + return Ok(MutationResult::Skipped); + } + + input.grimoire_mutated = true; + let gen = input.generalized_mut().as_mut().unwrap(); + + for (i, _) in gen + .iter() + .enumerate() + .filter(|&(_, x)| *x == GeneralizedItem::Gap) + { + self.gap_indices.push(i); + } + let min_idx = + self.gap_indices[state.rand_mut().below(self.gap_indices.len() as u64) as usize]; + let max_idx = + self.gap_indices[state.rand_mut().below(self.gap_indices.len() as u64) as usize]; + let (min_idx, max_idx) = (min(min_idx, max_idx), max(min_idx, max_idx)); + + self.gap_indices.clear(); + + if min_idx == max_idx { + Ok(MutationResult::Skipped) + } else { + gen.drain(min_idx..max_idx); + Ok(MutationResult::Mutated) + } + } +} + +impl Named for GrimoireRandomDeleteMutator { + fn name(&self) -> &str { + "GrimoireRandomDeleteMutator" + } +} + +impl GrimoireRandomDeleteMutator { + /// Creates a new [`GrimoireExtensionMutator`]. + #[must_use] + pub fn new() -> Self { + Self { + gap_indices: vec![], + } + } +} diff --git a/libafl/src/mutators/mod.rs b/libafl/src/mutators/mod.rs index f4eb777a9a..c8901988bb 100644 --- a/libafl/src/mutators/mod.rs +++ b/libafl/src/mutators/mod.rs @@ -12,6 +12,8 @@ pub mod mopt_mutator; pub use mopt_mutator::*; pub mod gramatron; pub use gramatron::*; +pub mod grimoire; +pub use grimoire::*; #[cfg(feature = "nautilus")] pub mod nautilus; diff --git a/libafl/src/observers/map.rs b/libafl/src/observers/map.rs index f0bb09a6f1..a0f18db0fc 100644 --- a/libafl/src/observers/map.rs +++ b/libafl/src/observers/map.rs @@ -96,6 +96,19 @@ pub trait MapObserver: HasLen + Named + Serialize + serde::de::DeserializeOwned } res } + + /// Get the number of set entries with the specified indexes + fn how_many_set(&self, indexes: &[usize]) -> usize { + let initial = self.initial(); + let cnt = self.usable_count(); + let mut res = 0; + for i in indexes { + if *i < cnt && *self.get(*i) != initial { + res += 1; + } + } + res + } } /// The Map Observer retrieves the state of a map, diff --git a/libafl/src/stages/generalization.rs b/libafl/src/stages/generalization.rs new file mode 100644 index 0000000000..aed82790eb --- /dev/null +++ b/libafl/src/stages/generalization.rs @@ -0,0 +1,507 @@ +//! The tracing stage can trace the target and enrich a testcase with metadata, for example for `CmpLog`. + +use alloc::{ + string::{String, ToString}, + vec::Vec, +}; +use core::{fmt::Debug, marker::PhantomData}; +use hashbrown::HashSet; +use serde::{Deserialize, Serialize}; + +use crate::{ + bolts::AsSlice, + corpus::Corpus, + executors::{Executor, HasObservers}, + feedbacks::map::MapNoveltiesMetadata, + inputs::{GeneralizedInput, GeneralizedItem, HasBytesVec}, + mark_feature_time, + observers::{MapObserver, ObserversTuple}, + stages::Stage, + start_timer, + state::{HasClientPerfMonitor, HasCorpus, HasExecutions, HasMetadata}, + Error, +}; + +#[cfg(feature = "introspection")] +use crate::monitors::PerfFeature; + +const MAX_GENERALIZED_LEN: usize = 8192; + +/// A state metadata holding the set of indexes related to the generalized corpus entries +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct GeneralizedIndexesMetadata { + /// The set of indexes + pub indexes: HashSet, +} + +crate::impl_serdeany!(GeneralizedIndexesMetadata); + +impl GeneralizedIndexesMetadata { + /// Create the metadata + #[must_use] + pub fn new() -> Self { + Self::default() + } +} + +fn increment_by_offset(_list: &[Option], idx: usize, off: u8) -> usize { + idx + 1 + off as usize +} + +fn find_next_char(list: &[Option], mut idx: usize, ch: u8) -> usize { + while idx < list.len() { + if list[idx] == Some(ch) { + return idx + 1; + } + idx += 1; + } + idx +} + +/// A stage that runs a tracer executor +#[derive(Clone, Debug)] +pub struct GeneralizationStage +where + O: MapObserver, + OT: ObserversTuple, + S: HasClientPerfMonitor + HasExecutions + HasMetadata + HasCorpus, +{ + map_observer_name: String, + #[allow(clippy::type_complexity)] + phantom: PhantomData<(EM, O, OT, S, Z)>, +} + +impl Stage for GeneralizationStage +where + O: MapObserver, + E: Executor + HasObservers, + OT: ObserversTuple, + S: HasClientPerfMonitor + HasExecutions + HasMetadata + HasCorpus, +{ + #[inline] + #[allow(clippy::too_many_lines)] + fn perform( + &mut self, + fuzzer: &mut Z, + executor: &mut E, + state: &mut S, + manager: &mut EM, + corpus_idx: usize, + ) -> Result<(), Error> { + if state + .metadata() + .get::() + .is_none() + { + state.add_metadata(GeneralizedIndexesMetadata::new()); + } + + let (mut payload, original, novelties) = { + start_timer!(state); + state.corpus().get(corpus_idx)?.borrow_mut().load_input()?; + mark_feature_time!(state, PerfFeature::GetInputFromCorpus); + let mut entry = state.corpus().get(corpus_idx)?.borrow_mut(); + let input = entry.input_mut().as_mut().unwrap(); + + if input.generalized().is_some() { + drop(entry); + state + .metadata_mut() + .get_mut::() + .unwrap() + .indexes + .insert(corpus_idx); + return Ok(()); + } + + let payload: Vec<_> = input.bytes().iter().map(|&x| Some(x)).collect(); + let original = input.clone(); + let meta = entry.metadata().get::().ok_or_else(|| { + Error::KeyNotFound(format!( + "MapNoveltiesMetadata needed for GeneralizationStage not found in testcase #{} (check the arguments of MapFeedback::new(...))", + corpus_idx + )) + })?; + (payload, original, meta.as_slice().to_vec()) + }; + + // Do not generalized unstable inputs + if !self.verify_input(fuzzer, executor, state, manager, &novelties, &original)? { + return Ok(()); + } + + self.find_gaps( + fuzzer, + executor, + state, + manager, + &mut payload, + &novelties, + increment_by_offset, + 255, + )?; + self.find_gaps( + fuzzer, + executor, + state, + manager, + &mut payload, + &novelties, + increment_by_offset, + 127, + )?; + self.find_gaps( + fuzzer, + executor, + state, + manager, + &mut payload, + &novelties, + increment_by_offset, + 63, + )?; + self.find_gaps( + fuzzer, + executor, + state, + manager, + &mut payload, + &novelties, + increment_by_offset, + 31, + )?; + self.find_gaps( + fuzzer, + executor, + state, + manager, + &mut payload, + &novelties, + increment_by_offset, + 0, + )?; + + self.find_gaps( + fuzzer, + executor, + state, + manager, + &mut payload, + &novelties, + find_next_char, + b'.', + )?; + self.find_gaps( + fuzzer, + executor, + state, + manager, + &mut payload, + &novelties, + find_next_char, + b';', + )?; + self.find_gaps( + fuzzer, + executor, + state, + manager, + &mut payload, + &novelties, + find_next_char, + b',', + )?; + self.find_gaps( + fuzzer, + executor, + state, + manager, + &mut payload, + &novelties, + find_next_char, + b'\n', + )?; + self.find_gaps( + fuzzer, + executor, + state, + manager, + &mut payload, + &novelties, + find_next_char, + b'\r', + )?; + self.find_gaps( + fuzzer, + executor, + state, + manager, + &mut payload, + &novelties, + find_next_char, + b'#', + )?; + self.find_gaps( + fuzzer, + executor, + state, + manager, + &mut payload, + &novelties, + find_next_char, + b' ', + )?; + + self.find_gaps_in_closures( + fuzzer, + executor, + state, + manager, + &mut payload, + &novelties, + b'(', + b')', + )?; + self.find_gaps_in_closures( + fuzzer, + executor, + state, + manager, + &mut payload, + &novelties, + b'[', + b']', + )?; + self.find_gaps_in_closures( + fuzzer, + executor, + state, + manager, + &mut payload, + &novelties, + b'{', + b'}', + )?; + self.find_gaps_in_closures( + fuzzer, + executor, + state, + manager, + &mut payload, + &novelties, + b'<', + b'>', + )?; + self.find_gaps_in_closures( + fuzzer, + executor, + state, + manager, + &mut payload, + &novelties, + b'\'', + b'\'', + )?; + self.find_gaps_in_closures( + fuzzer, + executor, + state, + manager, + &mut payload, + &novelties, + b'"', + b'"', + )?; + + if payload.len() <= MAX_GENERALIZED_LEN { + // Save the modified input in the corpus + { + let mut entry = state.corpus().get(corpus_idx)?.borrow_mut(); + entry.load_input()?; + entry + .input_mut() + .as_mut() + .unwrap() + .generalized_from_options(&payload); + entry.store_input()?; + + debug_assert!( + entry.load_input()?.generalized().unwrap().first() + == Some(&GeneralizedItem::Gap) + ); + debug_assert!( + entry.load_input()?.generalized().unwrap().last() + == Some(&GeneralizedItem::Gap) + ); + } + + state + .metadata_mut() + .get_mut::() + .unwrap() + .indexes + .insert(corpus_idx); + } + + Ok(()) + } +} + +impl GeneralizationStage +where + O: MapObserver, + OT: ObserversTuple, + S: HasClientPerfMonitor + HasExecutions + HasMetadata + HasCorpus, +{ + /// Create a new [`GeneralizationStage`]. + #[must_use] + pub fn new(map_observer: &O) -> Self { + Self { + map_observer_name: map_observer.name().to_string(), + phantom: PhantomData, + } + } + + /// Create a new [`GeneralizationStage`] from name + #[must_use] + pub fn from_name(map_observer_name: &str) -> Self { + Self { + map_observer_name: map_observer_name.to_string(), + phantom: PhantomData, + } + } + + fn verify_input( + &self, + fuzzer: &mut Z, + executor: &mut E, + state: &mut S, + manager: &mut EM, + novelties: &[usize], + input: &GeneralizedInput, + ) -> Result + where + E: Executor + HasObservers, + { + start_timer!(state); + executor.observers_mut().pre_exec_all(state, input)?; + mark_feature_time!(state, PerfFeature::PreExecObservers); + + start_timer!(state); + let _ = executor.run_target(fuzzer, state, manager, input)?; + mark_feature_time!(state, PerfFeature::TargetExecution); + + *state.executions_mut() += 1; + + start_timer!(state); + executor.observers_mut().post_exec_all(state, input)?; + mark_feature_time!(state, PerfFeature::PostExecObservers); + + let cnt = executor + .observers() + .match_name::(&self.map_observer_name) + .ok_or_else(|| Error::KeyNotFound("MapObserver not found".to_string()))? + .how_many_set(novelties); + + Ok(cnt == novelties.len()) + } + + fn trim_payload(payload: &mut Vec>) { + let mut previous = false; + payload.retain(|&x| !(x.is_none() & core::mem::replace(&mut previous, x.is_none()))); + } + + #[allow(clippy::too_many_arguments)] + fn find_gaps( + &self, + fuzzer: &mut Z, + executor: &mut E, + state: &mut S, + manager: &mut EM, + payload: &mut Vec>, + novelties: &[usize], + find_next_index: fn(&[Option], usize, u8) -> usize, + split_char: u8, + ) -> Result<(), Error> + where + E: Executor + HasObservers, + { + let mut start = 0; + while start < payload.len() { + let mut end = find_next_index(payload, start, split_char); + if end > payload.len() { + end = payload.len(); + } + let mut candidate = GeneralizedInput::new(vec![]); + candidate + .bytes_mut() + .extend(payload[..start].iter().flatten()); + candidate + .bytes_mut() + .extend(payload[end..].iter().flatten()); + + if self.verify_input(fuzzer, executor, state, manager, novelties, &candidate)? { + for item in &mut payload[start..end] { + *item = None; + } + } + + start = end; + } + + Self::trim_payload(payload); + Ok(()) + } + + #[allow(clippy::too_many_arguments)] + fn find_gaps_in_closures( + &self, + fuzzer: &mut Z, + executor: &mut E, + state: &mut S, + manager: &mut EM, + payload: &mut Vec>, + novelties: &[usize], + opening_char: u8, + closing_char: u8, + ) -> Result<(), Error> + where + E: Executor + HasObservers, + { + let mut index = 0; + while index < payload.len() { + // Find start index + while index < payload.len() { + if payload[index] == Some(opening_char) { + break; + } + index += 1; + } + let mut start = index; + let mut end = payload.len() - 1; + // Process every ending + while end > start { + if payload[end] == Some(closing_char) { + let mut candidate = GeneralizedInput::new(vec![]); + candidate + .bytes_mut() + .extend(payload[..start].iter().flatten()); + candidate + .bytes_mut() + .extend(payload[end..].iter().flatten()); + + if self.verify_input(fuzzer, executor, state, manager, novelties, &candidate)? { + for item in &mut payload[start..end] { + *item = None; + } + } + start = end; + } + end -= 1; + } + } + + Self::trim_payload(payload); + Ok(()) + } +} diff --git a/libafl/src/stages/mod.rs b/libafl/src/stages/mod.rs index 9d703db006..e99e5fe672 100644 --- a/libafl/src/stages/mod.rs +++ b/libafl/src/stages/mod.rs @@ -19,6 +19,9 @@ pub use calibrate::{CalibrationStage, PowerScheduleMetadata}; pub mod power; pub use power::PowerMutationalStage; +pub mod generalization; +pub use generalization::GeneralizationStage; + pub mod owned; pub use owned::StagesOwnedList; diff --git a/libafl_qemu/libqasan/libqasan.c b/libafl_qemu/libqasan/libqasan.c index 6ea24f085a..a64db10f8e 100644 --- a/libafl_qemu/libqasan/libqasan.c +++ b/libafl_qemu/libqasan/libqasan.c @@ -72,8 +72,6 @@ __attribute__((constructor)) void __libqasan_init() { if (getenv("AFL_INST_LIBS") || getenv("QASAN_HOTPACH")) __libqasan_hotpatch(); - if (getenv("AFL_INST_LIBS") || getenv("QASAN_HOTPACH")) __libqasan_hotpatch(); - #ifdef DEBUG __qasan_debug = getenv("QASAN_DEBUG") != NULL; #endif diff --git a/libafl_qemu/src/emu.rs b/libafl_qemu/src/emu.rs index ff2ce683c1..5c32e42d9b 100644 --- a/libafl_qemu/src/emu.rs +++ b/libafl_qemu/src/emu.rs @@ -479,20 +479,15 @@ impl Emulator { } pub fn map_private(&self, addr: u64, size: usize, perms: MmapPerms) -> Result { - self.mmap( - addr, - size, - perms.into(), - libc::MAP_PRIVATE | libc::MAP_ANONYMOUS, - ) - .map_err(|_| format!("Failed to map {}", addr)) + self.mmap(addr, size, perms, libc::MAP_PRIVATE | libc::MAP_ANONYMOUS) + .map_err(|_| format!("Failed to map {}", addr)) } pub fn map_fixed(&self, addr: u64, size: usize, perms: MmapPerms) -> Result { self.mmap( addr, size, - perms.into(), + perms, libc::MAP_FIXED | libc::MAP_PRIVATE | libc::MAP_ANONYMOUS, ) .map_err(|_| format!("Failed to map {}", addr))