diff --git a/.gitignore b/.gitignore index a22d8d298d..71b3d81484 100644 --- a/.gitignore +++ b/.gitignore @@ -35,3 +35,5 @@ AFLplusplus a forkserver_test +__pycache__ +*.lafl_lock diff --git a/Cargo.toml b/Cargo.toml index 1ae51d538e..02b87de87d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,7 @@ members = [ "libafl_concolic/symcc_libafl", "libafl_concolic/test/dump_constraints", "libafl_concolic/test/runtime_test", + "utils/deexit", ] default-members = [ "libafl", diff --git a/Dockerfile b/Dockerfile index c001e04051..a753b547ce 100644 --- a/Dockerfile +++ b/Dockerfile @@ -64,6 +64,8 @@ COPY scripts/dummy.rs libafl_concolic/symcc_runtime/src/lib.rs COPY libafl_concolic/symcc_libafl/Cargo.toml libafl_concolic/symcc_libafl/ COPY scripts/dummy.rs libafl_concolic/symcc_libafl/src/lib.rs +COPY utils utils + RUN cargo build && cargo build --release COPY scripts scripts diff --git a/fuzzers/fuzzbench/Makefile b/fuzzers/fuzzbench/Makefile index 50eb875f89..0d0eacb1c2 100644 --- a/fuzzers/fuzzbench/Makefile +++ b/fuzzers/fuzzbench/Makefile @@ -37,8 +37,8 @@ short_test: all echo a > in/a # Allow sigterm as exit code (timeout 11s ./$(FUZZER_NAME) out in || [ $$? -eq 124 ]) - rm -rf out - rm -rf in + rm -rf out || true + rm -rf in || true test: all mkdir in || true diff --git a/fuzzers/generic_inmemory/.gitignore b/fuzzers/generic_inmemory/.gitignore new file mode 100644 index 0000000000..6d1d246c00 --- /dev/null +++ b/fuzzers/generic_inmemory/.gitignore @@ -0,0 +1 @@ +fuzzer diff --git a/fuzzers/generic_inmemory/Makefile b/fuzzers/generic_inmemory/Makefile new file mode 100644 index 0000000000..5fb91e93d0 --- /dev/null +++ b/fuzzers/generic_inmemory/Makefile @@ -0,0 +1,31 @@ +FUZZER_NAME="fuzzer" +PROJECT_DIR=$(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) +UNAME := $(shell uname) + +PHONY: all + +all: fuzzer + +target/release/libafl_cxx: src/* src/bin/* + # Build the libpng libfuzzer library + cargo build --release + +libafl_cxx: target/release/libafl_cxx + +libafl_cc: target/release/libafl_cxx + +fuzzer: libafl_cc + target/release/libafl_cc -O3 fuzz.c -o $@ + +clean: + rm ./$(FUZZER_NAME) + +run: all + ./$(FUZZER_NAME) --cores 0 & + +short_test: all + rm -rf libafl_unix_shmem_server || true + RUST_BACKTRACE=1 timeout 10s ./$(FUZZER_NAME) --cores 0 & + +test: all + RUST_BACKTRACE=1 timeout 60s ./$(FUZZER_NAME) --cores 0 & diff --git a/fuzzers/generic_inmemory/src/lib.rs b/fuzzers/generic_inmemory/src/lib.rs index 30e1111c43..bbae8e6a83 100644 --- a/fuzzers/generic_inmemory/src/lib.rs +++ b/fuzzers/generic_inmemory/src/lib.rs @@ -31,6 +31,7 @@ use libafl::{ stages::{StdMutationalStage, TracingStage}, state::{HasCorpus, HasMetadata, StdState}, stats::MultiStats, + Error, }; use libafl_targets::{ @@ -230,7 +231,7 @@ pub fn libafl_main() { Ok(()) }; - Launcher::builder() + match Launcher::builder() .shmem_provider(shmem_provider) .configuration(EventConfig::from_name("default")) .stats(stats) @@ -241,5 +242,8 @@ pub fn libafl_main() { //.stdout_file(Some("/dev/null")) .build() .launch() - .expect("Launcher failed"); + { + Ok(_) | Err(Error::ShuttingDown) => (), + Err(e) => panic!("{:?}", e), + }; } diff --git a/fuzzers/libafl_atheris/.gitignore b/fuzzers/libafl_atheris/.gitignore new file mode 100644 index 0000000000..ec203b990d --- /dev/null +++ b/fuzzers/libafl_atheris/.gitignore @@ -0,0 +1,2 @@ +atheris +env \ No newline at end of file diff --git a/fuzzers/libafl_atheris/Cargo.toml b/fuzzers/libafl_atheris/Cargo.toml new file mode 100644 index 0000000000..864409c10a --- /dev/null +++ b/fuzzers/libafl_atheris/Cargo.toml @@ -0,0 +1,29 @@ +[package] +name = "libafl_atheris" +version = "0.6.1" +authors = ["Andrea Fioraldi ", "Dominik Maier "] +edition = "2018" + +[features] +default = ["std"] +std = [] + +[profile.release] +lto = true +codegen-units = 1 +opt-level = 3 +debug = true + +[build-dependencies] +cc = { version = "1.0", features = ["parallel"] } +which = { version = "4.0.2" } +num_cpus = "1.0" + +[dependencies] +libafl = { path = "../../libafl/" } +libafl_targets = { path = "../../libafl_targets/", features = ["sancov_pcguard_hitcounts_ptr", "sancov_cmplog", "libfuzzer"] } +clap = { version = "3.0.0-beta.4", features = ["default", "yaml"] } + +[lib] +name = "afl_atheris" +crate-type = ["staticlib"] diff --git a/fuzzers/libafl_atheris/Makefile b/fuzzers/libafl_atheris/Makefile new file mode 100644 index 0000000000..eddc16df57 --- /dev/null +++ b/fuzzers/libafl_atheris/Makefile @@ -0,0 +1,54 @@ +FUZZER_NAME="fuzzer" +PROJECT_DIR=$(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) +UNAME :="$(shell uname)" +LIB_DIR :="$(PROJECT_DIR)/target/release/libafl_atheris.a" +DEEXIT_PATH = "$(PROJECT_DIR)/../../target/release/libdeexit.dylib" + +PHONY: all + +all: fuzzer deexit + +deexit: $(DEEXIT_PATH) + +$(DEEXIT_PATH): + cd ../.. && cargo build --release -p deexit + +atheris: + git clone https://github.com/google/atheris.git + +target/release/libafl_atheris.a: src/* + # Build the libpng libfuzzer library + cargo build --release + +fuzzer: target/release/libafl_atheris.a atheris env + (\ + source env/bin/activate; \ + cd atheris; \ + LIBFUZZER_LIB=$(LIB_DIR) pip install .; \ + ) + +clean: + rm env + +run: all + ./$(FUZZER_NAME) --cores 0 & + +env: + python3 -m pip install --user virtualenv + python3 -m venv env + +short_test: all + (\ + rm -rf libafl_unix_shmem_server || true; \ + source env/bin/activate; \ + LDPRELOAD=$(DEEXIT_PATH) DYLD_FORCE_FLAT_NAMESPACE=1 DYLD_INSERT_LIBRARIES=$(DEEXIT_PATH) RUST_BACKTRACE=1 timeout 10s python3 ./atheris/example_fuzzers/fuzzing_example.py --cores 0 -i in -o out || true; \ + rm -rf out; \ + ) + +test: all + (\ + rm -rf libafl_unix_shmem_server || true; \ + source env/bin/activate; \ + LDPRELOAD=$(DEEXIT_PATH) DYLD_FORCE_FLAT_NAMESPACE=1 DYLD_INSERT_LIBRARIES=$(DEEXIT_PATH) RUST_BACKTRACE=1 timeout 60s python3 ./atheris/example_fuzzers/fuzzing_example.py --cores 0 -i in -o out || true; \ + rm -rf out; \ + ) diff --git a/fuzzers/libafl_atheris/README.md b/fuzzers/libafl_atheris/README.md new file mode 100644 index 0000000000..ef8987c9e6 --- /dev/null +++ b/fuzzers/libafl_atheris/README.md @@ -0,0 +1,4 @@ +# Atheris Pyhton Fuzzer + +This is a LibAFL-based Python fuzzer using [Atheris](https://github.com/google/atheris) for instrumentation. +We switch out Atheris' libfuzzer implementation with LibAFL, by mimicking its API. \ No newline at end of file diff --git a/fuzzers/libafl_atheris/src/lib.rs b/fuzzers/libafl_atheris/src/lib.rs new file mode 100644 index 0000000000..96d7d3bb1e --- /dev/null +++ b/fuzzers/libafl_atheris/src/lib.rs @@ -0,0 +1,375 @@ +//! A libfuzzer-like fuzzer with llmp-multithreading support and restarts +//! The `launcher` will spawn new processes for each cpu core. +//! This is the drop-in replacement for libfuzzer, to be used together with [`Atheris`](https://github.com/google/atheris) +//! for python instrumentation and fuzzing. + +use clap::{App, AppSettings, Arg}; +use core::{convert::TryInto, ffi::c_void, slice, time::Duration}; +use std::{ + env, + os::raw::{c_char, c_int}, + path::PathBuf, +}; + +use libafl::{ + bolts::{ + current_nanos, + launcher::Launcher, + os::parse_core_bind_arg, + rands::StdRand, + shmem::{ShMemProvider, StdShMemProvider}, + tuples::{tuple_list, Merge}, + }, + corpus::{ + Corpus, InMemoryCorpus, IndexesLenTimeMinimizerCorpusScheduler, OnDiskCorpus, + QueueCorpusScheduler, + }, + events::EventConfig, + executors::{inprocess::InProcessExecutor, ExitKind, TimeoutExecutor}, + feedback_or, + feedbacks::{CrashFeedback, MapFeedbackState, MaxMapFeedback, TimeFeedback, TimeoutFeedback}, + fuzzer::{Fuzzer, StdFuzzer}, + generators::RandBytesGenerator, + inputs::{BytesInput, HasTargetBytes}, + mutators::scheduled::{havoc_mutations, tokens_mutations, StdScheduledMutator}, + mutators::token_mutations::{I2SRandReplace, Tokens}, + observers::{HitcountsMapObserver, StdMapObserver, TimeObserver}, + stages::{StdMutationalStage, TracingStage}, + state::{HasCorpus, HasMetadata, StdState}, + stats::MultiStats, + Error, +}; +use libafl_targets::{ + CmpLogObserver, __sanitizer_cov_trace_cmp1, __sanitizer_cov_trace_cmp2, + __sanitizer_cov_trace_cmp4, __sanitizer_cov_trace_cmp8, CMPLOG_MAP, EDGES_MAP_PTR, + MAX_EDGES_NUM, +}; + +/// Set up our coverage map. +#[no_mangle] +pub fn __sanitizer_cov_8bit_counters_init(start: *mut u8, stop: *mut u8) { + unsafe { + EDGES_MAP_PTR = start; + MAX_EDGES_NUM = (stop as usize - start as usize) / 8; + } +} + +/// `pcs` tables seem to be unused by `Atheris`, so we can ignore this setup function, +/// but the symbol is still being called and, hence, required. +#[no_mangle] +pub fn __sanitizer_cov_pcs_init(_pcs_beg: *mut u8, _pcs_end: *mut u8) { + // noop +} + +/// Allow the python code to use `cmplog`. +/// This is a PoC implementation and could be improved. +/// For example, it only takes up to 8 bytes into consideration. +#[no_mangle] +pub fn __sanitizer_weak_hook_memcmp( + _caller_pc: *const c_void, + s1: *const c_void, + s2: *const c_void, + n: usize, + _result: c_int, +) { + unsafe { + let s1 = slice::from_raw_parts(s1 as *const u8, n); + let s2 = slice::from_raw_parts(s2 as *const u8, n); + match n { + 0 => (), + 1 => __sanitizer_cov_trace_cmp1( + u8::from_ne_bytes(s1.try_into().unwrap()), + u8::from_ne_bytes(s2.try_into().unwrap()), + ), + 2..=3 => __sanitizer_cov_trace_cmp2( + u16::from_ne_bytes(s1.try_into().unwrap()), + u16::from_ne_bytes(s2.try_into().unwrap()), + ), + 4..=7 => __sanitizer_cov_trace_cmp4( + u32::from_ne_bytes(s1.try_into().unwrap()), + u32::from_ne_bytes(s2.try_into().unwrap()), + ), + _ => __sanitizer_cov_trace_cmp8( + u64::from_ne_bytes(s1.try_into().unwrap()), + u64::from_ne_bytes(s2.try_into().unwrap()), + ), + } + } +} + +/// It's called by Atheris after the fuzzer has been initialized. +/// The main entrypoint to our fuzzer, which will be called by `Atheris` when fuzzing starts. +/// The `harness_fn` parameter is the function that will be called by `LibAFL` for each iteration +/// and jumps back into `Atheris'` instrumented python code. +#[no_mangle] +#[allow(non_snake_case)] +pub fn LLVMFuzzerRunDriver( + _argc: *const c_int, + _argv: *const *const c_char, + harness_fn: Option c_int>, +) { + // Registry the metadata types used in this fuzzer + // Needed only on no_std + //RegistryBuilder::register::(); + + if harness_fn.is_none() { + panic!("No harness callback provided"); + } + let harness_fn = harness_fn.unwrap(); + + if unsafe { EDGES_MAP_PTR.is_null() } { + panic!( + "Edges map was never initialized - __sanitizer_cov_8bit_counters_init never got called" + ); + } + + println!("Args: {:?}", std::env::args()); + + let matches = App::new("libafl_atheris") + .version("0.1.0") + .setting(AppSettings::AllowExternalSubcommands) + .arg(Arg::new("script")) // The python script is the first arg + .arg( + Arg::new("cores") + .short('c') + .long("cores") + .required(true) + .takes_value(true), + ) + .arg( + Arg::new("broker_port") + .short('p') + .long("broker-port") + .required(false) + .takes_value(true), + ) + .arg( + Arg::new("output") + .short('o') + .long("output") + .required(false) + .takes_value(true), + ) + .arg( + Arg::new("input") + .short('i') + .long("input") + .required(true) + .takes_value(true), + ) + .arg( + Arg::new("remote_broker_addr") + .short('B') + .long("remote-broker-addr") + .required(false) + .takes_value(true), + ) + .arg( + Arg::new("timeout") + .short('t') + .long("timeout") + .required(false) + .takes_value(true), + ) + .get_matches(); + + let workdir = env::current_dir().unwrap(); + println!( + "Workdir: {:?}", + env::current_dir().unwrap().to_string_lossy().to_string() + ); + + let cores = parse_core_bind_arg(matches.value_of("cores").unwrap()) + .expect("No valid core count given!"); + let broker_port = matches + .value_of("broker_port") + .map(|s| s.parse().expect("Invalid broker port")) + .unwrap_or(1337); + let remote_broker_addr = matches + .value_of("remote_broker_addr") + .map(|s| s.parse().expect("Invalid broker address")); + let input_dirs: Vec = matches + .values_of("input") + .map(|v| v.map(PathBuf::from).collect()) + .unwrap_or_default(); + let output_dir = matches + .value_of("output") + .map(PathBuf::from) + .unwrap_or_else(|| workdir.clone()); + let token_files: Vec<&str> = matches + .values_of("tokens") + .map(|v| v.collect()) + .unwrap_or_default(); + let timeout_ms = matches + .value_of("timeout") + .map(|s| s.parse().expect("Invalid timeout")) + .unwrap_or(10000); + // let cmplog_enabled = matches.is_present("cmplog"); + + println!("Workdir: {:?}", workdir.to_string_lossy().to_string()); + + let shmem_provider = StdShMemProvider::new().expect("Failed to init shared memory"); + + let stats = MultiStats::new(|s| println!("{}", s)); + + // TODO: we need to handle Atheris calls to `exit` on errors somhow. + + let mut run_client = |state: Option>, mut mgr, _core_id| { + // Create an observation channel using the coverage map + let edges = unsafe { slice::from_raw_parts_mut(EDGES_MAP_PTR, MAX_EDGES_NUM) }; + let edges_observer = HitcountsMapObserver::new(StdMapObserver::new("edges", edges)); + + // Create an observation channel to keep track of the execution time + let time_observer = TimeObserver::new("time"); + + // Create the Cmp observer + let cmplog = unsafe { &mut CMPLOG_MAP }; + let cmplog_observer = CmpLogObserver::new("cmplog", cmplog, true); + + // The state of the edges feedback. + let feedback_state = MapFeedbackState::with_observer(&edges_observer); + + // Feedback to rate the interestingness of an input + // This one is composed by two Feedbacks in OR + let feedback = feedback_or!( + // New maximization map feedback linked to the edges observer and the feedback state + MaxMapFeedback::new_tracking(&feedback_state, &edges_observer, true, false), + // Time feedback, this one does not need a feedback state + TimeFeedback::new_with_observer(&time_observer) + ); + + // A feedback to choose if an input is a solution or not + let objective = feedback_or!(CrashFeedback::new(), TimeoutFeedback::new()); + + // If not restarting, create a State from scratch + let mut state = state.unwrap_or_else(|| { + StdState::new( + // RNG + StdRand::with_seed(current_nanos()), + // Corpus that will be evolved, we keep it in memory for performance + InMemoryCorpus::new(), + // Corpus in which we store solutions (crashes in this example), + // on disk so the user can get them after stopping the fuzzer + OnDiskCorpus::new(output_dir.clone()).unwrap(), + // States of the feedbacks. + // They are the data related to the feedbacks that you want to persist in the State. + tuple_list!(feedback_state), + ) + }); + + // Create a dictionary if not existing + if state.metadata().get::().is_none() { + for tokens_file in &token_files { + state.add_metadata(Tokens::from_tokens_file(tokens_file)?); + } + } + + // A minimization+queue policy to get testcasess from the corpus + let scheduler = IndexesLenTimeMinimizerCorpusScheduler::new(QueueCorpusScheduler::new()); + + // A fuzzer with feedbacks and a corpus scheduler + let mut fuzzer = StdFuzzer::new(scheduler, feedback, objective); + + // The wrapped harness function, calling out to the LLVM-style harness + let mut harness = |input: &BytesInput| { + let target = input.target_bytes(); + let buf = target.as_slice(); + harness_fn(buf.as_ptr(), buf.len()); + ExitKind::Ok + }; + + // Create the executor for an in-process function with one observer for edge coverage and one for the execution time + let mut executor = TimeoutExecutor::new( + InProcessExecutor::new( + &mut harness, + tuple_list!(edges_observer, time_observer), + &mut fuzzer, + &mut state, + &mut mgr, + )?, + Duration::from_millis(timeout_ms), + ); + + // Secondary harness due to mut ownership + let mut harness = |input: &BytesInput| { + let target = input.target_bytes(); + let buf = target.as_slice(); + harness_fn(buf.as_ptr(), buf.len()); + ExitKind::Ok + }; + + // Setup a tracing stage in which we log comparisons + let tracing = TracingStage::new(InProcessExecutor::new( + &mut harness, + tuple_list!(cmplog_observer), + &mut fuzzer, + &mut state, + &mut mgr, + )?); + + // Setup a randomic Input2State stage + let i2s = + StdMutationalStage::new(StdScheduledMutator::new(tuple_list!(I2SRandReplace::new()))); + + // Setup a basic mutator + let mutator = StdScheduledMutator::new(havoc_mutations().merge(tokens_mutations())); + let mutational = StdMutationalStage::new(mutator); + + // The order of the stages matter! + let mut stages = tuple_list!(tracing, i2s, mutational); + + // In case the corpus is empty (on first run), reset + if state.corpus().count() < 1 { + if input_dirs.is_empty() { + // Generator of printable bytearrays of max size 32 + let mut generator = RandBytesGenerator::new(32); + + // Generate 8 initial inputs + state + .generate_initial_inputs( + &mut fuzzer, + &mut executor, + &mut generator, + &mut mgr, + 8, + ) + .expect("Failed to generate the initial corpus"); + println!( + "We imported {} inputs from the generator.", + state.corpus().count() + ); + } else { + println!("Loading from {:?}", &input_dirs); + // Load from disk + // we used _forced since some Atheris testcases don't touch the map at all, hence, wolud not load any data. + state + .load_initial_inputs_forced(&mut fuzzer, &mut executor, &mut mgr, &input_dirs) + .unwrap_or_else(|_| { + panic!("Failed to load initial corpus at {:?}", &input_dirs) + }); + println!("We imported {} inputs from disk.", state.corpus().count()); + } + } + + fuzzer.fuzz_loop(&mut stages, &mut executor, &mut state, &mut mgr)?; + Ok(()) + }; + + // Let's go. Python fuzzing ftw! + match Launcher::builder() + .shmem_provider(shmem_provider) + .configuration(EventConfig::from_name("default")) + .stats(stats) + .run_client(&mut run_client) + .cores(&cores) + .broker_port(broker_port) + .remote_broker_addr(remote_broker_addr) + // remove this comment to sience the target. + //.stdout_file(Some("/dev/null")) + .build() + .launch() + { + Ok(_) | Err(Error::ShuttingDown) => (), + Err(e) => panic!("Error in fuzzer: {}", e), + }; +} diff --git a/utils/README.md b/utils/README.md new file mode 100644 index 0000000000..cbf267f964 --- /dev/null +++ b/utils/README.md @@ -0,0 +1,11 @@ +# LibAFL Utils + +Welcome to the LibAFL Utils folder. +Here, you find some helful utilities that may be helpful for successfull fuzzing campaigns. + +## DeExit: ldpreload exit lib + +In the `deexit` folder, you'll find a ldpreloadable library, that changes calls to `exit` to `abort()`s. +When a target exits, it quits, and LibAFL will not be able to catch this or recover. +Abort, on the other hand, raises an error LibAFL's inprocess executor will be able to catch, thanks to its signal handlers. + diff --git a/utils/deexit/Cargo.toml b/utils/deexit/Cargo.toml new file mode 100644 index 0000000000..b6211d965e --- /dev/null +++ b/utils/deexit/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "deexit" +version = "0.1.0" +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] + +[lib] +name = "deexit" +crate-type = ["cdylib"] \ No newline at end of file diff --git a/utils/deexit/README.md b/utils/deexit/README.md new file mode 100644 index 0000000000..47b7c653e7 --- /dev/null +++ b/utils/deexit/README.md @@ -0,0 +1,6 @@ +# DeExit + +This util helps you, if your target calls `exit` during a fuzz run. +A simple wrapper that can be inserted into a program to turn `exit` calls to `abort`, which LibAFL will be able to catch. +If you are on MacOS, use the env variables `DYLD_FORCE_FLAT_NAMESPACE=1 DYLD_INSERT_LIBRARIES="path/to/target/release/libdeexit.dylib" tool` +On Linux, use `LD_PRELOAD="path/to/target/release/libdeexit.so" tool`. diff --git a/utils/deexit/src/lib.rs b/utils/deexit/src/lib.rs new file mode 100644 index 0000000000..dd1125a3a0 --- /dev/null +++ b/utils/deexit/src/lib.rs @@ -0,0 +1,16 @@ +//! A simple wrapper that can be inserted into a program to turn `exit` calls to `abort`, which `LibAFL` will be able to catch. +//! If you are on `MacOS`, use the env variables `DYLD_FORCE_FLAT_NAMESPACE=1 DYLD_INSERT_LIBRARIES="path/to/target/release/libdeexit.dylib" tool` +//! On Linux, use `LD_PRELOAD="path/to/target/release/libdeexit.so" tool`. + +extern "C" { + fn abort(); +} + +/// Hooked `exit` function +#[no_mangle] +pub fn exit(status: i32) { + println!("DeExit: The target called exit with status code {}", status); + unsafe { + abort(); + } +}