Atheris example to fuzz Python Code (#300)

* initial atheris libfuzzer harness

* cmplog, kinda

* added makefile to generic_inmemory

* Makefile for atheris fuzzer

* moved away from clap yaml

* fixed arg parsing

* fuzzing

* ldpreload lib to replace exit with abort

* fixed docker, docs

* fix docker some more

* better documentation

* less commented out important things

* Make makefile less crashy
This commit is contained in:
Dominik Maier 2021-11-03 10:13:05 +01:00 committed by GitHub
parent 2055eabede
commit 12c470a707
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 554 additions and 4 deletions

2
.gitignore vendored
View File

@ -35,3 +35,5 @@ AFLplusplus
a a
forkserver_test forkserver_test
__pycache__
*.lafl_lock

View File

@ -17,6 +17,7 @@ members = [
"libafl_concolic/symcc_libafl", "libafl_concolic/symcc_libafl",
"libafl_concolic/test/dump_constraints", "libafl_concolic/test/dump_constraints",
"libafl_concolic/test/runtime_test", "libafl_concolic/test/runtime_test",
"utils/deexit",
] ]
default-members = [ default-members = [
"libafl", "libafl",

View File

@ -64,6 +64,8 @@ COPY scripts/dummy.rs libafl_concolic/symcc_runtime/src/lib.rs
COPY libafl_concolic/symcc_libafl/Cargo.toml libafl_concolic/symcc_libafl/ COPY libafl_concolic/symcc_libafl/Cargo.toml libafl_concolic/symcc_libafl/
COPY scripts/dummy.rs libafl_concolic/symcc_libafl/src/lib.rs COPY scripts/dummy.rs libafl_concolic/symcc_libafl/src/lib.rs
COPY utils utils
RUN cargo build && cargo build --release RUN cargo build && cargo build --release
COPY scripts scripts COPY scripts scripts

View File

@ -37,8 +37,8 @@ short_test: all
echo a > in/a echo a > in/a
# Allow sigterm as exit code # Allow sigterm as exit code
(timeout 11s ./$(FUZZER_NAME) out in || [ $$? -eq 124 ]) (timeout 11s ./$(FUZZER_NAME) out in || [ $$? -eq 124 ])
rm -rf out rm -rf out || true
rm -rf in rm -rf in || true
test: all test: all
mkdir in || true mkdir in || true

1
fuzzers/generic_inmemory/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
fuzzer

View File

@ -0,0 +1,31 @@
FUZZER_NAME="fuzzer"
PROJECT_DIR=$(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
UNAME := $(shell uname)
PHONY: all
all: fuzzer
target/release/libafl_cxx: src/* src/bin/*
# Build the libpng libfuzzer library
cargo build --release
libafl_cxx: target/release/libafl_cxx
libafl_cc: target/release/libafl_cxx
fuzzer: libafl_cc
target/release/libafl_cc -O3 fuzz.c -o $@
clean:
rm ./$(FUZZER_NAME)
run: all
./$(FUZZER_NAME) --cores 0 &
short_test: all
rm -rf libafl_unix_shmem_server || true
RUST_BACKTRACE=1 timeout 10s ./$(FUZZER_NAME) --cores 0 &
test: all
RUST_BACKTRACE=1 timeout 60s ./$(FUZZER_NAME) --cores 0 &

View File

@ -31,6 +31,7 @@ use libafl::{
stages::{StdMutationalStage, TracingStage}, stages::{StdMutationalStage, TracingStage},
state::{HasCorpus, HasMetadata, StdState}, state::{HasCorpus, HasMetadata, StdState},
stats::MultiStats, stats::MultiStats,
Error,
}; };
use libafl_targets::{ use libafl_targets::{
@ -230,7 +231,7 @@ pub fn libafl_main() {
Ok(()) Ok(())
}; };
Launcher::builder() match Launcher::builder()
.shmem_provider(shmem_provider) .shmem_provider(shmem_provider)
.configuration(EventConfig::from_name("default")) .configuration(EventConfig::from_name("default"))
.stats(stats) .stats(stats)
@ -241,5 +242,8 @@ pub fn libafl_main() {
//.stdout_file(Some("/dev/null")) //.stdout_file(Some("/dev/null"))
.build() .build()
.launch() .launch()
.expect("Launcher failed"); {
Ok(_) | Err(Error::ShuttingDown) => (),
Err(e) => panic!("{:?}", e),
};
} }

2
fuzzers/libafl_atheris/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
atheris
env

View File

@ -0,0 +1,29 @@
[package]
name = "libafl_atheris"
version = "0.6.1"
authors = ["Andrea Fioraldi <andreafioraldi@gmail.com>", "Dominik Maier <domenukk@gmail.com>"]
edition = "2018"
[features]
default = ["std"]
std = []
[profile.release]
lto = true
codegen-units = 1
opt-level = 3
debug = true
[build-dependencies]
cc = { version = "1.0", features = ["parallel"] }
which = { version = "4.0.2" }
num_cpus = "1.0"
[dependencies]
libafl = { path = "../../libafl/" }
libafl_targets = { path = "../../libafl_targets/", features = ["sancov_pcguard_hitcounts_ptr", "sancov_cmplog", "libfuzzer"] }
clap = { version = "3.0.0-beta.4", features = ["default", "yaml"] }
[lib]
name = "afl_atheris"
crate-type = ["staticlib"]

View File

@ -0,0 +1,54 @@
FUZZER_NAME="fuzzer"
PROJECT_DIR=$(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
UNAME :="$(shell uname)"
LIB_DIR :="$(PROJECT_DIR)/target/release/libafl_atheris.a"
DEEXIT_PATH = "$(PROJECT_DIR)/../../target/release/libdeexit.dylib"
PHONY: all
all: fuzzer deexit
deexit: $(DEEXIT_PATH)
$(DEEXIT_PATH):
cd ../.. && cargo build --release -p deexit
atheris:
git clone https://github.com/google/atheris.git
target/release/libafl_atheris.a: src/*
# Build the libpng libfuzzer library
cargo build --release
fuzzer: target/release/libafl_atheris.a atheris env
(\
source env/bin/activate; \
cd atheris; \
LIBFUZZER_LIB=$(LIB_DIR) pip install .; \
)
clean:
rm env
run: all
./$(FUZZER_NAME) --cores 0 &
env:
python3 -m pip install --user virtualenv
python3 -m venv env
short_test: all
(\
rm -rf libafl_unix_shmem_server || true; \
source env/bin/activate; \
LDPRELOAD=$(DEEXIT_PATH) DYLD_FORCE_FLAT_NAMESPACE=1 DYLD_INSERT_LIBRARIES=$(DEEXIT_PATH) RUST_BACKTRACE=1 timeout 10s python3 ./atheris/example_fuzzers/fuzzing_example.py --cores 0 -i in -o out || true; \
rm -rf out; \
)
test: all
(\
rm -rf libafl_unix_shmem_server || true; \
source env/bin/activate; \
LDPRELOAD=$(DEEXIT_PATH) DYLD_FORCE_FLAT_NAMESPACE=1 DYLD_INSERT_LIBRARIES=$(DEEXIT_PATH) RUST_BACKTRACE=1 timeout 60s python3 ./atheris/example_fuzzers/fuzzing_example.py --cores 0 -i in -o out || true; \
rm -rf out; \
)

View File

@ -0,0 +1,4 @@
# Atheris Pyhton Fuzzer
This is a LibAFL-based Python fuzzer using [Atheris](https://github.com/google/atheris) for instrumentation.
We switch out Atheris' libfuzzer implementation with LibAFL, by mimicking its API.

View File

@ -0,0 +1,375 @@
//! A libfuzzer-like fuzzer with llmp-multithreading support and restarts
//! The `launcher` will spawn new processes for each cpu core.
//! This is the drop-in replacement for libfuzzer, to be used together with [`Atheris`](https://github.com/google/atheris)
//! for python instrumentation and fuzzing.
use clap::{App, AppSettings, Arg};
use core::{convert::TryInto, ffi::c_void, slice, time::Duration};
use std::{
env,
os::raw::{c_char, c_int},
path::PathBuf,
};
use libafl::{
bolts::{
current_nanos,
launcher::Launcher,
os::parse_core_bind_arg,
rands::StdRand,
shmem::{ShMemProvider, StdShMemProvider},
tuples::{tuple_list, Merge},
},
corpus::{
Corpus, InMemoryCorpus, IndexesLenTimeMinimizerCorpusScheduler, OnDiskCorpus,
QueueCorpusScheduler,
},
events::EventConfig,
executors::{inprocess::InProcessExecutor, ExitKind, TimeoutExecutor},
feedback_or,
feedbacks::{CrashFeedback, MapFeedbackState, MaxMapFeedback, TimeFeedback, TimeoutFeedback},
fuzzer::{Fuzzer, StdFuzzer},
generators::RandBytesGenerator,
inputs::{BytesInput, HasTargetBytes},
mutators::scheduled::{havoc_mutations, tokens_mutations, StdScheduledMutator},
mutators::token_mutations::{I2SRandReplace, Tokens},
observers::{HitcountsMapObserver, StdMapObserver, TimeObserver},
stages::{StdMutationalStage, TracingStage},
state::{HasCorpus, HasMetadata, StdState},
stats::MultiStats,
Error,
};
use libafl_targets::{
CmpLogObserver, __sanitizer_cov_trace_cmp1, __sanitizer_cov_trace_cmp2,
__sanitizer_cov_trace_cmp4, __sanitizer_cov_trace_cmp8, CMPLOG_MAP, EDGES_MAP_PTR,
MAX_EDGES_NUM,
};
/// Set up our coverage map.
#[no_mangle]
pub fn __sanitizer_cov_8bit_counters_init(start: *mut u8, stop: *mut u8) {
unsafe {
EDGES_MAP_PTR = start;
MAX_EDGES_NUM = (stop as usize - start as usize) / 8;
}
}
/// `pcs` tables seem to be unused by `Atheris`, so we can ignore this setup function,
/// but the symbol is still being called and, hence, required.
#[no_mangle]
pub fn __sanitizer_cov_pcs_init(_pcs_beg: *mut u8, _pcs_end: *mut u8) {
// noop
}
/// Allow the python code to use `cmplog`.
/// This is a PoC implementation and could be improved.
/// For example, it only takes up to 8 bytes into consideration.
#[no_mangle]
pub fn __sanitizer_weak_hook_memcmp(
_caller_pc: *const c_void,
s1: *const c_void,
s2: *const c_void,
n: usize,
_result: c_int,
) {
unsafe {
let s1 = slice::from_raw_parts(s1 as *const u8, n);
let s2 = slice::from_raw_parts(s2 as *const u8, n);
match n {
0 => (),
1 => __sanitizer_cov_trace_cmp1(
u8::from_ne_bytes(s1.try_into().unwrap()),
u8::from_ne_bytes(s2.try_into().unwrap()),
),
2..=3 => __sanitizer_cov_trace_cmp2(
u16::from_ne_bytes(s1.try_into().unwrap()),
u16::from_ne_bytes(s2.try_into().unwrap()),
),
4..=7 => __sanitizer_cov_trace_cmp4(
u32::from_ne_bytes(s1.try_into().unwrap()),
u32::from_ne_bytes(s2.try_into().unwrap()),
),
_ => __sanitizer_cov_trace_cmp8(
u64::from_ne_bytes(s1.try_into().unwrap()),
u64::from_ne_bytes(s2.try_into().unwrap()),
),
}
}
}
/// It's called by Atheris after the fuzzer has been initialized.
/// The main entrypoint to our fuzzer, which will be called by `Atheris` when fuzzing starts.
/// The `harness_fn` parameter is the function that will be called by `LibAFL` for each iteration
/// and jumps back into `Atheris'` instrumented python code.
#[no_mangle]
#[allow(non_snake_case)]
pub fn LLVMFuzzerRunDriver(
_argc: *const c_int,
_argv: *const *const c_char,
harness_fn: Option<extern "C" fn(*const u8, usize) -> c_int>,
) {
// Registry the metadata types used in this fuzzer
// Needed only on no_std
//RegistryBuilder::register::<Tokens>();
if harness_fn.is_none() {
panic!("No harness callback provided");
}
let harness_fn = harness_fn.unwrap();
if unsafe { EDGES_MAP_PTR.is_null() } {
panic!(
"Edges map was never initialized - __sanitizer_cov_8bit_counters_init never got called"
);
}
println!("Args: {:?}", std::env::args());
let matches = App::new("libafl_atheris")
.version("0.1.0")
.setting(AppSettings::AllowExternalSubcommands)
.arg(Arg::new("script")) // The python script is the first arg
.arg(
Arg::new("cores")
.short('c')
.long("cores")
.required(true)
.takes_value(true),
)
.arg(
Arg::new("broker_port")
.short('p')
.long("broker-port")
.required(false)
.takes_value(true),
)
.arg(
Arg::new("output")
.short('o')
.long("output")
.required(false)
.takes_value(true),
)
.arg(
Arg::new("input")
.short('i')
.long("input")
.required(true)
.takes_value(true),
)
.arg(
Arg::new("remote_broker_addr")
.short('B')
.long("remote-broker-addr")
.required(false)
.takes_value(true),
)
.arg(
Arg::new("timeout")
.short('t')
.long("timeout")
.required(false)
.takes_value(true),
)
.get_matches();
let workdir = env::current_dir().unwrap();
println!(
"Workdir: {:?}",
env::current_dir().unwrap().to_string_lossy().to_string()
);
let cores = parse_core_bind_arg(matches.value_of("cores").unwrap())
.expect("No valid core count given!");
let broker_port = matches
.value_of("broker_port")
.map(|s| s.parse().expect("Invalid broker port"))
.unwrap_or(1337);
let remote_broker_addr = matches
.value_of("remote_broker_addr")
.map(|s| s.parse().expect("Invalid broker address"));
let input_dirs: Vec<PathBuf> = matches
.values_of("input")
.map(|v| v.map(PathBuf::from).collect())
.unwrap_or_default();
let output_dir = matches
.value_of("output")
.map(PathBuf::from)
.unwrap_or_else(|| workdir.clone());
let token_files: Vec<&str> = matches
.values_of("tokens")
.map(|v| v.collect())
.unwrap_or_default();
let timeout_ms = matches
.value_of("timeout")
.map(|s| s.parse().expect("Invalid timeout"))
.unwrap_or(10000);
// let cmplog_enabled = matches.is_present("cmplog");
println!("Workdir: {:?}", workdir.to_string_lossy().to_string());
let shmem_provider = StdShMemProvider::new().expect("Failed to init shared memory");
let stats = MultiStats::new(|s| println!("{}", s));
// TODO: we need to handle Atheris calls to `exit` on errors somhow.
let mut run_client = |state: Option<StdState<_, _, _, _, _>>, mut mgr, _core_id| {
// Create an observation channel using the coverage map
let edges = unsafe { slice::from_raw_parts_mut(EDGES_MAP_PTR, MAX_EDGES_NUM) };
let edges_observer = HitcountsMapObserver::new(StdMapObserver::new("edges", edges));
// Create an observation channel to keep track of the execution time
let time_observer = TimeObserver::new("time");
// Create the Cmp observer
let cmplog = unsafe { &mut CMPLOG_MAP };
let cmplog_observer = CmpLogObserver::new("cmplog", cmplog, true);
// The state of the edges feedback.
let feedback_state = MapFeedbackState::with_observer(&edges_observer);
// Feedback to rate the interestingness of an input
// This one is composed by two Feedbacks in OR
let feedback = feedback_or!(
// New maximization map feedback linked to the edges observer and the feedback state
MaxMapFeedback::new_tracking(&feedback_state, &edges_observer, true, false),
// Time feedback, this one does not need a feedback state
TimeFeedback::new_with_observer(&time_observer)
);
// A feedback to choose if an input is a solution or not
let objective = feedback_or!(CrashFeedback::new(), TimeoutFeedback::new());
// If not restarting, create a State from scratch
let mut state = state.unwrap_or_else(|| {
StdState::new(
// RNG
StdRand::with_seed(current_nanos()),
// Corpus that will be evolved, we keep it in memory for performance
InMemoryCorpus::new(),
// Corpus in which we store solutions (crashes in this example),
// on disk so the user can get them after stopping the fuzzer
OnDiskCorpus::new(output_dir.clone()).unwrap(),
// States of the feedbacks.
// They are the data related to the feedbacks that you want to persist in the State.
tuple_list!(feedback_state),
)
});
// Create a dictionary if not existing
if state.metadata().get::<Tokens>().is_none() {
for tokens_file in &token_files {
state.add_metadata(Tokens::from_tokens_file(tokens_file)?);
}
}
// A minimization+queue policy to get testcasess from the corpus
let scheduler = IndexesLenTimeMinimizerCorpusScheduler::new(QueueCorpusScheduler::new());
// A fuzzer with feedbacks and a corpus scheduler
let mut fuzzer = StdFuzzer::new(scheduler, feedback, objective);
// The wrapped harness function, calling out to the LLVM-style harness
let mut harness = |input: &BytesInput| {
let target = input.target_bytes();
let buf = target.as_slice();
harness_fn(buf.as_ptr(), buf.len());
ExitKind::Ok
};
// Create the executor for an in-process function with one observer for edge coverage and one for the execution time
let mut executor = TimeoutExecutor::new(
InProcessExecutor::new(
&mut harness,
tuple_list!(edges_observer, time_observer),
&mut fuzzer,
&mut state,
&mut mgr,
)?,
Duration::from_millis(timeout_ms),
);
// Secondary harness due to mut ownership
let mut harness = |input: &BytesInput| {
let target = input.target_bytes();
let buf = target.as_slice();
harness_fn(buf.as_ptr(), buf.len());
ExitKind::Ok
};
// Setup a tracing stage in which we log comparisons
let tracing = TracingStage::new(InProcessExecutor::new(
&mut harness,
tuple_list!(cmplog_observer),
&mut fuzzer,
&mut state,
&mut mgr,
)?);
// Setup a randomic Input2State stage
let i2s =
StdMutationalStage::new(StdScheduledMutator::new(tuple_list!(I2SRandReplace::new())));
// Setup a basic mutator
let mutator = StdScheduledMutator::new(havoc_mutations().merge(tokens_mutations()));
let mutational = StdMutationalStage::new(mutator);
// The order of the stages matter!
let mut stages = tuple_list!(tracing, i2s, mutational);
// In case the corpus is empty (on first run), reset
if state.corpus().count() < 1 {
if input_dirs.is_empty() {
// Generator of printable bytearrays of max size 32
let mut generator = RandBytesGenerator::new(32);
// Generate 8 initial inputs
state
.generate_initial_inputs(
&mut fuzzer,
&mut executor,
&mut generator,
&mut mgr,
8,
)
.expect("Failed to generate the initial corpus");
println!(
"We imported {} inputs from the generator.",
state.corpus().count()
);
} else {
println!("Loading from {:?}", &input_dirs);
// Load from disk
// we used _forced since some Atheris testcases don't touch the map at all, hence, wolud not load any data.
state
.load_initial_inputs_forced(&mut fuzzer, &mut executor, &mut mgr, &input_dirs)
.unwrap_or_else(|_| {
panic!("Failed to load initial corpus at {:?}", &input_dirs)
});
println!("We imported {} inputs from disk.", state.corpus().count());
}
}
fuzzer.fuzz_loop(&mut stages, &mut executor, &mut state, &mut mgr)?;
Ok(())
};
// Let's go. Python fuzzing ftw!
match Launcher::builder()
.shmem_provider(shmem_provider)
.configuration(EventConfig::from_name("default"))
.stats(stats)
.run_client(&mut run_client)
.cores(&cores)
.broker_port(broker_port)
.remote_broker_addr(remote_broker_addr)
// remove this comment to sience the target.
//.stdout_file(Some("/dev/null"))
.build()
.launch()
{
Ok(_) | Err(Error::ShuttingDown) => (),
Err(e) => panic!("Error in fuzzer: {}", e),
};
}

11
utils/README.md Normal file
View File

@ -0,0 +1,11 @@
# LibAFL Utils
Welcome to the LibAFL Utils folder.
Here, you find some helful utilities that may be helpful for successfull fuzzing campaigns.
## DeExit: ldpreload exit lib
In the `deexit` folder, you'll find a ldpreloadable library, that changes calls to `exit` to `abort()`s.
When a target exits, it quits, and LibAFL will not be able to catch this or recover.
Abort, on the other hand, raises an error LibAFL's inprocess executor will be able to catch, thanks to its signal handlers.

12
utils/deexit/Cargo.toml Normal file
View File

@ -0,0 +1,12 @@
[package]
name = "deexit"
version = "0.1.0"
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
[lib]
name = "deexit"
crate-type = ["cdylib"]

6
utils/deexit/README.md Normal file
View File

@ -0,0 +1,6 @@
# DeExit
This util helps you, if your target calls `exit` during a fuzz run.
A simple wrapper that can be inserted into a program to turn `exit` calls to `abort`, which LibAFL will be able to catch.
If you are on MacOS, use the env variables `DYLD_FORCE_FLAT_NAMESPACE=1 DYLD_INSERT_LIBRARIES="path/to/target/release/libdeexit.dylib" tool`
On Linux, use `LD_PRELOAD="path/to/target/release/libdeexit.so" tool`.

16
utils/deexit/src/lib.rs Normal file
View File

@ -0,0 +1,16 @@
//! A simple wrapper that can be inserted into a program to turn `exit` calls to `abort`, which `LibAFL` will be able to catch.
//! If you are on `MacOS`, use the env variables `DYLD_FORCE_FLAT_NAMESPACE=1 DYLD_INSERT_LIBRARIES="path/to/target/release/libdeexit.dylib" tool`
//! On Linux, use `LD_PRELOAD="path/to/target/release/libdeexit.so" tool`.
extern "C" {
fn abort();
}
/// Hooked `exit` function
#[no_mangle]
pub fn exit(status: i32) {
println!("DeExit: The target called exit with status code {}", status);
unsafe {
abort();
}
}