Implement a corpus minimiser (cmin) (#739)

* initial try

* correct case where cull attempts to fetch non-existent corpus entries

* various on_remove, on_replace implementations

* ise -> ize (consistency), use TestcaseScore instead of rolling our own

* oops, feature gate

* documentation!

* link c++

* doc-nit: correction in opt explanation

don't write documentation at 0300

* better linking
This commit is contained in:
Addison Crump 2022-08-29 06:38:46 -05:00 committed by GitHub
parent d6e72560dc
commit 0859c3ace2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 1059 additions and 31 deletions

View File

@ -667,14 +667,7 @@ fn fuzz_text(
// In case the corpus is empty (on first run), reset // In case the corpus is empty (on first run), reset
if state.corpus().count() < 1 { if state.corpus().count() < 1 {
state state
.load_from_directory( .load_initial_inputs(&mut fuzzer, &mut executor, &mut mgr, &[seed_dir.clone()])
&mut fuzzer,
&mut executor,
&mut mgr,
&seed_dir,
false,
&mut |_, _, path| GeneralizedInput::from_bytes_file(path),
)
.unwrap_or_else(|_| { .unwrap_or_else(|_| {
println!("Failed to load initial corpus at {:?}", &seed_dir); println!("Failed to load initial corpus at {:?}", &seed_dir);
process::exit(0); process::exit(0);

View File

@ -0,0 +1 @@
libpng-*

View File

@ -0,0 +1,33 @@
[package]
name = "libfuzzer_libpng_cmin"
version = "0.8.1"
authors = ["Andrea Fioraldi <andreafioraldi@gmail.com>", "Dominik Maier <domenukk@gmail.com>", "Addison Crump <research@addisoncrump.info>"]
edition = "2021"
[features]
default = ["std"]
std = []
# Forces a crash
crash = []
[profile.release]
lto = true
codegen-units = 1
opt-level = 3
debug = true
[build-dependencies]
cc = { version = "1.0", features = ["parallel"] }
which = { version = "4.0.2" }
[dependencies]
libafl = { path = "../../libafl/", features = ["default", "cmin"] }
# libafl = { path = "../../libafl/", features = ["default"] }
libafl_targets = { path = "../../libafl_targets/", features = ["sancov_pcguard_hitcounts", "libfuzzer", "sancov_cmplog"] }
# TODO Include it only when building cc
libafl_cc = { path = "../../libafl_cc/" }
mimalloc = { version = "*", default-features = false }
[lib]
name = "libfuzzer_libpng"
crate-type = ["staticlib"]

View File

@ -0,0 +1,181 @@
# Variables
[env]
FUZZER_NAME='fuzzer_libpng'
LIBAFL_CC = './target/release/libafl_cc'
LIBAFL_CXX = './target/release/libafl_cxx'
FUZZER = './target/release/${FUZZER_NAME}'
PROJECT_DIR = { script = ["pwd"] }
[tasks.unsupported]
script_runner="@shell"
script='''
echo "Cargo-make not integrated yet on this"
'''
# libpng
[tasks.libpng]
linux_alias = "libpng_unix"
mac_alias = "libpng_unix"
windows_alias = "unsupported"
[tasks.libpng_unix]
condition = { files_not_exist = ["./libpng-1.6.37"]}
script_runner="@shell"
script='''
curl https://deac-fra.dl.sourceforge.net/project/libpng/libpng16/1.6.37/libpng-1.6.37.tar.xz --output libpng-1.6.37.tar.xz
tar -xvf libpng-1.6.37.tar.xz
'''
# Compilers
[tasks.cxx]
linux_alias = "cxx_unix"
mac_alias = "cxx_unix"
windows_alias = "unsupported"
[tasks.cxx_unix]
command = "cargo"
args = ["build" , "--release"]
[tasks.cc]
linux_alias = "cc_unix"
mac_alias = "cc_unix"
windows_alias = "unsupported"
[tasks.cc_unix]
command = "cargo"
args = ["build" , "--release"]
[tasks.crash_cxx]
linux_alias = "crash_cxx_unix"
mac_alias = "crash_cxx_unix"
windows_alias = "unsupported"
[tasks.crash_cxx_unix]
command = "cargo"
args = ["build" , "--release", "--features=crash"]
[tasks.crash_cc]
linux_alias = "crash_cc_unix"
mac_alias = "crash_cc_unix"
windows_alias = "unsupported"
[tasks.crash_cc_unix]
command = "cargo"
args = ["build" , "--release", "--features=crash"]
# Library
[tasks.lib]
linux_alias = "lib_unix"
mac_alias = "lib_unix"
windows_alias = "unsupported"
[tasks.lib_unix]
script_runner="@shell"
script='''
cd libpng-1.6.37 && ./configure --enable-shared=no --with-pic=yes --enable-hardware-optimizations=yes
cd "${PROJECT_DIR}"
make -C libpng-1.6.37 CC="${PROJECT_DIR}/target/release/libafl_cc" CXX="${PROJECT_DIR}/target/release/libafl_cxx"
'''
dependencies = [ "libpng", "cxx", "cc" ]
# Library
[tasks.crash_lib]
linux_alias = "crash_lib_unix"
mac_alias = "crash_lib_unix"
windows_alias = "unsupported"
[tasks.crash_lib_unix]
script_runner="@shell"
script='''
cd libpng-1.6.37 && ./configure --enable-shared=no --with-pic=yes --enable-hardware-optimizations=yes
cd "${PROJECT_DIR}"
make -C libpng-1.6.37 CC="${PROJECT_DIR}/target/release/libafl_cc" CXX="${PROJECT_DIR}/target/release/libafl_cxx"
'''
dependencies = [ "libpng", "crash_cxx", "crash_cc" ]
# Harness
[tasks.fuzzer]
linux_alias = "fuzzer_unix"
mac_alias = "fuzzer_unix"
windows_alias = "unsupported"
[tasks.fuzzer_unix]
command = "target/release/libafl_cxx"
args = ["${PROJECT_DIR}/harness.cc", "${PROJECT_DIR}/libpng-1.6.37/.libs/libpng16.a", "-I", "${PROJECT_DIR}/libpng-1.6.37/", "-o", "${FUZZER_NAME}", "-lm", "-lz"]
dependencies = [ "lib", "cxx", "cc" ]
# Crashing Harness
[tasks.fuzzer_crash]
linux_alias = "fuzzer_crash_unix"
mac_alias = "fuzzer_crash_unix"
windows_alias = "unsupported"
[tasks.fuzzer_crash_unix]
command = "target/release/libafl_cxx"
args = ["${PROJECT_DIR}/harness.cc", "${PROJECT_DIR}/libpng-1.6.37/.libs/libpng16.a", "-I", "${PROJECT_DIR}/libpng-1.6.37/", "-o", "${FUZZER_NAME}_crash", "-lm", "-lz"]
dependencies = [ "crash_lib", "crash_cxx", "crash_cc" ]
# Run the fuzzer
[tasks.run]
linux_alias = "run_unix"
mac_alias = "run_unix"
windows_alias = "unsupported"
[tasks.run_unix]
script_runner = "@shell"
script='''
./${FUZZER_NAME} &
sleep 0.2
./${FUZZER_NAME} 2>/dev/null
'''
dependencies = [ "fuzzer" ]
# Run the fuzzer with a crash
[tasks.crash]
linux_alias = "crash_unix"
mac_alias = "crash_unix"
windows_alias = "unsupported"
[tasks.crash_unix]
script_runner = "@shell"
script='''
./${FUZZER_NAME}_crash &
sleep 0.2
./${FUZZER_NAME}_crash 2>/dev/null
'''
dependencies = [ "fuzzer_crash" ]
# Test
[tasks.test]
linux_alias = "test_unix"
mac_alias = "test_unix"
windows_alias = "unsupported"
[tasks.test_unix]
script_runner = "@shell"
script='''
rm -rf libafl_unix_shmem_server || true
timeout 11s ./${FUZZER_NAME} &
sleep 0.2
timeout 10s ./${FUZZER_NAME} >/dev/null 2>/dev/null &
'''
dependencies = [ "fuzzer" ]
# Clean up
[tasks.clean]
linux_alias = "clean_unix"
mac_alias = "clean_unix"
windows_alias = "unsupported"
[tasks.clean_unix]
# Disable default `clean` definition
clear = true
script_runner="@shell"
script='''
rm -f ./${FUZZER_NAME}
make -C libpng-1.6.37 clean
cargo clean
'''

View File

@ -0,0 +1,81 @@
# Libfuzzer for libpng
This folder contains an example fuzzer for libpng, using LLMP for fast multi-process fuzzing and crash detection.
In contrast to other fuzzer examples, this setup uses `fuzz_loop_for`, to occasionally respawn the fuzzer executor.
While this costs performance, it can be useful for targets with memory leaks or other instabilities.
If your target is really instable, however, consider exchanging the `InProcessExecutor` for a `ForkserverExecutor` instead.
It also uses the `introspection` feature, printing fuzzer stats during execution.
To show off crash detection, we added a `ud2` instruction to the harness, edit harness.cc if you want a non-crashing example.
It has been tested on Linux.
## Build
To build this example, run
```bash
cargo build --release
```
This will build the library with the fuzzer (src/lib.rs) with the libfuzzer compatibility layer and the SanitizerCoverage runtime functions for coverage feedback.
In addition, it will also build two C and C++ compiler wrappers (bin/libafl_c(libafl_c/xx).rs) that you must use to compile the target.
The compiler wrappers, `libafl_cc` and libafl_cxx`, will end up in `./target/release/` (or `./target/debug`, in case you did not build with the `--release` flag).
Then download libpng, and unpack the archive:
```bash
wget https://deac-fra.dl.sourceforge.net/project/libpng/libpng16/1.6.37/libpng-1.6.37.tar.xz
tar -xvf libpng-1.6.37.tar.xz
```
Now compile libpng, using the libafl_cc compiler wrapper:
```bash
cd libpng-1.6.37
./configure
make CC="$(pwd)/../target/release/libafl_cc" CXX="$(pwd)/../target/release/libafl_cxx" -j `nproc`
```
You can find the static lib at `libpng-1.6.37/.libs/libpng16.a`.
Now, we have to build the libfuzzer harness and link all together to create our fuzzer binary.
```
cd ..
./target/release/libafl_cxx ./harness.cc libpng-1.6.37/.libs/libpng16.a -I libpng-1.6.37/ -o fuzzer_libpng -lz -lm
```
Afterward, the fuzzer will be ready to run.
Note that, unless you use the `launcher`, you will have to run the binary multiple times to actually start the fuzz process, see `Run` in the following.
This allows you to run multiple different builds of the same fuzzer alongside, for example, with and without ASAN (`-fsanitize=address`) or with different mutators.
## Run
The first time you run the binary, the broker will open a tcp port (currently on port `1337`), waiting for fuzzer clients to connect. This port is local and only used for the initial handshake. All further communication happens via shared map, to be independent of the kernel. Currently, you must run the clients from the libfuzzer_libpng directory for them to be able to access the PNG corpus.
```
./fuzzer_libpng
[libafl/src/bolts/llmp.rs:407] "We're the broker" = "We\'re the broker"
Doing broker things. Run this tool again to start fuzzing in a client.
```
And after running the above again in a separate terminal:
```
[libafl/src/bolts/llmp.rs:1464] "New connection" = "New connection"
[libafl/src/bolts/llmp.rs:1464] addr = 127.0.0.1:33500
[libafl/src/bolts/llmp.rs:1464] stream.peer_addr().unwrap() = 127.0.0.1:33500
[LOG Debug]: Loaded 4 initial testcases.
[New Testcase #2] clients: 3, corpus: 6, objectives: 0, executions: 5, exec/sec: 0
< fuzzing stats >
```
As this example uses in-process fuzzing, we added a Restarting Event Manager (`setup_restarting_mgr`).
This means each client will start itself again to listen for crashes and timeouts.
By restarting the actual fuzzer, it can recover from these exit conditions.
In any real-world scenario, you should use `taskset` to pin each client to an empty CPU core, the lib does not pick an empty core automatically (yet).

Binary file not shown.

After

Width:  |  Height:  |  Size: 218 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 376 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 228 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 427 B

View File

@ -0,0 +1,191 @@
// libpng_read_fuzzer.cc
// Copyright 2017-2018 Glenn Randers-Pehrson
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that may
// be found in the LICENSE file https://cs.chromium.org/chromium/src/LICENSE
// Last changed in libpng 1.6.35 [July 15, 2018]
// The modifications in 2017 by Glenn Randers-Pehrson include
// 1. addition of a PNG_CLEANUP macro,
// 2. setting the option to ignore ADLER32 checksums,
// 3. adding "#include <string.h>" which is needed on some platforms
// to provide memcpy().
// 4. adding read_end_info() and creating an end_info structure.
// 5. adding calls to png_set_*() transforms commonly used by browsers.
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include <vector>
#define PNG_INTERNAL
#include "png.h"
#define PNG_CLEANUP \
if (png_handler.png_ptr) { \
if (png_handler.row_ptr) { \
png_free(png_handler.png_ptr, png_handler.row_ptr); \
} \
if (png_handler.end_info_ptr) { \
png_destroy_read_struct(&png_handler.png_ptr, &png_handler.info_ptr, \
&png_handler.end_info_ptr); \
} else if (png_handler.info_ptr) { \
png_destroy_read_struct(&png_handler.png_ptr, &png_handler.info_ptr, \
nullptr); \
} else { \
png_destroy_read_struct(&png_handler.png_ptr, nullptr, nullptr); \
} \
png_handler.png_ptr = nullptr; \
png_handler.row_ptr = nullptr; \
png_handler.info_ptr = nullptr; \
png_handler.end_info_ptr = nullptr; \
}
struct BufState {
const uint8_t *data;
size_t bytes_left;
};
struct PngObjectHandler {
png_infop info_ptr = nullptr;
png_structp png_ptr = nullptr;
png_infop end_info_ptr = nullptr;
png_voidp row_ptr = nullptr;
BufState *buf_state = nullptr;
~PngObjectHandler() {
if (row_ptr) { png_free(png_ptr, row_ptr); }
if (end_info_ptr) {
png_destroy_read_struct(&png_ptr, &info_ptr, &end_info_ptr);
} else if (info_ptr) {
png_destroy_read_struct(&png_ptr, &info_ptr, nullptr);
} else {
png_destroy_read_struct(&png_ptr, nullptr, nullptr);
}
delete buf_state;
}
};
void user_read_data(png_structp png_ptr, png_bytep data, size_t length) {
BufState *buf_state = static_cast<BufState *>(png_get_io_ptr(png_ptr));
if (length > buf_state->bytes_left) { png_error(png_ptr, "read error"); }
memcpy(data, buf_state->data, length);
buf_state->bytes_left -= length;
buf_state->data += length;
}
static const int kPngHeaderSize = 8;
// Entry point for LibFuzzer.
// Roughly follows the libpng book example:
// http://www.libpng.org/pub/png/book/chapter13.html
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
if (size < kPngHeaderSize) { return 0; }
std::vector<unsigned char> v(data, data + size);
if (png_sig_cmp(v.data(), 0, kPngHeaderSize)) {
// not a PNG.
return 0;
}
PngObjectHandler png_handler;
png_handler.png_ptr = nullptr;
png_handler.row_ptr = nullptr;
png_handler.info_ptr = nullptr;
png_handler.end_info_ptr = nullptr;
png_handler.png_ptr =
png_create_read_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr);
if (!png_handler.png_ptr) { return 0; }
png_handler.info_ptr = png_create_info_struct(png_handler.png_ptr);
if (!png_handler.info_ptr) {
PNG_CLEANUP
return 0;
}
png_handler.end_info_ptr = png_create_info_struct(png_handler.png_ptr);
if (!png_handler.end_info_ptr) {
PNG_CLEANUP
return 0;
}
png_set_crc_action(png_handler.png_ptr, PNG_CRC_QUIET_USE, PNG_CRC_QUIET_USE);
#ifdef PNG_IGNORE_ADLER32
png_set_option(png_handler.png_ptr, PNG_IGNORE_ADLER32, PNG_OPTION_ON);
#endif
// Setting up reading from buffer.
png_handler.buf_state = new BufState();
png_handler.buf_state->data = data + kPngHeaderSize;
png_handler.buf_state->bytes_left = size - kPngHeaderSize;
png_set_read_fn(png_handler.png_ptr, png_handler.buf_state, user_read_data);
png_set_sig_bytes(png_handler.png_ptr, kPngHeaderSize);
if (setjmp(png_jmpbuf(png_handler.png_ptr))) {
PNG_CLEANUP
return 0;
}
// Reading.
png_read_info(png_handler.png_ptr, png_handler.info_ptr);
// reset error handler to put png_deleter into scope.
if (setjmp(png_jmpbuf(png_handler.png_ptr))) {
PNG_CLEANUP
return 0;
}
png_uint_32 width, height;
int bit_depth, color_type, interlace_type, compression_type;
int filter_type;
if (!png_get_IHDR(png_handler.png_ptr, png_handler.info_ptr, &width, &height,
&bit_depth, &color_type, &interlace_type, &compression_type,
&filter_type)) {
PNG_CLEANUP
return 0;
}
// This is going to be too slow.
if (width && height > 100000000 / width) {
PNG_CLEANUP
#ifdef HAS_DUMMY_CRASH
#ifdef __aarch64__
asm volatile(".word 0xf7f0a000\n");
#else
asm("ud2");
#endif
#endif
return 0;
}
// Set several transforms that browsers typically use:
png_set_gray_to_rgb(png_handler.png_ptr);
png_set_expand(png_handler.png_ptr);
png_set_packing(png_handler.png_ptr);
png_set_scale_16(png_handler.png_ptr);
png_set_tRNS_to_alpha(png_handler.png_ptr);
int passes = png_set_interlace_handling(png_handler.png_ptr);
png_read_update_info(png_handler.png_ptr, png_handler.info_ptr);
png_handler.row_ptr =
png_malloc(png_handler.png_ptr,
png_get_rowbytes(png_handler.png_ptr, png_handler.info_ptr));
for (int pass = 0; pass < passes; ++pass) {
for (png_uint_32 y = 0; y < height; ++y) {
png_read_row(png_handler.png_ptr,
static_cast<png_bytep>(png_handler.row_ptr), nullptr);
}
}
png_read_end(png_handler.png_ptr, png_handler.end_info_ptr);
PNG_CLEANUP
return 0;
}

View File

@ -0,0 +1,29 @@
use std::env;
use libafl_cc::{ClangWrapper, CompilerWrapper};
pub fn main() {
let args: Vec<String> = env::args().collect();
if args.len() > 1 {
let mut dir = env::current_exe().unwrap();
dir.pop();
let mut cc = ClangWrapper::new();
if let Some(code) = cc
.cpp(true) // this will find the appropriate c++ lib for z3
// silence the compiler wrapper output, needed for some configure scripts.
.silence(true)
.parse_args(&args)
.expect("Failed to parse the command line")
.link_staticlib(&dir, "libfuzzer_libpng")
.add_arg("-fsanitize-coverage=trace-pc-guard")
.run()
.expect("Failed to run the wrapped compiler")
{
std::process::exit(code);
}
} else {
panic!("LibAFL CC: No Arguments given");
}
}

View File

@ -0,0 +1,5 @@
pub mod libafl_cc;
fn main() {
libafl_cc::main()
}

View File

@ -0,0 +1,226 @@
//! A libfuzzer-like fuzzer with llmp-multithreading support and restarts
//! The example harness is built for libpng.
use mimalloc::MiMalloc;
#[global_allocator]
static GLOBAL: MiMalloc = MiMalloc;
use core::time::Duration;
#[cfg(feature = "crash")]
use std::ptr;
use std::{env, path::PathBuf};
use libafl::{
bolts::{
current_nanos,
rands::StdRand,
tuples::{tuple_list, Merge},
AsSlice,
},
corpus::{
minimizer::{CorpusMinimizer, StdCorpusMinimizer},
Corpus, InMemoryCorpus, OnDiskCorpus,
},
events::{setup_restarting_mgr_std, EventConfig, EventFirer, EventRestarter, LogSeverity},
executors::{inprocess::InProcessExecutor, ExitKind, TimeoutExecutor},
feedback_or, feedback_or_fast,
feedbacks::{CrashFeedback, MaxMapFeedback, TimeFeedback, TimeoutFeedback},
fuzzer::{Fuzzer, StdFuzzer},
inputs::{BytesInput, HasTargetBytes},
monitors::MultiMonitor,
mutators::{
scheduled::{havoc_mutations, tokens_mutations, StdScheduledMutator},
token_mutations::Tokens,
},
observers::{HitcountsMapObserver, StdMapObserver, TimeObserver},
schedulers::{
powersched::PowerSchedule, IndexesLenTimeMinimizerScheduler, StdWeightedScheduler,
},
stages::{calibrate::CalibrationStage, power::StdPowerMutationalStage},
state::{HasCorpus, HasMetadata, StdState},
Error,
};
use libafl_targets::{libfuzzer_initialize, libfuzzer_test_one_input, EDGES_MAP, MAX_EDGES_NUM};
/// The main fn, `no_mangle` as it is a C main
#[cfg(not(test))]
#[no_mangle]
pub fn libafl_main() {
// Registry the metadata types used in this fuzzer
// Needed only on no_std
//RegistryBuilder::register::<Tokens>();
println!(
"Workdir: {:?}",
env::current_dir().unwrap().to_string_lossy().to_string()
);
fuzz(
&[PathBuf::from("./corpus")],
PathBuf::from("./crashes"),
1337,
)
.expect("An error occurred while fuzzing");
}
/// The actual fuzzer
#[cfg(not(test))]
fn fuzz(corpus_dirs: &[PathBuf], objective_dir: PathBuf, broker_port: u16) -> Result<(), Error> {
// 'While the stats are state, they are usually used in the broker - which is likely never restarted
let monitor = MultiMonitor::new(|s| println!("{}", s));
// The restarting state will spawn the same process again as child, then restarted it each time it crashes.
let (state, mut restarting_mgr) =
match setup_restarting_mgr_std(monitor, broker_port, EventConfig::AlwaysUnique) {
Ok(res) => res,
Err(err) => match err {
Error::ShuttingDown => {
return Ok(());
}
_ => {
panic!("Failed to setup the restarter: {}", err);
}
},
};
// Create an observation channel using the coverage map
let edges = unsafe { &mut EDGES_MAP[0..MAX_EDGES_NUM] };
let edges_observer = HitcountsMapObserver::new(StdMapObserver::new("edges", edges));
let minimizer = StdCorpusMinimizer::new(&edges_observer);
// Create an observation channel to keep track of the execution time
let time_observer = TimeObserver::new("time");
let map_feedback = MaxMapFeedback::new_tracking(&edges_observer, true, false);
let calibration = CalibrationStage::new(&map_feedback);
// Feedback to rate the interestingness of an input
// This one is composed by two Feedbacks in OR
let mut feedback = feedback_or!(
// New maximization map feedback linked to the edges observer and the feedback state
map_feedback,
// Time feedback, this one does not need a feedback state
TimeFeedback::new_with_observer(&time_observer)
);
// A feedback to choose if an input is a solution or not
let mut objective = feedback_or_fast!(CrashFeedback::new(), TimeoutFeedback::new());
// If not restarting, create a State from scratch
let mut state = state.unwrap_or_else(|| {
StdState::new(
// RNG
StdRand::with_seed(current_nanos()),
// Corpus that will be evolved, we keep it in memory for performance
InMemoryCorpus::new(),
// Corpus in which we store solutions (crashes in this example),
// on disk so the user can get them after stopping the fuzzer
OnDiskCorpus::new(objective_dir).unwrap(),
// States of the feedbacks.
// The feedbacks can report the data that should persist in the State.
&mut feedback,
// Same for objective feedbacks
&mut objective,
)
.unwrap()
});
println!("We're a client, let's fuzz :)");
// Create a PNG dictionary if not existing
if state.metadata().get::<Tokens>().is_none() {
state.add_metadata(Tokens::from([
vec![137, 80, 78, 71, 13, 10, 26, 10], // PNG header
"IHDR".as_bytes().to_vec(),
"IDAT".as_bytes().to_vec(),
"PLTE".as_bytes().to_vec(),
"IEND".as_bytes().to_vec(),
]));
}
// Setup a basic mutator with a mutational stage
let mutator = StdScheduledMutator::new(havoc_mutations().merge(tokens_mutations()));
let power = StdPowerMutationalStage::new(mutator, &edges_observer);
let mut stages = tuple_list!(calibration, power);
// A minimization+queue policy to get testcasess from the corpus
let scheduler = IndexesLenTimeMinimizerScheduler::new(StdWeightedScheduler::with_schedule(
PowerSchedule::FAST,
));
// A fuzzer with feedbacks and a corpus scheduler
let mut fuzzer = StdFuzzer::new(scheduler, feedback, objective);
// The wrapped harness function, calling out to the LLVM-style harness
let mut harness = |input: &BytesInput| {
let target = input.target_bytes();
let buf = target.as_slice();
#[cfg(feature = "crash")]
if buf.len() > 4 && buf[4] == 0 {
unsafe {
eprintln!("Crashing (for testing purposes)");
let addr = ptr::null_mut();
*addr = 1;
}
}
libfuzzer_test_one_input(buf);
ExitKind::Ok
};
// Create the executor for an in-process function with one observer for edge coverage and one for the execution time
let mut executor = TimeoutExecutor::new(
InProcessExecutor::new(
&mut harness,
tuple_list!(edges_observer, time_observer),
&mut fuzzer,
&mut state,
&mut restarting_mgr,
)?,
// 10 seconds timeout
Duration::new(10, 0),
);
// The actual target run starts here.
// Call LLVMFUzzerInitialize() if present.
let args: Vec<String> = env::args().collect();
if libfuzzer_initialize(&args) == -1 {
println!("Warning: LLVMFuzzerInitialize failed with -1")
}
// In case the corpus is empty (on first run), reset
if state.corpus().count() < 1 {
state
.load_initial_inputs(&mut fuzzer, &mut executor, &mut restarting_mgr, corpus_dirs)
.unwrap_or_else(|_| panic!("Failed to load initial corpus at {:?}", &corpus_dirs));
println!("We imported {} inputs from disk.", state.corpus().count());
}
// This fuzzer restarts after 1 mio `fuzz_one` executions.
// Each fuzz_one will internally do many executions of the target.
// If your target is very instable, setting a low count here may help.
// However, you will lose a lot of performance that way.
let iters = 10_000;
fuzzer.fuzz_loop_for(
&mut stages,
&mut executor,
&mut state,
&mut restarting_mgr,
iters,
)?;
let orig_size = state.corpus().count();
let msg = "Started distillation...".to_string();
restarting_mgr.log(&mut state, LogSeverity::Info, msg)?;
minimizer.minimize(&mut fuzzer, &mut executor, &mut restarting_mgr, &mut state)?;
let msg = format!("Distilled out {} cases", orig_size - state.corpus().count());
restarting_mgr.log(&mut state, LogSeverity::Info, msg)?;
// It's important, that we store the state before restarting!
// Else, the parent will not respawn a new child and quit.
restarting_mgr.on_restart(&mut state)?;
Ok(())
}

View File

@ -26,6 +26,7 @@ qemu_cli = ["cli"]
frida_cli = ["cli"] frida_cli = ["cli"]
afl_exec_sec = [] # calculate exec/sec like AFL afl_exec_sec = [] # calculate exec/sec like AFL
errors_backtrace = ["backtrace"] errors_backtrace = ["backtrace"]
cmin = ["z3"] # corpus minimisation
# features hiding dependencies licensed under GPL # features hiding dependencies licensed under GPL
gpl = [] gpl = []

View File

@ -0,0 +1,205 @@
//! Whole corpus minimizers, for reducing the number of samples/the total size/the average runtime
//! of your corpus.
use alloc::{
string::{String, ToString},
vec::Vec,
};
use core::{hash::Hash, marker::PhantomData};
use hashbrown::{HashMap, HashSet};
use num_traits::ToPrimitive;
use z3::{ast::Bool, Config, Context, Optimize};
use crate::{
bolts::AsIter,
corpus::Corpus,
events::EventManager,
executors::{Executor, HasObservers},
inputs::Input,
observers::{MapObserver, ObserversTuple},
schedulers::{LenTimeMulTestcaseScore, Scheduler, TestcaseScore},
state::{HasCorpus, HasMetadata},
Error, Evaluator, HasScheduler,
};
/// `CorpusMinimizers` minimize corpora according to internal logic. See various implementations for
/// details.
pub trait CorpusMinimizer<I, S>
where
I: Input,
S: HasCorpus<I>,
{
/// Minimize the corpus of the provided state.
fn minimize<CS, EX, EM, OT, Z>(
&self,
fuzzer: &mut Z,
executor: &mut EX,
manager: &mut EM,
state: &mut S,
) -> Result<(), Error>
where
CS: Scheduler<I, S>,
EX: Executor<EM, I, S, Z> + HasObservers<I, OT, S>,
EM: EventManager<EX, I, S, Z>,
OT: ObserversTuple<I, S>,
Z: Evaluator<EX, EM, I, S> + HasScheduler<CS, I, S>;
}
/// Minimizes a corpus according to coverage maps, weighting by the specified `TestcaseScore`.
///
/// Algorithm based on WMOPT: <https://hexhive.epfl.ch/publications/files/21ISSTA2.pdf>
#[derive(Debug)]
pub struct MapCorpusMinimizer<E, I, O, S, TS>
where
E: Copy + Hash + Eq,
I: Input,
for<'a> O: MapObserver<Entry = E> + AsIter<'a, Item = E>,
S: HasMetadata + HasCorpus<I>,
TS: TestcaseScore<I, S>,
{
obs_name: String,
phantom: PhantomData<(E, I, O, S, TS)>,
}
/// Standard corpus minimizer, which weights inputs by length and time.
pub type StdCorpusMinimizer<E, I, O, S> =
MapCorpusMinimizer<E, I, O, S, LenTimeMulTestcaseScore<I, S>>;
impl<E, I, O, S, TS> MapCorpusMinimizer<E, I, O, S, TS>
where
E: Copy + Hash + Eq,
I: Input,
for<'a> O: MapObserver<Entry = E> + AsIter<'a, Item = E>,
S: HasMetadata + HasCorpus<I>,
TS: TestcaseScore<I, S>,
{
/// Constructs a new `MapCorpusMinimizer` from a provided observer. This observer will be used
/// in the future to get observed maps from an executed input.
pub fn new(obs: &O) -> Self {
Self {
obs_name: obs.name().to_string(),
phantom: PhantomData,
}
}
}
impl<E, I, O, S, TS> CorpusMinimizer<I, S> for MapCorpusMinimizer<E, I, O, S, TS>
where
E: Copy + Hash + Eq,
I: Input,
for<'a> O: MapObserver<Entry = E> + AsIter<'a, Item = E>,
S: HasMetadata + HasCorpus<I>,
TS: TestcaseScore<I, S>,
{
fn minimize<CS, EX, EM, OT, Z>(
&self,
fuzzer: &mut Z,
executor: &mut EX,
manager: &mut EM,
state: &mut S,
) -> Result<(), Error>
where
CS: Scheduler<I, S>,
EX: Executor<EM, I, S, Z> + HasObservers<I, OT, S>,
EM: EventManager<EX, I, S, Z>,
OT: ObserversTuple<I, S>,
Z: Evaluator<EX, EM, I, S> + HasScheduler<CS, I, S>,
{
let cfg = Config::default();
let ctx = Context::new(&cfg);
let opt = Optimize::new(&ctx);
let mut seed_exprs = HashMap::new();
let mut cov_map = HashMap::new();
for idx in 0..state.corpus().count() {
let (weight, input) = {
let mut testcase = state.corpus().get(idx)?.borrow_mut();
let weight = TS::compute(&mut *testcase, state)?
.to_u64()
.expect("Weight must be computable.");
let input = testcase
.input()
.as_ref()
.expect("Input must be available.")
.clone();
(weight, input)
};
// Execute the input; we cannot rely on the metadata already being present.
executor.observers_mut().pre_exec_all(state, &input)?;
let kind = executor.run_target(fuzzer, state, manager, &input)?;
executor
.observers_mut()
.post_exec_all(state, &input, &kind)?;
let seed_expr = Bool::fresh_const(&ctx, "seed");
let obs: &O = executor
.observers()
.match_name::<O>(&self.obs_name)
.expect("Observer must be present.");
// Store coverage, mapping coverage map indices to hit counts (if present) and the
// associated seeds for the map indices with those hit counts.
for (i, e) in obs.as_iter().copied().enumerate() {
cov_map
.entry(i)
.or_insert_with(HashMap::new)
.entry(e)
.or_insert_with(HashSet::new)
.insert(seed_expr.clone());
}
// Keep track of that seed's index and weight
seed_exprs.insert(seed_expr, (idx, weight));
}
for (_, cov) in cov_map {
for (_, seeds) in cov {
// At least one seed for each hit count of each coverage map index
if let Some(reduced) = seeds.into_iter().reduce(|s1, s2| s1 | s2) {
opt.assert(&reduced);
}
}
}
for (seed, (_, weight)) in &seed_exprs {
// opt will attempt to minimise the number of violated assertions.
//
// To tell opt to minimize the number of seeds, we tell opt to maximize the number of
// not seeds.
//
// Additionally, each seed has a weight associated with them; the higher, the more z3
// doesn't want to violate the assertion. Thus, inputs which have higher weights will be
// less likely to appear in the final corpus -- provided all their coverage points are
// hit by at least one other input.
opt.assert_soft(&!seed, *weight, None);
}
// Perform the optimization!
opt.check(&[]);
let res = if let Some(model) = opt.get_model() {
let mut removed = Vec::with_capacity(state.corpus().count());
for (seed, (idx, _)) in seed_exprs {
// if the model says the seed isn't there, mark it for deletion
if !model.eval(&seed, true).unwrap().as_bool().unwrap() {
removed.push(idx);
}
}
// reverse order; if indexes are stored in a vec, we need to remove from back to front
removed.sort_unstable_by(|idx1, idx2| idx2.cmp(idx1));
for idx in removed {
let removed = state.corpus_mut().remove(idx)?;
// scheduler needs to know we've removed the input, or it will continue to try
// to use now-missing inputs
fuzzer.scheduler_mut().on_remove(state, idx, &removed)?;
}
Ok(())
} else {
Err(Error::unknown("Corpus minimization failed; unsat."))
};
res
}
}

View File

@ -13,11 +13,16 @@ pub use ondisk::OnDiskCorpus;
#[cfg(feature = "std")] #[cfg(feature = "std")]
pub mod cached; pub mod cached;
use core::cell::RefCell;
#[cfg(feature = "std")] #[cfg(feature = "std")]
pub use cached::CachedOnDiskCorpus; pub use cached::CachedOnDiskCorpus;
#[cfg(feature = "cmin")]
pub mod minimizer;
use core::cell::RefCell;
#[cfg(feature = "cmin")]
pub use minimizer::*;
use crate::{inputs::Input, Error}; use crate::{inputs::Input, Error};
/// Corpus with all current testcases /// Corpus with all current testcases

View File

@ -43,6 +43,22 @@ impl Input for GeneralizedInput {
format!("{:016x}", hasher.finish()) format!("{:016x}", hasher.finish())
} }
/// Load from a plain file of bytes
#[cfg(feature = "std")]
fn from_file<P>(path: P) -> Result<Self, Error>
where
P: AsRef<Path>,
{
let mut file = File::open(path)?;
let mut bytes: Vec<u8> = vec![];
file.read_to_end(&mut bytes)?;
Ok(Self {
bytes,
generalized: None,
grimoire_mutated: false,
})
}
/// An hook executed before being added to the corpus /// An hook executed before being added to the corpus
fn wrapped_as_testcase(&mut self) { fn wrapped_as_testcase(&mut self) {
// remove generalized for inputs generated with bit-level mutations // remove generalized for inputs generated with bit-level mutations
@ -206,20 +222,4 @@ impl GeneralizedInput {
pub fn generalized_mut(&mut self) -> &mut Option<Vec<GeneralizedItem>> { pub fn generalized_mut(&mut self) -> &mut Option<Vec<GeneralizedItem>> {
&mut self.generalized &mut self.generalized
} }
/// Load from a plain file of bytes
#[cfg(feature = "std")]
pub fn from_bytes_file<P>(path: P) -> Result<Self, Error>
where
P: AsRef<Path>,
{
let mut file = File::open(path)?;
let mut bytes: Vec<u8> = vec![];
file.read_to_end(&mut bytes)?;
Ok(Self {
bytes,
generalized: None,
grimoire_mutated: false,
})
}
} }

View File

@ -1,7 +1,8 @@
//! The Minimizer schedulers are a family of corpus schedulers that feed the fuzzer //! The Minimizer schedulers are a family of corpus schedulers that feed the fuzzer
// with testcases only from a subset of the total corpus. //! with testcases only from a subset of the total corpus.
use core::marker::PhantomData; use alloc::vec::Vec;
use core::{cmp::Ordering, marker::PhantomData};
use hashbrown::{HashMap, HashSet}; use hashbrown::{HashMap, HashSet};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
@ -99,7 +100,67 @@ where
idx: usize, idx: usize,
testcase: &Option<Testcase<I>>, testcase: &Option<Testcase<I>>,
) -> Result<(), Error> { ) -> Result<(), Error> {
self.base.on_remove(state, idx, testcase) self.base.on_remove(state, idx, testcase)?;
let mut entries = if let Some(meta) = state.metadata_mut().get_mut::<TopRatedsMetadata>() {
let entries = meta
.map
.drain_filter(|_, other_idx| *other_idx == idx)
.map(|(entry, _)| entry)
.collect::<Vec<_>>();
meta.map
.values_mut()
.filter(|other_idx| **other_idx > idx)
.for_each(|other_idx| {
*other_idx -= 1;
});
entries
} else {
return Ok(());
};
entries.sort_unstable(); // this should already be sorted, but just in case
let mut map = HashMap::new();
for i in 0..state.corpus().count() {
let mut old = state.corpus().get(i)?.borrow_mut();
let factor = F::compute(&mut *old, state)?;
if let Some(old_map) = old.metadata_mut().get_mut::<M>() {
let mut e_iter = entries.iter();
let mut map_iter = old_map.as_slice().iter(); // ASSERTION: guaranteed to be in order?
// manual set intersection
let mut entry = e_iter.next();
let mut map_entry = map_iter.next();
while let Some(e) = entry {
if let Some(me) = map_entry {
match e.cmp(me) {
Ordering::Less => {
entry = e_iter.next();
}
Ordering::Equal => {
// if we found a better factor, prefer it
map.entry(*e)
.and_modify(|(f, idx)| {
if *f > factor {
*f = factor;
*idx = i;
}
})
.or_insert((factor, i));
}
Ordering::Greater => {
map_entry = map_iter.next();
}
}
} else {
break;
}
}
}
}
if let Some(meta) = state.metadata_mut().get_mut::<TopRatedsMetadata>() {
meta.map
.extend(map.into_iter().map(|(entry, (_, idx))| (entry, idx)));
}
Ok(())
} }
/// Gets the next entry /// Gets the next entry

View File

@ -11,7 +11,7 @@ use serde::{Deserialize, Serialize};
use crate::{ use crate::{
bolts::rands::Rand, bolts::rands::Rand,
corpus::{Corpus, SchedulerTestcaseMetaData}, corpus::{Corpus, SchedulerTestcaseMetaData, Testcase},
inputs::Input, inputs::Input,
schedulers::{ schedulers::{
powersched::{PowerSchedule, SchedulerMetadata}, powersched::{PowerSchedule, SchedulerMetadata},
@ -264,6 +264,22 @@ where
Ok(()) Ok(())
} }
fn on_replace(&self, state: &mut S, idx: usize, _testcase: &Testcase<I>) -> Result<(), Error> {
// Recreate the alias table
self.on_add(state, idx)
}
fn on_remove(
&self,
state: &mut S,
_idx: usize,
_testcase: &Option<Testcase<I>>,
) -> Result<(), Error> {
// Recreate the alias table
self.create_alias_table(state)?;
Ok(())
}
#[allow(clippy::similar_names, clippy::cast_precision_loss)] #[allow(clippy::similar_names, clippy::cast_precision_loss)]
fn next(&self, state: &mut S) -> Result<usize, Error> { fn next(&self, state: &mut S) -> Result<usize, Error> {
if state.corpus().count() == 0 { if state.corpus().count() == 0 {
@ -283,7 +299,7 @@ where
let current_cycles = wsmeta.runs_in_current_cycle(); let current_cycles = wsmeta.runs_in_current_cycle();
if current_cycles > corpus_counts { if current_cycles >= corpus_counts {
wsmeta.set_runs_current_cycle(0); wsmeta.set_runs_current_cycle(0);
} else { } else {
wsmeta.set_runs_current_cycle(current_cycles + 1); wsmeta.set_runs_current_cycle(current_cycles + 1);