From 40b73d4b1b01a1c6019c73b5a015dddfd03ce8c6 Mon Sep 17 00:00:00 2001 From: Andrea Fioraldi Date: Mon, 27 Sep 2021 09:39:32 +0200 Subject: [PATCH] Book refactoring and update (#280) * llmp docs skeleton * llmp documentation * more llmp docu * llmp * some core concepts * start working on tutorial * adapted rng_core to lain * fix tutorial build * warnings, format * add explanation * No need to own the types * metadata * writing * fmt * tutorial folder * lain needs nightly * added mdbook test to ci * fix ci, add linkcheck * more book * baby * tutorial target * fix mdbook build * fix mdbook test * more book * fixed typo * fixed build * spawn instances' * 'finish' book * added sugar crate information Co-authored-by: Dominik Maier --- .github/workflows/build_and_test.yml | 20 +- docs/book.toml | 4 + docs/src/SUMMARY.md | 19 +- .../advanced_features/concolic/concolic.md | 8 +- docs/src/advanced_features/no_std/no_std.md | 6 +- docs/src/baby_fuzzer.md | 105 ++++++---- docs/src/core_concepts/core_concepts.md | 5 + docs/src/core_concepts/corpus.md | 11 ++ docs/src/core_concepts/executor.md | 16 ++ docs/src/core_concepts/feedback.md | 18 ++ docs/src/core_concepts/generator.md | 9 + docs/src/core_concepts/input.md | 13 ++ docs/src/core_concepts/mutator.md | 9 + docs/src/core_concepts/observer.md | 12 ++ docs/src/core_concepts/stage.md | 9 + docs/src/design/architecture.md | 4 +- docs/src/design/core_concepts.md | 86 -------- docs/src/design/design.md | 2 +- docs/src/design/metadata.md | 5 +- docs/src/design/usage.md | 1 - docs/src/getting_started/build.md | 11 +- docs/src/getting_started/crates.md | 63 ++++-- docs/src/getting_started/getting_started.md | 5 +- docs/src/getting_started/setup.md | 33 ++-- docs/src/introduction.md | 26 +-- docs/src/libafl.md | 8 +- docs/src/message_passing/configurations.md | 10 + docs/src/message_passing/message_passing.md | 92 +++++++++ docs/src/message_passing/spawn_instances.md | 49 +++++ docs/src/tutorial/intro.md | 5 + docs/src/tutorial/tutorial.md | 5 + fuzzers/tutorial/Cargo.toml | 33 ++++ fuzzers/tutorial/rust-toolchain | 1 + fuzzers/tutorial/src/bin/libafl_cc.rs | 35 ++++ fuzzers/tutorial/src/bin/libafl_cxx.rs | 5 + fuzzers/tutorial/src/input.rs | 72 +++++++ fuzzers/tutorial/src/lib.rs | 187 ++++++++++++++++++ fuzzers/tutorial/src/metadata.rs | 90 +++++++++ fuzzers/tutorial/src/mutator.rs | 76 +++++++ fuzzers/tutorial/target.c | 71 +++++++ libafl/Cargo.toml | 5 +- libafl/src/bolts/rands.rs | 16 ++ libafl/src/lib.rs | 2 +- libafl/src/observers/mod.rs | 4 +- 44 files changed, 1062 insertions(+), 204 deletions(-) create mode 100644 docs/src/core_concepts/core_concepts.md create mode 100644 docs/src/core_concepts/corpus.md create mode 100644 docs/src/core_concepts/executor.md create mode 100644 docs/src/core_concepts/feedback.md create mode 100644 docs/src/core_concepts/generator.md create mode 100644 docs/src/core_concepts/input.md create mode 100644 docs/src/core_concepts/mutator.md create mode 100644 docs/src/core_concepts/observer.md create mode 100644 docs/src/core_concepts/stage.md delete mode 100644 docs/src/design/core_concepts.md delete mode 100644 docs/src/design/usage.md create mode 100644 docs/src/message_passing/configurations.md create mode 100644 docs/src/message_passing/message_passing.md create mode 100644 docs/src/message_passing/spawn_instances.md create mode 100644 docs/src/tutorial/intro.md create mode 100644 docs/src/tutorial/tutorial.md create mode 100644 fuzzers/tutorial/Cargo.toml create mode 100644 fuzzers/tutorial/rust-toolchain create mode 100644 fuzzers/tutorial/src/bin/libafl_cc.rs create mode 100644 fuzzers/tutorial/src/bin/libafl_cxx.rs create mode 100644 fuzzers/tutorial/src/input.rs create mode 100644 fuzzers/tutorial/src/lib.rs create mode 100644 fuzzers/tutorial/src/metadata.rs create mode 100644 fuzzers/tutorial/src/mutator.rs create mode 100644 fuzzers/tutorial/target.c diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index f7b7a9c638..57fef89514 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -10,7 +10,7 @@ env: CARGO_TERM_COLOR: always jobs: - lint: + book: strategy: matrix: os: [ubuntu-latest, windows-latest, macOS-latest] @@ -22,13 +22,17 @@ jobs: profile: minimal toolchain: stable - uses: Swatinem/rust-cache@v1 - - name: Add clippy - run: rustup component add clippy - #- name: Run clippy - # uses: actions-rs/cargo@v1 - # with: - # command: clippy - # args: --all + - name: install mdbook + run: cargo install mdbook + - name: install linkcheck + run: cargo install mdbook-linkcheck + - name: Build libafl debug + run: cargo build -p libafl + - name: Build the book + run: cd docs && mdbook build + - name: Test the book + run: cd docs && mdbook test -L ../target/debug/deps + ubuntu: runs-on: ubuntu-latest diff --git a/docs/book.toml b/docs/book.toml index 8901056073..8699f2c46f 100644 --- a/docs/book.toml +++ b/docs/book.toml @@ -4,3 +4,7 @@ language = "en" multilingual = false src = "src" title = "The LibAFL Fuzzing Library" + +[output.html] + +[output.linkcheck] diff --git a/docs/src/SUMMARY.md b/docs/src/SUMMARY.md index 81bf4b3818..d78cebcb95 100644 --- a/docs/src/SUMMARY.md +++ b/docs/src/SUMMARY.md @@ -11,12 +11,27 @@ - [Baby Fuzzer](./baby_fuzzer.md) +- [Core Concepts](./core_concepts/core_concepts.md) + - [Observer](./core_concepts/observer.md) + - [Executor](./core_concepts/executor.md) + - [Feedback](./core_concepts/feedback.md) + - [Input](./core_concepts/input.md) + - [Corpus](./core_concepts/corpus.md) + - [Mutator](./core_concepts/mutator.md) + - [Generator](./core_concepts/generator.md) + - [Stage](./core_concepts/stage.md) + - [Design](./design/design.md) - - [Core Concepts](./design/core_concepts.md) - [Architecture](./design/architecture.md) - [Metadata](./design/metadata.md) +- [Message Passing](./message_passing/message_passing.md) + - [Spawning Instances](./message_passing/spawn_instances.md) + - [Configurations](./message_passing/configurations.md) + +- [Tutorial](./tutorial/tutorial.md) + - [Introduction](./tutorial/intro.md) - [Advanced Features](./advanced_features/advanced_features.md) - [Concolic Tracing & Hybrid Fuzzing](./advanced_features/concolic/concolic.md) - - [Using LibAFL in no_std environments](./advanced_features/no_std/no_std.md) \ No newline at end of file + - [LibAFL in `no_std` environments (Kernels, Hypervisors, ...)](./advanced_features/no_std/no_std.md) diff --git a/docs/src/advanced_features/concolic/concolic.md b/docs/src/advanced_features/concolic/concolic.md index c51bd0a745..3d5eb77c94 100644 --- a/docs/src/advanced_features/concolic/concolic.md +++ b/docs/src/advanced_features/concolic/concolic.md @@ -8,14 +8,14 @@ Finally, we'll walk through building a basic hybrid fuzzer using LibAFL. ## Concolic Tracing by Example Suppose you want to fuzz the following program: ```rust -fn main(input: &[u8]) -> i32 { +fn target(input: &[u8]) -> i32 { match &input { // fictitious crashing input &[1, 3, 3, 7] => 1337, // standard error handling code &[] => -1, // representative of normal execution - .. => 0 + _ => 0 } } ``` @@ -28,7 +28,7 @@ To understand what this entails, we'll run an example with the above program. First, we'll simplify the program to simple if-then-else-statements: ```rust -fn main(input: &[u8]) -> i32 { +fn target(input: &[u8]) -> i32 { if input.len() == 4 { if input[0] == 1 { if input[1] == 3 { @@ -58,7 +58,7 @@ fn main(input: &[u8]) -> i32 { ``` Next, we'll trace the program on the input `[]`. The trace would look like this: -```rust +```rust,ignore Branch { // if input.len() == 4 condition: Equals { left: Variable { name: "input_len" }, diff --git a/docs/src/advanced_features/no_std/no_std.md b/docs/src/advanced_features/no_std/no_std.md index d17ed3c52c..50a962c806 100644 --- a/docs/src/advanced_features/no_std/no_std.md +++ b/docs/src/advanced_features/no_std/no_std.md @@ -1,4 +1,4 @@ -# Using LibAFL in no_std environments +# Using LibAFL in `no_std` environments It is possible to use LibAFL in `no_std` environments e.g. custom platforms like microcontrolles, kernels, hypervisors, and more. @@ -9,7 +9,7 @@ libafl = { path = "path/to/libafl/", default-features = false} ``` Then build your project e.g. for `aarch64-unknown-none` using -``` +```sh cargo build --no-default-features --target aarch64-unknown-none ``` @@ -27,7 +27,7 @@ int my_real_seconds(void) ``` and here we use it in Rust. `external_current_millis` is then called from LibAFL. Note that it needs to be `no_mangle` in order to get picked up by LibAFL at linktime. -```rust +```rust,ignore #[no_mangle] pub extern "C" fn external_current_millis() -> u64 { unsafe { my_real_seconds()*1000 } diff --git a/docs/src/baby_fuzzer.md b/docs/src/baby_fuzzer.md index a08ca608e9..6ee69e0735 100644 --- a/docs/src/baby_fuzzer.md +++ b/docs/src/baby_fuzzer.md @@ -1,12 +1,18 @@ -# Baby Fuzzer +# A Simple LibAFL Fuzzer -This chapter will teach you how to create a naive fuzzer using the LibAFL API, you will learn about basic entities such as `State`, `Observer`, and `Executor`. -The following chapters will discuss in detail the components of LibAFL, while here we will just scratch the fundamentals. +This chapter discusses a naive fuzzer using the LibAFL API. +You will learn about basic entities such as `State`, `Observer`, and `Executor`. +While the following chapters discuss the components of LibAFL in detail, here we introduce the fundamentals. -We are going to fuzz a simple Rust function that panics under a condition. The fuzzer will be single-threaded and will stop after the crash like libFuzzer does normally. +We are going to fuzz a simple Rust function that panics under a condition. The fuzzer will be single-threaded and will stop after the crash, just like libFuzzer normally does. You can find a complete version of this tutorial as an example fuzzer in [`fuzzers/baby_fuzzer`](https://github.com/AFLplusplus/LibAFL/tree/main/fuzzers/baby_fuzzer). +> ### Warning +> +> This example fuzzer is too naive for any real-world usage. +> Its purpose is solely to show the main components of the library, for a more in-depth walkthrough on building a custom fuzzer go to the [Tutorial chapter](./tutorial/intro.md) directly. + ## Creating a project We use cargo to create a new Rust project with LibAFL as a dependency. @@ -16,7 +22,7 @@ $ cargo new baby_fuzzer $ cd baby_fuzzer ``` -The generated _Cargo.toml_ looks like the following: +The generated `Cargo.toml` looks like the following: ```toml [package] @@ -31,14 +37,14 @@ edition = "2018" ``` In order to use LibAFl we must add it as dependency adding `libafl = { path = "path/to/libafl/" }` under `[dependencies]`. -You can use the LibAFL version from crates.io if you want, in this case, you have to use `libafl = "*"` to get the latest version. +You can use the LibAFL version from crates.io if you want, in this case, you have to use `libafl = "*"` to get the latest version (or set it to the current version). -As we are going to fuzz Rust code, we want that a panic does not simply cause the program exit, but an abort that can be caught by the fuzzer. +As we are going to fuzz Rust code, we want that a panic does not simply cause the program to exit, but raise an `abort` that can then be caught by the fuzzer. To do that, we specify `panic = "abort"` in the [profiles](https://doc.rust-lang.org/cargo/reference/profiles.html). Alongside this setting, we add some optimization flags for the compile when building in release mode. -The final _Cargo.toml_ should look similar to the following: +The final `Cargo.toml` should look similar to the following: ```toml @@ -66,11 +72,16 @@ debug = true ## The function under test -Opening `src/main.rs` we have an empty main function. -To start, we create the closure that we want to fuzz. It takes a buffer as input and panics if it starts with "abc". +Opening `src/main.rs`, we have an empty `main` function. +To start, we create the closure that we want to fuzz. It takes a buffer as input and panics if it starts with `"abc"`. ```rust -let mut harness = |buf: &[u8]| { +extern crate libafl; +use libafl::inputs::{BytesInput, HasTargetBytes}; + +let mut harness = |input: &BytesInput| { + let target = input.target_bytes(); + let buf = target.as_slice(); if buf.len() > 0 && buf[0] == 'a' as u8 { if buf.len() > 1 && buf[1] == 'b' as u8 { if buf.len() > 2 && buf[2] == 'c' as u8 { @@ -80,16 +91,17 @@ let mut harness = |buf: &[u8]| { } }; // To test the panic: -// let input = "abc".as_bytes(); +// let input = BytesInput::new("abc".as_bytes()); // harness(&input); ``` ## Generating and running some tests -One of the main components that a LibAFL-based fuzzer uses is the State, a container of the data that is evolved during the fuzzing process, such as the Corpus of inputs. -In our main so we create a basic State instance like the following: +One of the main components that a LibAFL-based fuzzer uses is the State, a container of the data that is evolved during the fuzzing process. +Includes all State, such as the Corpus of inputs, the current rng state, and potential Metadata for the testcases and run. +In our `main` we create a basic State instance like the following: -```rust +```rust,ignore // create a State from scratch let mut state = StdState::new( // RNG @@ -107,11 +119,11 @@ It takes a random number generator, that is part of the fuzzer state, in this ca As the second parameter, it takes an instance of something implementing the Corpus trait, InMemoryCorpus in this case. The corpus is the container of the testcases evolved by the fuzzer, in this case, we keep it all in memory. -We will discuss later the last parameter. The third is another corpus, in this case, to store the testcases that are considered as "solutions" for the fuzzer. For our purpose, the solution is the input that triggers the panic. In this case, we want to store it to disk under the `crashes` directory so we can inspect it. +We will discuss the last parameter later. The third parameter is another corpus, in this case, to store the testcases that are considered as "solutions" for the fuzzer. For our purpose, the solution is the input that triggers the panic. In this case, we want to store it to disk under the `crashes` directory, so we can inspect it. Another required component is the EventManager. It handles some events such as the addition of a testcase to the corpus during the fuzzing process. For our purpose, we use the simplest one that just displays the information about these events to the user using a Stats instance. -```rust +```rust,ignore // The Stats trait define how the fuzzer stats are reported to the user let stats = SimpleStats::new(|s| println!("{}", s)); @@ -120,10 +132,10 @@ let stats = SimpleStats::new(|s| println!("{}", s)); let mut mgr = SimpleEventManager::new(stats); ``` -In addition, we have the Fuzzer, an entity that contains some actions that alter the State. On of these actions is the scheduling of the testcases to the fuzzer using a CorpusScheduler. +In addition, we have the Fuzzer, an entity that contains some actions that alter the State. One of these actions is the scheduling of the testcases to the fuzzer using a CorpusScheduler. We create it as QueueCorpusScheduler, a scheduler that serves testcases to the fuzzer in a FIFO fashion. -```rust +```rust,ignore // A queue policy to get testcasess from the corpus let scheduler = QueueCorpusScheduler::new(); @@ -131,9 +143,9 @@ let scheduler = QueueCorpusScheduler::new(); let mut fuzzer = StdFuzzer::new(scheduler, (), ()); ``` -Last but not least, we need an Executor that is the entity responsible to run our program under test. In this example, we want to run the harness function in process, and so we use the InProcessExecutor. +Last but not least, we need an Executor that is the entity responsible to run our program under test. In this example, we want to run the harness function in-process (without forking off a child, for example), and so we use the `InProcessExecutor`. -```rust +```rust,ignore // Create the executor for an in-process function let mut executor = InProcessExecutor::new( &mut harness, @@ -150,9 +162,11 @@ As the executor expects that the harness returns an ExitKind object, we add `Exi Now we have the 4 major entities ready for running our tests, but we still cannot generate testcases. -For this purpose, we use a Generator, RandPrintablesGenerator that generates a string of printable bytes. +For this purpose, we use a Generator, `RandPrintablesGenerator` that generates a string of printable bytes. + +```rust,ignore +use libafl::generators::RandPrintablesGenerator; -```rust // Generator of printable bytearrays of max size 32 let mut generator = RandPrintablesGenerator::new(32); @@ -162,9 +176,11 @@ state .expect("Failed to generate the initial corpus".into()); ``` -Now you can prepend the following `use` directives to your main.rs and compile it. +Now you can prepend the necessary `use` directives to your main.rs and compile the fuzzer. ```rust +extern crate libafl; + use std::path::PathBuf; use libafl::{ bolts::{current_nanos, rands::StdRand}, @@ -172,6 +188,7 @@ use libafl::{ events::SimpleEventManager, executors::{inprocess::InProcessExecutor, ExitKind}, generators::RandPrintablesGenerator, + inputs::{BytesInput, HasTargetBytes}, state::StdState, stats::SimpleStats, }; @@ -188,28 +205,33 @@ $ cargo run ## Evolving the corpus with feedbacks -Now you simply ran 8 randomly generated testcases but none of them has been stored in the corpus. If you are very lucky, maybe you triggered the panic by chance but you don't see any saved file in `crashes`. +Now you simply ran 8 randomly generated testcases, but none of them has been stored in the corpus. If you are very lucky, maybe you triggered the panic by chance but you don't see any saved file in `crashes`. Now we want to turn our simple fuzzer into a feedback-based one and increase the chance to generate the right input to trigger the panic. We are going to implement a simple feedback based on the 3 conditions that are needed to reach the panic. -To do that, we need a way to keep track of if a condition is satisfied. The component that feeds the fuzzer with information about properties of a fuzzing run, the satisfied conditions in our case, is the Observer. We use the StdMapObserver, the default observer that uses a map to keep track of covered elements. In our fuzzer, each condition is mapped to an entry of such map. +To do that, we need a way to keep track of if a condition is satisfied. The component that feeds the fuzzer with information about properties of a fuzzing run, the satisfied conditions in our case, is the Observer. We use the `StdMapObserver`, the default observer that uses a map to keep track of covered elements. In our fuzzer, each condition is mapped to an entry of such map. -We represent such map as a `static mut` variable: +We represent such map as a `static mut` variable. +As we don't rely on any instrumentation engine, we have to manually track the satisfied conditions in a map modyfing our tested function: ```rust +extern crate libafl; +use libafl::{ + inputs::{BytesInput, HasTargetBytes}, + executors::ExitKind, +}; + // Coverage map with explicit assignments due to the lack of instrumentation static mut SIGNALS: [u8; 16] = [0; 16]; fn signals_set(idx: usize) { unsafe { SIGNALS[idx] = 1 }; } -``` -As we don't rely on any instrumentation engine, we have to manually track the satisfied conditions in a map modyfing our tested function: - -```rust // The closure that we want to fuzz -let mut harness = |buf: &[u8]| { +let mut harness = |input: &BytesInput| { + let target = input.target_bytes(); + let buf = target.as_slice(); signals_set(0); if buf.len() > 0 && buf[0] == 'a' as u8 { signals_set(1); @@ -226,14 +248,14 @@ let mut harness = |buf: &[u8]| { The observer can be created directly from the `SIGNALS` map, in the following way: -```rust +```rust,ignore // Create an observation channel using the signals map let observer = StdMapObserver::new("signals", unsafe { &mut SIGNALS }); ``` The observers are usually kept in the corresponding executor as they keep track of information that is valid for just one run. We have then to modify our InProcessExecutor creation to include the observer as follows: -```rust +```rust,ignore // Create the executor for an in-process function with just one observer let mut executor = InProcessExecutor::new(&mut harness, tuple_list!(observer), &mut state, &mut mgr) @@ -248,7 +270,16 @@ Feedbacks are used also to decide if an input is a "solution". The feedback that We need to update our State creation including the feedback state and the Fuzzer including the feedback and the objective: -```rust +```rust,ignore +extern crate libafl; +use libafl::{ + bolts::{rands::StdRand, + corpus::{InMemoryCorpus, OnDiskCorpus, RandCorpusScheduler}, + events::{setup_restarting_mgr_std, EventConfig, EventRestarter}, + feedbacks::{MapFeedbackState, MaxMapFeedback, CrashFeedback}, + fuzzer::{StdFuzzer}, +}; + // The state of the edges feedback. let feedback_state = MapFeedbackState::with_observer(&observer); @@ -286,7 +317,7 @@ Another central component of LibAFL are the Stages, that are actions done on ind As the last step, we create a MutationalStage that uses a mutator inspired by the havoc mutator of AFL. -```rust +```rust,ignore // Setup a mutational stage with a basic bytes mutator let mutator = StdScheduledMutator::new(havoc_mutations()); let mut stages = tuple_list!(StdMutationalStage::new(mutator)); @@ -300,7 +331,7 @@ fuzzer After adding this code, we have a proper fuzzer, that can run a find the input that panics the function in less than a second. -``` +```text $ cargo run Compiling baby_fuzzer v0.1.0 (/home/andrea/Desktop/baby_fuzzer) Finished dev [unoptimized + debuginfo] target(s) in 1.56s diff --git a/docs/src/core_concepts/core_concepts.md b/docs/src/core_concepts/core_concepts.md new file mode 100644 index 0000000000..8467c1438f --- /dev/null +++ b/docs/src/core_concepts/core_concepts.md @@ -0,0 +1,5 @@ +# Core Concepts + +LibAFL is designed around some core concepts that we think can effectively abstract most of the other fuzzers designs. + +Here, we discuss these concepts and provide some examples related to other fuzzers. diff --git a/docs/src/core_concepts/corpus.md b/docs/src/core_concepts/corpus.md new file mode 100644 index 0000000000..b6d30ede42 --- /dev/null +++ b/docs/src/core_concepts/corpus.md @@ -0,0 +1,11 @@ +# Corpus + +The Corpus is where testcases are stored. We define a Testcase as an Input and a set of related metadata like execution time for instance. + +A Corpus can store testcases in diferent ways, for example on disk, or in memory, or implement a cache to speedup on disk storage. + +Usually, a testcase is added to the Corpus when it is considered as interesting, but a Corpus is used also to store testcases that fulfill an objective (like crashing the tested program for instance). + +Related to the Corpus, there is the way in which the fuzzer should ask for the next testcase to fuzz picking it from the Corpus. The taxonomy for this in LibAFL is CorpusScheduler, the entity representing the policy to pop testcases from the Corpus, FIFO for instance. + +Speaking about the code, [`Corpus`](https://docs.rs/libafl/0/libafl/corpus/trait.Corpus.html) and [`CorpusScheduler`](https://docs.rs/libafl/0/libafl/corpus/trait.CorpusScheduler.html) are traits. diff --git a/docs/src/core_concepts/executor.md b/docs/src/core_concepts/executor.md new file mode 100644 index 0000000000..ae461e43ba --- /dev/null +++ b/docs/src/core_concepts/executor.md @@ -0,0 +1,16 @@ +# Executor + +In different fuzzers, this concept of executing the program under test means each run is now always the same. +For instance, for in-memory fuzzers like libFuzzer an execution is a call to an harness function, for hypervisor-based fuzzers like [kAFL](https://github.com/IntelLabs/kAFL) instead an entire operating system is started from a snapshot for each run. + +In our model, an Executor is the entity that defines not only how to execute the target, but all the volatile operations that are related to just a single run of the target. + +So the Executor is for instance responsible to inform the program about the input that the fuzzer wants to use in the run, writing to a memory location for instance or passing it as a parameter to the harness function. + +In our model, it can also hold a set of Observers connected with each execution. + +In Rust, we bind this concept to the [`Executor`](https://docs.rs/libafl/0/libafl/executors/trait.Executor.html) trait. A structure implementing this trait must implement [`HasObservers`](https://docs.rs/libafl/0/libafl/executors/trait.HasObservers.html) too if wants to hold a set of Observers. + +By default, we implement some commonly used Executors such as [`InProcessExecutor`](https://docs.rs/libafl/0/libafl/executors/inprocess/struct.InProcessExecutor.html) is which the target is a harness function providing in-process crash detection. Another Executor is the [`ForkserverExecutor`](https://docs.rs/libafl/0/libafl/executors/forkserver/struct.ForkserverExecutor.html) that implements an AFL-like mechanism to spawn child processes to fuzz. + +A common pattern when creating an Executor is wrapping an existing one, for instance [`TimeoutExecutor`](https://docs.rs/libafl/0.6.1/libafl/executors/timeout/struct.TimeoutExecutor.html) wraps an executor and install a timeout callback before calling the original run function of the wrapped executor. diff --git a/docs/src/core_concepts/feedback.md b/docs/src/core_concepts/feedback.md new file mode 100644 index 0000000000..a859a86a56 --- /dev/null +++ b/docs/src/core_concepts/feedback.md @@ -0,0 +1,18 @@ +# Feedback + +The Feedback is an entity that classifies the outcome of an execution of the program under test as interesting or not. +Typically, if an execution is interesting, the corresponding input used to feed the target program is added to a corpus. + +Most of the times, the notion of Feedback is deeply linked to the Observer, but they are different concepts. + +The Feedback, in most of the cases, processes the information reported by one or more observers to decide if the execution is interesting. +The concept of "interestingness" is abstract, but typically it is related to a novelty search (i.e. interesting inputs are those that reach a previously unseen edge in the control flow graph). + +As an example, given an Observer that reports all the sizes of memory allocations, a maximization Feedback can be used to maximize these sizes to sport pathological inputs in terms of memory consumption. + +In terms of code, the library offers the [`Feedback`](https://docs.rs/libafl/0/libafl/feedbacks/trait.Feedback.html) and the [`FeedbackState`](https://docs.rs/libafl/0/libafl/feedbacks/trait.FeedbackState.html) traits. +The first is used to implement functors that, given the state of the obversers from the last execution, tells if the execution was interesting. The second is tied with `Feedback` and it is the state of the data that the feedback wants to persist in the fuzzers's state, for instance the cumulative map holding all the edges seen so far in the case of a feedback based on edge coverage. + +Multiple Feedbacks can be combined into boolean formula, considering for instance an execution as interesting if it triggers new code paths or execute in less time compared to the average execution time using [`feedback_or`](https://docs.rs/libafl/0/libafl/macro.feedback_or.html). + +TODO objective feedbacks and fast feedback logic operators diff --git a/docs/src/core_concepts/generator.md b/docs/src/core_concepts/generator.md new file mode 100644 index 0000000000..e1557d6330 --- /dev/null +++ b/docs/src/core_concepts/generator.md @@ -0,0 +1,9 @@ +# Generator + +A Generator is a component designed to generate an Input from scratch. + +Typically, a random generator is used to generate random inputs. + +Generators are traditionally less used in Feedback-driven Fuzzing, but there are exceptions, like Nautilus, that uses a Grammar generator to create the initial corpus and a sub-tree Generator as a mutation of its grammar Mutator. + +In the code, [`Generator`](https://docs.rs/libafl/0/libafl/generators/trait.Generator.html) is a trait. diff --git a/docs/src/core_concepts/input.md b/docs/src/core_concepts/input.md new file mode 100644 index 0000000000..bb7d508332 --- /dev/null +++ b/docs/src/core_concepts/input.md @@ -0,0 +1,13 @@ +# Input + +Formally, the input of a program is the data taken from external sources that affect the program behaviour. + +In our model of an abstract fuzzer, we define the Input as the internal representation of the program input (or a part of it). + +In the straightforward case, the input of the program is a byte array and in fuzzers such as AFL we store and manipulate exactly these byte arrays. + +But it is not always the case. A program can expect inputs that are not byte arrays (e.g. a sequence of syscalls) and the fuzzer does not represent the Input in the same way that the program consumes it. + +In case of a grammar fuzzer for instance, the Input is generally an Abstract Syntax Tree because it is a data structure that can be easily manipulated while maintaining the validity, but the program expects a byte array as input, so just before the execution, the tree is serialized to a sequence of bytes. + +In the Rust code, an [`Input`](https://docs.rs/libafl/0/libafl/inputs/trait.Input.html) is a trait that can be implemented only by structures that are serializable and have only owned data as fields. diff --git a/docs/src/core_concepts/mutator.md b/docs/src/core_concepts/mutator.md new file mode 100644 index 0000000000..b2b753d8af --- /dev/null +++ b/docs/src/core_concepts/mutator.md @@ -0,0 +1,9 @@ +# Mutator + +The Mutator is an entity that takes one or more Inputs and generates a new derived one. + +Mutators can be composed and they are generally linked to a specific Input type. + +There can be, for instance, a Mutator that applies more than a single type of mutation on the input. Consider a generic Mutator for a byte stream, bit flip is just one of the possible mutations but not the only one, there is also, for instance, the random replacement of a byte of the copy of a chunk. + +In LibAFL, [`Mutator`](https://docs.rs/libafl/0/libafl/mutators/trait.Mutator.html) is a trait. diff --git a/docs/src/core_concepts/observer.md b/docs/src/core_concepts/observer.md new file mode 100644 index 0000000000..508bf7dfb1 --- /dev/null +++ b/docs/src/core_concepts/observer.md @@ -0,0 +1,12 @@ +# Observer + +An Observer, or Observation Channel, is an entity that provides an information observed during the execution of the program under test to the fuzzer. + +The information contained in the Observer is not preserved across executions. + +As an example, the coverage shared map filled during the execution to report the executed edges used by fuzzers such as AFL and HonggFuzz can be considered an Observation Channel. +This information is not preserved across runs and it is an observation of a dynamic property of the program. + +In terms of code, in the library this entity is described by the [`Observer`](https://docs.rs/libafl/0/libafl/observers/trait.Observer.html) trait. + +In addition to holding the volatile data connected with the last execution of the target, the structures implementing this trait can define some execution hooks that are executed before and after each fuzz case. In this hooks, the observer can modify the fuzzer's state. diff --git a/docs/src/core_concepts/stage.md b/docs/src/core_concepts/stage.md new file mode 100644 index 0000000000..13c2ef0e2f --- /dev/null +++ b/docs/src/core_concepts/stage.md @@ -0,0 +1,9 @@ +# Stage + +A Stage is an entity that operates on a single Input got from the Corpus. + +For instance, a Mutational Stage, given an input of the corpus, applies a Mutator and executes the generated input one or more time. How many times this has to be done can be scheduled, AFL for instance uses a performance score of the input to choose how many times the havoc mutator should be invoked. This can depend also on other parameters, for instance, the length of the input if we want to just apply a sequential bitflip, or be a fixed value. + +A stage can also be an analysis stage, for instance, the Colorization stage of Redqueen that aims to introduce more entropy in a testcase or the Trimming stage of AFL that aims to reduce the size of a testcase. + +There are several stages in the LibAFL codebases implementing the [`Stage`](https://docs.rs/libafl/0/libafl/stages/trait.Stage.html) trait. diff --git a/docs/src/design/architecture.md b/docs/src/design/architecture.md index 321a612a1b..a7678d6dac 100644 --- a/docs/src/design/architecture.md +++ b/docs/src/design/architecture.md @@ -8,6 +8,8 @@ The LibAFL code reuse meachanism is so based on components rather than sub-class Thinking about similar fuzzers, you can observe that most of the times the data structures that are modified are the ones related to testcases and the fuzzer global state. -Beside the entities described previously, we introduce the Testcase and State entities. The Testcase is a container for an Input stored in the Corpus and its metadata (so, in the implementation, the Corpus stores Testcases) and the State contains all the metadata that are evolved while running the fuzzer, Corpus included. +Beside the entities previously described, we introduce the [`Testcase`](https://docs.rs/libafl/0.6/libafl/corpus/testcase/struct.Testcase.html) and [`State`](https://docs.rs/libafl/0.6/libafl/state/struct.StdState.html) entities. The Testcase is a container for an Input stored in the Corpus and its metadata (so, in the implementation, the Corpus stores Testcases) and the State contains all the metadata that are evolved while running the fuzzer, Corpus included. +The State, in the implementation, contains only owned objects that are serializable and it is serializable itself. Some fuzzers may want to serialize its state when pausing or just, when doing in-process fuzzing, serialize on crash and deserialize in the new process to continue to fuzz with all the metadata preserved. +Additionally, we group the entities that are "actions", like the CorpusScheduler and the Feedbacks, in a common place, the [`Fuzzer'](https://docs.rs/libafl/0.6.1/libafl/fuzzer/struct.StdFuzzer.html). diff --git a/docs/src/design/core_concepts.md b/docs/src/design/core_concepts.md deleted file mode 100644 index ec38d75755..0000000000 --- a/docs/src/design/core_concepts.md +++ /dev/null @@ -1,86 +0,0 @@ -# Core Concepts - -LibAFL is designed around some core concepts that we think can effectively abstract most of the other fuzzers designs. - -Here, we discuss these concepts and provide some examples related to other fuzzers. - -TODO add links to trait definitions in docs.rs - -## Observer - -An Observer, or Observation Channel, is an entity that provides an information observed during the execution of the program under test to the fuzzer. - -The information contained in the Observer is not preserved across executions. - -As an example, the coverage shared map filled during the execution to report the executed edges used by fuzzers such as AFL and HonggFuzz can be considered an Observation Channel. -This information is not preserved across runs and it is an observation of a dynamic property of the program. - -## Executor - -In different fuzzers, this concept of executing the program under test means each run is now always the same. -For instance, for in-memory fuzzers like libFuzzer an execution is a call to an harness function, for hypervisor-based fuzzers like [kAFL](https://github.com/IntelLabs/kAFL) instead an entire operating system is started from a snapshot for each run. - -In our model, an Executor is the entity that defines not only how to execute the target, but all the volatile operations that are related to just a single run of the target. - -So the Executor is for instance responsible to inform the program about the input that the fuzzer wants to use in the run, writing to a memory location for instance or passing it as a parameter to the harness function. - -It also holds a set of Observers, as they are related to just a single run of the target. - -## Feedback - -The Feedback is an entity that classifies the outcome of an execution of the program under test as interesting or not. -Typically, if an execution is interesting, the corresponding input used to feed the target program is added to a corpus. - -Most of the times, the notion of Feedback is deeply linked to the Observer, but they are different concepts. - -The Feedback, in most of the cases, processes the information reported by one or more observers to decide if the execution is interesting. -The concept of "interestingness" is abstract, but typically it is related to a novelty search (i.e. interesting inputs are those that reach a previously unseen edge in the control flow graph). - -As an example, given an Observer that reports all the sizes of memory allocations, a maximization Feedback can be used to maximize these sizes to sport pathological inputs in terms of memory consumption. - -## Input - -Formally, the input of a program is the data taken from external sources that affect the program behaviour. - -In our model of an abstract fuzzer, we define the Input as the internal representation of the program input (or a part of it). - -In the straightforward case, the input of the program is a byte array and in fuzzers such as AFL we store and manipulate exactly these byte arrays. - -But it is not always the case. A program can expect inputs that are not byte arrays (e.g. a sequence of syscalls) and the fuzzer does not represent the Input in the same way that the program consumes it. - -In case of a grammar fuzzer for instance, the Input is generally an Abstract Syntax Tree because it is a data structure that can be easily manipulated while maintaining the validity, but the program expects a byte array as input, so just before the execution, the tree is serialized to a sequence of bytes. - -## Corpus - -The Corpus is where testcases are stored. A Testcase is defined as an Input and a set of related metadata like execution time for instance. - -For instance, a Corpus can store testcases on disk, or in memory, or implement a cache to speedup on disk storage. - -Usually, a testcase is added to the Corpus when it is considered as interesting. - -## Mutator - -The Mutator is an entity that takes one or more Inputs and generates a new derived one. - -Mutators can be composed and they are generally linked to a specific Input type. - -There can be, for instance, a Mutator that applies more than a single type of mutation on the input. Consider a generic Mutator for a byte stream, bit flip is just one of the possible mutations but not the only one, there is also, for instance, the random replacement of a byte of the copy of a chunk. - -This Mutator will simple schedule the application of some other Mutators. - -## Generator - -A Generator is a component designed to generate an Input from scratch. - -Typically, a random generator is used to generate random inputs. - -Generators are traditionally less used in Feedback-driven Fuzzing, but there are exceptions, like Nautilus, that uses a Grammar generator to create the initial corpus and a sub-tree Generator as a mutation of its grammar Mutator. - -## Stage - -A Stage is an entity that operates on a single Input got from the Corpus. - -For instance, a Mutational Stage, given an input of the corpus, applies a Mutator and executes the generated input one or more time. How many times this has to be done can be scheduled, AFL for instance uses a performance score of the input to choose how many times the havoc mutator should be invoked. This can depend also on other parameters, for instance, the length of the input if we want to just apply a sequential bitflip, or be a fixed value. - -A stage can also be an analysis stage, for instance, the Colorization stage of Redqueen that aims to introduce more entropy in a testcase or the Trimming stage of AFL that aims to reduce the size of a testcase. - diff --git a/docs/src/design/design.md b/docs/src/design/design.md index 3650272973..faaddf3c0d 100644 --- a/docs/src/design/design.md +++ b/docs/src/design/design.md @@ -1,3 +1,3 @@ # Design -In this chapter, we introduce the abstract Core Concepts behind LibAFL, we then discuss how we designed the library to take into account these concepts while allowing code reuse and extensibility. +In this chapter, we discuss how we designed the library taking into account the core concepts while allowing code reuse and extensibility. diff --git a/docs/src/design/metadata.md b/docs/src/design/metadata.md index 9230f2551f..cbc84e05d1 100644 --- a/docs/src/design/metadata.md +++ b/docs/src/design/metadata.md @@ -5,12 +5,15 @@ A metadata in LibAFL is a self contained structure that holds associated data to In terms of code, a metadata can be defined as a Rust struct registered in the SerdeAny register. ```rust +extern crate libafl; +extern crate serde; + use libafl::SerdeAny; use serde::{Serialize, Deserialize}; #[derive(Serialize, Deserialize, SerdeAny)] pub struct MyMetadata { - ... + //... } ``` diff --git a/docs/src/design/usage.md b/docs/src/design/usage.md deleted file mode 100644 index 33666487f1..0000000000 --- a/docs/src/design/usage.md +++ /dev/null @@ -1 +0,0 @@ -# Metadata diff --git a/docs/src/getting_started/build.md b/docs/src/getting_started/build.md index a8b22a302f..12fac70705 100644 --- a/docs/src/getting_started/build.md +++ b/docs/src/getting_started/build.md @@ -1,4 +1,4 @@ -# Build +# Building LibAFL LibAFL, as most of the Rust projects, can be built using `cargo` from the root directory of the project with: @@ -10,16 +10,19 @@ Note that the `--release` flag is optional for development, but you needed to ad Slowdowns of 10x or more are not uncommon for Debug builds. The LibAFL repository is composed of multiple crates. -The top-level Cargo.toml is the workspace file grouping these crates. +The [top-level `Cargo.toml`](https://github.com/AFLplusplus/LibAFL/blob/main/Cargo.toml) is the workspace file grouping these crates. Calling `cargo build` from the root directory will compile all crates in the workspace. ## Build Example Fuzzers -We group example fuzzers in the `./fuzzers` directory of the LibAFL repository. +The best starting point for experienced rustaceans is to read through, and adapt, the example fuzzers. + +We group these fuzzers in the [`./fuzzers`](https://github.com/AFLplusplus/LibAFL/tree/main/fuzzers) directory of the LibAFL repository. The directory contains a set of crates that are not part of the workspace. Each of these example fuzzers uses particular features of LibAFL, sometimes combined with different instrumentation backends (e.g. [SanitizerCoverage](https://clang.llvm.org/docs/SanitizerCoverage.html), [Frida](https://frida.re/), ...). You can use these crates as examples and as skeletons for custom fuzzers with similar feature sets. +Each fuzzer will have a `README.md` file in its directory, describing the fuzzer and its features. -To build an example fuzzer you have to invoke cargo from its respective folder (`fuzzers/[FUZZER_NAME]`). +To build an example fuzzer, you have to invoke `cargo build --release` from its respective folder (`fuzzers/[FUZZER_NAME]`). diff --git a/docs/src/getting_started/crates.md b/docs/src/getting_started/crates.md index 310f8a8ce4..6b283e1f28 100644 --- a/docs/src/getting_started/crates.md +++ b/docs/src/getting_started/crates.md @@ -1,54 +1,77 @@ # Crates -LibAFL is composed by different crates. -Each one has its self-contained purpose, and the user may not need to use all of them in its project. +LibAFL is composed of different crates. +A crate is an individual library in Rust's Cargo build system, that you can use by adding it to your project's `Cargo.toml`, like: +```toml +[dependencies] +libafl = { version = "*" } +``` + +For LibAFL, each crate has its self-contained purpose, and the user may not need to use all of them in its project. Following the naming convention of the folders in the project's root, they are: -### libafl +### [`libafl`](https://github.com/AFLplusplus/LibAFL/tree/main/libafl) This is the main crate that contains all the components needed to build a fuzzer. -This crate has the following feature flags: +This crate has a number of feature flags that enable and disable certain aspects of LibAFL. +The features can be found in [LibAFL's `Cargo.toml`](https://github.com/AFLplusplus/LibAFL/blob/main/libafl/Cargo.toml) under "`[features]`", and are usually explained with comments there. +Some features worthy of remark are: -- std, that enables the parts of the code that use the Rust standard library. Without this flag, libafl is no_std. -- derive, that enables the usage of the `derive(...)` macros defined in libafl_derive from libafl. +- `std` enables the parts of the code that use the Rust standard library. Without this flag, LibAFL is `no_std` compatible. This disables a range of features, but allows us to use LibAFL in embedded environments, read [the `no_std` section](../advanced_features/no_std/no_std.md) for further details. +- `derive` enables the usage of the `derive(...)` macros defined in libafl_derive from libafl. +- `rand_trait` allows you to use LibAFL's very fast (*but insecure!*) random number generator wherever compatibility with Rust's [`rand` crate](https://crates.io/crates/rand) is needed. +- `llmp_bind_public` makes LibAFL's LLMP bind to a public TCP port, over which other fuzzers nodes can communicate with this instance. +- `introspection` adds performance statistics to LibAFL. -By default, std and derive are both set. +You can chose the features by using `features = ["feature1", "feature2", ...]` for LibAFL in your `Cargo.toml`. +Out of this list, by default, `std`, `derive`, and `rand_trait` are already set. +You can choose to disable them by setting `default-features = false` in your `Cargo.toml`. + +### libafl_sugar + +The sugar crate abstracts away most of the complexity of LibAFL's API. +Instead of high flexibility, it aims to be high-level and easy-to-use. +It is not as flexible as stitching your fuzzer together from each individual component, but allows you to build a fuzzer with minimal lines of code. +To see it in action, take a look at the [`libfuzzer_stb_image_sugar` example fuzzer](https://github.com/AFLplusplus/LibAFL/tree/main/fuzzers/libfuzzer_stb_image_sugar). ### libafl_derive -This a proc-macro crate paired with the libafl crate. +This a proc-macro crate paired with the `libafl` crate. -At the moment, it just expose the `derive(SerdeAny)` macro that can be used to define metadata structs. +At the moment, it just exposes the `derive(SerdeAny)` macro that can be used to define Metadata structs, see the section about [Metadata](../design/metadata.md) for details. ### libafl_targets -This crate that exposes, under feature flags, pieces of code to interact with targets +This crate exposes code to interact with, and to instrument, targets. +To enable and disable features at compile-time, the features are enabled and disabled using feature flags. Currently, the supported flags are: -- pcguard_edges, that defines the SanitizerCoverage trace-pc-guard hooks to track the executed edges in a map. -- pcguard_hitcounts, that defines the SanitizerCoverage trace-pc-guard hooks to track the executed edges with the hitcounts (like AFL) in a map. -- libfuzzer, that expose a compatibility layer with libFuzzer style harnesses. -- value_profile, that defines the SanitizerCoverage trace-cmp hooks to track the matching bits of each comparison in a map. +- `pcguard_edges` defines the SanitizerCoverage trace-pc-guard hooks to track the executed edges in a map. +- `pcguard_hitcounts defines the SanitizerCoverage trace-pc-guard hooks to track the executed edges with the hitcounts (like AFL) in a map. +- `libfuzzer` exposes a compatibility layer with libFuzzer style harnesses. +- `value_profile` defines the SanitizerCoverage trace-cmp hooks to track the matching bits of each comparison in a map. ### libafl_cc -This is a library that provides some utils to wrap compilers and create source level fuzzers. +This is a library that provides utils wrap compilers and create source-level fuzzers. At the moment, only the Clang compiler is supported. +To understand it deeper, look through the tutorials and examples. ### libafl_frida -This library bridges libafl with Frida as instrumentation backend. +This library bridges LibAFL with Frida as instrumentation backend. -With this crate you can instrument targets on Linux/macOS/Windows/Android for coverage collection. +With this crate, you can instrument targets on Linux/macOS/Windows/Android for coverage collection. -The CmpLog and AddressSanitizer instrumentation and runtimes are currently supported only for ARM64. +Additionally, it supports CmpLog, and AddressSanitizer instrumentation and runtimes for aarch64. ### libafl_qemu -This library bridges libafl with QEMU user-mode to fuzz ELF binaries. +This library bridges LibAFL with QEMU user-mode to fuzz ELF cross-platform binaries. -It works on Linux and can collect edge coverage withotu collisions. +It works on Linux and can collect edge coverage without collisions! +It also supports a wide range of hooks and instrumentation options. diff --git a/docs/src/getting_started/getting_started.md b/docs/src/getting_started/getting_started.md index ef05e80138..92299c63ef 100644 --- a/docs/src/getting_started/getting_started.md +++ b/docs/src/getting_started/getting_started.md @@ -1,4 +1,5 @@ # Getting Started -To start using LibAFL, there are some first steps to do. In this chapter, we will -discuss how to download LibAFL and build with `cargo`, how are structured its crates and the purpose of each crate. +To get startes with LibAFL, there are some initial steps to do. +In this chapter, we discuss how to download and build LibAFL, using Rust's `cargo` command. +We also describe the structure of LibAFL's components, so-called crates, and the purpose of each individual crate. diff --git a/docs/src/getting_started/setup.md b/docs/src/getting_started/setup.md index 3734f90e3e..f3d201be0c 100644 --- a/docs/src/getting_started/setup.md +++ b/docs/src/getting_started/setup.md @@ -1,23 +1,25 @@ # Setup -The first step is to download LibAFL and all its dependencies that are not automatically installed with `cargo`. +The first step is to download LibAFL and all dependencies that are not automatically installed with `cargo`. > ### Command Line Notation > -> In this chapter and throughout the book, we’ll show some commands used in the +> In this chapter and throughout the book, we show some commands used in the > terminal. Lines that you should enter in a terminal all start with `$`. You > don’t need to type in the `$` character; it indicates the start of each > command. Lines that don’t start with `$` typically show the output of the > previous command. Additionally, PowerShell-specific examples will use `>` > rather than `$`. -The easiest way to download LibAFL is using `git`. +While you technically do not need to install LibAFL, but can use the version from crates.io directly, we do recommend to download or clone the GitHub version. +This gets you the example fuzzers, additional utilities, and latest patches. +The easiest way to do this is to use `git`. ```sh $ git clone git@github.com:AFLplusplus/LibAFL.git ``` -You can alternatively, on a UNIX-like machine, download a compressed archive and extract with: +You can alternatively, on a UNIX-like machine, download a compressed archive and extract it with: ```sh $ wget https://github.com/AFLplusplus/LibAFL/archive/main.tar.gz @@ -29,30 +31,29 @@ $ ls LibAFL-main # this is the extracted folder ## Clang installation One of the external dependencies of LibAFL is the Clang C/C++ compiler. -While most of the code is in pure Rust, we still need a C compiler because Rust stable -still does not support features that we need such as weak linking and LLVM builtins linking, -and so we use C to expose the missing functionalities to our Rust codebase. +While most of the code is in pure Rust, we still need a C compiler because stable Rust still does not support features that some parts of LibAFL may need, such as weak linking, and LLVM builtins linking. +For these parts, we use C to expose the missing functionalities to our Rust codebase. In addition, if you want to perform source-level fuzz testing of C/C++ applications, you will likely need Clang with its instrumentation options to compile the programs under test. -On Linux you can use your distro's package manager to get Clang, -but these packages are not always updated, so we suggest you to use the -Debian/Ubuntu prebuilt packages from LLVM that are available using their [official repository](https://apt.llvm.org/). +On Linux you could use your distribution's package manager to get Clang, +but these packages are not always up-to-date. +Instead, we suggest using the Debian/Ubuntu prebuilt packages from LLVM that are available using their [official repository](https://apt.llvm.org/). For Microsoft Windows, you can download the [installer package](https://llvm.org/builds/) that LLVM generates periodically. -Despite that Clang is the default C compiler on macOS, we discourage the use of the build shipped by Apple and encourage -the installation from `brew` or directly a fresh build from the source code. +Despite Clang being the default C compiler on MacOS, we discourage the use of the build shipped by Apple and encourage +the installation from [Homebrew](https://brew.sh/), using `brew install llvm`. -Alternatively you can download and build the LLVM source tree - Clang included - following the steps +Alternatively, you can download and build the LLVM source tree - Clang included - following the steps explained [here](https://clang.llvm.org/get_started.html). ## Rust installation -If you don't have Rust installed, you can easily follow the steps described [here](https://www.rust-lang.org/tools/install) +If you do not have Rust installed, you can easily follow the steps described [here](https://www.rust-lang.org/tools/install) to install it on any supported system. +Be aware that Rust versions shipped with Linux distributions may be outdated, LibAFL always targets the latest `stable` version available via `rustup upgrade`. -We suggest to install Clang and LLVM first. - +We suggest installing Clang and LLVM first. diff --git a/docs/src/introduction.md b/docs/src/introduction.md index f4acea8ad2..ab020f6b38 100644 --- a/docs/src/introduction.md +++ b/docs/src/introduction.md @@ -1,17 +1,17 @@ # Introduction -Fuzzers are important assets in the pockets of security researchers and developers alike. -A wide range of cool state-of-the-art tools like [AFL++](https://github.com/AFLplusplus/AFLplusplus), [libFuzzer](https://llvm.org/docs/LibFuzzer.html) or [honggfuzz](https://github.com/google/honggfuzz) are available to users. They do their job in a very effective way, finding thousands of bugs. +Fuzzers are important tools for security researchers and developers alike. +A wide range of state-of-the-art tools like [AFL++](https://github.com/AFLplusplus/AFLplusplus), [libFuzzer](https://llvm.org/docs/LibFuzzer.html) or [honggfuzz](https://github.com/google/honggfuzz) are available to users. They do their job in a very effective way, finding thousands of bugs. -From the power user perspective, however, these tools are limited. +From the perspective of a power user, however, these tools are limited. Their design does not treat extensibility as a first-class citizen. Usually, a fuzzer developer can choose to either fork one of these existing tools, or to create a new fuzzer from scratch. In any case, researchers end up with tons of fuzzers, all of which are incompatible with each other. Their outstanding features can not just be combined for new projects. -Instead, we keep reinventing the wheel and may completely miss out on features that are complex to reimplement. +By reinventing the wheel over and over, we may completely miss out on features that are complex to reimplement. -Here comes LibAFL, a library that IS NOT a fuzzer, but a collection of reusable pieces of fuzzers, written in Rust. -LibAFL helps you develop your own custom fuzzer, tailored for your specific needs. +To tackle this issue, we created LibAFL, a library that is _not just another fuzzer_, but a collection of reusable pieces for individual fuzzers. +LibAFL, written in Rust, helps you develop a fuzzer tailored for your specific needs. Be it a specific target, a particular instrumentation backend, or a custom mutator, you can leverage existing bits and pieces to craft the fastest and most efficient fuzzer you can envision. ## Why LibAFL? @@ -19,11 +19,15 @@ Be it a specific target, a particular instrumentation backend, or a custom mutat LibAFL gives you many of the benefits of an off-the-shelf fuzzer, while being completely customizable. Some highlight features currently include: - `multi platform`: LibAFL works pretty much anywhere you can find a Rust compiler for. We already used it on *Windows*, *Android*, *MacOS*, and *Linux*, on *x86_64*, *aarch64*, ... -- `portable`: `LibAFL` can be built in `no_std` mode. This means it does not require a specific OS-dependent runtime to function. Define an allocator and a way to map pages, you should be good to inject LibAFL in obscure targets like embedded devices, hypervisors, or maybe even WebAssembly? -- `adaptable`: Given year of experience fine-tuning *AFLplusplus* and our academic fuzzing background, we could incorporate recent fuzzing trends into LibAFL's deign and make it future-proof. +- `portable`: `LibAFL` can be built in `no_std` mode. +This means it does not require a specific OS-dependent runtime to function. +Define an allocator and a way to map pages, and you are good to inject LibAFL in obscure targets like embedded devices, hypervisors, or maybe even WebAssembly? +- `adaptable`: Given years of experience fine-tuning *AFLplusplus* and our academic fuzzing background, we could incorporate recent fuzzing trends into LibAFL's design and make it future-proof. To give an example, as opposed to old-skool fuzzers, a `BytesInput` is just one of the potential forms of inputs: feel free to use and mutate an Abstract Syntax Tree instead, for structured fuzzing. -- `scalable`: As part of LibAFL, we developed `Low Level Message Passing`, `LLMP` for short, which allows LibAFL to scale almost linearly over cores. That is, if you chose to use this feature - it is your fuzzer, after all. Scaling to multiple machines over TCP is on the near road-map. -- `fast`: We do everything we can at compiletime so that the runtime overhead is as minimal as it can get. -- `bring your own target`: We support binary-only modes, like Frida-Mode with ASAN and CmpLog, as well as multiple compilation passes for sourced-based instrumentation, and of course support custom instrumentation. +- `scalable`: As part of LibAFL, we developed `Low Level Message Passing`, `LLMP` for short, which allows LibAFL to scale almost linearly over cores. That is, if you chose to use this feature - it is your fuzzer, after all. +Scaling to multiple machines over TCP is also possible, using LLMP's `broker2broker` feature. +- `fast`: We do everything we can at compile time so that the runtime overhead is as minimal as it can get. +- `bring your own target`: We support binary-only modes, like QEMU-Mode and Frida-Mode with ASAN and CmpLog, as well as multiple compilation passes for sourced-based instrumentation. +Of course, we also support custom instrumentation, as you can see in the Python example based on Google's Atheris. - `usable`: This one is on you to decide. Dig right in! \ No newline at end of file diff --git a/docs/src/libafl.md b/docs/src/libafl.md index 8c0b795498..28ef5b7eb2 100644 --- a/docs/src/libafl.md +++ b/docs/src/libafl.md @@ -1,9 +1,15 @@ # The LibAFL Fuzzing Library + AFL++ Logo + *by Andrea Fioraldi and Dominik Maier* +Welcome to LibAFL, the Advanced Fuzzing Library. +This book shall be a gentle introduction into the library. + This version of the LibAFL book is coupled with the release 1.0 beta of the library. This document is still work-in-progress and incomplete. The structure and the concepts explained here are subject to change in future revisions, as the structure of LibAFL itself will evolve. -The HTML version of this book is available online at [https://aflplus.plus/libafl-book/](https://aflplus.plus/libafl-book/) and offline from the LibAFL repository in the docs/ folder. +The HTML version of this book is available online at [https://aflplus.plus/libafl-book/](https://aflplus.plus/libafl-book/) and offline from the LibAFL repository in the `docs/` folder. +Build it using `mdbook build` in this folder, or run `mdbook serve` to view the book. \ No newline at end of file diff --git a/docs/src/message_passing/configurations.md b/docs/src/message_passing/configurations.md new file mode 100644 index 0000000000..f1b923b035 --- /dev/null +++ b/docs/src/message_passing/configurations.md @@ -0,0 +1,10 @@ +# Configurations + +Configurations for individual fuzzer nodes are relevant for multi node fuzzing. +The chapter describes how to run nodes with different configurations +in one fuzzing cluster. +This allows, for example, a node compiled with ASAN, to know that it needs to rerun new testcases for a node without ASAN, while the same binary/configuration does not. + +> ## Under Construction! +> This section is under construction. +> Please check back later (or open a PR) diff --git a/docs/src/message_passing/message_passing.md b/docs/src/message_passing/message_passing.md new file mode 100644 index 0000000000..28b104ebf6 --- /dev/null +++ b/docs/src/message_passing/message_passing.md @@ -0,0 +1,92 @@ +# Message Passing + +LibAFL offers a standard mechanism for message passing over processes and machines with a low overhead. +We use message passing to inform the other connected clients/fuzzers/nodes about new testcases, metadata, and statistics about the current run. +Depending on individual needs, LibAFL can also write testcase contents to disk, while still using events to notify other fuzzers, using an `OnDiskCorpus`. + +In our tests, message passing scales very well to share new testcases and metadata between multiple running fuzzer instances for multi-core fuzzing. +Specifically, it scales _a lot_ better than using memory locks on a shared corpus, and _a lot_ better than sharing the testcases via the filesystem, as AFL traditionally does. +Think "all cores are green" in `htop`, aka., no kernel interaction. + +The `EventManager` interface is used to send Events over the wire using `Low Level Message Passing`, a custom message passing mechanism over shared memory or TCP. + +## Low Level Message Passing (LLMP) + +LibAFL comes with a reasonably lock-free message passing mechanism that scales well across cores and, using its *broker2broker* mechanism, even to connected machines via TCP. +Most example fuzzers use this mechanism, and it is the best `EventManager` if you want to fuzz on more than a single core. +In the following, we will describe the inner workings of `LLMP`. + +`LLMP` has one `broker` process that can forward messages sent by any client process to all other clients. +The broker can also intercept and filter the messages it receives instead of forwarding them. +A common use-case for messages filtered by the broker are the status messages sent from each client to the broker directly. +The broker used this information to paint a simple UI, with up-to-date information about all clients, however the other clients don't need to receive this information. + +### Speedy Local Messages via Shared Maps + +Throughout LibAFL, we use a wrapper around different operating system's shared maps, called `ShMem`. +Shared maps are the backbone of `LLMP`. +Each client, usually a fuzzer trying to share stats and new testcases, maps an outgoing `ShMem` map. +With very few exceptions, only this client writes to this map, therefore, we do not run in race conditions and can live without locks. +The broker reas from all client's `ShMem` maps. +It checks all incoming client maps periodically, and then forwards new messages to its outgoing broadcast-`ShMem`, mapped by all connected clients. + +To send new messages, a client places a new message at the end of their map, and then updates a static field to notify the broker. +Once the outgoing map is full, the sender allocates a new `ShMem` using the respective `ShMemProvider`. +It then sends the information needed to map the newly-allocated page in connected processes to the old page, using an end of page (`EOP`) message. +Once the receiver maps the new page, flags it as safe for unmapping from the sending process (to avoid race conditions if we have more than a single EOP in a short time), and then continues to read from the new `ShMem`. + +The schema for client's maps to the broker is as follows: +```text +[client0] [client1] ... [clientN] + | | / +[client0_out] [client1_out] ... [clientN_out] + | / / + |________________/ / + |________________________________/ + \|/ +[broker] +``` + +The broker loops over all incoming maps, and checks for new messages. +On `std` builds, the broker will sleep a few milliseconds after a loop, since we do not need the messages to arrive instantly. +After the broker received a new message from clientN, (`clientN_out->current_id != last_message->message_id`) the broker copies the message content to its own broadcast map. + +The clients periodically, for example after finishing `n` mutations, check for new incoming messages by checking if (`current_broadcast_map->current_id != last_message->message_id`). +While the broker uses the same EOP mechanism to map new `ShMem`s for its outgoing map, it never unmaps old pages. +This additional memory overhead serves a good purpose: by keeping all broadcast pages around, we make sure that new clients can join in on a fuzzing campaign at a later point in time +They just need to re-read all broadcasted messages from start to finish. + +So the outgoing messages flow like this over the outgoing broadcast `Shmem`: + +```text +[broker] + | +[current_broadcast_map] + | + |___________________________________ + |_________________ \ + | \ \ + | | | + \|/ \|/ \|/ +[client0] [client1] ... [clientN] +``` + +To use `LLMP` in LibAFL, you usually want to use an `LlmpEventManager` or its restarting variant. +They are the default if using LibAFL's `Launcher`. + +If you should want to use `LLMP` in its raw form, without any `LibAFL` abstractions, take a look at the `llmp_test` example in [./libafl/examples](https://github.com/AFLplusplus/LibAFL/blob/main/libafl/examples/llmp_test/main.rs). +You can run the example using `cargo run --example llmp_test` with the appropriate modes, as indicated by its help output. +First, you will have to create a broker using `LlmpBroker::new()`. +Then, create some `LlmpClient``s` in other threads and register them with the main thread using `LlmpBroker::register_client`. +Finally, call `LlmpBroker::loop_forever()`. + +### B2B: Connecting Fuzzers via TCP + +For `broker2broker` communication, all broadcast messages are additionally forwarded via network sockets. +To facilitate this, we spawn an additional client thread in the broker, that reads the broadcast map, just like any other client would. +For broker2broker communication, this b2b client listens for TCP connections from other, remote brokers. +It keeps a pool of open sockets to other, remote, b2b brokers around at any time. +When receiving a new message on the local broker map, the b2b client will forward it to all connected remote brokers via TCP. +Additionally, the broker can receive messages from all connected (remote) brokers, and forward them to the local broker over a client `ShMem`. + +As a sidenote, the tcp listener used for b2b communication is also used for an initial handshake when a new client tries to connect to a broker locally, simply exchanging the initial `ShMem` descriptions. \ No newline at end of file diff --git a/docs/src/message_passing/spawn_instances.md b/docs/src/message_passing/spawn_instances.md new file mode 100644 index 0000000000..babb3bc7bb --- /dev/null +++ b/docs/src/message_passing/spawn_instances.md @@ -0,0 +1,49 @@ +# Spawning Instances + +Multiple fuzzer instances can be spawned using different ways. + +## Manually, via a TCP port + +The straightforward way to do Multi-Threading is to use the `LlmpRestartingEventManager`, and specifically to use `setup_restarting_mgr_std`. +It abstracts away all the pesky details about restarts on crash handling (for in-memory fuzzers) and multi-threading. +With it, every instance you launch manually tries to connect to a TCP port on the local machine. + +If the port is not yet bound, this instance becomes the broker, itself binding to the port to await new clients. + +If the port is already bound, the EventManager will try to connect to it. +The instance becomes a client and can now communicate with all other nodes. + +Launching nodes manually has the benefit that you can have multiple nodes with different configurations, such as clients fuzzing with and without ASAN. + +While it's called "restarting" manager, it uses `fork` on Unix operating systems as optimization and only actually restarts from scratch on Windows. + +## Launcher + +The Launcher is the lazy way to do multiprocessing. +You can use the Launcher builder to create a fuzzer that spawns multiple nodes, all using restaring event managers. +An example may look like this: + +```rust,ignore + Launcher::builder() + .configuration(EventConfig::from_name(&configuration)) + .shmem_provider(shmem_provider) + .stats(stats) + .run_client(&mut run_client) + .cores(cores) + .broker_port(broker_port) + .stdout_file(stdout_file) + .remote_broker_addr(broker_addr) + .build() + .launch() +``` + +This first starts a broker, then spawns `n` clients, according to the value passed to `cores`. +The value is a string indicating the cores to bind to, for example, `0,2,5` or `0-3`. +For each client, `run_client` will be called. +On Windows, the Launcher will restart each client, while on Unix it will use `fork`. + +## Other ways + +The LlmpEvenManager family is the easiest way to do spawn instances, but for obscure targets, you may need to come up with other solutions. +LLMP is even, in theory, `no_std` compatible, and even completely different EventManagers can be used for message passing. +If you are in this situation, please either read through the current implementations and/or reach out to us. \ No newline at end of file diff --git a/docs/src/tutorial/intro.md b/docs/src/tutorial/intro.md new file mode 100644 index 0000000000..b82fc21058 --- /dev/null +++ b/docs/src/tutorial/intro.md @@ -0,0 +1,5 @@ +# Introduction + +> ## Under Construction! +> This section is under construction. +> Please check back later (or open a PR) diff --git a/docs/src/tutorial/tutorial.md b/docs/src/tutorial/tutorial.md new file mode 100644 index 0000000000..4347d44c9c --- /dev/null +++ b/docs/src/tutorial/tutorial.md @@ -0,0 +1,5 @@ +# Tutorial + +In this chapter, we will build a custom fuzzer using the [Lain](https://github.com/microsoft/lain) mutator in Rust. + +This tutorial will introduce you in writing extensions to LibAFL like Feedbacks and Testcase's metadata. diff --git a/fuzzers/tutorial/Cargo.toml b/fuzzers/tutorial/Cargo.toml new file mode 100644 index 0000000000..adfdfe5d69 --- /dev/null +++ b/fuzzers/tutorial/Cargo.toml @@ -0,0 +1,33 @@ +[package] +name = "tutorial" +version = "0.6.1" +authors = ["Andrea Fioraldi ", "Dominik Maier "] +edition = "2018" + +[features] +default = ["std"] +std = [] + +[profile.release] +lto = true +codegen-units = 1 +opt-level = 3 +debug = true + +[build-dependencies] +cc = { version = "1.0", features = ["parallel"] } +which = { version = "4.0.2" } +num_cpus = "1.0" + +[dependencies] +libafl = { path = "../../libafl/", features = ["default", "rand_trait"] } +libafl_derive = { path = "../../libafl_derive/" } +libafl_targets = { path = "../../libafl_targets/", features = ["sancov_pcguard_hitcounts", "libfuzzer", "sancov_cmplog"] } +serde = { version = "1.0", default-features = false, features = ["alloc"] } # serialization lib +lain = { version = "0.5", features = ["serde_support"]} +# TODO Include it only when building cc +libafl_cc = { path = "../../libafl_cc/" } + +[lib] +name = "tutorial" +crate-type = ["staticlib"] diff --git a/fuzzers/tutorial/rust-toolchain b/fuzzers/tutorial/rust-toolchain new file mode 100644 index 0000000000..bf867e0ae5 --- /dev/null +++ b/fuzzers/tutorial/rust-toolchain @@ -0,0 +1 @@ +nightly diff --git a/fuzzers/tutorial/src/bin/libafl_cc.rs b/fuzzers/tutorial/src/bin/libafl_cc.rs new file mode 100644 index 0000000000..ead1f07c88 --- /dev/null +++ b/fuzzers/tutorial/src/bin/libafl_cc.rs @@ -0,0 +1,35 @@ +use libafl_cc::{ClangWrapper, CompilerWrapper}; +use std::env; + +pub fn main() { + let args: Vec = env::args().collect(); + if args.len() > 1 { + let mut dir = env::current_exe().unwrap(); + let wrapper_name = dir.file_name().unwrap().to_str().unwrap(); + + let is_cpp = match wrapper_name[wrapper_name.len()-2..].to_lowercase().as_str() { + "cc" => false, + "++" | "pp" | "xx" => true, + _ => panic!("Could not figure out if c or c++ warpper was called. Expected {:?} to end with c or cxx", dir), + }; + + dir.pop(); + + let mut cc = ClangWrapper::new(); + if let Some(code) = cc + .cpp(is_cpp) + // silence the compiler wrapper output, needed for some configure scripts. + .silence(true) + .from_args(&args) + .expect("Failed to parse the command line") + .link_staticlib(&dir, "tutorial") + .add_arg("-fsanitize-coverage=trace-pc-guard") + .run() + .expect("Failed to run the wrapped compiler") + { + std::process::exit(code); + } + } else { + panic!("LibAFL CC: No Arguments given"); + } +} diff --git a/fuzzers/tutorial/src/bin/libafl_cxx.rs b/fuzzers/tutorial/src/bin/libafl_cxx.rs new file mode 100644 index 0000000000..ce786239b0 --- /dev/null +++ b/fuzzers/tutorial/src/bin/libafl_cxx.rs @@ -0,0 +1,5 @@ +pub mod libafl_cc; + +fn main() { + libafl_cc::main() +} diff --git a/fuzzers/tutorial/src/input.rs b/fuzzers/tutorial/src/input.rs new file mode 100644 index 0000000000..a743da3419 --- /dev/null +++ b/fuzzers/tutorial/src/input.rs @@ -0,0 +1,72 @@ +use libafl::{ + bolts::ownedref::OwnedSlice, + inputs::{HasLen, HasTargetBytes, Input}, +}; + +use lain::prelude::*; + +use serde::{Deserialize, Serialize}; + +#[derive( + Serialize, + Deserialize, + Debug, + Default, + Clone, + NewFuzzed, + Mutatable, + VariableSizeObject, + BinarySerialize, +)] +pub struct PacketData { + pub typ: UnsafeEnum, + + pub offset: u64, + pub length: u64, + + #[lain(min = 0, max = 10)] + pub data: Vec, +} + +impl Fixup for PacketData { + fn fixup(&mut self, _mutator: &mut Mutator) { + self.length = self.data.len() as u64; + } +} + +#[derive( + Serialize, Deserialize, Debug, Copy, Clone, FuzzerObject, ToPrimitiveU32, BinarySerialize, +)] +#[repr(u32)] +pub enum PacketType { + Read = 0x0, + Write = 0x1, + Reset = 0x2, +} + +impl Default for PacketType { + fn default() -> Self { + PacketType::Read + } +} + +impl Input for PacketData { + fn generate_name(&self, idx: usize) -> String { + format!("id_{}", idx) + } +} + +impl HasTargetBytes for PacketData { + #[inline] + fn target_bytes(&self) -> OwnedSlice { + let mut serialized_data = Vec::with_capacity(self.serialized_size()); + self.binary_serialize::<_, LittleEndian>(&mut serialized_data); + OwnedSlice::Owned(serialized_data) + } +} + +impl HasLen for PacketData { + fn len(&self) -> usize { + self.serialized_size() + } +} diff --git a/fuzzers/tutorial/src/lib.rs b/fuzzers/tutorial/src/lib.rs new file mode 100644 index 0000000000..9787bed9c1 --- /dev/null +++ b/fuzzers/tutorial/src/lib.rs @@ -0,0 +1,187 @@ +//! A libfuzzer-like fuzzer with llmp-multithreading support and restarts + +#![feature(min_specialization)] +//#![feature(min_const_generics)] + +use core::time::Duration; +use std::{env, path::PathBuf}; + +use libafl::{ + bolts::{current_nanos, rands::StdRand, tuples::tuple_list}, + corpus::{Corpus, InMemoryCorpus, OnDiskCorpus, PowerQueueCorpusScheduler}, + events::{setup_restarting_mgr_std, EventConfig, EventRestarter}, + executors::{inprocess::InProcessExecutor, ExitKind, TimeoutExecutor}, + feedback_or, feedback_or_fast, + feedbacks::{CrashFeedback, MapFeedbackState, MaxMapFeedback, TimeFeedback, TimeoutFeedback}, + fuzzer::{Fuzzer, StdFuzzer}, + inputs::HasTargetBytes, + observers::{HitcountsMapObserver, StdMapObserver, TimeObserver}, + stages::{ + calibrate::CalibrationStage, + power::{PowerMutationalStage, PowerSchedule}, + }, + state::{HasCorpus, StdState}, + stats::MultiStats, + Error, +}; + +use libafl_targets::{libfuzzer_initialize, libfuzzer_test_one_input, EDGES_MAP, MAX_EDGES_NUM}; + +mod input; +use input::*; + +mod mutator; +use mutator::*; + +mod metadata; +use metadata::*; + +/// The main fn, `no_mangle` as it is a C main +#[cfg(not(test))] +#[no_mangle] +pub fn libafl_main() { + // Registry the metadata types used in this fuzzer + // Needed only on no_std + //RegistryBuilder::register::(); + + println!( + "Workdir: {:?}", + env::current_dir().unwrap().to_string_lossy().to_string() + ); + fuzz( + &[PathBuf::from("./corpus")], + PathBuf::from("./crashes"), + 1337, + ) + .expect("An error occurred while fuzzing"); +} + +/// The actual fuzzer +fn fuzz(corpus_dirs: &[PathBuf], objective_dir: PathBuf, broker_port: u16) -> Result<(), Error> { + // The wrapped harness function, calling out to the LLVM-style harness + let mut harness = |input: &PacketData| { + let target = input.target_bytes(); + let buf = target.as_slice(); + libfuzzer_test_one_input(buf); + ExitKind::Ok + }; + + // 'While the stats are state, they are usually used in the broker - which is likely never restarted + let stats = MultiStats::new(|s| println!("{}", s)); + + // The restarting state will spawn the same process again as child, then restarted it each time it crashes. + let (state, mut restarting_mgr) = + match setup_restarting_mgr_std(stats, broker_port, EventConfig::AlwaysUnique) { + Ok(res) => res, + Err(err) => match err { + Error::ShuttingDown => { + return Ok(()); + } + _ => { + panic!("Failed to setup the restarter: {}", err); + } + }, + }; + + // Create an observation channel using the coverage map + let edges = unsafe { &mut EDGES_MAP[0..MAX_EDGES_NUM] }; + let edges_observer = HitcountsMapObserver::new(StdMapObserver::new("edges", edges)); + + // Create an observation channel to keep track of the execution time + let time_observer = TimeObserver::new("time"); + + // The state of the edges feedback. + let feedback_state = MapFeedbackState::with_observer(&edges_observer); + + // Feedback to rate the interestingness of an input + // This one is composed by two Feedbacks in OR + let feedback = feedback_or!( + // New maximization map feedback linked to the edges observer and the feedback state + MaxMapFeedback::new_tracking(&feedback_state, &edges_observer, true, false), + // Time feedback, this one does not need a feedback state + TimeFeedback::new_with_observer(&time_observer), + PacketLenFeedback::new() + ); + + // A feedback to choose if an input is a solution or not + let objective = feedback_or_fast!(CrashFeedback::new(), TimeoutFeedback::new()); + + // If not restarting, create a State from scratch + let mut state = state.unwrap_or_else(|| { + StdState::new( + // RNG + StdRand::with_seed(current_nanos()), + // Corpus that will be evolved, we keep it in memory for performance + InMemoryCorpus::new(), + // Corpus in which we store solutions (crashes in this example), + // on disk so the user can get them after stopping the fuzzer + OnDiskCorpus::new(objective_dir).unwrap(), + // States of the feedbacks. + // They are the data related to the feedbacks that you want to persist in the State. + tuple_list!(feedback_state), + ) + }); + + println!("We're a client, let's fuzz :)"); + + // Setup a lain mutator with a mutational stage + let mutator = LainMutator::new(); + + let calibration = CalibrationStage::new(&mut state, &edges_observer); + let power = PowerMutationalStage::new(mutator, PowerSchedule::FAST, &edges_observer); + + let mut stages = tuple_list!(calibration, power); + + // A minimization+queue policy to get testcasess from the corpus + let scheduler = PacketLenMinimizerCorpusScheduler::new(PowerQueueCorpusScheduler::new()); + + // A fuzzer with feedbacks and a corpus scheduler + let mut fuzzer = StdFuzzer::new(scheduler, feedback, objective); + + // Create the executor for an in-process function with one observer for edge coverage and one for the execution time + let mut executor = TimeoutExecutor::new( + InProcessExecutor::new( + &mut harness, + tuple_list!(edges_observer, time_observer), + &mut fuzzer, + &mut state, + &mut restarting_mgr, + )?, + // 10 seconds timeout + Duration::new(10, 0), + ); + + // The actual target run starts here. + // Call LLVMFUzzerInitialize() if present. + let args: Vec = env::args().collect(); + if libfuzzer_initialize(&args) == -1 { + println!("Warning: LLVMFuzzerInitialize failed with -1") + } + + // In case the corpus is empty (on first run), reset + if state.corpus().count() < 1 { + state + .load_initial_inputs(&mut fuzzer, &mut executor, &mut restarting_mgr, corpus_dirs) + .unwrap_or_else(|_| panic!("Failed to load initial corpus at {:?}", &corpus_dirs)); + println!("We imported {} inputs from disk.", state.corpus().count()); + } + + // This fuzzer restarts after 1 mio `fuzz_one` executions. + // Each fuzz_one will internally do many executions of the target. + // If your target is very instable, setting a low count here may help. + // However, you will lose a lot of performance that way. + let iters = 1_000_000; + fuzzer.fuzz_loop_for( + &mut stages, + &mut state, + &mut executor, + &mut restarting_mgr, + iters, + )?; + + // It's important, that we store the state before restarting! + // Else, the parent will not respawn a new child and quit. + restarting_mgr.on_restart(&mut state)?; + + Ok(()) +} diff --git a/fuzzers/tutorial/src/metadata.rs b/fuzzers/tutorial/src/metadata.rs new file mode 100644 index 0000000000..a2188bfb6e --- /dev/null +++ b/fuzzers/tutorial/src/metadata.rs @@ -0,0 +1,90 @@ +use libafl::{ + bolts::tuples::Named, + corpus::{FavFactor, MinimizerCorpusScheduler, Testcase}, + events::EventFirer, + executors::ExitKind, + feedbacks::{Feedback, MapIndexesMetadata}, + observers::ObserversTuple, + state::{HasClientPerfStats, HasMetadata}, + Error, SerdeAny, +}; + +use crate::input::PacketData; + +use serde::{Deserialize, Serialize}; + +#[derive(SerdeAny, Serialize, Deserialize)] +pub struct PacketLenMetadata { + pub length: u64, +} + +pub struct PacketLenFavFactor {} + +impl FavFactor for PacketLenFavFactor { + fn compute(entry: &mut Testcase) -> Result { + Ok(entry + .metadata() + .get::() + .map_or(1, |m| m.length)) + } +} + +pub type PacketLenMinimizerCorpusScheduler = + MinimizerCorpusScheduler; + +#[derive(Serialize, Deserialize, Default, Clone, Debug)] +pub struct PacketLenFeedback { + len: u64, +} + +impl Feedback for PacketLenFeedback +where + S: HasClientPerfStats, +{ + fn is_interesting( + &mut self, + _state: &mut S, + _manager: &mut EM, + input: &PacketData, + _observers: &OT, + _exit_kind: &ExitKind, + ) -> Result + where + EM: EventFirer, + OT: ObserversTuple, + { + self.len = input.length; + Ok(false) + } + + #[inline] + fn append_metadata( + &mut self, + _state: &mut S, + testcase: &mut Testcase, + ) -> Result<(), Error> { + testcase + .metadata_mut() + .insert(PacketLenMetadata { length: self.len }); + Ok(()) + } + + #[inline] + fn discard_metadata(&mut self, _state: &mut S, _input: &PacketData) -> Result<(), Error> { + Ok(()) + } +} + +impl Named for PacketLenFeedback { + #[inline] + fn name(&self) -> &str { + "PacketLenFeedback" + } +} + +impl PacketLenFeedback { + #[must_use] + pub fn new() -> Self { + Self::default() + } +} diff --git a/fuzzers/tutorial/src/mutator.rs b/fuzzers/tutorial/src/mutator.rs new file mode 100644 index 0000000000..d560967b2a --- /dev/null +++ b/fuzzers/tutorial/src/mutator.rs @@ -0,0 +1,76 @@ +use libafl::{ + bolts::{ + rands::{Rand, StdRand}, + tuples::Named, + }, + mutators::{MutationResult, Mutator}, + state::HasRand, + Error, +}; + +use crate::input::PacketData; + +use core::marker::PhantomData; +use lain::traits::Mutatable; + +pub struct LainMutator +where + S: HasRand, + R: Rand, +{ + inner: lain::mutator::Mutator, + phantom: PhantomData<*const (R, S)>, +} + +impl Mutator for LainMutator +where + S: HasRand, + R: Rand, +{ + fn mutate( + &mut self, + state: &mut S, + input: &mut PacketData, + _stage_idx: i32, + ) -> Result { + // Lain uses its own instance of StdRand, but we want to keep it in sync with LibAFL's state. + self.inner.rng_mut().set_seed(state.rand_mut().next()); + input.mutate(&mut self.inner, None); + Ok(MutationResult::Mutated) + } +} + +impl Named for LainMutator +where + S: HasRand, + R: Rand, +{ + fn name(&self) -> &str { + "LainMutator" + } +} + +impl LainMutator +where + S: HasRand, + R: Rand, +{ + #[must_use] + pub fn new() -> Self { + Self { + inner: lain::mutator::Mutator::new(StdRand::with_seed(0)), + phantom: PhantomData, + } + } +} + +impl Default for LainMutator +where + S: HasRand, + R: Rand, +{ + #[must_use] + fn default() -> Self { + Self::new() + } +} diff --git a/fuzzers/tutorial/target.c b/fuzzers/tutorial/target.c new file mode 100644 index 0000000000..678a48ea22 --- /dev/null +++ b/fuzzers/tutorial/target.c @@ -0,0 +1,71 @@ +#include +#include +#include +#include +#include + +#define MAX_PACKET_SIZE 0x1000 + +typedef enum _packet_type { + data_read = 0x0, + data_write = 0x1, + data_reset = 0x2, +} packet_type; + +#pragma pack(1) +typedef struct _packet_data { + packet_type type; + uint64_t offset; + uint64_t length; + char data[0]; +} packet_data; + + +int LLVMFuzzerTestOneInput(const uint8_t *packet_buffer, size_t packet_length) { + ssize_t saved_data_length = 0; + char* saved_data = NULL; + int err = 0; + packet_data* datagram = NULL; + + if (packet_length < sizeof(packet_data) || packet_length > MAX_PACKET_SIZE) { + return 1; + } + + datagram = (packet_data*)packet_buffer; + + switch (datagram->type) { +case data_read: + if (saved_data != NULL && datagram->offset + datagram->length <= saved_data_length) { + write(0, packet_buffer + datagram->offset, datagram->length); + } + break; + +case data_write: + // NOTE: Who cares about checking the offset? Nobody would ever provide bad data + if (saved_data != NULL && datagram->length <= saved_data_length) { + memcpy(saved_data + datagram->offset, datagram->data, datagram->length); + } + break; + +case data_reset: + if (datagram->length > packet_length - sizeof(*datagram)) { + return 1; + } + + if (saved_data != NULL) { + free(saved_data); + } + + saved_data = malloc(datagram->length); + saved_data_length = datagram->length; + + memcpy(saved_data, datagram->data, datagram->length); + break; + +default: + return 1; + } + + return 0; +} + diff --git a/libafl/Cargo.toml b/libafl/Cargo.toml index c9682f0c98..762f1637b4 100644 --- a/libafl/Cargo.toml +++ b/libafl/Cargo.toml @@ -37,7 +37,7 @@ harness = false #debug = true [features] -default = ["std", "anymap_debug", "derive", "llmp_compression"] +default = ["std", "anymap_debug", "derive", "llmp_compression", "rand_trait"] std = ["serde_json", "hostname", "core_affinity", "nix", "serde/std", "bincode", "wait-timeout", "regex", "build_id", "uuid"] # print, env, launcher ... support anymap_debug = ["serde_json"] # uses serde_json to Debug the anymap trait. Disable for smaller footprint. derive = ["libafl_derive"] # provide derive(SerdeAny) macro. @@ -74,8 +74,7 @@ serde_json = { version = "1.0", optional = true, default-features = false, featu miniz_oxide = { version = "0.4.4", optional = true} core_affinity = { version = "0.5", git = "https://github.com/s1341/core_affinity_rs", optional = true } hostname = { version = "^0.3", optional = true } # Is there really no gethostname in the stdlib? -rand = { version = "0.8.1", optional = true } # -rand_core = { version = "0.6.2", optional = true } # This dependency allows us to export our RomuRand as rand::Rng. +rand_core = { version = "0.5.1", optional = true } # This dependency allows us to export our RomuRand as rand::Rng. nix = { version = "0.20.0", optional = true } regex = { version = "1", optional = true } build_id = { version = "0.2.1", git = "https://github.com/domenukk/build_id", branch = "main", optional = true } diff --git a/libafl/src/bolts/rands.rs b/libafl/src/bolts/rands.rs index a2287070ca..9ecbe9917b 100644 --- a/libafl/src/bolts/rands.rs +++ b/libafl/src/bolts/rands.rs @@ -412,4 +412,20 @@ mod tests { assert_ne!(rand.next(), rand_fixed.next()); test_single_rand(&mut rand); } + + #[test] + #[cfg(feature = "rand_trait")] + fn test_rgn_core_support() { + use crate::bolts::rands::StdRand; + use rand_core::RngCore; + pub struct Mutator { + rng: R, + } + + let mut mutator = Mutator { + rng: StdRand::with_seed(0), + }; + + println!("random value: {}", mutator.rng.next_u32()); + } } diff --git a/libafl/src/lib.rs b/libafl/src/lib.rs index cbbdc91be5..56196d3073 100644 --- a/libafl/src/lib.rs +++ b/libafl/src/lib.rs @@ -3,7 +3,7 @@ Welcome to `LibAFL` */ #![cfg_attr(not(feature = "std"), no_std)] -#![cfg_attr(feature = "RUSTC_IS_NIGHTLY", feature(specialization))] +#![cfg_attr(feature = "RUSTC_IS_NIGHTLY", feature(min_specialization))] #![deny(rustdoc::broken_intra_doc_links)] #[macro_use] diff --git a/libafl/src/observers/mod.rs b/libafl/src/observers/mod.rs index 1af37e0b5b..1703ea42f6 100644 --- a/libafl/src/observers/mod.rs +++ b/libafl/src/observers/mod.rs @@ -30,13 +30,13 @@ pub trait Observer: Named { Ok(()) } - /// Called right before exexution starts + /// Called right before execution starts. #[inline] fn pre_exec(&mut self, _state: &mut S, _input: &I) -> Result<(), Error> { Ok(()) } - /// Called right after execution finished. + /// Called right after execution finish. #[inline] fn post_exec(&mut self, _state: &mut S, _input: &I) -> Result<(), Error> { Ok(())