Unicode-preserving mutators (#1542)
* create the string classification stage * modify API to pre-group * preserving mutator * more meaningful test * subproperty mutators + some fixes * document, finalise, integrate with libafl_libfuzzer * add example, fix for weird range select * fix for introspection * fix fuzzer build * speed optimisation: allow, but do not require, stacking * property => category * token replacement * fixup: rare case where rust does not agree on valid character * fix CI again * again again * take two: dynamic unicode discovery * oops * fix: last byte is never selected * opt: bias to smaller unicode categories * fix test * opt: precompute regions and fix tests * cache and allow stacking * document and update libafl_libfuzzer * oops, use reverse * fix bolts clippy error * fixup part 2 * clippy * part 2 * clippy warning allow * clippy complaint * use alloc not std --------- Co-authored-by: toka <tokazerkje@outlook.com>
This commit is contained in:
parent
1e96652ed2
commit
281524dbf9
9
.github/workflows/build_and_test.yml
vendored
9
.github/workflows/build_and_test.yml
vendored
@ -73,6 +73,8 @@ jobs:
|
|||||||
run: command -v llvm-config-15 && clang-15 -v
|
run: command -v llvm-config-15 && clang-15 -v
|
||||||
- name: Add nightly rustfmt and clippy
|
- name: Add nightly rustfmt and clippy
|
||||||
run: rustup toolchain install nightly --component rustfmt --component clippy --component miri --allow-downgrade
|
run: rustup toolchain install nightly --component rustfmt --component clippy --component miri --allow-downgrade
|
||||||
|
- name: Install ucd-generate
|
||||||
|
run: cargo install -f ucd-generate
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
- uses: Swatinem/rust-cache@v2
|
- uses: Swatinem/rust-cache@v2
|
||||||
|
|
||||||
@ -135,6 +137,8 @@ jobs:
|
|||||||
run: command -v llvm-config-15 && clang-15 -v
|
run: command -v llvm-config-15 && clang-15 -v
|
||||||
- name: Install cargo-hack
|
- name: Install cargo-hack
|
||||||
run: curl -LsSf https://github.com/taiki-e/cargo-hack/releases/latest/download/cargo-hack-x86_64-unknown-linux-gnu.tar.gz | tar xzf - -C ~/.cargo/bin
|
run: curl -LsSf https://github.com/taiki-e/cargo-hack/releases/latest/download/cargo-hack-x86_64-unknown-linux-gnu.tar.gz | tar xzf - -C ~/.cargo/bin
|
||||||
|
- name: Install ucd-generate
|
||||||
|
run: cargo install -f ucd-generate
|
||||||
- name: Add nightly
|
- name: Add nightly
|
||||||
run: rustup toolchain install nightly --allow-downgrade
|
run: rustup toolchain install nightly --allow-downgrade
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
@ -222,6 +226,8 @@ jobs:
|
|||||||
- name: Install cxxbridge
|
- name: Install cxxbridge
|
||||||
if: runner.os == 'macOS'
|
if: runner.os == 'macOS'
|
||||||
run: cargo install cxxbridge-cmd
|
run: cargo install cxxbridge-cmd
|
||||||
|
- name: Install ucd-generate
|
||||||
|
run: cargo install -f ucd-generate
|
||||||
- name: Install python (macOS)
|
- name: Install python (macOS)
|
||||||
# Removing macOS things already installed in CI against failed linking
|
# Removing macOS things already installed in CI against failed linking
|
||||||
if: runner.os == 'macOS'
|
if: runner.os == 'macOS'
|
||||||
@ -384,6 +390,8 @@ jobs:
|
|||||||
toolchain: stable
|
toolchain: stable
|
||||||
- name: Add nightly rustfmt and clippy
|
- name: Add nightly rustfmt and clippy
|
||||||
run: rustup toolchain install nightly --component rustfmt --component clippy --allow-downgrade
|
run: rustup toolchain install nightly --component rustfmt --component clippy --allow-downgrade
|
||||||
|
- name: Install ucd-generate
|
||||||
|
run: cargo install -f ucd-generate
|
||||||
- name: Install deps
|
- name: Install deps
|
||||||
run: brew install z3 gtk+3
|
run: brew install z3 gtk+3
|
||||||
- name: Install cxxbridge
|
- name: Install cxxbridge
|
||||||
@ -453,6 +461,7 @@ jobs:
|
|||||||
freebsd-version
|
freebsd-version
|
||||||
. "$HOME/.cargo/env"
|
. "$HOME/.cargo/env"
|
||||||
rustup toolchain install nightly
|
rustup toolchain install nightly
|
||||||
|
cargo install -f ucd-generate
|
||||||
export LLVM_CONFIG=/usr/local/bin/llvm-config16
|
export LLVM_CONFIG=/usr/local/bin/llvm-config16
|
||||||
pwd
|
pwd
|
||||||
ls -lah
|
ls -lah
|
||||||
|
1
fuzzers/baby_fuzzer_unicode/.gitignore
vendored
Normal file
1
fuzzers/baby_fuzzer_unicode/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
libpng-*
|
24
fuzzers/baby_fuzzer_unicode/Cargo.toml
Normal file
24
fuzzers/baby_fuzzer_unicode/Cargo.toml
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
[package]
|
||||||
|
name = "baby_fuzzer_unicode"
|
||||||
|
version = "0.10.0"
|
||||||
|
authors = ["Andrea Fioraldi <andreafioraldi@gmail.com>", "Dominik Maier <domenukk@gmail.com>"]
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
[features]
|
||||||
|
default = ["std"]
|
||||||
|
tui = []
|
||||||
|
std = []
|
||||||
|
|
||||||
|
[profile.dev]
|
||||||
|
panic = "abort"
|
||||||
|
|
||||||
|
[profile.release]
|
||||||
|
panic = "abort"
|
||||||
|
lto = true
|
||||||
|
codegen-units = 1
|
||||||
|
opt-level = 3
|
||||||
|
debug = true
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
libafl = { path = "../../libafl/", features = ["unicode"] }
|
||||||
|
libafl_bolts = { path = "../../libafl_bolts/" }
|
15
fuzzers/baby_fuzzer_unicode/README.md
Normal file
15
fuzzers/baby_fuzzer_unicode/README.md
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
# Baby fuzzer: unicode
|
||||||
|
|
||||||
|
This is a minimalistic example about how to create a libafl based fuzzer.
|
||||||
|
|
||||||
|
It runs on a single core until a crash occurs and then exits.
|
||||||
|
|
||||||
|
The tested program is a simple Rust function without any instrumentation.
|
||||||
|
For real fuzzing, you will want to add some sort to add coverage or other feedback.
|
||||||
|
|
||||||
|
You can run this example using `cargo run`, and you can enable the TUI feature by running `cargo run --features tui`.
|
||||||
|
|
||||||
|
## Unicode
|
||||||
|
|
||||||
|
This fuzzer uses mutators which preserve unicode properties. For programs which have string-heavy inputs, you may
|
||||||
|
consider using the same strategy.
|
138
fuzzers/baby_fuzzer_unicode/src/main.rs
Normal file
138
fuzzers/baby_fuzzer_unicode/src/main.rs
Normal file
@ -0,0 +1,138 @@
|
|||||||
|
#[cfg(windows)]
|
||||||
|
use std::ptr::write_volatile;
|
||||||
|
use std::{path::PathBuf, ptr::write};
|
||||||
|
|
||||||
|
#[cfg(feature = "tui")]
|
||||||
|
use libafl::monitors::tui::{ui::TuiUI, TuiMonitor};
|
||||||
|
#[cfg(not(feature = "tui"))]
|
||||||
|
use libafl::monitors::SimpleMonitor;
|
||||||
|
use libafl::{
|
||||||
|
corpus::{InMemoryCorpus, OnDiskCorpus},
|
||||||
|
events::SimpleEventManager,
|
||||||
|
executors::{inprocess::InProcessExecutor, ExitKind},
|
||||||
|
feedbacks::{CrashFeedback, MaxMapFeedback},
|
||||||
|
fuzzer::{Fuzzer, StdFuzzer},
|
||||||
|
inputs::{BytesInput, HasTargetBytes},
|
||||||
|
mutators::{StdScheduledMutator, StringCategoryRandMutator, StringSubcategoryRandMutator},
|
||||||
|
observers::StdMapObserver,
|
||||||
|
schedulers::QueueScheduler,
|
||||||
|
stages::{mutational::StdMutationalStage, StringIdentificationStage},
|
||||||
|
state::StdState,
|
||||||
|
Evaluator,
|
||||||
|
};
|
||||||
|
use libafl_bolts::{current_nanos, rands::StdRand, tuples::tuple_list, AsSlice};
|
||||||
|
|
||||||
|
/// Coverage map with explicit assignments due to the lack of instrumentation
|
||||||
|
static mut SIGNALS: [u8; 64] = [0; 64];
|
||||||
|
static mut SIGNALS_PTR: *mut u8 = unsafe { SIGNALS.as_mut_ptr() };
|
||||||
|
|
||||||
|
/// Assign a signal to the signals map
|
||||||
|
fn signals_set(idx: usize) {
|
||||||
|
unsafe { write(SIGNALS_PTR.add(idx), 1) };
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::similar_names, clippy::manual_assert)]
|
||||||
|
pub fn main() {
|
||||||
|
// The closure that we want to fuzz
|
||||||
|
let mut harness = |input: &BytesInput| {
|
||||||
|
let target = input.target_bytes();
|
||||||
|
let buf = target.as_slice();
|
||||||
|
let goal = b"abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz";
|
||||||
|
let mut i = 0;
|
||||||
|
for _ in buf.iter().zip(goal).take_while(|(b, c)| b == c) {
|
||||||
|
signals_set(i);
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
if i == goal.len() {
|
||||||
|
#[cfg(unix)]
|
||||||
|
panic!("Artificial bug triggered =)");
|
||||||
|
|
||||||
|
#[cfg(windows)]
|
||||||
|
unsafe {
|
||||||
|
write_volatile(0 as *mut u32, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ExitKind::Ok
|
||||||
|
};
|
||||||
|
|
||||||
|
// Create an observation channel using the signals map
|
||||||
|
let observer = unsafe { StdMapObserver::from_mut_ptr("signals", SIGNALS_PTR, SIGNALS.len()) };
|
||||||
|
|
||||||
|
// Feedback to rate the interestingness of an input
|
||||||
|
let mut feedback = MaxMapFeedback::new(&observer);
|
||||||
|
|
||||||
|
// A feedback to choose if an input is a solution or not
|
||||||
|
let mut objective = CrashFeedback::new();
|
||||||
|
|
||||||
|
// create a State from scratch
|
||||||
|
let mut state = StdState::new(
|
||||||
|
// RNG
|
||||||
|
StdRand::with_seed(current_nanos()),
|
||||||
|
// Corpus that will be evolved, we keep it in memory for performance
|
||||||
|
InMemoryCorpus::new(),
|
||||||
|
// Corpus in which we store solutions (crashes in this example),
|
||||||
|
// on disk so the user can get them after stopping the fuzzer
|
||||||
|
OnDiskCorpus::new(PathBuf::from("./crashes")).unwrap(),
|
||||||
|
// States of the feedbacks.
|
||||||
|
// The feedbacks can report the data that should persist in the State.
|
||||||
|
&mut feedback,
|
||||||
|
// Same for objective feedbacks
|
||||||
|
&mut objective,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// The Monitor trait define how the fuzzer stats are displayed to the user
|
||||||
|
#[cfg(not(feature = "tui"))]
|
||||||
|
let mon = SimpleMonitor::new(|s| println!("{s}"));
|
||||||
|
#[cfg(feature = "tui")]
|
||||||
|
let ui = TuiUI::with_version(String::from("Baby Fuzzer"), String::from("0.0.1"), false);
|
||||||
|
#[cfg(feature = "tui")]
|
||||||
|
let mon = TuiMonitor::new(ui);
|
||||||
|
|
||||||
|
// The event manager handle the various events generated during the fuzzing loop
|
||||||
|
// such as the notification of the addition of a new item to the corpus
|
||||||
|
let mut mgr = SimpleEventManager::new(mon);
|
||||||
|
|
||||||
|
// A queue policy to get testcasess from the corpus
|
||||||
|
let scheduler = QueueScheduler::new();
|
||||||
|
|
||||||
|
// A fuzzer with feedbacks and a corpus scheduler
|
||||||
|
let mut fuzzer = StdFuzzer::new(scheduler, feedback, objective);
|
||||||
|
|
||||||
|
// Create the executor for an in-process function with just one observer
|
||||||
|
let mut executor = InProcessExecutor::new(
|
||||||
|
&mut harness,
|
||||||
|
tuple_list!(observer),
|
||||||
|
&mut fuzzer,
|
||||||
|
&mut state,
|
||||||
|
&mut mgr,
|
||||||
|
)
|
||||||
|
.expect("Failed to create the Executor");
|
||||||
|
|
||||||
|
// Generate 8 initial inputs
|
||||||
|
fuzzer
|
||||||
|
.evaluate_input(
|
||||||
|
&mut state,
|
||||||
|
&mut executor,
|
||||||
|
&mut mgr,
|
||||||
|
BytesInput::new(vec![b'a']),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Setup a mutational stage with a basic bytes mutator
|
||||||
|
let mutator = StdScheduledMutator::new(tuple_list!(
|
||||||
|
StringCategoryRandMutator,
|
||||||
|
StringSubcategoryRandMutator,
|
||||||
|
StringSubcategoryRandMutator,
|
||||||
|
StringSubcategoryRandMutator,
|
||||||
|
StringSubcategoryRandMutator
|
||||||
|
));
|
||||||
|
let mut stages = tuple_list!(
|
||||||
|
StringIdentificationStage::new(),
|
||||||
|
StdMutationalStage::transforming(mutator)
|
||||||
|
);
|
||||||
|
|
||||||
|
fuzzer
|
||||||
|
.fuzz_loop(&mut stages, &mut executor, &mut state, &mut mgr)
|
||||||
|
.expect("Error in the fuzzing loop");
|
||||||
|
}
|
@ -77,6 +77,9 @@ concolic_mutation = ["z3"]
|
|||||||
## Enable the fancy TuiMonitor for a termanal UI using crossterm
|
## Enable the fancy TuiMonitor for a termanal UI using crossterm
|
||||||
tui_monitor = ["ratatui", "crossterm"]
|
tui_monitor = ["ratatui", "crossterm"]
|
||||||
|
|
||||||
|
## Enables `StringClassificationStage` and associated mutators, which allow for mutations which preserve the Unicode property data
|
||||||
|
unicode = ["libafl_bolts/alloc", "ahash/std", "serde/rc", "bitvec"]
|
||||||
|
|
||||||
|
|
||||||
#! ## LibAFL-Bolts Features
|
#! ## LibAFL-Bolts Features
|
||||||
|
|
||||||
@ -126,7 +129,9 @@ agpl = ["nautilus"]
|
|||||||
nautilus = ["grammartec", "std", "serde_json/std"]
|
nautilus = ["grammartec", "std", "serde_json/std"]
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
|
reqwest = { version = "0.11", features = ["blocking"] }
|
||||||
rustversion = "1.0"
|
rustversion = "1.0"
|
||||||
|
zip = "0.6"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
serde_json = { version = "1.0", default-features = false, features = ["alloc"] }
|
serde_json = { version = "1.0", default-features = false, features = ["alloc"] }
|
||||||
@ -172,7 +177,9 @@ z3 = { version = "0.12.0", features = ["static-link-z3"], optional = true } # fo
|
|||||||
pyo3 = { version = "0.18", optional = true, features = ["serde", "macros"] }
|
pyo3 = { version = "0.18", optional = true, features = ["serde", "macros"] }
|
||||||
concat-idents = { version = "1.1.3", optional = true }
|
concat-idents = { version = "1.1.3", optional = true }
|
||||||
|
|
||||||
libcasr = { version = "2.7", optional = true}
|
libcasr = { version = "2.7", optional = true }
|
||||||
|
|
||||||
|
bitvec = { version = "1.0", optional = true, features = ["serde"] } # used for string range storage
|
||||||
|
|
||||||
# optional-dev deps (change when target.'cfg(accessible(::std))'.test-dependencies will be stable)
|
# optional-dev deps (change when target.'cfg(accessible(::std))'.test-dependencies will be stable)
|
||||||
serial_test = { version = "2", optional = true, default-features = false, features = ["logging"] }
|
serial_test = { version = "2", optional = true, default-features = false, features = ["logging"] }
|
||||||
|
@ -1,14 +1,69 @@
|
|||||||
|
use std::error::Error;
|
||||||
|
|
||||||
#[rustversion::nightly]
|
#[rustversion::nightly]
|
||||||
fn main() {
|
fn main() -> Result<(), Box<dyn Error>> {
|
||||||
println!("cargo:rerun-if-changed=build.rs");
|
println!("cargo:rerun-if-changed=build.rs");
|
||||||
println!("cargo:rustc-cfg=nightly");
|
println!("cargo:rustc-cfg=nightly");
|
||||||
|
#[cfg(feature = "unicode")]
|
||||||
|
{
|
||||||
|
build_unicode_property_map()?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[rustversion::not(nightly)]
|
#[rustversion::not(nightly)]
|
||||||
fn main() {
|
fn main() -> Result<(), Box<dyn Error>> {
|
||||||
println!("cargo:rerun-if-changed=build.rs");
|
println!("cargo:rerun-if-changed=build.rs");
|
||||||
assert!(
|
assert!(
|
||||||
cfg!(all(not(docrs), not(feature = "nautilus"))),
|
cfg!(all(not(docrs), not(feature = "nautilus"))),
|
||||||
"The 'nautilus' feature of libafl requires a nightly compiler"
|
"The 'nautilus' feature of libafl requires a nightly compiler"
|
||||||
);
|
);
|
||||||
|
#[cfg(feature = "unicode")]
|
||||||
|
{
|
||||||
|
build_unicode_property_map()?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "unicode")]
|
||||||
|
fn build_unicode_property_map() -> Result<(), Box<dyn Error>> {
|
||||||
|
use std::{
|
||||||
|
env,
|
||||||
|
fs::File,
|
||||||
|
io::{BufWriter, Write},
|
||||||
|
path::PathBuf,
|
||||||
|
process::{Command, Stdio},
|
||||||
|
};
|
||||||
|
|
||||||
|
let out_dir = PathBuf::from(env::var_os("OUT_DIR").unwrap());
|
||||||
|
let ucd_dir = out_dir.join("ucd-dir");
|
||||||
|
let generated_file = out_dir.join("unicode_categories.rs");
|
||||||
|
|
||||||
|
std::fs::create_dir_all(&ucd_dir)?;
|
||||||
|
|
||||||
|
let zip_path = ucd_dir.join("ucd.zip");
|
||||||
|
let mut ucd_file = BufWriter::new(File::create(&zip_path)?);
|
||||||
|
for chunk in reqwest::blocking::get("https://www.unicode.org/Public/zipped/latest/UCD.zip")?
|
||||||
|
.bytes()?
|
||||||
|
.chunks(1 << 12)
|
||||||
|
{
|
||||||
|
ucd_file.write_all(chunk)?;
|
||||||
|
}
|
||||||
|
ucd_file.flush()?;
|
||||||
|
drop(ucd_file);
|
||||||
|
|
||||||
|
let mut zip_file = zip::ZipArchive::new(File::open(&zip_path)?)?;
|
||||||
|
zip_file.extract(&ucd_dir)?;
|
||||||
|
drop(zip_file);
|
||||||
|
|
||||||
|
std::fs::remove_file(zip_path)?;
|
||||||
|
|
||||||
|
let status = Command::new("ucd-generate")
|
||||||
|
.arg("general-category")
|
||||||
|
.arg(ucd_dir.as_os_str())
|
||||||
|
.stdout(Stdio::from(File::create(generated_file)?))
|
||||||
|
.status()?;
|
||||||
|
assert!(status.success());
|
||||||
|
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
@ -20,6 +20,11 @@ pub use grimoire::*;
|
|||||||
pub mod tuneable;
|
pub mod tuneable;
|
||||||
pub use tuneable::*;
|
pub use tuneable::*;
|
||||||
|
|
||||||
|
#[cfg(feature = "unicode")]
|
||||||
|
pub mod string;
|
||||||
|
#[cfg(feature = "unicode")]
|
||||||
|
pub use string::*;
|
||||||
|
|
||||||
#[cfg(feature = "nautilus")]
|
#[cfg(feature = "nautilus")]
|
||||||
pub mod nautilus;
|
pub mod nautilus;
|
||||||
use alloc::vec::Vec;
|
use alloc::vec::Vec;
|
||||||
|
595
libafl/src/mutators/string.rs
Normal file
595
libafl/src/mutators/string.rs
Normal file
@ -0,0 +1,595 @@
|
|||||||
|
//! Mutators for preserving string categories, which may be useful for certain targets which are primarily string-oriented.
|
||||||
|
use alloc::vec::Vec;
|
||||||
|
use core::{
|
||||||
|
cmp::{Ordering, Reverse},
|
||||||
|
ops::Range,
|
||||||
|
};
|
||||||
|
|
||||||
|
use libafl_bolts::{rands::Rand, Error, HasLen, Named};
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
corpus::{CorpusId, HasTestcase, Testcase},
|
||||||
|
inputs::{BytesInput, HasBytesVec},
|
||||||
|
mutators::{rand_range, MutationResult, Mutator, Tokens},
|
||||||
|
stages::{
|
||||||
|
extract_metadata,
|
||||||
|
mutational::{MutatedTransform, MutatedTransformPost},
|
||||||
|
StringIdentificationMetadata,
|
||||||
|
},
|
||||||
|
state::{HasCorpus, HasMaxSize, HasMetadata, HasRand},
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Input which contains the context necessary to perform unicode mutations
|
||||||
|
pub type UnicodeInput = (BytesInput, StringIdentificationMetadata);
|
||||||
|
|
||||||
|
impl<S> MutatedTransform<BytesInput, S> for UnicodeInput
|
||||||
|
where
|
||||||
|
S: HasCorpus<Input = BytesInput> + HasTestcase,
|
||||||
|
{
|
||||||
|
type Post = StringIdentificationMetadata;
|
||||||
|
|
||||||
|
fn try_transform_from(
|
||||||
|
base: &mut Testcase<BytesInput>,
|
||||||
|
state: &S,
|
||||||
|
_corpus_idx: CorpusId,
|
||||||
|
) -> Result<Self, Error> {
|
||||||
|
let input = base.load_input(state.corpus())?.clone();
|
||||||
|
let metadata = base.metadata::<StringIdentificationMetadata>().cloned()?;
|
||||||
|
Ok((input, metadata))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn try_transform_into(self, _state: &S) -> Result<(BytesInput, Self::Post), Error> {
|
||||||
|
Ok(self)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<S> MutatedTransformPost<S> for StringIdentificationMetadata
|
||||||
|
where
|
||||||
|
S: HasTestcase,
|
||||||
|
{
|
||||||
|
fn post_exec(
|
||||||
|
self,
|
||||||
|
state: &mut S,
|
||||||
|
_stage_idx: i32,
|
||||||
|
corpus_idx: Option<CorpusId>,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
if let Some(corpus_idx) = corpus_idx {
|
||||||
|
let mut tc = state.testcase_mut(corpus_idx)?;
|
||||||
|
tc.add_metadata(self);
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const MAX_CHARS: usize = 16;
|
||||||
|
|
||||||
|
fn choose_start<R: Rand>(
|
||||||
|
rand: &mut R,
|
||||||
|
bytes: &[u8],
|
||||||
|
meta: &StringIdentificationMetadata,
|
||||||
|
) -> Option<(usize, usize)> {
|
||||||
|
let idx = rand.below(bytes.len() as u64) as usize;
|
||||||
|
let mut options = Vec::new();
|
||||||
|
for (start, range) in meta.ranges() {
|
||||||
|
if idx
|
||||||
|
.checked_sub(*start) // idx adjusted to start
|
||||||
|
.and_then(|idx| (idx < range.len()).then(|| range[idx])) // idx in range
|
||||||
|
.map_or(false, |r| r)
|
||||||
|
{
|
||||||
|
options.push((*start, range));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
match options.len() {
|
||||||
|
0 => None,
|
||||||
|
1 => Some((options[0].0, options[0].1.len())),
|
||||||
|
_ => {
|
||||||
|
// bias towards longer strings
|
||||||
|
options.sort_by_cached_key(|(_, entries)| entries.count_ones());
|
||||||
|
let selected = libafl_bolts::math::integer_sqrt(
|
||||||
|
rand.below((options.len() * options.len()) as u64),
|
||||||
|
) as usize;
|
||||||
|
Some((options[selected].0, options[selected].1.len()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_subcategory<T: Ord + Copy>(needle: T, haystack: &[(T, T)]) -> Option<(T, T)> {
|
||||||
|
haystack
|
||||||
|
.binary_search_by(|&(min, max)| match min.cmp(&needle) {
|
||||||
|
Ordering::Less | Ordering::Equal => match needle.cmp(&max) {
|
||||||
|
Ordering::Less | Ordering::Equal => Ordering::Equal,
|
||||||
|
Ordering::Greater => Ordering::Less,
|
||||||
|
},
|
||||||
|
Ordering::Greater => Ordering::Greater,
|
||||||
|
})
|
||||||
|
.ok()
|
||||||
|
.map(|idx| haystack[idx])
|
||||||
|
}
|
||||||
|
|
||||||
|
fn find_range<F: Fn(char) -> bool>(
|
||||||
|
chars: &[(usize, char)],
|
||||||
|
idx: usize,
|
||||||
|
predicate: F,
|
||||||
|
) -> Range<usize> {
|
||||||
|
// walk backwards and discover
|
||||||
|
let start = chars[..idx]
|
||||||
|
.iter()
|
||||||
|
.rev()
|
||||||
|
.take_while(|&&(_, c)| predicate(c))
|
||||||
|
.last()
|
||||||
|
.map_or(chars[idx].0, |&(i, _)| i);
|
||||||
|
// walk forwards
|
||||||
|
let end = chars[(idx + 1)..]
|
||||||
|
.iter()
|
||||||
|
.take_while(|&&(_, c)| predicate(c))
|
||||||
|
.last()
|
||||||
|
.map_or(chars[idx].0 + chars[idx].1.len_utf8(), |&(i, c)| {
|
||||||
|
i + c.len_utf8()
|
||||||
|
});
|
||||||
|
|
||||||
|
start..end
|
||||||
|
}
|
||||||
|
|
||||||
|
fn choose_category_range<R: Rand>(
|
||||||
|
rand: &mut R,
|
||||||
|
string: &str,
|
||||||
|
) -> (Range<usize>, &'static [(u32, u32)]) {
|
||||||
|
let chars = string.char_indices().collect::<Vec<_>>();
|
||||||
|
let idx = rand.below(chars.len() as u64) as usize;
|
||||||
|
let c = chars[idx].1;
|
||||||
|
|
||||||
|
// figure out the categories for this char
|
||||||
|
let expanded = c as u32;
|
||||||
|
#[cfg(test)]
|
||||||
|
let mut names = Vec::new();
|
||||||
|
let mut categories = Vec::new();
|
||||||
|
for (_name, category) in unicode_categories::BY_NAME {
|
||||||
|
if get_subcategory(expanded, category).is_some() {
|
||||||
|
#[cfg(test)]
|
||||||
|
names.push(_name);
|
||||||
|
categories.push(category);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ok -- we want to bias towards smaller regions to keep the mutations "tight" to original
|
||||||
|
// we sort the options by descending length, then pick isqrt of below(n^2)
|
||||||
|
|
||||||
|
categories.sort_by_cached_key(|cat| {
|
||||||
|
Reverse(
|
||||||
|
cat.iter()
|
||||||
|
.map(|&(min, max)| (max - min + 1) as usize)
|
||||||
|
.sum::<usize>(),
|
||||||
|
)
|
||||||
|
});
|
||||||
|
let options = categories.len() * categories.len();
|
||||||
|
let selected_idx = libafl_bolts::math::integer_sqrt(rand.below(options as u64)) as usize;
|
||||||
|
|
||||||
|
let selected = categories[selected_idx];
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
println!("category for `{c}' ({}): {}", c as u32, names[selected_idx]);
|
||||||
|
|
||||||
|
(
|
||||||
|
find_range(&chars, idx, |c| {
|
||||||
|
get_subcategory(c as u32, selected).is_some()
|
||||||
|
}),
|
||||||
|
selected,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn choose_subcategory_range<R: Rand>(rand: &mut R, string: &str) -> (Range<usize>, (u32, u32)) {
|
||||||
|
let chars = string.char_indices().collect::<Vec<_>>();
|
||||||
|
let idx = rand.below(chars.len() as u64) as usize;
|
||||||
|
let c = chars[idx].1;
|
||||||
|
|
||||||
|
// figure out the categories for this char
|
||||||
|
let expanded = c as u32;
|
||||||
|
#[cfg(test)]
|
||||||
|
let mut names = Vec::new();
|
||||||
|
let mut subcategories = Vec::new();
|
||||||
|
for (_name, category) in unicode_categories::BY_NAME {
|
||||||
|
if let Some(subcategory) = get_subcategory(expanded, category) {
|
||||||
|
#[cfg(test)]
|
||||||
|
names.push(_name);
|
||||||
|
subcategories.push(subcategory);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// see reasoning for selection pattern in choose_category_range
|
||||||
|
|
||||||
|
subcategories.sort_by_key(|&(min, max)| Reverse(max - min + 1));
|
||||||
|
let options = subcategories.len() * subcategories.len();
|
||||||
|
let selected_idx = libafl_bolts::math::integer_sqrt(rand.below(options as u64)) as usize;
|
||||||
|
let selected = subcategories[selected_idx];
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
println!(
|
||||||
|
"subcategory for `{c}' ({}): {} ({:?})",
|
||||||
|
c as u32, names[selected_idx], selected
|
||||||
|
);
|
||||||
|
|
||||||
|
(
|
||||||
|
find_range(&chars, idx, |c| {
|
||||||
|
let expanded = c as u32;
|
||||||
|
selected.0 <= expanded && expanded <= selected.1
|
||||||
|
}),
|
||||||
|
selected,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn rand_replace_range<S: HasRand + HasMaxSize, F: Fn(&mut S) -> char>(
|
||||||
|
state: &mut S,
|
||||||
|
input: &mut UnicodeInput,
|
||||||
|
range: Range<usize>,
|
||||||
|
char_gen: F,
|
||||||
|
) -> MutationResult {
|
||||||
|
let temp_range = rand_range(state, range.end - range.start, MAX_CHARS);
|
||||||
|
let range = (range.start + temp_range.start)..(range.start + temp_range.end);
|
||||||
|
let range = match core::str::from_utf8(&input.0.bytes()[range.clone()]) {
|
||||||
|
Ok(_) => range,
|
||||||
|
Err(e) => range.start..(range.start + e.valid_up_to()),
|
||||||
|
};
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
println!(
|
||||||
|
"mutating range: {:?} ({:?})",
|
||||||
|
range,
|
||||||
|
core::str::from_utf8(&input.0.bytes()[range.clone()])
|
||||||
|
);
|
||||||
|
if range.start == range.end {
|
||||||
|
return MutationResult::Skipped;
|
||||||
|
}
|
||||||
|
|
||||||
|
let replace_len = state.rand_mut().below(MAX_CHARS as u64) as usize;
|
||||||
|
let orig_len = range.end - range.start;
|
||||||
|
if input.0.len() - orig_len + replace_len > state.max_size() {
|
||||||
|
return MutationResult::Skipped;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut replacement = Vec::with_capacity(replace_len);
|
||||||
|
let mut dest = [0u8; 4];
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let new_c = char_gen(state);
|
||||||
|
if replacement.len() + new_c.len_utf8() > replace_len {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
new_c.encode_utf8(&mut dest);
|
||||||
|
replacement.extend_from_slice(&dest[..new_c.len_utf8()]);
|
||||||
|
if replacement.len() + new_c.len_utf8() == replace_len {
|
||||||
|
break; // nailed it
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
input.0.bytes_mut().splice(range, replacement);
|
||||||
|
input.1 = extract_metadata(input.0.bytes());
|
||||||
|
|
||||||
|
MutationResult::Mutated
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Unicode category data, as used by string analysis and mutators.
|
||||||
|
pub mod unicode_categories {
|
||||||
|
#![allow(unused)]
|
||||||
|
#![allow(missing_docs)]
|
||||||
|
#![allow(clippy::redundant_static_lifetimes)]
|
||||||
|
|
||||||
|
include!(concat!(env!("OUT_DIR"), "/unicode_categories.rs"));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Mutator which randomly replaces a randomly selected range of bytes with bytes that preserve the
|
||||||
|
/// range's category
|
||||||
|
#[derive(Debug, Default)]
|
||||||
|
pub struct StringCategoryRandMutator;
|
||||||
|
|
||||||
|
impl Named for StringCategoryRandMutator {
|
||||||
|
fn name(&self) -> &str {
|
||||||
|
"string-category-rand"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<S> Mutator<UnicodeInput, S> for StringCategoryRandMutator
|
||||||
|
where
|
||||||
|
S: HasRand + HasMaxSize,
|
||||||
|
{
|
||||||
|
fn mutate(
|
||||||
|
&mut self,
|
||||||
|
state: &mut S,
|
||||||
|
input: &mut UnicodeInput,
|
||||||
|
_stage_idx: i32,
|
||||||
|
) -> Result<MutationResult, Error> {
|
||||||
|
if input.0.bytes().is_empty() {
|
||||||
|
return Ok(MutationResult::Skipped);
|
||||||
|
}
|
||||||
|
|
||||||
|
let bytes = input.0.bytes();
|
||||||
|
let meta = &input.1;
|
||||||
|
if let Some((base, len)) = choose_start(state.rand_mut(), bytes, meta) {
|
||||||
|
let substring = core::str::from_utf8(&bytes[base..][..len])?;
|
||||||
|
let (range, category) = choose_category_range(state.rand_mut(), substring);
|
||||||
|
#[cfg(test)]
|
||||||
|
println!(
|
||||||
|
"{:?} => {:?}",
|
||||||
|
range,
|
||||||
|
core::str::from_utf8(&bytes[range.clone()])
|
||||||
|
);
|
||||||
|
|
||||||
|
let options: u64 = category
|
||||||
|
.iter()
|
||||||
|
.map(|&(start, end)| u64::from(end) - u64::from(start) + 1)
|
||||||
|
.sum();
|
||||||
|
let char_gen = |state: &mut S| loop {
|
||||||
|
let mut selected = state.rand_mut().below(options);
|
||||||
|
for &(min, max) in category {
|
||||||
|
if let Some(next_selected) =
|
||||||
|
selected.checked_sub(u64::from(max) - u64::from(min) + 1)
|
||||||
|
{
|
||||||
|
selected = next_selected;
|
||||||
|
} else if let Some(new_c) = char::from_u32(selected as u32 + min) {
|
||||||
|
return new_c;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
return Ok(rand_replace_range(state, input, range, char_gen));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(MutationResult::Skipped)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Mutator which randomly replaces a randomly selected range of bytes with bytes that preserve the
|
||||||
|
/// range's subcategory
|
||||||
|
#[derive(Debug, Default)]
|
||||||
|
pub struct StringSubcategoryRandMutator;
|
||||||
|
|
||||||
|
impl Named for StringSubcategoryRandMutator {
|
||||||
|
fn name(&self) -> &str {
|
||||||
|
"string-subcategory-rand"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<S> Mutator<UnicodeInput, S> for StringSubcategoryRandMutator
|
||||||
|
where
|
||||||
|
S: HasRand + HasMaxSize,
|
||||||
|
{
|
||||||
|
fn mutate(
|
||||||
|
&mut self,
|
||||||
|
state: &mut S,
|
||||||
|
input: &mut UnicodeInput,
|
||||||
|
_stage_idx: i32,
|
||||||
|
) -> Result<MutationResult, Error> {
|
||||||
|
if input.0.bytes().is_empty() {
|
||||||
|
return Ok(MutationResult::Skipped);
|
||||||
|
}
|
||||||
|
|
||||||
|
let bytes = input.0.bytes();
|
||||||
|
let meta = &input.1;
|
||||||
|
if let Some((base, len)) = choose_start(state.rand_mut(), bytes, meta) {
|
||||||
|
let substring = core::str::from_utf8(&bytes[base..][..len])?;
|
||||||
|
let (range, subcategory) = choose_subcategory_range(state.rand_mut(), substring);
|
||||||
|
#[cfg(test)]
|
||||||
|
println!(
|
||||||
|
"{:?} => {:?}",
|
||||||
|
range,
|
||||||
|
core::str::from_utf8(&bytes[range.clone()])
|
||||||
|
);
|
||||||
|
|
||||||
|
let options: u64 = u64::from(subcategory.1) - u64::from(subcategory.0) + 1;
|
||||||
|
let char_gen = |state: &mut S| loop {
|
||||||
|
let selected = state.rand_mut().below(options);
|
||||||
|
if let Some(new_c) = char::from_u32(selected as u32 + subcategory.0) {
|
||||||
|
return new_c;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
return Ok(rand_replace_range(state, input, range, char_gen));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(MutationResult::Skipped)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Mutator which randomly replaces a full category-contiguous region of chars with a random token
|
||||||
|
#[derive(Debug, Default)]
|
||||||
|
pub struct StringCategoryTokenReplaceMutator;
|
||||||
|
|
||||||
|
impl Named for StringCategoryTokenReplaceMutator {
|
||||||
|
fn name(&self) -> &str {
|
||||||
|
"string-category-token-replace"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<S> Mutator<UnicodeInput, S> for StringCategoryTokenReplaceMutator
|
||||||
|
where
|
||||||
|
S: HasRand + HasMaxSize + HasMetadata,
|
||||||
|
{
|
||||||
|
fn mutate(
|
||||||
|
&mut self,
|
||||||
|
state: &mut S,
|
||||||
|
input: &mut UnicodeInput,
|
||||||
|
_stage_idx: i32,
|
||||||
|
) -> Result<MutationResult, Error> {
|
||||||
|
if input.0.bytes().is_empty() {
|
||||||
|
return Ok(MutationResult::Skipped);
|
||||||
|
}
|
||||||
|
|
||||||
|
let tokens_len = {
|
||||||
|
let meta = state.metadata_map().get::<Tokens>();
|
||||||
|
if meta.is_none() {
|
||||||
|
return Ok(MutationResult::Skipped);
|
||||||
|
}
|
||||||
|
if meta.unwrap().tokens().is_empty() {
|
||||||
|
return Ok(MutationResult::Skipped);
|
||||||
|
}
|
||||||
|
meta.unwrap().tokens().len()
|
||||||
|
};
|
||||||
|
let token_idx = state.rand_mut().below(tokens_len as u64) as usize;
|
||||||
|
|
||||||
|
let bytes = input.0.bytes();
|
||||||
|
let meta = &input.1;
|
||||||
|
if let Some((base, len)) = choose_start(state.rand_mut(), bytes, meta) {
|
||||||
|
let substring = core::str::from_utf8(&bytes[base..][..len])?;
|
||||||
|
let (range, _) = choose_category_range(state.rand_mut(), substring);
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
println!(
|
||||||
|
"{:?} => {:?}",
|
||||||
|
range,
|
||||||
|
core::str::from_utf8(&bytes[range.clone()])
|
||||||
|
);
|
||||||
|
|
||||||
|
let meta = state.metadata_map().get::<Tokens>().unwrap();
|
||||||
|
let token = &meta.tokens()[token_idx];
|
||||||
|
|
||||||
|
if input.0.len() - (range.end - range.start) + token.len() > state.max_size() {
|
||||||
|
return Ok(MutationResult::Skipped);
|
||||||
|
}
|
||||||
|
|
||||||
|
input.0.bytes_mut().splice(range, token.iter().copied());
|
||||||
|
input.1 = extract_metadata(input.0.bytes());
|
||||||
|
return Ok(MutationResult::Mutated);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(MutationResult::Skipped)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Mutator which randomly replaces a full subcategory-contiguous region of chars with a random token
|
||||||
|
#[derive(Debug, Default)]
|
||||||
|
pub struct StringSubcategoryTokenReplaceMutator;
|
||||||
|
|
||||||
|
impl Named for StringSubcategoryTokenReplaceMutator {
|
||||||
|
fn name(&self) -> &str {
|
||||||
|
"string-subcategory-replace"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<S> Mutator<UnicodeInput, S> for StringSubcategoryTokenReplaceMutator
|
||||||
|
where
|
||||||
|
S: HasRand + HasMaxSize + HasMetadata,
|
||||||
|
{
|
||||||
|
fn mutate(
|
||||||
|
&mut self,
|
||||||
|
state: &mut S,
|
||||||
|
input: &mut UnicodeInput,
|
||||||
|
_stage_idx: i32,
|
||||||
|
) -> Result<MutationResult, Error> {
|
||||||
|
if input.0.bytes().is_empty() {
|
||||||
|
return Ok(MutationResult::Skipped);
|
||||||
|
}
|
||||||
|
|
||||||
|
let tokens_len = {
|
||||||
|
let meta = state.metadata_map().get::<Tokens>();
|
||||||
|
if meta.is_none() {
|
||||||
|
return Ok(MutationResult::Skipped);
|
||||||
|
}
|
||||||
|
if meta.unwrap().tokens().is_empty() {
|
||||||
|
return Ok(MutationResult::Skipped);
|
||||||
|
}
|
||||||
|
meta.unwrap().tokens().len()
|
||||||
|
};
|
||||||
|
let token_idx = state.rand_mut().below(tokens_len as u64) as usize;
|
||||||
|
|
||||||
|
let bytes = input.0.bytes();
|
||||||
|
let meta = &input.1;
|
||||||
|
if let Some((base, len)) = choose_start(state.rand_mut(), bytes, meta) {
|
||||||
|
let substring = core::str::from_utf8(&bytes[base..][..len])?;
|
||||||
|
let (range, _) = choose_subcategory_range(state.rand_mut(), substring);
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
println!(
|
||||||
|
"{:?} => {:?}",
|
||||||
|
range,
|
||||||
|
core::str::from_utf8(&bytes[range.clone()])
|
||||||
|
);
|
||||||
|
|
||||||
|
let meta = state.metadata_map().get::<Tokens>().unwrap();
|
||||||
|
let token = &meta.tokens()[token_idx];
|
||||||
|
|
||||||
|
if input.0.len() - (range.end - range.start) + token.len() > state.max_size() {
|
||||||
|
return Ok(MutationResult::Skipped);
|
||||||
|
}
|
||||||
|
|
||||||
|
input.0.bytes_mut().splice(range, token.iter().copied());
|
||||||
|
input.1 = extract_metadata(input.0.bytes());
|
||||||
|
return Ok(MutationResult::Mutated);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(MutationResult::Skipped)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use libafl_bolts::rands::StdRand;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
use crate::{corpus::NopCorpus, stages::extract_metadata, state::StdState};
|
||||||
|
|
||||||
|
// a not-so-useful test for this
|
||||||
|
#[test]
|
||||||
|
fn mutate_hex() {
|
||||||
|
let result: Result<(), Error> = (|| {
|
||||||
|
let hex = "0123456789abcdef0123456789abcdef";
|
||||||
|
let mut bytes = BytesInput::from(hex.as_bytes());
|
||||||
|
|
||||||
|
let mut mutator = StringCategoryRandMutator;
|
||||||
|
|
||||||
|
let mut state = StdState::new(
|
||||||
|
StdRand::with_seed(0),
|
||||||
|
NopCorpus::<BytesInput>::new(),
|
||||||
|
NopCorpus::new(),
|
||||||
|
&mut (),
|
||||||
|
&mut (),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
for _ in 0..(1 << 12) {
|
||||||
|
let metadata = extract_metadata(bytes.bytes());
|
||||||
|
let mut input = (bytes, metadata);
|
||||||
|
let _ = mutator.mutate(&mut state, &mut input, 0);
|
||||||
|
println!("{:?}", core::str::from_utf8(input.0.bytes()).unwrap());
|
||||||
|
bytes = input.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
})();
|
||||||
|
|
||||||
|
if let Err(e) = result {
|
||||||
|
panic!("failed with error: {e}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn mutate_hex_subcat() {
|
||||||
|
let result: Result<(), Error> = (|| {
|
||||||
|
let hex = "0123456789abcdef0123456789abcdef";
|
||||||
|
let mut bytes = BytesInput::from(hex.as_bytes());
|
||||||
|
|
||||||
|
let mut mutator = StringSubcategoryRandMutator;
|
||||||
|
|
||||||
|
let mut state = StdState::new(
|
||||||
|
StdRand::with_seed(0),
|
||||||
|
NopCorpus::<BytesInput>::new(),
|
||||||
|
NopCorpus::new(),
|
||||||
|
&mut (),
|
||||||
|
&mut (),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
for _ in 0..(1 << 12) {
|
||||||
|
let metadata = extract_metadata(bytes.bytes());
|
||||||
|
let mut input = (bytes, metadata);
|
||||||
|
let _ = mutator.mutate(&mut state, &mut input, 0);
|
||||||
|
println!("{:?}", core::str::from_utf8(input.0.bytes()).unwrap());
|
||||||
|
bytes = input.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
})();
|
||||||
|
|
||||||
|
if let Err(e) = result {
|
||||||
|
panic!("failed with error: {e}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -49,6 +49,11 @@ pub use concolic::ConcolicTracingStage;
|
|||||||
#[cfg(feature = "std")]
|
#[cfg(feature = "std")]
|
||||||
pub use concolic::SimpleConcolicMutationalStage;
|
pub use concolic::SimpleConcolicMutationalStage;
|
||||||
|
|
||||||
|
#[cfg(feature = "unicode")]
|
||||||
|
pub mod string;
|
||||||
|
#[cfg(feature = "unicode")]
|
||||||
|
pub use string::*;
|
||||||
|
|
||||||
#[cfg(feature = "std")]
|
#[cfg(feature = "std")]
|
||||||
pub mod sync;
|
pub mod sync;
|
||||||
#[cfg(feature = "std")]
|
#[cfg(feature = "std")]
|
||||||
@ -56,6 +61,7 @@ pub use sync::*;
|
|||||||
|
|
||||||
#[cfg(feature = "std")]
|
#[cfg(feature = "std")]
|
||||||
pub mod dump;
|
pub mod dump;
|
||||||
|
|
||||||
use core::{convert::From, marker::PhantomData};
|
use core::{convert::From, marker::PhantomData};
|
||||||
|
|
||||||
#[cfg(feature = "std")]
|
#[cfg(feature = "std")]
|
||||||
|
128
libafl/src/stages/string.rs
Normal file
128
libafl/src/stages/string.rs
Normal file
@ -0,0 +1,128 @@
|
|||||||
|
//! Stages which analysis common to Unicode-style mutations
|
||||||
|
|
||||||
|
use alloc::{collections::VecDeque, rc::Rc, vec::Vec};
|
||||||
|
use core::marker::PhantomData;
|
||||||
|
|
||||||
|
use bitvec::{bitvec, vec::BitVec};
|
||||||
|
use libafl_bolts::{impl_serdeany, Error};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
corpus::{CorpusId, HasTestcase},
|
||||||
|
inputs::{BytesInput, HasBytesVec, UsesInput},
|
||||||
|
stages::Stage,
|
||||||
|
state::{HasCorpus, HasMetadata, UsesState},
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Metadata which stores the list of pre-computed string-like ranges in the input
|
||||||
|
#[derive(Debug, Default, Serialize, Deserialize, Clone)]
|
||||||
|
pub struct StringIdentificationMetadata {
|
||||||
|
ranges: Rc<Vec<(usize, BitVec)>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl_serdeany!(StringIdentificationMetadata);
|
||||||
|
|
||||||
|
impl StringIdentificationMetadata {
|
||||||
|
/// The list of pre-computed string-like ranges in the input
|
||||||
|
#[must_use]
|
||||||
|
pub fn ranges(&self) -> &Vec<(usize, BitVec)> {
|
||||||
|
self.ranges.as_ref()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn extract_metadata(bytes: &[u8]) -> StringIdentificationMetadata {
|
||||||
|
let mut ranges = Vec::new();
|
||||||
|
|
||||||
|
if !bytes.is_empty() {
|
||||||
|
let mut queue = VecDeque::new();
|
||||||
|
let mut visited = bitvec![0; bytes.len()];
|
||||||
|
queue.push_back(0);
|
||||||
|
|
||||||
|
while let Some(i) = queue.pop_front() {
|
||||||
|
if i >= bytes.len() || visited[i] {
|
||||||
|
// if we've already visited a particular entry, then we already know its range(s)
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
visited.set(i, true); // we always visit the current entry
|
||||||
|
let s = core::str::from_utf8(&bytes[i..]).unwrap_or_else(|e| {
|
||||||
|
queue.push_back(i + e.valid_up_to() + 1); // push to the next region
|
||||||
|
core::str::from_utf8(&bytes[i..][..e.valid_up_to()]).unwrap()
|
||||||
|
});
|
||||||
|
if !s.is_empty() {
|
||||||
|
let mut entries = bitvec![0; s.bytes().len()];
|
||||||
|
for (c_idx, _) in s.char_indices() {
|
||||||
|
entries.set(c_idx, true);
|
||||||
|
visited.set(i + c_idx, true);
|
||||||
|
}
|
||||||
|
for unset in entries.iter_zeros() {
|
||||||
|
// each unset index potentially represents a new UTF-8 start point
|
||||||
|
queue.push_back(unset);
|
||||||
|
}
|
||||||
|
ranges.push((i, entries));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
StringIdentificationMetadata {
|
||||||
|
ranges: Rc::new(ranges),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Stage which identifies potential strings in the provided input
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct StringIdentificationStage<S> {
|
||||||
|
phantom: PhantomData<S>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<S> Default for StringIdentificationStage<S> {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<S> StringIdentificationStage<S> {
|
||||||
|
/// Create a new instance of the string identification stage
|
||||||
|
#[must_use]
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
phantom: PhantomData,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<S> UsesState for StringIdentificationStage<S>
|
||||||
|
where
|
||||||
|
S: UsesInput,
|
||||||
|
{
|
||||||
|
type State = S;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<S, E, EM, Z> Stage<E, EM, Z> for StringIdentificationStage<S>
|
||||||
|
where
|
||||||
|
S: HasTestcase<Input = BytesInput> + HasCorpus,
|
||||||
|
E: UsesState<State = S>,
|
||||||
|
EM: UsesState<State = S>,
|
||||||
|
Z: UsesState<State = S>,
|
||||||
|
{
|
||||||
|
fn perform(
|
||||||
|
&mut self,
|
||||||
|
_fuzzer: &mut Z,
|
||||||
|
_executor: &mut E,
|
||||||
|
state: &mut Self::State,
|
||||||
|
_manager: &mut EM,
|
||||||
|
corpus_idx: CorpusId,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
let mut tc = state.testcase_mut(corpus_idx)?;
|
||||||
|
if tc.has_metadata::<StringIdentificationMetadata>() {
|
||||||
|
return Ok(()); // skip recompute
|
||||||
|
}
|
||||||
|
|
||||||
|
let input = tc.load_input(state.corpus())?;
|
||||||
|
|
||||||
|
let bytes = input.bytes();
|
||||||
|
let metadata = extract_metadata(bytes);
|
||||||
|
tc.add_metadata(metadata);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
@ -169,11 +169,6 @@ use log::{Metadata, Record};
|
|||||||
/// out of `libafl_bolts` into `libafl::events::launcher`.
|
/// out of `libafl_bolts` into `libafl::events::launcher`.
|
||||||
pub mod launcher {}
|
pub mod launcher {}
|
||||||
|
|
||||||
// Re-export derive(SerdeAny)
|
|
||||||
#[cfg(feature = "libafl_derive")]
|
|
||||||
#[allow(unused_imports)]
|
|
||||||
#[macro_use]
|
|
||||||
extern crate libafl_derive;
|
|
||||||
use core::{
|
use core::{
|
||||||
array::TryFromSliceError,
|
array::TryFromSliceError,
|
||||||
fmt::{self, Display},
|
fmt::{self, Display},
|
||||||
@ -190,6 +185,7 @@ pub use libafl_derive::SerdeAny;
|
|||||||
use {
|
use {
|
||||||
alloc::string::{FromUtf8Error, String},
|
alloc::string::{FromUtf8Error, String},
|
||||||
core::cell::{BorrowError, BorrowMutError},
|
core::cell::{BorrowError, BorrowMutError},
|
||||||
|
core::str::Utf8Error,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// We need fixed names for many parts of this lib.
|
/// We need fixed names for many parts of this lib.
|
||||||
@ -505,6 +501,14 @@ impl From<FromUtf8Error> for Error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "alloc")]
|
||||||
|
impl From<Utf8Error> for Error {
|
||||||
|
#[allow(unused_variables)]
|
||||||
|
fn from(err: Utf8Error) -> Self {
|
||||||
|
Self::unknown(format!("Could not convert byte / utf-8: {err:?}"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(feature = "std")]
|
#[cfg(feature = "std")]
|
||||||
impl From<VarError> for Error {
|
impl From<VarError> for Error {
|
||||||
#[allow(unused_variables)]
|
#[allow(unused_variables)]
|
||||||
|
@ -85,7 +85,7 @@ macro_rules! create_serde_registry_for_trait {
|
|||||||
Error,
|
Error,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Visitor object used internally for the [`SerdeAny`] registry.
|
/// Visitor object used internally for the [`crate::serdeany::SerdeAny`] registry.
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct BoxDynVisitor {}
|
pub struct BoxDynVisitor {}
|
||||||
#[allow(unused_qualifications)]
|
#[allow(unused_qualifications)]
|
||||||
@ -319,7 +319,7 @@ macro_rules! create_serde_registry_for_trait {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A serializable [`HashMap`] wrapper for [`SerdeAny`] types, addressable by name.
|
/// A serializable [`HashMap`] wrapper for [`crate::serdeany::SerdeAny`] types, addressable by name.
|
||||||
#[allow(clippy::unsafe_derive_deserialize)]
|
#[allow(clippy::unsafe_derive_deserialize)]
|
||||||
#[allow(unused_qualifications)]
|
#[allow(unused_qualifications)]
|
||||||
#[derive(Debug, Serialize, Deserialize)]
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
|
@ -30,7 +30,7 @@ path = "src/lib.rs"
|
|||||||
crate-type = ["staticlib", "rlib"]
|
crate-type = ["staticlib", "rlib"]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
libafl = { path = "../../libafl", default-features = false, features = ["std", "derive", "llmp_compression", "rand_trait", "regex", "errors_backtrace", "serdeany_autoreg", "tui_monitor"] }
|
libafl = { path = "../../libafl", default-features = false, features = ["std", "derive", "llmp_compression", "rand_trait", "regex", "errors_backtrace", "serdeany_autoreg", "tui_monitor", "unicode"] }
|
||||||
libafl_bolts = { path = "../../libafl_bolts", default-features = false, features = ["std", "derive", "llmp_compression", "rand_trait", "serdeany_autoreg", "errors_backtrace"] }
|
libafl_bolts = { path = "../../libafl_bolts", default-features = false, features = ["std", "derive", "llmp_compression", "rand_trait", "serdeany_autoreg", "errors_backtrace"] }
|
||||||
libafl_targets = { path = "../../libafl_targets", features = ["sancov_8bit", "sancov_cmplog", "libfuzzer", "libfuzzer_oom", "libfuzzer_define_run_driver", "libfuzzer_interceptors", "sanitizers_flags", "whole_archive"] }
|
libafl_targets = { path = "../../libafl_targets", features = ["sancov_8bit", "sancov_cmplog", "libfuzzer", "libfuzzer_oom", "libfuzzer_define_run_driver", "libfuzzer_interceptors", "sanitizers_flags", "whole_archive"] }
|
||||||
|
|
||||||
|
@ -166,7 +166,8 @@ macro_rules! fuzz_with {
|
|||||||
mutators::{
|
mutators::{
|
||||||
GrimoireExtensionMutator, GrimoireRecursiveReplacementMutator, GrimoireRandomDeleteMutator,
|
GrimoireExtensionMutator, GrimoireRecursiveReplacementMutator, GrimoireRandomDeleteMutator,
|
||||||
GrimoireStringReplacementMutator, havoc_crossover, havoc_mutations, havoc_mutations_no_crossover,
|
GrimoireStringReplacementMutator, havoc_crossover, havoc_mutations, havoc_mutations_no_crossover,
|
||||||
I2SRandReplace, StdScheduledMutator, Tokens, tokens_mutations
|
I2SRandReplace, StdScheduledMutator, StringCategoryRandMutator, StringSubcategoryRandMutator,
|
||||||
|
StringCategoryTokenReplaceMutator, StringSubcategoryTokenReplaceMutator, Tokens, tokens_mutations
|
||||||
},
|
},
|
||||||
observers::{stacktrace::BacktraceObserver, TimeObserver},
|
observers::{stacktrace::BacktraceObserver, TimeObserver},
|
||||||
schedulers::{
|
schedulers::{
|
||||||
@ -174,7 +175,7 @@ macro_rules! fuzz_with {
|
|||||||
},
|
},
|
||||||
stages::{
|
stages::{
|
||||||
CalibrationStage, GeneralizationStage, IfStage, StdMutationalStage,
|
CalibrationStage, GeneralizationStage, IfStage, StdMutationalStage,
|
||||||
StdPowerMutationalStage, TracingStage,
|
StdPowerMutationalStage, StringIdentificationStage, TracingStage,
|
||||||
},
|
},
|
||||||
state::{HasCorpus, StdState},
|
state::{HasCorpus, StdState},
|
||||||
StdFuzzer,
|
StdFuzzer,
|
||||||
@ -224,7 +225,7 @@ macro_rules! fuzz_with {
|
|||||||
|
|
||||||
// Set up a generalization stage for grimoire
|
// Set up a generalization stage for grimoire
|
||||||
let generalization = GeneralizationStage::new(&edges_observer);
|
let generalization = GeneralizationStage::new(&edges_observer);
|
||||||
let generalization = IfStage::new(|_, _, _, _, _| Ok(grimoire.into()), (generalization, ()));
|
let generalization = IfStage::new(|_, _, _, _, _| Ok(grimoire.into()), tuple_list!(generalization));
|
||||||
|
|
||||||
let calibration = CalibrationStage::new(&map_feedback);
|
let calibration = CalibrationStage::new(&map_feedback);
|
||||||
|
|
||||||
@ -296,6 +297,32 @@ macro_rules! fuzz_with {
|
|||||||
});
|
});
|
||||||
state.metadata_map_mut().insert_boxed(grimoire_metadata);
|
state.metadata_map_mut().insert_boxed(grimoire_metadata);
|
||||||
|
|
||||||
|
// Set up a string category analysis stage for unicode mutations
|
||||||
|
let unicode_used = $options.unicode();
|
||||||
|
let string_mutator = StdScheduledMutator::new(
|
||||||
|
tuple_list!(
|
||||||
|
StringCategoryRandMutator,
|
||||||
|
StringSubcategoryRandMutator,
|
||||||
|
StringSubcategoryRandMutator,
|
||||||
|
StringSubcategoryRandMutator,
|
||||||
|
StringSubcategoryRandMutator,
|
||||||
|
)
|
||||||
|
);
|
||||||
|
let string_replace_mutator = StdScheduledMutator::new(
|
||||||
|
tuple_list!(
|
||||||
|
StringCategoryTokenReplaceMutator,
|
||||||
|
StringSubcategoryTokenReplaceMutator,
|
||||||
|
StringSubcategoryTokenReplaceMutator,
|
||||||
|
StringSubcategoryTokenReplaceMutator,
|
||||||
|
StringSubcategoryTokenReplaceMutator,
|
||||||
|
)
|
||||||
|
);
|
||||||
|
let string_power = StdMutationalStage::transforming(string_mutator);
|
||||||
|
let string_replace_power = StdMutationalStage::transforming(string_replace_mutator);
|
||||||
|
|
||||||
|
let string_analysis = StringIdentificationStage::new();
|
||||||
|
let string_analysis = IfStage::new(|_, _, _, _, _| Ok((unicode_used && mutator_status.std_mutational).into()), tuple_list!(string_analysis, string_power, string_replace_power));
|
||||||
|
|
||||||
// Attempt to use tokens from libfuzzer dicts
|
// Attempt to use tokens from libfuzzer dicts
|
||||||
if !state.has_metadata::<Tokens>() {
|
if !state.has_metadata::<Tokens>() {
|
||||||
let mut toks = if let Some(tokens) = $options.dict() {
|
let mut toks = if let Some(tokens) = $options.dict() {
|
||||||
@ -466,6 +493,7 @@ macro_rules! fuzz_with {
|
|||||||
calibration,
|
calibration,
|
||||||
generalization,
|
generalization,
|
||||||
tracing,
|
tracing,
|
||||||
|
string_analysis,
|
||||||
i2s,
|
i2s,
|
||||||
cm_i2s,
|
cm_i2s,
|
||||||
std_power,
|
std_power,
|
||||||
|
@ -107,6 +107,7 @@ pub struct LibfuzzerOptions {
|
|||||||
artifact_prefix: ArtifactPrefix,
|
artifact_prefix: ArtifactPrefix,
|
||||||
timeout: Duration,
|
timeout: Duration,
|
||||||
grimoire: Option<bool>,
|
grimoire: Option<bool>,
|
||||||
|
unicode: bool,
|
||||||
forks: Option<usize>,
|
forks: Option<usize>,
|
||||||
dict: Option<Tokens>,
|
dict: Option<Tokens>,
|
||||||
dirs: Vec<PathBuf>,
|
dirs: Vec<PathBuf>,
|
||||||
@ -162,6 +163,10 @@ impl LibfuzzerOptions {
|
|||||||
self.grimoire
|
self.grimoire
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn unicode(&self) -> bool {
|
||||||
|
self.unicode
|
||||||
|
}
|
||||||
|
|
||||||
pub fn forks(&self) -> Option<usize> {
|
pub fn forks(&self) -> Option<usize> {
|
||||||
self.forks
|
self.forks
|
||||||
}
|
}
|
||||||
@ -230,6 +235,7 @@ struct LibfuzzerOptionsBuilder<'a> {
|
|||||||
artifact_prefix: Option<&'a str>,
|
artifact_prefix: Option<&'a str>,
|
||||||
timeout: Option<Duration>,
|
timeout: Option<Duration>,
|
||||||
grimoire: Option<bool>,
|
grimoire: Option<bool>,
|
||||||
|
unicode: Option<bool>,
|
||||||
forks: Option<usize>,
|
forks: Option<usize>,
|
||||||
dict: Option<&'a str>,
|
dict: Option<&'a str>,
|
||||||
dirs: Vec<&'a str>,
|
dirs: Vec<&'a str>,
|
||||||
@ -292,6 +298,7 @@ impl<'a> LibfuzzerOptionsBuilder<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
"grimoire" => self.grimoire = Some(parse_or_bail!(name, value, u64) > 0),
|
"grimoire" => self.grimoire = Some(parse_or_bail!(name, value, u64) > 0),
|
||||||
|
"unicode" => self.unicode = Some(parse_or_bail!(name, value, u64) > 0),
|
||||||
"artifact_prefix" => {
|
"artifact_prefix" => {
|
||||||
self.artifact_prefix = Some(value);
|
self.artifact_prefix = Some(value);
|
||||||
}
|
}
|
||||||
@ -349,6 +356,7 @@ impl<'a> LibfuzzerOptionsBuilder<'a> {
|
|||||||
.unwrap_or_default(),
|
.unwrap_or_default(),
|
||||||
timeout: self.timeout.unwrap_or(Duration::from_secs(1200)),
|
timeout: self.timeout.unwrap_or(Duration::from_secs(1200)),
|
||||||
grimoire: self.grimoire,
|
grimoire: self.grimoire,
|
||||||
|
unicode: self.unicode.unwrap_or(true),
|
||||||
forks: self.forks,
|
forks: self.forks,
|
||||||
dict: self.dict.map(|path| {
|
dict: self.dict.map(|path| {
|
||||||
Tokens::from_file(path).expect("Couldn't load tokens from specified dictionary")
|
Tokens::from_file(path).expect("Couldn't load tokens from specified dictionary")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user