Unicode-preserving mutators (#1542)

* create the string classification stage

* modify API to pre-group

* preserving mutator

* more meaningful test

* subproperty mutators + some fixes

* document, finalise, integrate with libafl_libfuzzer

* add example, fix for weird range select

* fix for introspection

* fix fuzzer build

* speed optimisation: allow, but do not require, stacking

* property => category

* token replacement

* fixup: rare case where rust does not agree on valid character

* fix CI again

* again again

* take two: dynamic unicode discovery

* oops

* fix: last byte is never selected

* opt: bias to smaller unicode categories

* fix test

* opt: precompute regions and fix tests

* cache and allow stacking

* document and update libafl_libfuzzer

* oops, use reverse

* fix bolts clippy error

* fixup part 2

* clippy

* part 2

* clippy warning allow

* clippy complaint

* use alloc not std

---------

Co-authored-by: toka <tokazerkje@outlook.com>
This commit is contained in:
Addison Crump 2023-11-21 00:41:16 +01:00 committed by GitHub
parent 1e96652ed2
commit 281524dbf9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 1037 additions and 14 deletions

View File

@ -73,6 +73,8 @@ jobs:
run: command -v llvm-config-15 && clang-15 -v
- name: Add nightly rustfmt and clippy
run: rustup toolchain install nightly --component rustfmt --component clippy --component miri --allow-downgrade
- name: Install ucd-generate
run: cargo install -f ucd-generate
- uses: actions/checkout@v3
- uses: Swatinem/rust-cache@v2
@ -135,6 +137,8 @@ jobs:
run: command -v llvm-config-15 && clang-15 -v
- name: Install cargo-hack
run: curl -LsSf https://github.com/taiki-e/cargo-hack/releases/latest/download/cargo-hack-x86_64-unknown-linux-gnu.tar.gz | tar xzf - -C ~/.cargo/bin
- name: Install ucd-generate
run: cargo install -f ucd-generate
- name: Add nightly
run: rustup toolchain install nightly --allow-downgrade
- uses: actions/checkout@v3
@ -222,6 +226,8 @@ jobs:
- name: Install cxxbridge
if: runner.os == 'macOS'
run: cargo install cxxbridge-cmd
- name: Install ucd-generate
run: cargo install -f ucd-generate
- name: Install python (macOS)
# Removing macOS things already installed in CI against failed linking
if: runner.os == 'macOS'
@ -384,6 +390,8 @@ jobs:
toolchain: stable
- name: Add nightly rustfmt and clippy
run: rustup toolchain install nightly --component rustfmt --component clippy --allow-downgrade
- name: Install ucd-generate
run: cargo install -f ucd-generate
- name: Install deps
run: brew install z3 gtk+3
- name: Install cxxbridge
@ -453,6 +461,7 @@ jobs:
freebsd-version
. "$HOME/.cargo/env"
rustup toolchain install nightly
cargo install -f ucd-generate
export LLVM_CONFIG=/usr/local/bin/llvm-config16
pwd
ls -lah

View File

@ -0,0 +1 @@
libpng-*

View File

@ -0,0 +1,24 @@
[package]
name = "baby_fuzzer_unicode"
version = "0.10.0"
authors = ["Andrea Fioraldi <andreafioraldi@gmail.com>", "Dominik Maier <domenukk@gmail.com>"]
edition = "2021"
[features]
default = ["std"]
tui = []
std = []
[profile.dev]
panic = "abort"
[profile.release]
panic = "abort"
lto = true
codegen-units = 1
opt-level = 3
debug = true
[dependencies]
libafl = { path = "../../libafl/", features = ["unicode"] }
libafl_bolts = { path = "../../libafl_bolts/" }

View File

@ -0,0 +1,15 @@
# Baby fuzzer: unicode
This is a minimalistic example about how to create a libafl based fuzzer.
It runs on a single core until a crash occurs and then exits.
The tested program is a simple Rust function without any instrumentation.
For real fuzzing, you will want to add some sort to add coverage or other feedback.
You can run this example using `cargo run`, and you can enable the TUI feature by running `cargo run --features tui`.
## Unicode
This fuzzer uses mutators which preserve unicode properties. For programs which have string-heavy inputs, you may
consider using the same strategy.

View File

@ -0,0 +1,138 @@
#[cfg(windows)]
use std::ptr::write_volatile;
use std::{path::PathBuf, ptr::write};
#[cfg(feature = "tui")]
use libafl::monitors::tui::{ui::TuiUI, TuiMonitor};
#[cfg(not(feature = "tui"))]
use libafl::monitors::SimpleMonitor;
use libafl::{
corpus::{InMemoryCorpus, OnDiskCorpus},
events::SimpleEventManager,
executors::{inprocess::InProcessExecutor, ExitKind},
feedbacks::{CrashFeedback, MaxMapFeedback},
fuzzer::{Fuzzer, StdFuzzer},
inputs::{BytesInput, HasTargetBytes},
mutators::{StdScheduledMutator, StringCategoryRandMutator, StringSubcategoryRandMutator},
observers::StdMapObserver,
schedulers::QueueScheduler,
stages::{mutational::StdMutationalStage, StringIdentificationStage},
state::StdState,
Evaluator,
};
use libafl_bolts::{current_nanos, rands::StdRand, tuples::tuple_list, AsSlice};
/// Coverage map with explicit assignments due to the lack of instrumentation
static mut SIGNALS: [u8; 64] = [0; 64];
static mut SIGNALS_PTR: *mut u8 = unsafe { SIGNALS.as_mut_ptr() };
/// Assign a signal to the signals map
fn signals_set(idx: usize) {
unsafe { write(SIGNALS_PTR.add(idx), 1) };
}
#[allow(clippy::similar_names, clippy::manual_assert)]
pub fn main() {
// The closure that we want to fuzz
let mut harness = |input: &BytesInput| {
let target = input.target_bytes();
let buf = target.as_slice();
let goal = b"abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz";
let mut i = 0;
for _ in buf.iter().zip(goal).take_while(|(b, c)| b == c) {
signals_set(i);
i += 1;
}
if i == goal.len() {
#[cfg(unix)]
panic!("Artificial bug triggered =)");
#[cfg(windows)]
unsafe {
write_volatile(0 as *mut u32, 0);
}
}
ExitKind::Ok
};
// Create an observation channel using the signals map
let observer = unsafe { StdMapObserver::from_mut_ptr("signals", SIGNALS_PTR, SIGNALS.len()) };
// Feedback to rate the interestingness of an input
let mut feedback = MaxMapFeedback::new(&observer);
// A feedback to choose if an input is a solution or not
let mut objective = CrashFeedback::new();
// create a State from scratch
let mut state = StdState::new(
// RNG
StdRand::with_seed(current_nanos()),
// Corpus that will be evolved, we keep it in memory for performance
InMemoryCorpus::new(),
// Corpus in which we store solutions (crashes in this example),
// on disk so the user can get them after stopping the fuzzer
OnDiskCorpus::new(PathBuf::from("./crashes")).unwrap(),
// States of the feedbacks.
// The feedbacks can report the data that should persist in the State.
&mut feedback,
// Same for objective feedbacks
&mut objective,
)
.unwrap();
// The Monitor trait define how the fuzzer stats are displayed to the user
#[cfg(not(feature = "tui"))]
let mon = SimpleMonitor::new(|s| println!("{s}"));
#[cfg(feature = "tui")]
let ui = TuiUI::with_version(String::from("Baby Fuzzer"), String::from("0.0.1"), false);
#[cfg(feature = "tui")]
let mon = TuiMonitor::new(ui);
// The event manager handle the various events generated during the fuzzing loop
// such as the notification of the addition of a new item to the corpus
let mut mgr = SimpleEventManager::new(mon);
// A queue policy to get testcasess from the corpus
let scheduler = QueueScheduler::new();
// A fuzzer with feedbacks and a corpus scheduler
let mut fuzzer = StdFuzzer::new(scheduler, feedback, objective);
// Create the executor for an in-process function with just one observer
let mut executor = InProcessExecutor::new(
&mut harness,
tuple_list!(observer),
&mut fuzzer,
&mut state,
&mut mgr,
)
.expect("Failed to create the Executor");
// Generate 8 initial inputs
fuzzer
.evaluate_input(
&mut state,
&mut executor,
&mut mgr,
BytesInput::new(vec![b'a']),
)
.unwrap();
// Setup a mutational stage with a basic bytes mutator
let mutator = StdScheduledMutator::new(tuple_list!(
StringCategoryRandMutator,
StringSubcategoryRandMutator,
StringSubcategoryRandMutator,
StringSubcategoryRandMutator,
StringSubcategoryRandMutator
));
let mut stages = tuple_list!(
StringIdentificationStage::new(),
StdMutationalStage::transforming(mutator)
);
fuzzer
.fuzz_loop(&mut stages, &mut executor, &mut state, &mut mgr)
.expect("Error in the fuzzing loop");
}

View File

@ -77,6 +77,9 @@ concolic_mutation = ["z3"]
## Enable the fancy TuiMonitor for a termanal UI using crossterm
tui_monitor = ["ratatui", "crossterm"]
## Enables `StringClassificationStage` and associated mutators, which allow for mutations which preserve the Unicode property data
unicode = ["libafl_bolts/alloc", "ahash/std", "serde/rc", "bitvec"]
#! ## LibAFL-Bolts Features
@ -126,7 +129,9 @@ agpl = ["nautilus"]
nautilus = ["grammartec", "std", "serde_json/std"]
[build-dependencies]
reqwest = { version = "0.11", features = ["blocking"] }
rustversion = "1.0"
zip = "0.6"
[dev-dependencies]
serde_json = { version = "1.0", default-features = false, features = ["alloc"] }
@ -172,7 +177,9 @@ z3 = { version = "0.12.0", features = ["static-link-z3"], optional = true } # fo
pyo3 = { version = "0.18", optional = true, features = ["serde", "macros"] }
concat-idents = { version = "1.1.3", optional = true }
libcasr = { version = "2.7", optional = true}
libcasr = { version = "2.7", optional = true }
bitvec = { version = "1.0", optional = true, features = ["serde"] } # used for string range storage
# optional-dev deps (change when target.'cfg(accessible(::std))'.test-dependencies will be stable)
serial_test = { version = "2", optional = true, default-features = false, features = ["logging"] }

View File

@ -1,14 +1,69 @@
use std::error::Error;
#[rustversion::nightly]
fn main() {
fn main() -> Result<(), Box<dyn Error>> {
println!("cargo:rerun-if-changed=build.rs");
println!("cargo:rustc-cfg=nightly");
#[cfg(feature = "unicode")]
{
build_unicode_property_map()?;
}
Ok(())
}
#[rustversion::not(nightly)]
fn main() {
fn main() -> Result<(), Box<dyn Error>> {
println!("cargo:rerun-if-changed=build.rs");
assert!(
cfg!(all(not(docrs), not(feature = "nautilus"))),
"The 'nautilus' feature of libafl requires a nightly compiler"
);
#[cfg(feature = "unicode")]
{
build_unicode_property_map()?;
}
Ok(())
}
#[cfg(feature = "unicode")]
fn build_unicode_property_map() -> Result<(), Box<dyn Error>> {
use std::{
env,
fs::File,
io::{BufWriter, Write},
path::PathBuf,
process::{Command, Stdio},
};
let out_dir = PathBuf::from(env::var_os("OUT_DIR").unwrap());
let ucd_dir = out_dir.join("ucd-dir");
let generated_file = out_dir.join("unicode_categories.rs");
std::fs::create_dir_all(&ucd_dir)?;
let zip_path = ucd_dir.join("ucd.zip");
let mut ucd_file = BufWriter::new(File::create(&zip_path)?);
for chunk in reqwest::blocking::get("https://www.unicode.org/Public/zipped/latest/UCD.zip")?
.bytes()?
.chunks(1 << 12)
{
ucd_file.write_all(chunk)?;
}
ucd_file.flush()?;
drop(ucd_file);
let mut zip_file = zip::ZipArchive::new(File::open(&zip_path)?)?;
zip_file.extract(&ucd_dir)?;
drop(zip_file);
std::fs::remove_file(zip_path)?;
let status = Command::new("ucd-generate")
.arg("general-category")
.arg(ucd_dir.as_os_str())
.stdout(Stdio::from(File::create(generated_file)?))
.status()?;
assert!(status.success());
Ok(())
}

View File

@ -20,6 +20,11 @@ pub use grimoire::*;
pub mod tuneable;
pub use tuneable::*;
#[cfg(feature = "unicode")]
pub mod string;
#[cfg(feature = "unicode")]
pub use string::*;
#[cfg(feature = "nautilus")]
pub mod nautilus;
use alloc::vec::Vec;

View File

@ -0,0 +1,595 @@
//! Mutators for preserving string categories, which may be useful for certain targets which are primarily string-oriented.
use alloc::vec::Vec;
use core::{
cmp::{Ordering, Reverse},
ops::Range,
};
use libafl_bolts::{rands::Rand, Error, HasLen, Named};
use crate::{
corpus::{CorpusId, HasTestcase, Testcase},
inputs::{BytesInput, HasBytesVec},
mutators::{rand_range, MutationResult, Mutator, Tokens},
stages::{
extract_metadata,
mutational::{MutatedTransform, MutatedTransformPost},
StringIdentificationMetadata,
},
state::{HasCorpus, HasMaxSize, HasMetadata, HasRand},
};
/// Input which contains the context necessary to perform unicode mutations
pub type UnicodeInput = (BytesInput, StringIdentificationMetadata);
impl<S> MutatedTransform<BytesInput, S> for UnicodeInput
where
S: HasCorpus<Input = BytesInput> + HasTestcase,
{
type Post = StringIdentificationMetadata;
fn try_transform_from(
base: &mut Testcase<BytesInput>,
state: &S,
_corpus_idx: CorpusId,
) -> Result<Self, Error> {
let input = base.load_input(state.corpus())?.clone();
let metadata = base.metadata::<StringIdentificationMetadata>().cloned()?;
Ok((input, metadata))
}
fn try_transform_into(self, _state: &S) -> Result<(BytesInput, Self::Post), Error> {
Ok(self)
}
}
impl<S> MutatedTransformPost<S> for StringIdentificationMetadata
where
S: HasTestcase,
{
fn post_exec(
self,
state: &mut S,
_stage_idx: i32,
corpus_idx: Option<CorpusId>,
) -> Result<(), Error> {
if let Some(corpus_idx) = corpus_idx {
let mut tc = state.testcase_mut(corpus_idx)?;
tc.add_metadata(self);
}
Ok(())
}
}
const MAX_CHARS: usize = 16;
fn choose_start<R: Rand>(
rand: &mut R,
bytes: &[u8],
meta: &StringIdentificationMetadata,
) -> Option<(usize, usize)> {
let idx = rand.below(bytes.len() as u64) as usize;
let mut options = Vec::new();
for (start, range) in meta.ranges() {
if idx
.checked_sub(*start) // idx adjusted to start
.and_then(|idx| (idx < range.len()).then(|| range[idx])) // idx in range
.map_or(false, |r| r)
{
options.push((*start, range));
}
}
match options.len() {
0 => None,
1 => Some((options[0].0, options[0].1.len())),
_ => {
// bias towards longer strings
options.sort_by_cached_key(|(_, entries)| entries.count_ones());
let selected = libafl_bolts::math::integer_sqrt(
rand.below((options.len() * options.len()) as u64),
) as usize;
Some((options[selected].0, options[selected].1.len()))
}
}
}
fn get_subcategory<T: Ord + Copy>(needle: T, haystack: &[(T, T)]) -> Option<(T, T)> {
haystack
.binary_search_by(|&(min, max)| match min.cmp(&needle) {
Ordering::Less | Ordering::Equal => match needle.cmp(&max) {
Ordering::Less | Ordering::Equal => Ordering::Equal,
Ordering::Greater => Ordering::Less,
},
Ordering::Greater => Ordering::Greater,
})
.ok()
.map(|idx| haystack[idx])
}
fn find_range<F: Fn(char) -> bool>(
chars: &[(usize, char)],
idx: usize,
predicate: F,
) -> Range<usize> {
// walk backwards and discover
let start = chars[..idx]
.iter()
.rev()
.take_while(|&&(_, c)| predicate(c))
.last()
.map_or(chars[idx].0, |&(i, _)| i);
// walk forwards
let end = chars[(idx + 1)..]
.iter()
.take_while(|&&(_, c)| predicate(c))
.last()
.map_or(chars[idx].0 + chars[idx].1.len_utf8(), |&(i, c)| {
i + c.len_utf8()
});
start..end
}
fn choose_category_range<R: Rand>(
rand: &mut R,
string: &str,
) -> (Range<usize>, &'static [(u32, u32)]) {
let chars = string.char_indices().collect::<Vec<_>>();
let idx = rand.below(chars.len() as u64) as usize;
let c = chars[idx].1;
// figure out the categories for this char
let expanded = c as u32;
#[cfg(test)]
let mut names = Vec::new();
let mut categories = Vec::new();
for (_name, category) in unicode_categories::BY_NAME {
if get_subcategory(expanded, category).is_some() {
#[cfg(test)]
names.push(_name);
categories.push(category);
}
}
// ok -- we want to bias towards smaller regions to keep the mutations "tight" to original
// we sort the options by descending length, then pick isqrt of below(n^2)
categories.sort_by_cached_key(|cat| {
Reverse(
cat.iter()
.map(|&(min, max)| (max - min + 1) as usize)
.sum::<usize>(),
)
});
let options = categories.len() * categories.len();
let selected_idx = libafl_bolts::math::integer_sqrt(rand.below(options as u64)) as usize;
let selected = categories[selected_idx];
#[cfg(test)]
println!("category for `{c}' ({}): {}", c as u32, names[selected_idx]);
(
find_range(&chars, idx, |c| {
get_subcategory(c as u32, selected).is_some()
}),
selected,
)
}
fn choose_subcategory_range<R: Rand>(rand: &mut R, string: &str) -> (Range<usize>, (u32, u32)) {
let chars = string.char_indices().collect::<Vec<_>>();
let idx = rand.below(chars.len() as u64) as usize;
let c = chars[idx].1;
// figure out the categories for this char
let expanded = c as u32;
#[cfg(test)]
let mut names = Vec::new();
let mut subcategories = Vec::new();
for (_name, category) in unicode_categories::BY_NAME {
if let Some(subcategory) = get_subcategory(expanded, category) {
#[cfg(test)]
names.push(_name);
subcategories.push(subcategory);
}
}
// see reasoning for selection pattern in choose_category_range
subcategories.sort_by_key(|&(min, max)| Reverse(max - min + 1));
let options = subcategories.len() * subcategories.len();
let selected_idx = libafl_bolts::math::integer_sqrt(rand.below(options as u64)) as usize;
let selected = subcategories[selected_idx];
#[cfg(test)]
println!(
"subcategory for `{c}' ({}): {} ({:?})",
c as u32, names[selected_idx], selected
);
(
find_range(&chars, idx, |c| {
let expanded = c as u32;
selected.0 <= expanded && expanded <= selected.1
}),
selected,
)
}
fn rand_replace_range<S: HasRand + HasMaxSize, F: Fn(&mut S) -> char>(
state: &mut S,
input: &mut UnicodeInput,
range: Range<usize>,
char_gen: F,
) -> MutationResult {
let temp_range = rand_range(state, range.end - range.start, MAX_CHARS);
let range = (range.start + temp_range.start)..(range.start + temp_range.end);
let range = match core::str::from_utf8(&input.0.bytes()[range.clone()]) {
Ok(_) => range,
Err(e) => range.start..(range.start + e.valid_up_to()),
};
#[cfg(test)]
println!(
"mutating range: {:?} ({:?})",
range,
core::str::from_utf8(&input.0.bytes()[range.clone()])
);
if range.start == range.end {
return MutationResult::Skipped;
}
let replace_len = state.rand_mut().below(MAX_CHARS as u64) as usize;
let orig_len = range.end - range.start;
if input.0.len() - orig_len + replace_len > state.max_size() {
return MutationResult::Skipped;
}
let mut replacement = Vec::with_capacity(replace_len);
let mut dest = [0u8; 4];
loop {
let new_c = char_gen(state);
if replacement.len() + new_c.len_utf8() > replace_len {
break;
}
new_c.encode_utf8(&mut dest);
replacement.extend_from_slice(&dest[..new_c.len_utf8()]);
if replacement.len() + new_c.len_utf8() == replace_len {
break; // nailed it
}
}
input.0.bytes_mut().splice(range, replacement);
input.1 = extract_metadata(input.0.bytes());
MutationResult::Mutated
}
/// Unicode category data, as used by string analysis and mutators.
pub mod unicode_categories {
#![allow(unused)]
#![allow(missing_docs)]
#![allow(clippy::redundant_static_lifetimes)]
include!(concat!(env!("OUT_DIR"), "/unicode_categories.rs"));
}
/// Mutator which randomly replaces a randomly selected range of bytes with bytes that preserve the
/// range's category
#[derive(Debug, Default)]
pub struct StringCategoryRandMutator;
impl Named for StringCategoryRandMutator {
fn name(&self) -> &str {
"string-category-rand"
}
}
impl<S> Mutator<UnicodeInput, S> for StringCategoryRandMutator
where
S: HasRand + HasMaxSize,
{
fn mutate(
&mut self,
state: &mut S,
input: &mut UnicodeInput,
_stage_idx: i32,
) -> Result<MutationResult, Error> {
if input.0.bytes().is_empty() {
return Ok(MutationResult::Skipped);
}
let bytes = input.0.bytes();
let meta = &input.1;
if let Some((base, len)) = choose_start(state.rand_mut(), bytes, meta) {
let substring = core::str::from_utf8(&bytes[base..][..len])?;
let (range, category) = choose_category_range(state.rand_mut(), substring);
#[cfg(test)]
println!(
"{:?} => {:?}",
range,
core::str::from_utf8(&bytes[range.clone()])
);
let options: u64 = category
.iter()
.map(|&(start, end)| u64::from(end) - u64::from(start) + 1)
.sum();
let char_gen = |state: &mut S| loop {
let mut selected = state.rand_mut().below(options);
for &(min, max) in category {
if let Some(next_selected) =
selected.checked_sub(u64::from(max) - u64::from(min) + 1)
{
selected = next_selected;
} else if let Some(new_c) = char::from_u32(selected as u32 + min) {
return new_c;
} else {
break;
}
}
};
return Ok(rand_replace_range(state, input, range, char_gen));
}
Ok(MutationResult::Skipped)
}
}
/// Mutator which randomly replaces a randomly selected range of bytes with bytes that preserve the
/// range's subcategory
#[derive(Debug, Default)]
pub struct StringSubcategoryRandMutator;
impl Named for StringSubcategoryRandMutator {
fn name(&self) -> &str {
"string-subcategory-rand"
}
}
impl<S> Mutator<UnicodeInput, S> for StringSubcategoryRandMutator
where
S: HasRand + HasMaxSize,
{
fn mutate(
&mut self,
state: &mut S,
input: &mut UnicodeInput,
_stage_idx: i32,
) -> Result<MutationResult, Error> {
if input.0.bytes().is_empty() {
return Ok(MutationResult::Skipped);
}
let bytes = input.0.bytes();
let meta = &input.1;
if let Some((base, len)) = choose_start(state.rand_mut(), bytes, meta) {
let substring = core::str::from_utf8(&bytes[base..][..len])?;
let (range, subcategory) = choose_subcategory_range(state.rand_mut(), substring);
#[cfg(test)]
println!(
"{:?} => {:?}",
range,
core::str::from_utf8(&bytes[range.clone()])
);
let options: u64 = u64::from(subcategory.1) - u64::from(subcategory.0) + 1;
let char_gen = |state: &mut S| loop {
let selected = state.rand_mut().below(options);
if let Some(new_c) = char::from_u32(selected as u32 + subcategory.0) {
return new_c;
}
};
return Ok(rand_replace_range(state, input, range, char_gen));
}
Ok(MutationResult::Skipped)
}
}
/// Mutator which randomly replaces a full category-contiguous region of chars with a random token
#[derive(Debug, Default)]
pub struct StringCategoryTokenReplaceMutator;
impl Named for StringCategoryTokenReplaceMutator {
fn name(&self) -> &str {
"string-category-token-replace"
}
}
impl<S> Mutator<UnicodeInput, S> for StringCategoryTokenReplaceMutator
where
S: HasRand + HasMaxSize + HasMetadata,
{
fn mutate(
&mut self,
state: &mut S,
input: &mut UnicodeInput,
_stage_idx: i32,
) -> Result<MutationResult, Error> {
if input.0.bytes().is_empty() {
return Ok(MutationResult::Skipped);
}
let tokens_len = {
let meta = state.metadata_map().get::<Tokens>();
if meta.is_none() {
return Ok(MutationResult::Skipped);
}
if meta.unwrap().tokens().is_empty() {
return Ok(MutationResult::Skipped);
}
meta.unwrap().tokens().len()
};
let token_idx = state.rand_mut().below(tokens_len as u64) as usize;
let bytes = input.0.bytes();
let meta = &input.1;
if let Some((base, len)) = choose_start(state.rand_mut(), bytes, meta) {
let substring = core::str::from_utf8(&bytes[base..][..len])?;
let (range, _) = choose_category_range(state.rand_mut(), substring);
#[cfg(test)]
println!(
"{:?} => {:?}",
range,
core::str::from_utf8(&bytes[range.clone()])
);
let meta = state.metadata_map().get::<Tokens>().unwrap();
let token = &meta.tokens()[token_idx];
if input.0.len() - (range.end - range.start) + token.len() > state.max_size() {
return Ok(MutationResult::Skipped);
}
input.0.bytes_mut().splice(range, token.iter().copied());
input.1 = extract_metadata(input.0.bytes());
return Ok(MutationResult::Mutated);
}
Ok(MutationResult::Skipped)
}
}
/// Mutator which randomly replaces a full subcategory-contiguous region of chars with a random token
#[derive(Debug, Default)]
pub struct StringSubcategoryTokenReplaceMutator;
impl Named for StringSubcategoryTokenReplaceMutator {
fn name(&self) -> &str {
"string-subcategory-replace"
}
}
impl<S> Mutator<UnicodeInput, S> for StringSubcategoryTokenReplaceMutator
where
S: HasRand + HasMaxSize + HasMetadata,
{
fn mutate(
&mut self,
state: &mut S,
input: &mut UnicodeInput,
_stage_idx: i32,
) -> Result<MutationResult, Error> {
if input.0.bytes().is_empty() {
return Ok(MutationResult::Skipped);
}
let tokens_len = {
let meta = state.metadata_map().get::<Tokens>();
if meta.is_none() {
return Ok(MutationResult::Skipped);
}
if meta.unwrap().tokens().is_empty() {
return Ok(MutationResult::Skipped);
}
meta.unwrap().tokens().len()
};
let token_idx = state.rand_mut().below(tokens_len as u64) as usize;
let bytes = input.0.bytes();
let meta = &input.1;
if let Some((base, len)) = choose_start(state.rand_mut(), bytes, meta) {
let substring = core::str::from_utf8(&bytes[base..][..len])?;
let (range, _) = choose_subcategory_range(state.rand_mut(), substring);
#[cfg(test)]
println!(
"{:?} => {:?}",
range,
core::str::from_utf8(&bytes[range.clone()])
);
let meta = state.metadata_map().get::<Tokens>().unwrap();
let token = &meta.tokens()[token_idx];
if input.0.len() - (range.end - range.start) + token.len() > state.max_size() {
return Ok(MutationResult::Skipped);
}
input.0.bytes_mut().splice(range, token.iter().copied());
input.1 = extract_metadata(input.0.bytes());
return Ok(MutationResult::Mutated);
}
Ok(MutationResult::Skipped)
}
}
#[cfg(test)]
mod test {
use libafl_bolts::rands::StdRand;
use super::*;
use crate::{corpus::NopCorpus, stages::extract_metadata, state::StdState};
// a not-so-useful test for this
#[test]
fn mutate_hex() {
let result: Result<(), Error> = (|| {
let hex = "0123456789abcdef0123456789abcdef";
let mut bytes = BytesInput::from(hex.as_bytes());
let mut mutator = StringCategoryRandMutator;
let mut state = StdState::new(
StdRand::with_seed(0),
NopCorpus::<BytesInput>::new(),
NopCorpus::new(),
&mut (),
&mut (),
)?;
for _ in 0..(1 << 12) {
let metadata = extract_metadata(bytes.bytes());
let mut input = (bytes, metadata);
let _ = mutator.mutate(&mut state, &mut input, 0);
println!("{:?}", core::str::from_utf8(input.0.bytes()).unwrap());
bytes = input.0;
}
Ok(())
})();
if let Err(e) = result {
panic!("failed with error: {e}");
}
}
#[test]
fn mutate_hex_subcat() {
let result: Result<(), Error> = (|| {
let hex = "0123456789abcdef0123456789abcdef";
let mut bytes = BytesInput::from(hex.as_bytes());
let mut mutator = StringSubcategoryRandMutator;
let mut state = StdState::new(
StdRand::with_seed(0),
NopCorpus::<BytesInput>::new(),
NopCorpus::new(),
&mut (),
&mut (),
)?;
for _ in 0..(1 << 12) {
let metadata = extract_metadata(bytes.bytes());
let mut input = (bytes, metadata);
let _ = mutator.mutate(&mut state, &mut input, 0);
println!("{:?}", core::str::from_utf8(input.0.bytes()).unwrap());
bytes = input.0;
}
Ok(())
})();
if let Err(e) = result {
panic!("failed with error: {e}");
}
}
}

View File

@ -49,6 +49,11 @@ pub use concolic::ConcolicTracingStage;
#[cfg(feature = "std")]
pub use concolic::SimpleConcolicMutationalStage;
#[cfg(feature = "unicode")]
pub mod string;
#[cfg(feature = "unicode")]
pub use string::*;
#[cfg(feature = "std")]
pub mod sync;
#[cfg(feature = "std")]
@ -56,6 +61,7 @@ pub use sync::*;
#[cfg(feature = "std")]
pub mod dump;
use core::{convert::From, marker::PhantomData};
#[cfg(feature = "std")]

128
libafl/src/stages/string.rs Normal file
View File

@ -0,0 +1,128 @@
//! Stages which analysis common to Unicode-style mutations
use alloc::{collections::VecDeque, rc::Rc, vec::Vec};
use core::marker::PhantomData;
use bitvec::{bitvec, vec::BitVec};
use libafl_bolts::{impl_serdeany, Error};
use serde::{Deserialize, Serialize};
use crate::{
corpus::{CorpusId, HasTestcase},
inputs::{BytesInput, HasBytesVec, UsesInput},
stages::Stage,
state::{HasCorpus, HasMetadata, UsesState},
};
/// Metadata which stores the list of pre-computed string-like ranges in the input
#[derive(Debug, Default, Serialize, Deserialize, Clone)]
pub struct StringIdentificationMetadata {
ranges: Rc<Vec<(usize, BitVec)>>,
}
impl_serdeany!(StringIdentificationMetadata);
impl StringIdentificationMetadata {
/// The list of pre-computed string-like ranges in the input
#[must_use]
pub fn ranges(&self) -> &Vec<(usize, BitVec)> {
self.ranges.as_ref()
}
}
pub(crate) fn extract_metadata(bytes: &[u8]) -> StringIdentificationMetadata {
let mut ranges = Vec::new();
if !bytes.is_empty() {
let mut queue = VecDeque::new();
let mut visited = bitvec![0; bytes.len()];
queue.push_back(0);
while let Some(i) = queue.pop_front() {
if i >= bytes.len() || visited[i] {
// if we've already visited a particular entry, then we already know its range(s)
continue;
}
visited.set(i, true); // we always visit the current entry
let s = core::str::from_utf8(&bytes[i..]).unwrap_or_else(|e| {
queue.push_back(i + e.valid_up_to() + 1); // push to the next region
core::str::from_utf8(&bytes[i..][..e.valid_up_to()]).unwrap()
});
if !s.is_empty() {
let mut entries = bitvec![0; s.bytes().len()];
for (c_idx, _) in s.char_indices() {
entries.set(c_idx, true);
visited.set(i + c_idx, true);
}
for unset in entries.iter_zeros() {
// each unset index potentially represents a new UTF-8 start point
queue.push_back(unset);
}
ranges.push((i, entries));
}
}
}
StringIdentificationMetadata {
ranges: Rc::new(ranges),
}
}
/// Stage which identifies potential strings in the provided input
#[derive(Debug)]
pub struct StringIdentificationStage<S> {
phantom: PhantomData<S>,
}
impl<S> Default for StringIdentificationStage<S> {
fn default() -> Self {
Self::new()
}
}
impl<S> StringIdentificationStage<S> {
/// Create a new instance of the string identification stage
#[must_use]
pub fn new() -> Self {
Self {
phantom: PhantomData,
}
}
}
impl<S> UsesState for StringIdentificationStage<S>
where
S: UsesInput,
{
type State = S;
}
impl<S, E, EM, Z> Stage<E, EM, Z> for StringIdentificationStage<S>
where
S: HasTestcase<Input = BytesInput> + HasCorpus,
E: UsesState<State = S>,
EM: UsesState<State = S>,
Z: UsesState<State = S>,
{
fn perform(
&mut self,
_fuzzer: &mut Z,
_executor: &mut E,
state: &mut Self::State,
_manager: &mut EM,
corpus_idx: CorpusId,
) -> Result<(), Error> {
let mut tc = state.testcase_mut(corpus_idx)?;
if tc.has_metadata::<StringIdentificationMetadata>() {
return Ok(()); // skip recompute
}
let input = tc.load_input(state.corpus())?;
let bytes = input.bytes();
let metadata = extract_metadata(bytes);
tc.add_metadata(metadata);
Ok(())
}
}

View File

@ -169,11 +169,6 @@ use log::{Metadata, Record};
/// out of `libafl_bolts` into `libafl::events::launcher`.
pub mod launcher {}
// Re-export derive(SerdeAny)
#[cfg(feature = "libafl_derive")]
#[allow(unused_imports)]
#[macro_use]
extern crate libafl_derive;
use core::{
array::TryFromSliceError,
fmt::{self, Display},
@ -190,6 +185,7 @@ pub use libafl_derive::SerdeAny;
use {
alloc::string::{FromUtf8Error, String},
core::cell::{BorrowError, BorrowMutError},
core::str::Utf8Error,
};
/// We need fixed names for many parts of this lib.
@ -505,6 +501,14 @@ impl From<FromUtf8Error> for Error {
}
}
#[cfg(feature = "alloc")]
impl From<Utf8Error> for Error {
#[allow(unused_variables)]
fn from(err: Utf8Error) -> Self {
Self::unknown(format!("Could not convert byte / utf-8: {err:?}"))
}
}
#[cfg(feature = "std")]
impl From<VarError> for Error {
#[allow(unused_variables)]

View File

@ -85,7 +85,7 @@ macro_rules! create_serde_registry_for_trait {
Error,
};
/// Visitor object used internally for the [`SerdeAny`] registry.
/// Visitor object used internally for the [`crate::serdeany::SerdeAny`] registry.
#[derive(Debug)]
pub struct BoxDynVisitor {}
#[allow(unused_qualifications)]
@ -319,7 +319,7 @@ macro_rules! create_serde_registry_for_trait {
}
}
/// A serializable [`HashMap`] wrapper for [`SerdeAny`] types, addressable by name.
/// A serializable [`HashMap`] wrapper for [`crate::serdeany::SerdeAny`] types, addressable by name.
#[allow(clippy::unsafe_derive_deserialize)]
#[allow(unused_qualifications)]
#[derive(Debug, Serialize, Deserialize)]

View File

@ -30,7 +30,7 @@ path = "src/lib.rs"
crate-type = ["staticlib", "rlib"]
[dependencies]
libafl = { path = "../../libafl", default-features = false, features = ["std", "derive", "llmp_compression", "rand_trait", "regex", "errors_backtrace", "serdeany_autoreg", "tui_monitor"] }
libafl = { path = "../../libafl", default-features = false, features = ["std", "derive", "llmp_compression", "rand_trait", "regex", "errors_backtrace", "serdeany_autoreg", "tui_monitor", "unicode"] }
libafl_bolts = { path = "../../libafl_bolts", default-features = false, features = ["std", "derive", "llmp_compression", "rand_trait", "serdeany_autoreg", "errors_backtrace"] }
libafl_targets = { path = "../../libafl_targets", features = ["sancov_8bit", "sancov_cmplog", "libfuzzer", "libfuzzer_oom", "libfuzzer_define_run_driver", "libfuzzer_interceptors", "sanitizers_flags", "whole_archive"] }

View File

@ -166,7 +166,8 @@ macro_rules! fuzz_with {
mutators::{
GrimoireExtensionMutator, GrimoireRecursiveReplacementMutator, GrimoireRandomDeleteMutator,
GrimoireStringReplacementMutator, havoc_crossover, havoc_mutations, havoc_mutations_no_crossover,
I2SRandReplace, StdScheduledMutator, Tokens, tokens_mutations
I2SRandReplace, StdScheduledMutator, StringCategoryRandMutator, StringSubcategoryRandMutator,
StringCategoryTokenReplaceMutator, StringSubcategoryTokenReplaceMutator, Tokens, tokens_mutations
},
observers::{stacktrace::BacktraceObserver, TimeObserver},
schedulers::{
@ -174,7 +175,7 @@ macro_rules! fuzz_with {
},
stages::{
CalibrationStage, GeneralizationStage, IfStage, StdMutationalStage,
StdPowerMutationalStage, TracingStage,
StdPowerMutationalStage, StringIdentificationStage, TracingStage,
},
state::{HasCorpus, StdState},
StdFuzzer,
@ -224,7 +225,7 @@ macro_rules! fuzz_with {
// Set up a generalization stage for grimoire
let generalization = GeneralizationStage::new(&edges_observer);
let generalization = IfStage::new(|_, _, _, _, _| Ok(grimoire.into()), (generalization, ()));
let generalization = IfStage::new(|_, _, _, _, _| Ok(grimoire.into()), tuple_list!(generalization));
let calibration = CalibrationStage::new(&map_feedback);
@ -296,6 +297,32 @@ macro_rules! fuzz_with {
});
state.metadata_map_mut().insert_boxed(grimoire_metadata);
// Set up a string category analysis stage for unicode mutations
let unicode_used = $options.unicode();
let string_mutator = StdScheduledMutator::new(
tuple_list!(
StringCategoryRandMutator,
StringSubcategoryRandMutator,
StringSubcategoryRandMutator,
StringSubcategoryRandMutator,
StringSubcategoryRandMutator,
)
);
let string_replace_mutator = StdScheduledMutator::new(
tuple_list!(
StringCategoryTokenReplaceMutator,
StringSubcategoryTokenReplaceMutator,
StringSubcategoryTokenReplaceMutator,
StringSubcategoryTokenReplaceMutator,
StringSubcategoryTokenReplaceMutator,
)
);
let string_power = StdMutationalStage::transforming(string_mutator);
let string_replace_power = StdMutationalStage::transforming(string_replace_mutator);
let string_analysis = StringIdentificationStage::new();
let string_analysis = IfStage::new(|_, _, _, _, _| Ok((unicode_used && mutator_status.std_mutational).into()), tuple_list!(string_analysis, string_power, string_replace_power));
// Attempt to use tokens from libfuzzer dicts
if !state.has_metadata::<Tokens>() {
let mut toks = if let Some(tokens) = $options.dict() {
@ -466,6 +493,7 @@ macro_rules! fuzz_with {
calibration,
generalization,
tracing,
string_analysis,
i2s,
cm_i2s,
std_power,

View File

@ -107,6 +107,7 @@ pub struct LibfuzzerOptions {
artifact_prefix: ArtifactPrefix,
timeout: Duration,
grimoire: Option<bool>,
unicode: bool,
forks: Option<usize>,
dict: Option<Tokens>,
dirs: Vec<PathBuf>,
@ -162,6 +163,10 @@ impl LibfuzzerOptions {
self.grimoire
}
pub fn unicode(&self) -> bool {
self.unicode
}
pub fn forks(&self) -> Option<usize> {
self.forks
}
@ -230,6 +235,7 @@ struct LibfuzzerOptionsBuilder<'a> {
artifact_prefix: Option<&'a str>,
timeout: Option<Duration>,
grimoire: Option<bool>,
unicode: Option<bool>,
forks: Option<usize>,
dict: Option<&'a str>,
dirs: Vec<&'a str>,
@ -292,6 +298,7 @@ impl<'a> LibfuzzerOptionsBuilder<'a> {
}
}
"grimoire" => self.grimoire = Some(parse_or_bail!(name, value, u64) > 0),
"unicode" => self.unicode = Some(parse_or_bail!(name, value, u64) > 0),
"artifact_prefix" => {
self.artifact_prefix = Some(value);
}
@ -349,6 +356,7 @@ impl<'a> LibfuzzerOptionsBuilder<'a> {
.unwrap_or_default(),
timeout: self.timeout.unwrap_or(Duration::from_secs(1200)),
grimoire: self.grimoire,
unicode: self.unicode.unwrap_or(true),
forks: self.forks,
dict: self.dict.map(|path| {
Tokens::from_file(path).expect("Couldn't load tokens from specified dictionary")