Move Nautilus to LibAFL, remove AGPL dependencies (#2265)

* Copy choose method for unbounded iterators

* Add choose method for unbounded iterators

* Copy&paste in nautilus grammartec

* cargo

* fmt

* Initial Nautilus in LibAFL

* missing link

* clippy

* clippy

* more clippy

* docs

* docs

* more docs

* remove nautilus default

* fix doctest

* fmt

* less vec

* test

* less flakey

* clippy

* clippy
This commit is contained in:
Dominik Maier 2024-06-03 14:18:52 +02:00 committed by GitHub
parent 0f9c82f893
commit 58c39680c9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
33 changed files with 2971 additions and 109 deletions

View File

@ -189,19 +189,6 @@ jobs:
# Fix me plz
# - name: Test Build libafl_libfuzzer with embed
# run: cargo +nightly test --features=embed-runtime --manifest-path libafl_libfuzzer/Cargo.toml
ubuntu-check-nightly:
runs-on: ubuntu-22.04
needs: ubuntu
steps:
- uses: actions/checkout@v3
- uses: ./.github/workflows/ubuntu-prepare
- uses: Swatinem/rust-cache@v2
with: { shared-key: "ubuntu" }
# ---- build and feature check ----
# cargo-hack's --feature-powerset would be nice here but libafl has a too many knobs
- name: Check nightly features
run: cargo +nightly check --features=agpl && cargo +nightly check --features=nautilus
ubuntu-check:
runs-on: ubuntu-22.04
@ -218,7 +205,6 @@ jobs:
# cargo-hack's --feature-powerset would be nice here but libafl has a too many knobs
- name: Check each feature
# Skipping `python` as it has to be built with the `maturin` tool
# `agpl`, `nautilus` require nightly
# `sancov_pcguard_edges` is tested seperatelyc
run: python3 ./scripts/parallellize_cargo_check.py ${{ matrix.instance_idx }}

View File

@ -149,11 +149,3 @@ Unless you explicitly state otherwise, any contribution intentionally submitted
for inclusion in this crate by you, as defined in the Apache-2.0 license, shall
be dual licensed as above, without any additional terms or conditions.
</sub>
<br>
<sub>
Dependencies under more restrictive licenses, such as GPL or AGPL, can be enabled
using the respective feature in each crate when it is present, such as the
'agpl' feature of the libafl crate.
</sub>

View File

@ -1 +0,0 @@
nightly

View File

@ -130,13 +130,8 @@ llmp_debug = ["std", "libafl_bolts/llmp_debug"]
## Reduces the initial map size for llmp
llmp_small_maps = ["libafl_bolts/llmp_small_maps"] # reduces initial map size for llmp
#! ## License-Changing Dependencies(!)
## Enables all features hiding dependencies licensed under `AGPL`
agpl = ["nautilus"]
## Enables the [`Nautilus`](https://wcventure.github.io/FuzzingPaper/Paper/NDSS19_Nautilus.pdf) Grammar Mutator (AGPL-licensed)
nautilus = ["grammartec", "std", "serde_json/std"]
## Grammar mutator. Requires nightly.
nautilus = ["std", "serde_json/std", "pyo3", "rand_trait", "regex-syntax"]
[build-dependencies]
rustversion = "1.0"
@ -193,16 +188,15 @@ arrayvec = { version = "0.7.4", optional = true, default-features = false } # us
const_format = "0.2.32" # used for providing helpful compiler output
const_panic = "0.2.8" # similarly, for formatting const panic output
pyo3 = { version = "0.18.3", optional = true } # For nautilus
regex-syntax = { version = "0.8.3", optional = true } # For nautilus
# optional-dev deps (change when target.'cfg(accessible(::std))'.test-dependencies will be stable)
serial_test = { version = "3", optional = true, default-features = false, features = ["logging"] }
# Document all features of this crate (for `cargo doc`)
document-features = { version = "0.2", optional = true }
# AGPL
# !!! this create requires nightly
grammartec = { version = "0.3.1", optional = true }
[target.'cfg(unix)'.dependencies]
libc = "0.2" # For (*nix) libc

View File

@ -6,12 +6,7 @@ fn nightly() {
}
#[rustversion::not(nightly)]
fn nightly() {
assert!(
cfg!(all(not(docrs), not(feature = "nautilus"))),
"The 'nautilus' feature of libafl requires a nightly compiler"
);
}
fn nightly() {}
fn main() {
println!("cargo:rustc-check-cfg=cfg(nightly)");

View File

@ -1,8 +1,13 @@
//! This module defines trait shared across different `LibAFL` modules
#![allow(unused, missing_docs)]
use alloc::boxed::Box;
use core::any::type_name;
#[cfg(feature = "nautilus")]
pub mod nautilus;
use libafl_bolts::{
serdeany::{NamedSerdeAnyMap, SerdeAny, SerdeAnyMap},
Error,

View File

@ -0,0 +1,72 @@
# Nautilus 2.0 LibAFL Mutator
Nautilus is a coverage guided, grammar-based mutator. You can use it to improve your test coverage and find more bugs. By specifying the grammar of semi-valid inputs, Nautilus is able to perform complex mutation and to uncover more interesting test cases. Many of the ideas behind the original fuzzer are documented in a paper published at NDSS 2019.
<p>
<a href="https://www.syssec.ruhr-uni-bochum.de/media/emma/veroeffentlichungen/2018/12/17/NDSS19-Nautilus.pdf"> <img align="right" width="200" src="https://github.com/RUB-SysSec/nautilus/raw/master/paper.png"> </a>
</p>
Version 2.0 has added many improvements to this early prototype.
Features from version 2.0 we support in LibAFL:
* Support for grammars specified in python
* Support for non-context free grammars using python scripts to generate inputs from the structure
* Support for specifying binary protocols/formats
* Support for specifying regex based terminals that aren't part of the directed mutations
* Better ability to avoid generating the same very short inputs over and over
* Helpful error output on invalid grammars
## How Does Nautilus Work?
You specify a grammar using rules such as `EXPR -> EXPR + EXPR` or `EXPR -> NUM` and `NUM -> 1`. From these rules, the fuzzer constructs a tree. This internal representation allows to apply much more complex mutations than raw bytes. This tree is then turned into a real input for the target application. In normal Context Free Grammars, this process is straightforward: all leaves are concatenated. The left tree in the example below would unparse to the input `a=1+2` and the right one to `a=1+1+1+2`. To increase the expressiveness of your grammars, using Nautilus you are able to provide python functions for the unparsing process to allow much more complex specifications.
<p align="center">
<img width="400" align="center" src="https://github.com/RUB-SysSec/nautilus/raw/master/tree.png">
</p>
## Examples
Here, we use python to generate a grammar for valid XML-like inputs. Notice the use of a script rule to ensure the opening
and closing tags match.
```python
#ctx.rule(NONTERM: string, RHS: string|bytes) adds a rule NONTERM->RHS. We can use {NONTERM} in the RHS to request a recursion.
ctx.rule("START","<document>{XML_CONTENT}</document>")
ctx.rule("XML_CONTENT","{XML}{XML_CONTENT}")
ctx.rule("XML_CONTENT","")
#ctx.script(NONTERM:string, RHS: [string]], func) adds a rule NONTERM->func(*RHS).
# In contrast to normal `rule`, RHS is an array of nonterminals.
# It's up to the function to combine the values returned for the NONTERMINALS with any fixed content used.
ctx.script("XML",["TAG","ATTR","XML_CONTENT"], lambda tag,attr,body: b"<%s %s>%s</%s>"%(tag,attr,body,tag) )
ctx.rule("ATTR","foo=bar")
ctx.rule("TAG","some_tag")
ctx.rule("TAG","other_tag")
#sometimes we don't want to explore the set of possible inputs in more detail. For example, if we fuzz a script
#interpreter, we don't want to spend time on fuzzing all different variable names. In such cases we can use Regex
#terminals. Regex terminals are only mutated during generation, but not during normal mutation stages, saving a lot of time.
#The fuzzer still explores different values for the regex, but it won't be able to learn interesting values incrementally.
#Use this when incremantal exploration would most likely waste time.
ctx.regex("TAG","[a-z]+")
```
To test your [grammars](https://github.com/nautilus-fuzz/nautilus/tree/mit-main/grammars) you can use the generator:
```sh
$ cargo run --bin generator -- -g grammars/grammar_py_exmaple.py -t 100
<document><some_tag foo=bar><other_tag foo=bar><other_tag foo=bar><some_tag foo=bar></some_tag></other_tag><some_tag foo=bar><other_tag foo=bar></other_tag></some_tag><other_tag foo=bar></other_tag><some_tag foo=bar></some_tag></other_tag><other_tag foo=bar></other_tag><some_tag foo=bar></some_tag></some_tag></document>
```
## Trophies
* <https://github.com/Microsoft/ChakraCore/issues/5503>
* <https://github.com/mruby/mruby/issues/3995> (**CVE-2018-10191**)
* <https://github.com/mruby/mruby/issues/4001> (**CVE-2018-10199**)
* <https://github.com/mruby/mruby/issues/4038> (**CVE-2018-12248**)
* <https://github.com/mruby/mruby/issues/4027> (**CVE-2018-11743**)
* <https://github.com/mruby/mruby/issues/4036> (**CVE-2018-12247**)
* <https://github.com/mruby/mruby/issues/4037> (**CVE-2018-12249**)
* <https://bugs.php.net/bug.php?id=76410>
* <https://bugs.php.net/bug.php?id=76244>

View File

@ -0,0 +1,154 @@
use alloc::{string::String, vec::Vec};
use std::{
fs::File,
io::Write,
sync::{atomic::AtomicBool, RwLock},
};
use hashbrown::{HashMap, HashSet};
use libafl_bolts::rands::Rand;
use serde::{Deserialize, Serialize};
use super::{
context::Context,
newtypes::{NTermId, NodeId, RuleId},
rule::RuleIdOrCustom,
tree::{Tree, TreeLike},
};
#[derive(Debug)]
pub struct ChunkStoreWrapper {
pub chunkstore: RwLock<ChunkStore>,
pub is_locked: AtomicBool,
}
impl ChunkStoreWrapper {
#[must_use]
pub fn new(work_dir: String) -> Self {
ChunkStoreWrapper {
chunkstore: RwLock::new(ChunkStore::new(work_dir)),
is_locked: AtomicBool::new(false),
}
}
}
#[derive(Debug, Serialize, Deserialize)]
pub struct ChunkStore {
nts_to_chunks: HashMap<NTermId, Vec<(usize, NodeId)>>,
seen_outputs: HashSet<Vec<u8>>,
trees: Vec<Tree>,
work_dir: String,
number_of_chunks: usize,
}
impl ChunkStore {
#[must_use]
pub fn new(work_dir: String) -> Self {
ChunkStore {
nts_to_chunks: HashMap::new(),
seen_outputs: HashSet::new(),
trees: vec![],
work_dir,
number_of_chunks: 0,
}
}
pub fn add_tree(&mut self, tree: Tree, ctx: &Context) {
let mut buffer = vec![];
let id = self.trees.len();
let mut contains_new_chunk = false;
for i in 0..tree.size() {
buffer.truncate(0);
if tree.sizes[i] > 30 {
continue;
}
let n = NodeId::from(i);
tree.unparse(n, ctx, &mut buffer);
if !self.seen_outputs.contains(&buffer) {
self.seen_outputs.insert(buffer.clone());
self.nts_to_chunks
.entry(tree.get_rule(n, ctx).nonterm())
.or_insert_with(Vec::new)
.push((id, n));
let mut file = File::create(format!(
"{}/outputs/chunks/chunk_{:09}",
self.work_dir, self.number_of_chunks
))
.expect("RAND_596689790");
self.number_of_chunks += 1;
file.write_all(&buffer).expect("RAND_606896756");
contains_new_chunk = true;
}
}
if contains_new_chunk {
self.trees.push(tree);
}
}
pub fn get_alternative_to<R: Rand>(
&self,
rand: &mut R,
r: RuleId,
ctx: &Context,
) -> Option<(&Tree, NodeId)> {
let chunks = self
.nts_to_chunks
.get(&ctx.get_nt(&RuleIdOrCustom::Rule(r)));
let relevant = chunks.map(|vec| {
vec.iter()
.filter(move |&&(tid, nid)| self.trees[tid].get_rule_id(nid) != r)
});
//The unwrap_or is just a quick and dirty fix to catch Errors from the sampler
let selected = relevant.and_then(|iter| rand.choose(iter));
selected.map(|&(tid, nid)| (&self.trees[tid], nid))
}
#[must_use]
pub fn trees(&self) -> usize {
self.trees.len()
}
}
#[cfg(test)]
mod tests {
use alloc::string::ToString;
use std::fs;
use libafl_bolts::rands::StdRand;
use crate::common::nautilus::grammartec::{
chunkstore::ChunkStore, context::Context, tree::TreeLike,
};
#[test]
fn chunk_store() {
let mut rand = StdRand::new();
let mut ctx = Context::new();
let r1 = ctx.add_rule("A", b"a {B:a}");
let r2 = ctx.add_rule("B", b"b {C:a}");
let _ = ctx.add_rule("C", b"c");
ctx.initialize(101);
let random_size = ctx.get_random_len_for_ruleid(&r1);
println!("random_size: {random_size}");
let tree = ctx.generate_tree_from_rule(&mut rand, r1, random_size);
fs::create_dir_all("/tmp/outputs/chunks").expect("40234068");
let mut cks = ChunkStore::new("/tmp/".to_string());
cks.add_tree(tree, &ctx);
// assert!(cks.seen_outputs.contains("a b c".as_bytes()));
// assert!(cks.seen_outputs.contains("b c".as_bytes()));
// assert!(cks.seen_outputs.contains("c".as_bytes()));
assert_eq!(cks.nts_to_chunks[&ctx.nt_id("A")].len(), 1);
let (tree_id, _) = cks.nts_to_chunks[&ctx.nt_id("A")][0];
assert_eq!(cks.trees[tree_id].unparse_to_vec(&ctx), "a b c".as_bytes());
let random_size = ctx.get_random_len_for_ruleid(&r2);
let tree = ctx.generate_tree_from_rule(&mut rand, r2, random_size);
cks.add_tree(tree, &ctx);
// assert_eq!(cks.seen_outputs.len(), 3);
// assert_eq!(cks.nts_to_chunks[&ctx.nt_id("B")].len(), 1);
let (tree_id, node_id) = cks.nts_to_chunks[&ctx.nt_id("B")][0];
assert_eq!(
cks.trees[tree_id].unparse_node_to_vec(node_id, &ctx),
"b c".as_bytes()
);
}
}

View File

@ -0,0 +1,452 @@
use alloc::{borrow::ToOwned, string::String, vec::Vec};
use hashbrown::HashMap;
use libafl_bolts::rands::{Rand, RomuDuoJrRand};
use pyo3::prelude::PyObject;
use super::{
newtypes::{NTermId, RuleId},
rule::{Rule, RuleIdOrCustom},
tree::Tree,
};
#[derive(Debug, Clone)]
pub struct Context {
rules: Vec<Rule>,
nts_to_rules: HashMap<NTermId, Vec<RuleId>>,
nt_ids_to_name: HashMap<NTermId, String>,
names_to_nt_id: HashMap<String, NTermId>,
rules_to_min_size: HashMap<RuleId, usize>,
nts_to_min_size: HashMap<NTermId, usize>,
rules_to_num_options: HashMap<RuleId, usize>,
nts_to_num_options: HashMap<NTermId, usize>,
max_len: usize,
}
impl Default for Context {
fn default() -> Self {
Self::new()
}
}
impl Context {
#[must_use]
pub fn new() -> Self {
Context {
rules: vec![],
nts_to_rules: HashMap::new(),
nt_ids_to_name: HashMap::new(),
names_to_nt_id: HashMap::new(),
rules_to_min_size: HashMap::new(),
nts_to_min_size: HashMap::new(),
rules_to_num_options: HashMap::new(),
nts_to_num_options: HashMap::new(),
max_len: 0,
}
}
pub fn initialize(&mut self, max_len: usize) {
self.calc_min_len();
self.calc_num_options();
self.max_len = max_len + 2;
}
#[must_use]
pub fn get_rule(&self, r: RuleId) -> &Rule {
let id: usize = r.into();
&self.rules[id]
}
#[must_use]
pub fn get_nt(&self, r: &RuleIdOrCustom) -> NTermId {
return self.get_rule(r.id()).nonterm();
}
#[must_use]
pub fn get_num_children(&self, r: &RuleIdOrCustom) -> usize {
return self.get_rule(r.id()).number_of_nonterms();
}
pub fn add_rule(&mut self, nt: &str, format: &[u8]) -> RuleId {
let rid = self.rules.len().into();
let rule = Rule::from_format(self, nt, format);
let ntid = self.aquire_nt_id(nt);
self.rules.push(rule);
self.nts_to_rules.entry(ntid).or_default().push(rid);
rid
}
pub fn add_script(&mut self, nt: &str, nts: &[String], script: PyObject) -> RuleId {
let rid = self.rules.len().into();
let rule = Rule::from_script(self, nt, nts, script);
let ntid = self.aquire_nt_id(nt);
self.rules.push(rule);
self.nts_to_rules.entry(ntid).or_default().push(rid);
rid
}
pub fn add_regex(&mut self, nt: &str, regex: &str) -> RuleId {
let rid = self.rules.len().into();
let rule = Rule::from_regex(self, nt, regex);
let ntid = self.aquire_nt_id(nt);
self.rules.push(rule);
self.nts_to_rules.entry(ntid).or_default().push(rid);
rid
}
pub fn add_term_rule(&mut self, nt: &str, term: &[u8]) -> RuleId {
let rid = self.rules.len().into();
let ntid = self.aquire_nt_id(nt);
self.rules.push(Rule::from_term(ntid, term));
self.nts_to_rules.entry(ntid).or_default().push(rid);
rid
}
pub fn aquire_nt_id(&mut self, nt: &str) -> NTermId {
let next_id = self.nt_ids_to_name.len().into();
let id = self.names_to_nt_id.entry(nt.into()).or_insert(next_id);
self.nt_ids_to_name.entry(*id).or_insert(nt.into());
*id
}
#[must_use]
pub fn nt_id(&self, nt: &str) -> NTermId {
return *self
.names_to_nt_id
.get(nt)
.expect(&("no such nonterminal: ".to_owned() + nt));
}
#[must_use]
pub fn nt_id_to_s(&self, nt: NTermId) -> String {
self.nt_ids_to_name[&nt].clone()
}
fn calc_min_len_for_rule(&self, r: RuleId) -> Option<usize> {
let mut res = 1;
for nt_id in self.get_rule(r).nonterms() {
if let Some(min) = self.nts_to_min_size.get(nt_id) {
//println!("Calculating length for Rule(calc_min_len_for_rule): {}, current: {}, adding: {}, because of rule: {}", self.nt_id_to_s(self.get_rule(r).nonterm().clone()), res, min, self.nt_id_to_s(nt_id.clone()));
res += *min;
} else {
return None;
}
}
//println!("Calculated length for Rule(calc_min_len_for_rule): {}, Length: {}", self.nt_id_to_s(self.get_rule(r).nonterm().clone()), res);
Some(res)
}
pub fn calc_min_len(&mut self) {
let mut something_changed = true;
while something_changed {
//TODO: find a better solution to prevent consumed_len >= ctx.get_min_len_for_nt(*nt)' Assertions
let mut unknown_rules = (0..self.rules.len()).map(RuleId::from).collect::<Vec<_>>();
something_changed = false;
while !unknown_rules.is_empty() {
let last_len = unknown_rules.len();
unknown_rules.retain(|rule| {
if let Some(min) = self.calc_min_len_for_rule(*rule) {
let nt = self.get_rule(*rule).nonterm();
//let name = self.nt_id_to_s(nt.clone()); //DEBUGGING
let e = self.nts_to_min_size.entry(nt).or_insert(min);
if *e > min {
*e = min;
something_changed = true;
}
//println!("Calculated length for Rule: {}, Length: {}, Min_length_of_nt: {}", name, min, *e);
self.rules_to_min_size.insert(*rule, min);
false
} else {
true
}
});
if last_len == unknown_rules.len() {
println!("Found unproductive rules: (missing base/non recursive case?)");
for r in unknown_rules {
println!("{}", self.get_rule(r).debug_show(self));
}
panic!("Broken Grammar");
}
}
}
self.calc_rule_order();
}
fn calc_num_options_for_rule(&self, r: RuleId) -> usize {
let mut res = 1_usize;
for nt_id in self.get_rule(r).nonterms() {
res = res.saturating_mul(*self.nts_to_num_options.get(nt_id).unwrap_or(&1));
}
res
}
pub fn calc_num_options(&mut self) {
for (nt, rules) in &self.nts_to_rules {
self.nts_to_num_options.entry(*nt).or_insert(rules.len());
}
let mut something_changed = true;
while something_changed {
something_changed = false;
for rid in (0..self.rules.len()).map(RuleId::from) {
let num = self.calc_num_options_for_rule(rid);
let nt = self.get_rule(rid).nonterm();
let e = self.nts_to_num_options.entry(nt).or_insert(num);
if *e < num {
*e = num;
something_changed = true;
}
//println!("Calculated length for Rule: {}, Length: {}, Min_length_of_nt: {}", name, min, *e);
self.rules_to_num_options.insert(rid, num);
}
}
}
fn calc_rule_order(&mut self) {
let rules_to_min_size = &self.rules_to_min_size;
for rules in self.nts_to_rules.values_mut() {
(*rules).sort_by(|r1, r2| rules_to_min_size[r1].cmp(&rules_to_min_size[r2]));
}
}
#[must_use]
pub fn check_if_nterm_has_multiple_possiblities(&self, nt: &NTermId) -> bool {
self.get_rules_for_nt(*nt).len() > 1
}
pub fn get_random_len<R: Rand>(rand: &mut R, len: usize, rhs_of_rule: &[NTermId]) -> usize {
Self::simple_get_random_len(rand, rhs_of_rule.len(), len)
}
//we need to get maximal sizes for all subtrees. To generate trees fairly, we want to split the
//available size fairly to all nodes. (e.g. all children have the same expected size,
//regardless of its index in the current rule. We use this version of the algorithm described
//here: https://stackoverflow.com/a/8068956 to get the first value.
fn simple_get_random_len<R: Rand>(
rand: &mut R,
number_of_children: usize,
total_remaining_len: usize,
) -> usize {
let mut res = total_remaining_len;
let iters = i32::try_from(number_of_children).unwrap() - 1;
for _ in 0..iters {
let proposal = rand.between(0, total_remaining_len);
if proposal < res {
res = proposal;
}
}
res
}
#[must_use]
pub fn get_min_len_for_nt(&self, nt: NTermId) -> usize {
self.nts_to_min_size[&nt]
}
pub fn get_random_rule_for_nt<R: Rand>(&self, rand: &mut R, nt: NTermId, len: usize) -> RuleId {
self.simple_get_random_rule_for_nt(rand, nt, len)
}
pub fn get_applicable_rules<'a, R: Rand>(
&'a self,
rand: &'a mut R,
max_len: usize,
nt: NTermId,
p_include_short_rules: usize,
) -> impl Iterator<Item = &RuleId> + 'a {
self.nts_to_rules[&nt]
.iter()
.take_while(move |r| self.rules_to_min_size[*r] <= max_len)
.filter(move |r| {
self.rules_to_num_options[*r] > 1 || rand.below(100) <= p_include_short_rules
})
}
pub fn choose_applicable_rule<R: Rand>(
&self,
rand: &mut R,
max_len: usize,
nt: NTermId,
p_include_short_rules: usize,
) -> Option<RuleId> {
// Create a tmp rand to get around borrowing. We hardcode the fatest rand here, because why not.
let mut rand_cpy = RomuDuoJrRand::with_seed(rand.next());
let rules = self.get_applicable_rules(rand, max_len, nt, p_include_short_rules);
rand_cpy.choose(rules).copied()
}
fn simple_get_random_rule_for_nt<R: Rand>(
&self,
rand: &mut R,
nt: NTermId,
max_len: usize,
) -> RuleId {
let p_include_short_rules = 100;
/*if self.nts_to_num_options[&nt] < 10 {
100 * 0
} else if max_len > 100 {
2 * 0
} else if max_len > 20 {
50 * 0
} else {
100 * 0;
}; */
if let Some(opt) = self.choose_applicable_rule(rand, max_len, nt, p_include_short_rules) {
opt
} else if let Some(opt) = self.choose_applicable_rule(rand, max_len, nt, 100) {
opt
} else {
panic!(
"there is no way to derive {} within {} steps",
self.nt_ids_to_name[&nt], max_len
)
}
}
#[must_use]
pub fn get_random_len_for_ruleid(&self, _rule_id: &RuleId) -> usize {
self.max_len //TODO?????
}
#[must_use]
pub fn get_random_len_for_nt(&self, _nt: &NTermId) -> usize {
self.max_len
}
#[must_use]
pub fn get_rules_for_nt(&self, nt: NTermId) -> &Vec<RuleId> {
&self.nts_to_rules[&nt]
}
pub fn generate_tree_from_nt<R: Rand>(
&self,
rand: &mut R,
nt: NTermId,
max_len: usize,
) -> Tree {
let random_rule = self.get_random_rule_for_nt(rand, nt, max_len);
self.generate_tree_from_rule(rand, random_rule, max_len - 1)
}
pub fn generate_tree_from_rule<R: Rand>(&self, rand: &mut R, r: RuleId, len: usize) -> Tree {
let mut tree = Tree::from_rule_vec(vec![], self);
tree.generate_from_rule(rand, r, len, self);
tree
}
}
#[cfg(test)]
mod tests {
use alloc::{string::String, vec::Vec};
use libafl_bolts::rands::StdRand;
use crate::common::nautilus::grammartec::{
context::Context,
rule::{Rule, RuleChild, RuleIdOrCustom},
tree::{Tree, TreeLike},
};
#[test]
fn simple_context() {
let mut ctx = Context::new();
let r = Rule::from_format(&mut ctx, "F", b"foo{A:a}\\{bar\\}{B:b}asd{C}");
let soll = vec![
RuleChild::from_lit(b"foo"),
RuleChild::from_nt("{A:a}", &mut ctx),
RuleChild::from_lit(b"{bar}"),
RuleChild::from_nt("{B:b}", &mut ctx),
RuleChild::from_lit(b"asd"),
RuleChild::from_nt("{C}", &mut ctx),
];
if let Rule::Plain(rl) = &r {
assert_eq!(&rl.children, &soll);
} else {
unreachable!();
}
assert_eq!(r.nonterms()[0], ctx.nt_id("A"));
assert_eq!(r.nonterms()[1], ctx.nt_id("B"));
assert_eq!(r.nonterms()[2], ctx.nt_id("C"));
}
#[test]
fn test_context() {
let mut rand = StdRand::new();
let mut ctx = Context::new();
let r0 = ctx.add_rule("C", b"c{B}c");
let r1 = ctx.add_rule("B", b"b{A}b");
let _ = ctx.add_rule("A", b"a {A}");
let _ = ctx.add_rule("A", b"a {A}");
let _ = ctx.add_rule("A", b"a {A}");
let _ = ctx.add_rule("A", b"a {A}");
let _ = ctx.add_rule("A", b"a {A}");
let r3 = ctx.add_rule("A", b"a");
ctx.initialize(5);
assert_eq!(ctx.get_min_len_for_nt(ctx.nt_id("A")), 1);
assert_eq!(ctx.get_min_len_for_nt(ctx.nt_id("B")), 2);
assert_eq!(ctx.get_min_len_for_nt(ctx.nt_id("C")), 3);
let mut tree = Tree::from_rule_vec(vec![], &ctx);
tree.generate_from_nt(&mut rand, ctx.nt_id("C"), 3, &ctx);
assert_eq!(
tree.rules,
vec![
RuleIdOrCustom::Rule(r0),
RuleIdOrCustom::Rule(r1),
RuleIdOrCustom::Rule(r3),
]
);
let mut data: Vec<u8> = vec![];
tree.unparse_to(&ctx, &mut data);
assert_eq!(String::from_utf8(data).expect("RAND_3377050372"), "cbabc");
}
#[test]
fn test_generate_len() {
let mut rand = StdRand::new();
let mut ctx = Context::new();
let r0 = ctx.add_rule("E", b"({E}+{E})");
let r1 = ctx.add_rule("E", b"({E}*{E})");
let r2 = ctx.add_rule("E", b"({E}-{E})");
let r3 = ctx.add_rule("E", b"({E}/{E})");
let r4 = ctx.add_rule("E", b"1");
ctx.initialize(11);
assert_eq!(ctx.get_min_len_for_nt(ctx.nt_id("E")), 1);
for _ in 0..100 {
let mut tree = Tree::from_rule_vec(vec![], &ctx);
tree.generate_from_nt(&mut rand, ctx.nt_id("E"), 9, &ctx);
assert!(tree.rules.len() < 10);
assert!(!tree.rules.is_empty());
}
let rules = [r0, r1, r4, r4, r4]
.iter()
.map(|x| RuleIdOrCustom::Rule(*x))
.collect::<Vec<_>>();
let tree = Tree::from_rule_vec(rules, &ctx);
let mut data: Vec<u8> = vec![];
tree.unparse_to(&ctx, &mut data);
assert_eq!(
String::from_utf8(data).expect("RAND_3492562908"),
"((1*1)+1)"
);
let rules = [r0, r1, r2, r3, r4, r4, r4, r4, r4]
.iter()
.map(|x| RuleIdOrCustom::Rule(*x))
.collect::<Vec<_>>();
let tree = Tree::from_rule_vec(rules, &ctx);
let mut data: Vec<u8> = vec![];
tree.unparse_to(&ctx, &mut data);
assert_eq!(
String::from_utf8(data).expect("RAND_4245419893"),
"((((1/1)-1)*1)+1)"
);
}
}

View File

@ -0,0 +1,7 @@
pub mod chunkstore;
pub mod context;
pub mod mutator;
pub mod newtypes;
pub mod recursion_info;
pub mod rule;
pub mod tree;

View File

@ -0,0 +1,601 @@
use alloc::vec::Vec;
use std::{collections::HashSet, mem};
use libafl_bolts::{rands::Rand, Error};
use crate::common::nautilus::grammartec::{
chunkstore::ChunkStore,
context::Context,
newtypes::NodeId,
recursion_info::RecursionInfo,
rule::RuleIdOrCustom,
tree::{Tree, TreeLike, TreeMutation},
};
#[derive(Debug)]
pub struct Mutator {
scratchpad: Tree,
}
impl Mutator {
#[must_use]
pub fn new(ctx: &Context) -> Self {
Mutator {
scratchpad: Tree::from_rule_vec(vec![], ctx),
}
}
//Return value indicates if minimization is complete: true: complete, false: not complete
#[allow(clippy::too_many_arguments)]
pub fn minimize_tree<F, R: Rand>(
&mut self,
rand: &mut R,
tree: &mut Tree,
bits: &HashSet<usize>,
ctx: &Context,
start_index: usize,
end_index: usize,
tester: &mut F,
) -> Result<bool, Error>
where
F: FnMut(&TreeMutation, &HashSet<usize>, &Context) -> Result<bool, Error>,
{
let mut i = start_index;
while i < tree.size() {
let n = NodeId::from(i);
let nt = tree.get_rule(n, ctx).nonterm();
if tree.subtree_size(n) > ctx.get_min_len_for_nt(nt) {
self.scratchpad
.generate_from_nt(rand, nt, ctx.get_min_len_for_nt(nt), ctx);
if let Some(t) = Mutator::test_and_convert(
tree,
n,
&self.scratchpad,
NodeId::from(0),
ctx,
bits,
tester,
)? {
let _ = mem::replace(tree, t);
}
}
i += 1;
if i == end_index {
return Ok(false);
}
}
Ok(true)
}
//Return value indicates if minimization is complete: true: complete, false: not complete
pub fn minimize_rec<F>(
&mut self,
tree: &mut Tree,
bits: &HashSet<usize>,
ctx: &Context,
start_index: usize,
end_index: usize,
tester: &mut F,
) -> Result<bool, Error>
where
F: FnMut(&TreeMutation, &HashSet<usize>, &Context) -> Result<bool, Error>,
{
let mut i = start_index;
while i < tree.size() {
let n = NodeId::from(i);
if let Some(parent) = Mutator::find_parent_with_nt(tree, n, ctx) {
if let Some(t) =
Mutator::test_and_convert(tree, parent, tree, n, ctx, bits, tester)?
{
let _ = mem::replace(tree, t);
i = parent.into();
}
}
i += 1;
if i == end_index {
return Ok(false);
}
}
Ok(true)
}
pub fn mut_rules<F, R: Rand>(
&mut self,
rand: &mut R,
tree: &Tree,
ctx: &Context,
start_index: usize,
end_index: usize,
tester: &mut F,
) -> Result<bool, Error>
where
F: FnMut(&TreeMutation, &Context) -> Result<(), Error>,
{
for i in start_index..end_index {
if i == tree.size() {
return Ok(true);
}
let n = NodeId::from(i);
let old_rule_id = tree.get_rule_id(n);
let rule_ids = ctx
.get_rules_for_nt(ctx.get_nt(&RuleIdOrCustom::Rule(old_rule_id)))
.clone(); //TODO: Maybe find a better solution
for new_rule_id in rule_ids {
if old_rule_id != new_rule_id {
let random_size = ctx.get_random_len_for_ruleid(&new_rule_id);
self.scratchpad
.generate_from_rule(rand, new_rule_id, random_size, ctx);
let repl = tree.mutate_replace_from_tree(n, &self.scratchpad, NodeId::from(0));
tester(&repl, ctx)?;
}
}
}
Ok(false)
}
pub fn mut_splice<F, R: Rand>(
&mut self,
rand: &mut R,
tree: &Tree,
ctx: &Context,
cks: &ChunkStore,
tester: &mut F,
) -> Result<(), Error>
where
F: FnMut(&TreeMutation, &Context) -> Result<(), Error>,
{
let n = NodeId::from(rand.below(tree.size()));
let old_rule_id = tree.get_rule_id(n);
if let Some((repl_tree, repl_node)) = cks.get_alternative_to(rand, old_rule_id, ctx) {
let repl = tree.mutate_replace_from_tree(n, repl_tree, repl_node);
tester(&repl, ctx)?;
}
Ok(())
}
//pub fn rec_splice<F>(
// &mut self,
// tree: &Tree,
// ctx: &Context,
// cks: &ChunkStore,
// tester: &mut F
// )-> Result<(), Error>
//where
// F: FnMut(&TreeMutation, &Context) -> Result<(), Error>,
//{
// let n = NodeId::from(rand::thread_rng().gen_range(0, tree.size()));
// if let Some(old_rule_id) = tree.get_rule_id(n){
// let nterm_id = ctx.get_rule(old_rule).nonterm();
// if let Some((repl_tree, repl_node)) = cks.get_alternative_to(old_rule_id, ctx) {
// let repl = tree.mutate_replace_from_tree(n, repl_tree, repl_node);
// tester(&repl, ctx)?;
// }
// }
//
// return Ok(());
//}
pub fn mut_random<F, R: Rand>(
&mut self,
rand: &mut R,
tree: &Tree,
ctx: &Context,
tester: &mut F,
) -> Result<(), Error>
where
F: FnMut(&TreeMutation, &Context) -> Result<(), Error>,
{
let n = NodeId::from(rand.below(tree.size()));
let nterm = tree.get_rule(n, ctx).nonterm();
if ctx.check_if_nterm_has_multiple_possiblities(&nterm) {
let len = ctx.get_random_len_for_nt(&nterm);
self.scratchpad.generate_from_nt(rand, nterm, len, ctx);
let repl = tree.mutate_replace_from_tree(n, &self.scratchpad, NodeId::from(0));
tester(&repl, ctx)?;
}
Ok(())
}
pub fn mut_random_recursion<F, R: Rand>(
&mut self,
rand: &mut R,
tree: &Tree,
recursions: &mut Vec<RecursionInfo>,
ctx: &Context,
tester: &mut F,
) -> Result<(), Error>
where
F: FnMut(&TreeMutation, &Context) -> Result<(), Error>,
{
let max_len_of_recursions = 2 << rand.between(1, 10);
if let Some(recursion_info) = rand.choose(recursions) {
let recursion = recursion_info.get_random_recursion_pair(rand);
let recursion_len_pre = recursion.1.to_i() - recursion.0.to_i();
let recursion_len_total =
tree.subtree_size(recursion.0) - tree.subtree_size(recursion.1);
let recursion_len_post = recursion_len_total - recursion_len_pre;
let num_of_recursions = max_len_of_recursions / recursion_len_total;
//Insert pre recursion
let postfix = tree.subtree_size(recursion.1);
let mut rules_new = Vec::with_capacity(
recursion_len_pre * num_of_recursions
+ postfix
+ recursion_len_post * num_of_recursions,
);
let mut sizes_new = Vec::with_capacity(
recursion_len_pre * num_of_recursions
+ postfix
+ recursion_len_post * num_of_recursions,
);
for i in 0..num_of_recursions * recursion_len_pre {
rules_new.push(
tree.get_rule_or_custom(recursion.0 + (i % recursion_len_pre))
.clone(),
);
sizes_new.push(tree.sizes[recursion.0.to_i() + (i % recursion_len_pre)]);
}
//Append ending of original tree
for i in 0..postfix {
rules_new.push(tree.get_rule_or_custom(recursion.1 + i).clone());
sizes_new.push(tree.sizes[recursion.1.to_i() + i]);
}
//Adjust the sizes
for (i, item) in sizes_new
.iter_mut()
.enumerate()
.take(num_of_recursions * recursion_len_pre)
{
if *item >= recursion_len_pre {
*item += (num_of_recursions - i / recursion_len_pre - 1) * recursion_len_total;
}
}
//Append post recursion
for i in 0..num_of_recursions * recursion_len_post {
rules_new.push(
tree.get_rule_or_custom(recursion.1 + postfix + (i % recursion_len_post))
.clone(),
);
sizes_new.push(tree.sizes[recursion.1.to_i() + postfix + (i % recursion_len_post)]);
}
let recursion_tree = Tree {
rules: rules_new,
sizes: sizes_new,
paren: Vec::new(), /*paren_new*/
};
let repl = tree.mutate_replace_from_tree(recursion.1, &recursion_tree, NodeId::from(0));
tester(&repl, ctx)?;
}
Ok(())
}
fn find_parent_with_nt(tree: &Tree, mut node: NodeId, ctx: &Context) -> Option<NodeId> {
let nt = tree.get_rule(node, ctx).nonterm();
while let Some(parent) = tree.get_parent(node) {
if tree.get_rule(parent, ctx).nonterm() == nt {
return Some(parent);
}
node = parent;
}
None
}
fn test_and_convert<F>(
tree_a: &Tree,
n_a: NodeId,
tree_b: &Tree,
n_b: NodeId,
ctx: &Context,
fresh_bits: &HashSet<usize>,
tester: &mut F,
) -> Result<Option<Tree>, Error>
where
F: FnMut(&TreeMutation, &HashSet<usize>, &Context) -> Result<bool, Error>,
{
let repl = tree_a.mutate_replace_from_tree(n_a, tree_b, n_b);
if tester(&repl, fresh_bits, ctx)? {
return Ok(Some(repl.to_tree(ctx)));
}
Ok(None)
}
}
#[cfg(test)]
mod tests {
use alloc::{
string::{String, ToString},
vec::Vec,
};
use std::{collections::HashSet, str};
use libafl_bolts::rands::StdRand;
use crate::{
common::nautilus::grammartec::{
chunkstore::ChunkStore,
context::Context,
newtypes::RuleId,
rule::RuleIdOrCustom,
tree::{Tree, TreeLike, TreeMutation},
},
nautilus::grammartec::mutator::Mutator,
};
#[test]
fn check_mut_random_recursion() {
let mut rand = StdRand::new();
let r1 = RuleId::from(0);
let r2 = RuleId::from(1);
let r3 = RuleId::from(2);
let r4 = RuleId::from(3);
let r5 = RuleId::from(4);
let mut ctx = Context::new();
ctx.add_rule("N1", b"r1{N2}{N3}{N4}");
ctx.add_rule("N2", b"r2");
ctx.add_rule("N3", b"r3{N1}");
ctx.add_rule("N1", b"r4");
ctx.add_rule("N4", b"r5");
let rules = [r1, r2, r3, r4, r5]
.iter()
.map(|x| RuleIdOrCustom::Rule(*x))
.collect::<Vec<_>>();
let mut tree = Tree::from_rule_vec(rules, &ctx);
println!("tree: {tree:?}");
let mut mutator = Mutator::new(&ctx);
let mut tester = |tree_mut: &TreeMutation, _ctx: &Context| {
println!("prefix: {:?}", tree_mut.prefix);
println!("repl: {:?}", tree_mut.repl);
println!("postfix: {:?}", tree_mut.postfix);
println!("mutated tree: ");
assert!(
tree_mut.prefix
== &[r1, r2, r3]
.iter()
.map(|x| RuleIdOrCustom::Rule(*x))
.collect::<Vec<_>>()[..]
);
assert!(
tree_mut.postfix
== &[r5]
.iter()
.map(|x| RuleIdOrCustom::Rule(*x))
.collect::<Vec<_>>()[..]
);
assert!(
tree_mut.repl[0..3]
== [r1, r2, r3]
.iter()
.map(|x| RuleIdOrCustom::Rule(*x))
.collect::<Vec<_>>()[..]
);
assert_eq!(tree_mut.repl.last(), Some(&RuleIdOrCustom::Rule(r5)));
Ok(())
};
let mut recursions = tree.calc_recursions(&ctx).expect("RAND_3407743327");
println!("Recursions:\n{recursions:?}");
mutator
.mut_random_recursion(&mut rand, &tree, &mut recursions, &ctx, &mut tester)
.expect("RAND_4227583404");
}
#[test]
fn check_minimize_tree() {
let mut rand = StdRand::new();
let mut ctx = Context::new();
let r1 = ctx.add_rule("S", b"s1 {A}");
let _ = ctx.add_rule("S", b"s2");
let _ = ctx.add_rule("S", b"a1");
let r2 = ctx.add_rule("A", b"a1 {B}");
let _ = ctx.add_rule("A", b"a1");
let _ = ctx.add_rule("A", b"a2");
let r3 = ctx.add_rule("B", b"b1");
let _ = ctx.add_rule("B", b"b2");
let _ = ctx.add_rule("B", b"b3{B}");
ctx.initialize(10);
for _ in 0..100 {
let mut tree = Tree::from_rule_vec(
[r1, r2, r3]
.iter()
.map(|x| RuleIdOrCustom::Rule(*x))
.collect::<Vec<_>>(),
&ctx,
);
let mut mutator = Mutator::new(&ctx);
{
let mut tester =
|tree_mut: &TreeMutation, _bits: &HashSet<usize>, ctx: &Context| {
if String::from_utf8(tree_mut.unparse_to_vec(ctx))
.expect("RAND_2486760939")
.contains("a1")
{
Ok(true)
} else {
Ok(false)
}
};
let tree_size = tree.size();
mutator
.minimize_tree(
&mut rand,
&mut tree,
&HashSet::new(),
&ctx,
0,
tree_size,
&mut tester,
)
.expect("RAND_4046907857");
}
let unparse = String::from_utf8(tree.unparse_to_vec(&ctx)).expect("RAND_380778776");
println!("unparse: {unparse}");
assert!(unparse.contains("a1"));
assert!(!unparse.contains("a2"));
assert!(!unparse.contains("b2"));
assert!(!unparse.contains("b3"));
}
}
#[test]
fn check_minimize_rec() {
let mut ctx = Context::new();
let r1 = ctx.add_rule("S", b"s1 {A}");
let _ = ctx.add_rule("S", b"s2");
let r2 = ctx.add_rule("A", b"a1 {B}");
let _ = ctx.add_rule("A", b"a1");
let _ = ctx.add_rule("A", b"a2");
let r3 = ctx.add_rule("B", b"b1");
let _ = ctx.add_rule("B", b"b2");
let _ = ctx.add_rule("B", b"b3{B}");
ctx.initialize(10);
for _ in 0..100 {
let mut tree = Tree::from_rule_vec(
[r1, r2, r3]
.iter()
.map(|x| RuleIdOrCustom::Rule(*x))
.collect::<Vec<_>>(),
&ctx,
);
let mut mutator = Mutator::new(&ctx);
{
let mut tester =
|tree_mut: &TreeMutation, _bits: &HashSet<usize>, ctx: &Context| {
if String::from_utf8(tree_mut.unparse_to_vec(ctx))
.expect("RAND_1958219388")
.contains("a1")
{
Ok(true)
} else {
Ok(false)
}
};
let tree_size = tree.size();
mutator
.minimize_rec(&mut tree, &HashSet::new(), &ctx, 0, tree_size, &mut tester)
.expect("RAND_1814454842");
}
let unparse = String::from_utf8(tree.unparse_to_vec(&ctx)).expect("RAND_3329325316");
println!("unparse: {unparse}");
assert!(unparse.contains("a1"));
assert!(!unparse.contains("a2"));
assert!(!unparse.contains("b2"));
assert!(!unparse.contains("b3"));
}
}
#[test]
fn deterministic_rule() {
let mut rand = StdRand::new();
let mut ctx = Context::new();
let r1 = ctx.add_rule("A", b"a {A:a}");
let _ = ctx.add_rule("A", b"b {A:a}");
let _ = ctx.add_rule("A", b"a");
ctx.initialize(101);
for _ in 0..100 {
let tree = ctx.generate_tree_from_rule(&mut rand, r1, 100);
let mut mutator = Mutator::new(&ctx);
let unparse = tree.unparse_to_vec(&ctx);
let mut count = 0;
{
let mut tester = |tree_mut: &TreeMutation, ctx: &Context| {
assert_ne!(tree_mut.unparse_to_vec(ctx), unparse);
count += 1;
Ok(())
};
mutator
.mut_rules(&mut rand, &tree, &ctx, 0, tree.size(), &mut tester)
.expect("RAND_3708258673");
}
assert!(count > 2);
}
}
#[test]
fn deterministic_splice() {
let mut rand = StdRand::new();
let mut ctx = Context::new();
let mut rand = StdRand::new();
let mut cks = ChunkStore::new("/tmp/".to_string());
let r1 = ctx.add_rule("A", b"a {A:a}");
let _ = ctx.add_rule("A", b"b {A:a}");
let r3 = ctx.add_rule("A", b"c {A:a}");
let _ = ctx.add_rule("A", b"a");
ctx.initialize(101);
let tree = ctx.generate_tree_from_rule(&mut rand, r3, 100);
cks.add_tree(tree, &ctx);
for _ in 0..100 {
let tree = ctx.generate_tree_from_rule(&mut rand, r1, 100);
let mut mutator = Mutator::new(&ctx);
let unparse = tree.unparse_to_vec(&ctx);
let mut tester = |tree_mut: &TreeMutation, ctx: &Context| {
assert_ne!(tree_mut.unparse_to_vec(ctx), unparse);
Ok(())
};
mutator
.mut_splice(&mut rand, &tree, &ctx, &cks, &mut tester)
.expect("RAND_236145345");
}
}
#[test]
fn check_det_rules_values() {
let mut rand = StdRand::new();
let mut ctx = Context::new();
let r1 = ctx.add_rule("S", b"s1 {A}");
let _ = ctx.add_rule("S", b"s2 {A}");
let r2 = ctx.add_rule("A", b"a1 {B}");
let _ = ctx.add_rule("A", b"a2 {B}");
let r3 = ctx.add_rule("B", b"b1");
let _ = ctx.add_rule("B", b"b2");
ctx.initialize(10);
for _ in 0..100 {
let tree = Tree::from_rule_vec(
[r1, r2, r3]
.iter()
.map(|x| RuleIdOrCustom::Rule(*x))
.collect::<Vec<_>>(),
&ctx,
);
let mut mutator = Mutator::new(&ctx);
let mut unparses = HashSet::new();
{
let mut tester = |tree_mut: &TreeMutation, ctx: &Context| {
unparses.insert(tree_mut.unparse_to_vec(ctx));
Ok(())
};
mutator
.mut_rules(&mut rand, &tree, &ctx, 0, tree.size(), &mut tester)
.expect("RAND_3954705736");
}
println!(
"{:?}",
unparses
.iter()
.map(|v| str::from_utf8(v).expect("RAND_3927087882"))
.collect::<Vec<_>>()
);
assert!(unparses.contains("s1 a1 b2".as_bytes()));
assert!(
unparses.contains("s1 a2 b1".as_bytes())
|| unparses.contains("s1 a2 b2".as_bytes())
);
assert!(
unparses.contains("s2 a1 b1".as_bytes())
|| unparses.contains("s2 a2 b2".as_bytes())
|| unparses.contains("s2 a1 b2".as_bytes())
|| unparses.contains("s2 a2 b1".as_bytes())
);
}
}
}

View File

@ -0,0 +1,151 @@
use std::ops::Add;
use serde::{Deserialize, Serialize};
#[derive(PartialEq, Eq, Clone, Copy, Debug, Hash, Serialize, Deserialize)]
pub struct RuleId(usize);
#[derive(PartialEq, PartialOrd, Eq, Clone, Copy, Debug, Hash, Serialize, Deserialize)]
pub struct NodeId(usize);
#[derive(PartialEq, Eq, Clone, Copy, Debug, Hash, Serialize, Deserialize)]
pub struct NTermId(usize);
impl RuleId {
#[must_use]
pub fn to_i(&self) -> usize {
self.0
}
}
impl From<usize> for RuleId {
fn from(i: usize) -> RuleId {
RuleId(i)
}
}
impl From<RuleId> for usize {
fn from(rule: RuleId) -> usize {
rule.0
}
}
impl Add<usize> for RuleId {
type Output = RuleId;
fn add(self, rhs: usize) -> RuleId {
RuleId(self.0 + rhs)
}
}
impl NodeId {
#[must_use]
pub fn to_i(&self) -> usize {
self.0
}
}
impl From<usize> for NodeId {
fn from(i: usize) -> Self {
NodeId(i)
}
}
impl From<NodeId> for usize {
fn from(val: NodeId) -> Self {
val.0
}
}
impl Add<usize> for NodeId {
type Output = NodeId;
fn add(self, rhs: usize) -> NodeId {
NodeId(self.0 + rhs)
}
}
impl NodeId {
fn steps_between(start: Self, end: Self) -> Option<usize> {
let start_i = start.to_i();
let end_i = end.to_i();
if start > end {
return None;
}
Some(end_i - start_i)
}
fn add_one(self) -> Self {
self.add(1)
}
fn sub_one(self) -> Self {
NodeId(self.0 - 1)
}
fn add_usize(self, n: usize) -> Option<Self> {
self.0.checked_add(n).map(NodeId::from)
}
}
impl NTermId {
#[must_use]
pub fn to_i(self) -> usize {
self.0
}
}
impl From<usize> for NTermId {
fn from(i: usize) -> Self {
NTermId(i)
}
}
impl From<NTermId> for usize {
fn from(val: NTermId) -> Self {
val.0
}
}
impl Add<usize> for NTermId {
type Output = NTermId;
fn add(self, rhs: usize) -> NTermId {
NTermId(self.0 + rhs)
}
}
#[cfg(test)]
mod tests {
use super::{NTermId, NodeId, RuleId};
#[test]
fn rule_id() {
let r1: RuleId = 1337.into();
let r2 = RuleId::from(1338);
let i1: usize = r1.into();
assert_eq!(i1, 1337);
let i2: usize = 1338;
assert_eq!(i2, r2.to_i());
let r3 = r2 + 3;
assert_eq!(r3, 1341.into());
}
#[test]
fn node_id() {
let r1: NodeId = 1337.into();
let r2 = NodeId::from(1338);
let i1: usize = r1.into();
assert_eq!(i1, 1337);
let i2: usize = 1338;
assert_eq!(i2, r2.to_i());
let r3 = r2 + 3;
assert_eq!(r3, 1341.into());
}
#[test]
fn nterm_id() {
let r1: NTermId = 1337.into();
let r2 = NTermId::from(1338);
let i1: usize = r1.into();
assert_eq!(i1, 1337);
let i2: usize = 1338;
assert_eq!(i2, r2.to_i());
let r3 = r2 + 3;
assert_eq!(r3, 1341.into());
}
}

View File

@ -0,0 +1,111 @@
use alloc::vec::Vec;
use std::fmt;
use hashbrown::HashMap;
use libafl_bolts::rands::{loaded_dice::LoadedDiceSampler, Rand};
use crate::common::nautilus::grammartec::{
context::Context,
newtypes::{NTermId, NodeId},
tree::Tree,
};
pub struct RecursionInfo {
recursive_parents: HashMap<NodeId, NodeId>,
sampler: LoadedDiceSampler,
depth_by_offset: Vec<usize>,
node_by_offset: Vec<NodeId>,
}
impl fmt::Debug for RecursionInfo {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("RecursionInfo")
.field("recursive_parents", &self.recursive_parents)
.field("depth_by_offset", &self.depth_by_offset)
.field("node_by_offset", &self.node_by_offset)
.finish_non_exhaustive()
}
}
impl RecursionInfo {
#[must_use]
pub fn new(t: &Tree, n: NTermId, ctx: &Context) -> Option<Self> {
let (recursive_parents, node_by_offset, depth_by_offset) =
RecursionInfo::find_parents(t, n, ctx)?;
let sampler = RecursionInfo::build_sampler(&depth_by_offset);
Some(Self {
recursive_parents,
sampler,
depth_by_offset,
node_by_offset,
})
}
// Constructs a tree where each node points to the first ancestor with the same nonterminal (e.g. each node points the next node above it, were the pair forms a recursive occurrence of a nonterminal).
// This structure is an ''inverted tree''. We use it later to sample efficiently from the set
// of all possible recursive pairs without occurring `n^2` overhead. Additionally, we return a
// ordered vec of all nodes with nonterminal n and the depth of this node in the freshly
// constructed 'recursion tree' (weight). Each node is the end point of exactly `weight` many
// different recursions. Therefore we use the weight of the node to sample the endpoint of a path trough the
// recursion tree. Then we just sample the length of this path uniformly as `(1.. weight)`. This
// yields a uniform sample from the whole set of recursions inside the tree. If you read this, Good luck you are on your own.
#[allow(clippy::type_complexity)]
fn find_parents(
t: &Tree,
nt: NTermId,
ctx: &Context,
) -> Option<(HashMap<NodeId, NodeId>, Vec<NodeId>, Vec<usize>)> {
let mut stack = vec![(None, 0)];
let mut res = None;
for (i, rule) in t.rules.iter().enumerate() {
let node = NodeId::from(i);
let (mut maybe_parent, depth) = stack.pop().expect("RAND_3404900492");
if ctx.get_nt(rule) == nt {
if let Some(parent) = maybe_parent {
let (mut parents, mut ids, mut weights) =
res.unwrap_or_else(|| (HashMap::new(), vec![], vec![]));
parents.insert(node, parent);
ids.push(node);
weights.push(depth);
res = Some((parents, ids, weights));
}
maybe_parent = Some(node);
}
for _ in 0..ctx.get_num_children(rule) {
stack.push((maybe_parent, depth + 1));
}
}
res
}
#[allow(clippy::cast_precision_loss)]
fn build_sampler(depths: &[usize]) -> LoadedDiceSampler {
let mut weights = depths.iter().map(|x| *x as f64).collect::<Vec<_>>();
let norm: f64 = weights.iter().sum();
assert!(norm > 0.0);
for v in &mut weights {
*v /= norm;
}
LoadedDiceSampler::new(&weights)
}
pub fn get_random_recursion_pair<R: Rand>(&mut self, rand: &mut R) -> (NodeId, NodeId) {
let offset = self.sampler.sample(rand);
self.get_recursion_pair_by_offset(offset)
}
#[must_use]
pub fn get_recursion_pair_by_offset(&self, offset: usize) -> (NodeId, NodeId) {
let node1 = self.node_by_offset[offset];
let mut node2 = node1;
for _ in 0..(self.depth_by_offset[offset]) {
node2 = self.recursive_parents[&node1];
}
(node2, node1)
}
#[must_use]
pub fn get_number_of_recursions(&self) -> usize {
self.node_by_offset.len()
}
}

View File

@ -0,0 +1,384 @@
use alloc::{string::String, vec::Vec};
use core::cell::OnceCell;
use std::sync::OnceLock;
use libafl_bolts::rands::Rand;
use pyo3::prelude::{PyObject, Python};
use regex;
use regex_syntax::hir::Hir;
use serde::{Deserialize, Serialize};
use crate::common::nautilus::{
grammartec::{
context::Context,
newtypes::{NTermId, NodeId, RuleId},
tree::Tree,
},
regex_mutator,
};
#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
pub enum RuleChild {
Term(Vec<u8>),
NTerm(NTermId),
}
static SPLITTER: OnceLock<regex::Regex> = OnceLock::new();
static TOKENIZER: OnceLock<regex::bytes::Regex> = OnceLock::new();
fn show_bytes(bs: &[u8]) -> String {
use std::{ascii::escape_default, str};
let mut visible = String::new();
for &b in bs {
let part: Vec<u8> = escape_default(b).collect();
visible.push_str(str::from_utf8(&part).unwrap());
}
format!("\"{visible}\"")
}
impl RuleChild {
#[must_use]
pub fn from_lit(lit: &[u8]) -> Self {
RuleChild::Term(lit.into())
}
pub fn from_nt(nt: &str, ctx: &mut Context) -> Self {
let (nonterm, _) = RuleChild::split_nt_description(nt);
RuleChild::NTerm(ctx.aquire_nt_id(&nonterm))
}
fn split_nt_description(nonterm: &str) -> (String, String) {
let splitter = SPLITTER.get_or_init(|| {
regex::Regex::new(r"^\{([A-Z][a-zA-Z_\-0-9]*)(?::([a-zA-Z_\-0-9]*))?\}$")
.expect("RAND_1363289094")
});
//splits {A:a} or {A} into A and maybe a
let descr = splitter.captures(nonterm).unwrap_or_else(|| panic!("could not interpret Nonterminal {nonterm:?}. Nonterminal Descriptions need to match start with a capital letter and con only contain [a-zA-Z_-0-9]"));
//let name = descr.get(2).map(|m| m.as_str().into()).unwrap_or(default.to_string()));
(descr[1].into(), String::new())
}
fn debug_show(&self, ctx: &Context) -> String {
match self {
RuleChild::Term(d) => show_bytes(d),
RuleChild::NTerm(nt) => ctx.nt_id_to_s(*nt),
}
}
}
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
pub enum RuleIdOrCustom {
Rule(RuleId),
Custom(RuleId, Vec<u8>),
}
impl RuleIdOrCustom {
#[must_use]
pub fn id(&self) -> RuleId {
match self {
RuleIdOrCustom::Rule(id) | RuleIdOrCustom::Custom(id, _) => *id,
}
}
#[must_use]
pub fn data(&self) -> &[u8] {
match self {
RuleIdOrCustom::Custom(_, data) => data,
RuleIdOrCustom::Rule(_) => panic!("cannot get data on a normal rule"),
}
}
}
#[derive(Clone, Debug)]
pub enum Rule {
Plain(PlainRule),
Script(ScriptRule),
RegExp(RegExpRule),
}
#[derive(Debug, Clone)]
pub struct RegExpRule {
pub nonterm: NTermId,
pub hir: Hir,
}
impl RegExpRule {
#[must_use]
pub fn debug_show(&self, ctx: &Context) -> String {
format!("{} => {:?}", ctx.nt_id_to_s(self.nonterm), self.hir)
}
}
#[derive(Debug)]
pub struct ScriptRule {
pub nonterm: NTermId,
pub nonterms: Vec<NTermId>,
pub script: PyObject,
}
impl ScriptRule {
#[must_use]
pub fn debug_show(&self, ctx: &Context) -> String {
let args = self
.nonterms
.iter()
.map(|nt| ctx.nt_id_to_s(*nt))
.collect::<Vec<_>>()
.join(", ");
format!("{} => func({args})", ctx.nt_id_to_s(self.nonterm))
}
}
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
pub struct PlainRule {
pub nonterm: NTermId,
pub children: Vec<RuleChild>,
pub nonterms: Vec<NTermId>,
}
impl PlainRule {
#[must_use]
pub fn debug_show(&self, ctx: &Context) -> String {
let args = self
.children
.iter()
.map(|child| child.debug_show(ctx))
.collect::<Vec<_>>()
.join(", ");
format!("{} => {args}", ctx.nt_id_to_s(self.nonterm))
}
}
impl Clone for ScriptRule {
fn clone(&self) -> Self {
Python::with_gil(|py| ScriptRule {
nonterm: self.nonterm,
nonterms: self.nonterms.clone(),
script: self.script.clone_ref(py),
})
}
}
impl Rule {
pub fn from_script(
ctx: &mut Context,
nonterm: &str,
nterms: &[String],
script: PyObject,
) -> Self {
return Self::Script(ScriptRule {
nonterm: ctx.aquire_nt_id(nonterm),
nonterms: nterms.iter().map(|s| ctx.aquire_nt_id(s)).collect(),
script,
});
}
pub fn from_regex(ctx: &mut Context, nonterm: &str, regex: &str) -> Self {
use regex_syntax::ParserBuilder;
let mut parser = ParserBuilder::new().unicode(true).utf8(false).build();
let hir = parser.parse(regex).unwrap();
Self::RegExp(RegExpRule {
nonterm: ctx.aquire_nt_id(nonterm),
hir,
})
}
#[must_use]
pub fn debug_show(&self, ctx: &Context) -> String {
match self {
Self::Plain(r) => r.debug_show(ctx),
Self::Script(r) => r.debug_show(ctx),
Self::RegExp(r) => r.debug_show(ctx),
}
}
pub fn from_format(ctx: &mut Context, nonterm: &str, format: &[u8]) -> Self {
let children = Rule::tokenize(format, ctx);
let nonterms = children
.iter()
.filter_map(|c| {
if let &RuleChild::NTerm(n) = c {
Some(n)
} else {
None
}
})
.collect();
Self::Plain(PlainRule {
nonterm: ctx.aquire_nt_id(nonterm),
children,
nonterms,
})
}
#[must_use]
pub fn from_term(ntermid: NTermId, term: &[u8]) -> Self {
let children = vec![RuleChild::Term(term.to_vec())];
let nonterms = vec![];
Self::Plain(PlainRule {
nonterm: ntermid,
children,
nonterms,
})
}
fn unescape(bytes: &[u8]) -> Vec<u8> {
if bytes.len() < 2 {
return bytes.to_vec();
}
let mut res = vec![];
let mut i = 0;
while i < bytes.len() - 1 {
if bytes[i] == 92 && bytes[i + 1] == 123 {
// replace \{ with {
res.push(123);
i += 1;
} else if bytes[i] == 92 && bytes[i + 1] == 125 {
// replace \} with }
res.push(125);
i += 1;
} else {
res.push(bytes[i]);
}
i += 1;
}
if i < bytes.len() {
res.push(bytes[bytes.len() - 1]);
}
res
}
fn tokenize(format: &[u8], ctx: &mut Context) -> Vec<RuleChild> {
let tokenizer = TOKENIZER.get_or_init(|| {
regex::bytes::RegexBuilder::new(r"(?-u)(\{[^}\\]+\})|((?:[^{\\]|\\\{|\\\}|\\)+)")
.dot_matches_new_line(true)
.build()
.expect("RAND_994455541")
// RegExp Changed from (\{[^}\\]+\})|((?:[^{\\]|\\\{|\\\}|\\\\)+) because of problems with \\ (\\ was not matched and therefore thrown away)
});
return tokenizer
.captures_iter(format)
.map(|cap| {
if let Some(sub) = cap.get(1) {
//println!("cap.get(1): {}", sub.as_str());
RuleChild::from_nt(
std::str::from_utf8(sub.as_bytes())
.expect("nonterminals need to be valid strings"),
ctx,
)
} else if let Some(sub) = cap.get(2) {
RuleChild::from_lit(&Self::unescape(sub.as_bytes()))
} else {
unreachable!()
}
})
.collect::<Vec<_>>();
}
#[must_use]
pub fn nonterms(&self) -> &[NTermId] {
match self {
Rule::Script(r) => &r.nonterms,
Rule::Plain(r) => &r.nonterms,
Rule::RegExp(_) => &[],
}
}
#[must_use]
pub fn number_of_nonterms(&self) -> usize {
self.nonterms().len()
}
#[must_use]
pub fn nonterm(&self) -> NTermId {
match self {
Rule::Script(r) => r.nonterm,
Rule::Plain(r) => r.nonterm,
Rule::RegExp(r) => r.nonterm,
}
}
pub fn generate<R: Rand>(
&self,
rand: &mut R,
tree: &mut Tree,
ctx: &Context,
len: usize,
) -> usize {
// println!("Rhs: {:?}, len: {}", self.nonterms, len);
// println!("Min needed len: {}", self.nonterms.iter().fold(0, |sum, nt| sum + ctx.get_min_len_for_nt(*nt) ));
let minimal_needed_len = self
.nonterms()
.iter()
.fold(0, |sum, nt| sum + ctx.get_min_len_for_nt(*nt));
assert!(minimal_needed_len <= len);
let mut remaining_len = len;
remaining_len -= minimal_needed_len;
//if we have no further children, we consumed no len
let mut total_size = 1;
let paren = NodeId::from(tree.rules.len() - 1);
//generate each childs tree from the left to the right. That way the only operation we ever
//perform is to push another node to the end of the tree_vec
for (i, nt) in self.nonterms().iter().enumerate() {
//sample how much len this child can use up (e.g. how big can
//let cur_child_max_len = Rule::get_random_len(remaining_nts, remaining_len) + ctx.get_min_len_for_nt(*nt);
let mut cur_child_max_len;
let mut new_nterms = Vec::new();
new_nterms.extend_from_slice(&self.nonterms()[i..]);
if new_nterms.is_empty() {
cur_child_max_len = remaining_len;
} else {
cur_child_max_len = Context::get_random_len(rand, remaining_len, &new_nterms);
}
cur_child_max_len += ctx.get_min_len_for_nt(*nt);
//get a rule that can be used with the remaining length
let rid = ctx.get_random_rule_for_nt(rand, *nt, cur_child_max_len);
let rule_or_custom = match ctx.get_rule(rid) {
Rule::Plain(_) | Rule::Script(_) => RuleIdOrCustom::Rule(rid),
Rule::RegExp(RegExpRule { hir, .. }) => {
RuleIdOrCustom::Custom(rid, regex_mutator::generate(rand, hir))
}
};
assert_eq!(tree.rules.len(), tree.sizes.len());
assert_eq!(tree.sizes.len(), tree.paren.len());
let offset = tree.rules.len();
tree.rules.push(rule_or_custom);
tree.sizes.push(0);
tree.paren.push(NodeId::from(0));
//generate the subtree for this rule, return the total consumed len
let consumed_len = ctx
.get_rule(rid)
.generate(rand, tree, ctx, cur_child_max_len - 1);
tree.sizes[offset] = consumed_len;
tree.paren[offset] = paren;
//println!("{}: min_needed_len: {}, Min-len: {} Consumed len: {} cur_child_max_len: {} remaining len: {}, total_size: {}, len: {}", ctx.nt_id_to_s(nt.clone()), minimal_needed_len, ctx.get_min_len_for_nt(*nt), consumed_len, cur_child_max_len, remaining_len, total_size, len);
assert!(consumed_len <= cur_child_max_len);
//println!("Rule: {}, min_len: {}", ctx.nt_id_to_s(nt.clone()), ctx.get_min_len_for_nt(*nt));
assert!(consumed_len >= ctx.get_min_len_for_nt(*nt));
//we can use the len that where not consumed by this iteration during the next iterations,
//therefore it will be redistributed evenly amongst the other
remaining_len += ctx.get_min_len_for_nt(*nt);
remaining_len -= consumed_len;
//add the consumed len to the total_len
total_size += consumed_len;
}
//println!("Rule: {}, Size: {}", ctx.nt_id_to_s(self.nonterm.clone()), total_size);
total_size
}
}

View File

@ -0,0 +1,590 @@
use alloc::vec::Vec;
use std::{cmp, collections::HashSet, io, io::Write, marker::Sized};
use libafl_bolts::rands::Rand;
use pyo3::{
prelude::{PyObject, PyResult, Python},
types::{PyBytes, PyString, PyTuple},
FromPyObject, PyTypeInfo,
};
use serde::{Deserialize, Serialize};
use super::{
super::regex_mutator,
context::Context,
newtypes::{NTermId, NodeId, RuleId},
recursion_info::RecursionInfo,
rule::{PlainRule, RegExpRule, Rule, RuleChild, RuleIdOrCustom, ScriptRule},
};
enum UnparseStep<'dat> {
Term(&'dat [u8]),
Nonterm(NTermId),
Script(usize, PyObject),
PushBuffer(),
}
struct Unparser<'data, 'tree: 'data, 'ctx: 'data, W: Write, T: TreeLike> {
tree: &'tree T,
stack: Vec<UnparseStep<'data>>,
buffers: Vec<io::Cursor<Vec<u8>>>,
w: W,
i: usize,
ctx: &'ctx Context,
}
impl<'data, 'tree: 'data, 'ctx: 'data, W: Write, T: TreeLike> Unparser<'data, 'tree, 'ctx, W, T> {
fn new(nid: NodeId, w: W, tree: &'tree T, ctx: &'ctx Context) -> Self {
let i = nid.to_i();
let nt = tree.get_rule(NodeId::from(i), ctx).nonterm();
let op = UnparseStep::<'data>::Nonterm(nt);
let stack = vec![op];
Self {
stack,
buffers: vec![],
w,
tree,
i,
ctx,
}
}
fn unparse_step(&mut self) -> bool {
match self.stack.pop() {
Some(UnparseStep::Term(data)) => self.write(data),
Some(UnparseStep::Nonterm(nt)) => self.nonterm(nt),
Some(UnparseStep::Script(num, expr)) => self.unwrap_script(num, &expr),
Some(UnparseStep::PushBuffer()) => self.push_buffer(),
None => return false,
};
true
}
fn write(&mut self, data: &[u8]) {
if let Some(buff) = self.buffers.last_mut() {
buff.write_all(data).unwrap();
} else {
self.w.write_all(data).unwrap();
}
}
fn nonterm(&mut self, nt: NTermId) {
self.next_rule(nt);
}
fn unwrap_script(&mut self, num: usize, expr: &PyObject) {
Python::with_gil(|py| {
self.script(py, num, expr)
.map_err(|e| e.print_and_set_sys_last_vars(py))
.unwrap();
});
}
fn script(&mut self, py: Python, num: usize, expr: &PyObject) -> PyResult<()> {
let bufs = self.buffers.split_off(self.buffers.len() - num);
let bufs = bufs
.into_iter()
.map(io::Cursor::into_inner)
.collect::<Vec<_>>();
let byte_arrays = bufs.iter().map(|b| PyBytes::new(py, b));
let res = expr.call1(py, PyTuple::new(py, byte_arrays))?;
if PyString::is_type_of(res.as_ref(py)) {
let pystr = <&PyString>::extract(res.as_ref(py))?;
self.write(pystr.to_string_lossy().as_bytes());
} else if PyBytes::is_type_of(res.as_ref(py)) {
let pybytes = <&PyBytes>::extract(res.as_ref(py))?;
self.write(pybytes.as_bytes());
} else {
return Err(pyo3::exceptions::PyValueError::new_err(
"script function should return string or bytes",
));
}
Ok(())
}
fn push_buffer(&mut self) {
self.buffers.push(io::Cursor::new(vec![]));
}
fn next_rule(&mut self, nt: NTermId) {
let nid = NodeId::from(self.i);
let rule: &'ctx Rule = self.tree.get_rule(nid, self.ctx);
assert_eq!(nt, rule.nonterm());
self.i += 1;
match rule {
Rule::Plain(r) => self.next_plain(r),
Rule::Script(r) => self.next_script(r),
Rule::RegExp(_) => self.next_regexp(self.tree.get_custom_rule_data(nid)),
}
}
fn next_plain(&mut self, r: &'ctx PlainRule) {
for rule_child in r.children.iter().rev() {
let op = match rule_child {
RuleChild::Term(data) => UnparseStep::<'data>::Term(data),
RuleChild::NTerm(id) => UnparseStep::<'data>::Nonterm(*id),
};
self.stack.push(op);
}
}
fn next_script(&mut self, r: &ScriptRule) {
Python::with_gil(|py| {
self.stack.push(UnparseStep::Script(
r.nonterms.len(),
r.script.clone_ref(py),
));
});
for nterm in r.nonterms.iter().rev() {
self.stack.push(UnparseStep::Nonterm(*nterm));
self.stack.push(UnparseStep::PushBuffer());
}
}
fn next_regexp(&mut self, data: &'tree [u8]) {
self.stack.push(UnparseStep::<'data>::Term(data));
}
fn unparse(&mut self) -> NodeId {
while self.unparse_step() {}
NodeId::from(self.i)
}
}
pub trait TreeLike
where
Self: Sized,
{
fn get_rule_id(&self, n: NodeId) -> RuleId;
fn size(&self) -> usize;
fn to_tree(&self, _: &Context) -> Tree;
fn get_rule<'c>(&self, n: NodeId, ctx: &'c Context) -> &'c Rule;
fn get_rule_or_custom(&self, n: NodeId) -> &RuleIdOrCustom;
fn get_custom_rule_data(&self, n: NodeId) -> &[u8];
fn get_nonterm_id(&self, n: NodeId, ctx: &Context) -> NTermId {
self.get_rule(n, ctx).nonterm()
}
fn unparse<W: Write>(&self, id: NodeId, ctx: &Context, mut w: &mut W) {
Unparser::new(id, &mut w, self, ctx).unparse();
}
fn unparse_to<W: Write>(&self, ctx: &Context, w: &mut W) {
self.unparse(NodeId::from(0), ctx, w);
}
fn unparse_to_vec(&self, ctx: &Context) -> Vec<u8> {
self.unparse_node_to_vec(NodeId::from(0), ctx)
}
fn unparse_node_to_vec(&self, n: NodeId, ctx: &Context) -> Vec<u8> {
let mut data = vec![];
self.unparse(n, ctx, &mut data);
data
}
fn unparse_print(&self, ctx: &Context) {
self.unparse_to(ctx, &mut io::stdout());
}
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct Tree {
pub rules: Vec<RuleIdOrCustom>,
pub sizes: Vec<usize>,
pub paren: Vec<NodeId>,
}
impl TreeLike for Tree {
fn get_rule_id(&self, n: NodeId) -> RuleId {
self.rules[n.to_i()].id()
}
fn size(&self) -> usize {
self.rules.len()
}
fn to_tree(&self, _ctx: &Context) -> Tree {
self.clone()
}
fn get_rule<'c>(&self, n: NodeId, ctx: &'c Context) -> &'c Rule {
ctx.get_rule(self.get_rule_id(n))
}
fn get_custom_rule_data(&self, n: NodeId) -> &[u8] {
self.rules[n.to_i()].data()
}
fn get_rule_or_custom(&self, n: NodeId) -> &RuleIdOrCustom {
&self.rules[n.to_i()]
}
}
impl Tree {
#[must_use]
pub fn from_rule_vec(rules: Vec<RuleIdOrCustom>, ctx: &Context) -> Self {
let sizes = vec![0; rules.len()];
let paren = vec![NodeId::from(0); rules.len()];
let mut res = Tree {
rules,
sizes,
paren,
};
if !res.rules.is_empty() {
res.calc_subtree_sizes_and_parents(ctx);
}
res
}
#[must_use]
pub fn get_rule_id(&self, n: NodeId) -> RuleId {
self.rules[n.to_i()].id()
}
fn get_rule_or_custom(&self, n: NodeId) -> &RuleIdOrCustom {
&self.rules[n.to_i()]
}
#[must_use]
pub fn subtree_size(&self, n: NodeId) -> usize {
self.sizes[n.to_i()]
}
#[must_use]
pub fn mutate_replace_from_tree<'a>(
&'a self,
n: NodeId,
other: &'a Tree,
other_node: NodeId,
) -> TreeMutation<'a> {
let old_size = self.subtree_size(n);
let new_size = other.subtree_size(other_node);
return TreeMutation {
prefix: self.slice(0.into(), n),
repl: other.slice(other_node, other_node + new_size),
postfix: self.slice(n + old_size, self.rules.len().into()),
};
}
fn calc_subtree_sizes_and_parents(&mut self, ctx: &Context) {
self.calc_parents(ctx);
self.calc_sizes();
}
fn calc_parents(&mut self, ctx: &Context) {
if self.size() == 0 {
return;
}
let mut stack: Vec<(NTermId, NodeId)> = Vec::new();
stack.push((
self.get_rule(NodeId::from(0), ctx).nonterm(),
NodeId::from(0),
));
for i in 0..self.size() {
let node_id = NodeId::from(i);
let nonterm = self.get_rule(node_id, ctx).nonterm();
//sanity check
let (nterm_id, node) = stack.pop().expect("Not a valid tree for unparsing!");
if nterm_id == nonterm {
self.paren[i] = node;
} else {
panic!("Not a valid tree for unparsing!");
}
let rule = self.get_rule(node_id, ctx);
for nonterm in rule.nonterms().iter().rev() {
stack.push((*nonterm, node_id));
}
}
}
fn calc_sizes(&mut self) {
//Initiate with 1
for size in &mut self.sizes {
*size = 1;
}
for i in (1..self.size()).rev() {
self.sizes[self.paren[i].to_i()] += self.sizes[i];
}
}
fn slice(&self, from: NodeId, to: NodeId) -> &[RuleIdOrCustom] {
&self.rules[from.into()..to.into()]
}
#[must_use]
pub fn get_parent(&self, n: NodeId) -> Option<NodeId> {
if n == NodeId::from(0) {
None
} else {
Some(self.paren[n.to_i()])
}
}
pub fn truncate(&mut self) {
self.rules.truncate(0);
self.sizes.truncate(0);
self.paren.truncate(0);
}
pub fn generate_from_nt<R: Rand>(
&mut self,
rand: &mut R,
start: NTermId,
len: usize,
ctx: &Context,
) {
let ruleid = ctx.get_random_rule_for_nt(rand, start, len);
self.generate_from_rule(rand, ruleid, len - 1, ctx);
}
pub fn generate_from_rule<R: Rand>(
&mut self,
rand: &mut R,
ruleid: RuleId,
max_len: usize,
ctx: &Context,
) {
match ctx.get_rule(ruleid) {
Rule::Plain(..) | Rule::Script(..) => {
self.truncate();
self.rules.push(RuleIdOrCustom::Rule(ruleid));
self.sizes.push(0);
self.paren.push(NodeId::from(0));
ctx.get_rule(ruleid).generate(rand, self, ctx, max_len);
self.sizes[0] = self.rules.len();
}
Rule::RegExp(RegExpRule { hir, .. }) => {
let rid = RuleIdOrCustom::Custom(ruleid, regex_mutator::generate(rand, hir));
self.truncate();
self.rules.push(rid);
self.sizes.push(0);
self.paren.push(NodeId::from(0));
self.sizes[0] = self.rules.len();
}
}
}
#[must_use]
pub fn calc_recursions(&self, ctx: &Context) -> Option<Vec<RecursionInfo>> {
let mut ret = Vec::new();
let mut done_nterms = HashSet::new();
for rule in &self.rules {
let nterm = ctx.get_nt(rule);
if !done_nterms.contains(&nterm) {
if let Some(rec_info) = RecursionInfo::new(self, nterm, ctx) {
ret.push(rec_info);
}
done_nterms.insert(nterm);
}
}
if ret.is_empty() {
None
} else {
Some(ret)
}
}
fn find_recursions_iter(&self, ctx: &Context) -> Vec<(NodeId, NodeId)> {
let mut found_recursions = Vec::new();
//Only search for iterations for up to 10000 nodes
for i in 1..cmp::min(self.size(), 10000) {
let node_id = NodeId::from(self.size() - i);
let current_nterm: NTermId = self.get_rule(node_id, ctx).nonterm();
let mut current_node_id = self.paren[node_id.to_i()];
let mut depth = 0;
while current_node_id != NodeId::from(0) {
if self.get_rule(current_node_id, ctx).nonterm() == current_nterm {
found_recursions.push((current_node_id, node_id));
}
current_node_id = self.paren[current_node_id.to_i()];
if depth > 15 {
break;
}
depth += 1;
}
}
found_recursions
}
}
#[derive(Debug)]
pub struct TreeMutation<'a> {
pub prefix: &'a [RuleIdOrCustom],
pub repl: &'a [RuleIdOrCustom],
pub postfix: &'a [RuleIdOrCustom],
}
impl<'a> TreeMutation<'a> {
#[must_use]
pub fn get_at(&self, n: NodeId) -> &'a RuleIdOrCustom {
let i = n.to_i();
let end0 = self.prefix.len();
let end1 = end0 + self.repl.len();
let end2 = end1 + self.postfix.len();
if i < end0 {
return &self.prefix[i];
}
if i < end1 {
return &self.repl[i - end0];
}
if i < end2 {
return &self.postfix[i - end1];
}
panic!("index out of bound for rule access");
}
}
impl<'a> TreeLike for TreeMutation<'a> {
fn get_rule_id(&self, n: NodeId) -> RuleId {
self.get_at(n).id()
}
fn size(&self) -> usize {
self.prefix.len() + self.repl.len() + self.postfix.len()
}
fn get_rule_or_custom(&self, n: NodeId) -> &RuleIdOrCustom {
self.get_at(n)
}
fn to_tree(&self, ctx: &Context) -> Tree {
let mut vec = vec![];
vec.extend_from_slice(self.prefix);
vec.extend_from_slice(self.repl);
vec.extend_from_slice(self.postfix);
Tree::from_rule_vec(vec, ctx)
}
fn get_rule<'c>(&self, n: NodeId, ctx: &'c Context) -> &'c Rule {
return ctx.get_rule(self.get_rule_id(n));
}
fn get_custom_rule_data(&self, n: NodeId) -> &[u8] {
self.get_at(n).data()
}
}
#[cfg(test)]
mod tests {
use libafl_bolts::rands::StdRand;
use super::{
super::{context::Context, newtypes::NodeId},
*,
};
fn calc_subtree_sizes_and_parents_rec_test(tree: &mut Tree, n: NodeId, ctx: &Context) -> usize {
let mut cur = n + 1;
let mut size = 1;
for _ in 0..tree.get_rule(n, ctx).number_of_nonterms() {
tree.paren[cur.to_i()] = n;
let sub_size = calc_subtree_sizes_and_parents_rec_test(tree, cur, ctx);
cur = cur + sub_size;
size += sub_size;
}
tree.sizes[n.to_i()] = size;
size
}
#[test]
fn check_calc_sizes_iter() {
let mut rand = StdRand::new();
let mut ctx = Context::new();
let _ = ctx.add_rule("C", b"c{B}c3");
let _ = ctx.add_rule("B", b"b{A}b23");
let _ = ctx.add_rule("A", b"aasdf {A}");
let _ = ctx.add_rule("A", b"a2 {A}");
let _ = ctx.add_rule("A", b"a sdf{A}");
let _ = ctx.add_rule("A", b"a 34{A}");
let _ = ctx.add_rule("A", b"adfe {A}");
let _ = ctx.add_rule("A", b"a32");
ctx.initialize(50);
let mut tree = Tree::from_rule_vec(vec![], &ctx);
for _ in 0..100 {
tree.truncate();
tree.generate_from_nt(&mut rand, ctx.nt_id("C"), 50, &ctx);
calc_subtree_sizes_and_parents_rec_test(&mut tree, NodeId::from(0), &ctx);
let vec1 = tree.sizes.clone();
tree.calc_sizes();
let vec2 = tree.sizes.clone();
assert_eq!(vec1, vec2);
}
}
#[test]
fn check_calc_paren_iter() {
let mut rand = StdRand::new();
let mut ctx = Context::new();
let _ = ctx.add_rule("C", b"c{B}c3");
let _ = ctx.add_rule("B", b"b{A}b23");
let _ = ctx.add_rule("A", b"aasdf {A}");
let _ = ctx.add_rule("A", b"a2 {A}");
let _ = ctx.add_rule("A", b"a sdf{A}");
let _ = ctx.add_rule("A", b"a 34{A}");
let _ = ctx.add_rule("A", b"adfe {A}");
let _ = ctx.add_rule("A", b"a32");
ctx.initialize(50);
let mut tree = Tree::from_rule_vec(vec![], &ctx);
for _ in 0..100 {
tree.truncate();
tree.generate_from_nt(&mut rand, ctx.nt_id("C"), 50, &ctx);
calc_subtree_sizes_and_parents_rec_test(&mut tree, NodeId::from(0), &ctx);
let vec1 = tree.paren.clone();
tree.calc_parents(&ctx);
let vec2 = tree.paren.clone();
assert_eq!(vec1, vec2);
}
}
#[test]
fn check_unparse_iter() {
let mut rand = StdRand::new();
let mut ctx = Context::new();
let _ = ctx.add_rule("C", b"c{B}c3");
let _ = ctx.add_rule("B", b"b{A}b23");
let _ = ctx.add_rule("A", b"aasdf {A}");
let _ = ctx.add_rule("A", b"a2 {A}");
let _ = ctx.add_rule("A", b"a sdf{A}");
let _ = ctx.add_rule("A", b"a 34{A}");
let _ = ctx.add_rule("A", b"adfe {A}");
let _ = ctx.add_rule("A", b"a32");
ctx.initialize(50);
let mut tree = Tree::from_rule_vec(vec![], &ctx);
for _ in 0..100 {
tree.truncate();
tree.generate_from_nt(&mut rand, ctx.nt_id("C"), 50, &ctx);
let mut vec1 = vec![];
let mut vec2 = vec![];
tree.unparse(NodeId::from(0), &ctx, &mut vec1);
tree.unparse(NodeId::from(0), &ctx, &mut vec2);
assert_eq!(vec1, vec2);
}
}
#[test]
fn check_find_recursions() {
let mut rand = StdRand::new();
let mut ctx = Context::new();
let _ = ctx.add_rule("C", b"c{B}c");
let _ = ctx.add_rule("B", b"b{A}b");
let _ = ctx.add_rule("A", b"a {A}");
let _ = ctx.add_rule("A", b"a {A}");
let _ = ctx.add_rule("A", b"a {A}");
let _ = ctx.add_rule("A", b"a {A}");
let _ = ctx.add_rule("A", b"a {A}");
let _ = ctx.add_rule("A", b"a");
ctx.initialize(20);
let mut tree = Tree::from_rule_vec(vec![], &ctx);
let mut some_recursions = false;
for _ in 0..100 {
tree.truncate();
tree.generate_from_nt(&mut rand, ctx.nt_id("C"), 20, &ctx);
if let Some(recursions) = tree.calc_recursions(&ctx) {
assert_ne!(recursions.len(), 0);
for recursion_info in recursions {
for offset in 0..recursion_info.get_number_of_recursions() {
let tuple = recursion_info.get_recursion_pair_by_offset(offset);
some_recursions = true;
assert!(tuple.0.to_i() < tuple.1.to_i());
}
}
}
}
assert!(some_recursions);
}
}

View File

@ -0,0 +1,6 @@
//! LibAFL version of the [`Nautilus`](https://github.com/nautilus-fuzz/nautilus) grammar fuzzer
//!
#![doc = include_str!("README.md")]
pub mod grammartec;
pub mod regex_mutator;

View File

@ -0,0 +1,142 @@
use alloc::vec::Vec;
use libafl_bolts::rands::Rand;
use regex_syntax::hir::{Class, ClassBytesRange, ClassUnicodeRange, Hir, Literal};
#[derive(Debug)]
pub struct RegexScript {
remaining: usize,
}
impl RegexScript {
pub fn new<R: Rand>(rand: &mut R) -> Self {
let len = if rand.next() % 256 == 0 {
rand.next() % 0xffff
} else {
let len = 1 << (rand.next() % 8);
rand.next() % len
};
RegexScript {
remaining: len as usize,
}
}
pub fn get_mod<R: Rand>(&mut self, rand: &mut R, val: usize) -> usize {
if self.remaining == 0 {
0
} else {
rand.below(val)
}
}
pub fn get_range<R: Rand>(&mut self, rand: &mut R, min: usize, max: usize) -> usize {
self.get_mod(rand, max - min) + min
}
}
fn append_char(res: &mut Vec<u8>, chr: char) {
let mut buf = [0; 4];
res.extend_from_slice(chr.encode_utf8(&mut buf).as_bytes());
}
fn append_lit(res: &mut Vec<u8>, lit: &Literal) {
res.extend_from_slice(&lit.0);
}
fn append_unicode_range<R: Rand>(
rand: &mut R,
res: &mut Vec<u8>,
scr: &mut RegexScript,
cls: ClassUnicodeRange,
) {
let mut chr_a_buf = [0; 4];
let mut chr_b_buf = [0; 4];
cls.start().encode_utf8(&mut chr_a_buf);
cls.end().encode_utf8(&mut chr_b_buf);
let a = u32::from_le_bytes(chr_a_buf);
let b = u32::from_le_bytes(chr_b_buf);
let c = scr.get_range(rand, a as usize, (b + 1) as usize) as u32;
append_char(res, std::char::from_u32(c).unwrap());
}
fn append_byte_range<R: Rand>(
rand: &mut R,
res: &mut Vec<u8>,
scr: &mut RegexScript,
cls: ClassBytesRange,
) {
res.push(scr.get_range(rand, cls.start() as usize, (cls.end() + 1) as usize) as u8);
}
fn append_class<R: Rand>(rand: &mut R, res: &mut Vec<u8>, scr: &mut RegexScript, cls: &Class) {
use regex_syntax::hir::Class::{Bytes, Unicode};
match cls {
Unicode(cls) => {
let rngs = cls.ranges();
let rng = rngs[scr.get_mod(rand, rngs.len())];
append_unicode_range(rand, res, scr, rng);
}
Bytes(cls) => {
let rngs = cls.ranges();
let rng = rngs[scr.get_mod(rand, rngs.len())];
append_byte_range(rand, res, scr, rng);
}
}
}
fn get_length<R: Rand>(rand: &mut R, scr: &mut RegexScript) -> usize {
let bits = scr.get_mod(rand, 8);
scr.get_mod(rand, 2 << bits)
}
fn get_repetition_range<R: Rand>(
rand: &mut R,
min: u32,
max: Option<u32>,
scr: &mut RegexScript,
) -> usize {
match (min, max) {
(a, None) => get_length(rand, scr) + (a as usize),
(a, Some(b)) if a == b => a as usize,
(a, Some(b)) => scr.get_range(rand, a as usize, b as usize),
}
}
fn get_repetitions<R: Rand>(
rand: &mut R,
min: u32,
max: Option<u32>,
scr: &mut RegexScript,
) -> usize {
match (min, max) {
(0, Some(1)) => scr.get_mod(rand, 2),
(0, _) => get_length(rand, scr),
(1, _) => 1 + get_length(rand, scr),
(min, max) => get_repetition_range(rand, min, max, scr),
}
}
pub fn generate<R: Rand>(rand: &mut R, hir: &Hir) -> Vec<u8> {
use regex_syntax::hir::HirKind;
let mut scr = RegexScript::new(rand);
let mut stack = vec![hir];
let mut res = vec![];
while !stack.is_empty() {
match stack.pop().unwrap().kind() {
HirKind::Empty => {}
HirKind::Literal(lit) => append_lit(&mut res, lit),
HirKind::Class(cls) => append_class(rand, &mut res, &mut scr, cls),
HirKind::Repetition(rep) => {
let num = get_repetitions(rand, rep.min, rep.max, &mut scr);
for _ in 0..num {
stack.push(&rep.sub);
}
}
HirKind::Capture(grp) => stack.push(&grp.sub),
HirKind::Concat(hirs) => hirs.iter().rev().for_each(|h| stack.push(h)),
HirKind::Alternation(hirs) => stack.push(&hirs[scr.get_mod(rand, hirs.len())]),
HirKind::Look(_) => (),
}
}
res
}

View File

@ -11,11 +11,11 @@ use libafl_bolts::{
};
use nix::unistd::{fork, ForkResult};
use super::super::hooks::ExecutorHooksTuple;
use crate::{
events::{EventFirer, EventRestarter},
executors::{
inprocess_fork::GenericInProcessForkExecutorInner, Executor, ExitKind, HasObservers,
hooks::ExecutorHooksTuple, inprocess_fork::GenericInProcessForkExecutorInner, Executor,
ExitKind, HasObservers,
},
feedbacks::Feedback,
fuzzer::HasObjective,

View File

@ -3,11 +3,11 @@ use alloc::{borrow::Cow, string::String};
use core::{fmt::Debug, marker::PhantomData};
use std::fs::create_dir_all;
use grammartec::{chunkstore::ChunkStore, context::Context};
use libafl_bolts::Named;
use serde::{Deserialize, Serialize};
use crate::{
common::nautilus::grammartec::{chunkstore::ChunkStore, context::Context},
corpus::{Corpus, Testcase},
events::EventFirer,
executors::ExitKind,

View File

@ -147,7 +147,7 @@ where
}
let printables = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz \t\n!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~".as_bytes();
let random_bytes: Vec<u8> = (0..size)
.map(|_| *state.rand_mut().choose(printables))
.map(|_| *state.rand_mut().choose(printables).unwrap())
.collect();
Ok(BytesInput::new(random_bytes))
}

View File

@ -6,10 +6,13 @@ use alloc::{
use core::fmt::Debug;
use std::{fs, io::BufReader, path::Path};
use grammartec::context::Context;
pub use grammartec::newtypes::NTermID;
use libafl_bolts::rands::Rand;
use crate::{generators::Generator, inputs::nautilus::NautilusInput, Error};
pub use crate::common::nautilus::grammartec::newtypes::NTermId;
use crate::{
common::nautilus::grammartec::context::Context, generators::Generator,
inputs::nautilus::NautilusInput, state::HasRand, Error,
};
/// The nautilus context for a generator
pub struct NautilusContext {
@ -104,12 +107,12 @@ impl Debug for NautilusGenerator<'_> {
}
}
impl<'a, S> Generator<NautilusInput, S> for NautilusGenerator<'a> {
fn generate(&mut self, _state: &mut S) -> Result<NautilusInput, Error> {
impl<'a, S: HasRand> Generator<NautilusInput, S> for NautilusGenerator<'a> {
fn generate(&mut self, state: &mut S) -> Result<NautilusInput, Error> {
let nonterm = self.nonterminal("START");
let len = self.ctx.get_random_len_for_nt(&nonterm);
let mut input = NautilusInput::empty();
self.generate_from_nonterminal(&mut input, nonterm, len);
self.generate_from_nonterminal(state.rand_mut(), &mut input, nonterm, len);
Ok(input)
}
}
@ -124,12 +127,20 @@ impl<'a> NautilusGenerator<'a> {
/// Gets the nonterminal from this input
// TODO create from a python grammar
#[must_use]
pub fn nonterminal(&self, name: &str) -> NTermID {
pub fn nonterminal(&self, name: &str) -> NTermId {
self.ctx.nt_id(name)
}
/// Generates a [`NautilusInput`] from a nonterminal
pub fn generate_from_nonterminal(&self, input: &mut NautilusInput, start: NTermID, len: usize) {
input.tree_mut().generate_from_nt(start, len, self.ctx);
pub fn generate_from_nonterminal<R: Rand>(
&self,
rand: &mut R,
input: &mut NautilusInput,
start: NTermId,
len: usize,
) {
input
.tree_mut()
.generate_from_nt(rand, start, len, self.ctx);
}
}

View File

@ -8,15 +8,15 @@ use alloc::{rc::Rc, string::String, vec::Vec};
use core::cell::RefCell;
use std::hash::{Hash, Hasher};
use grammartec::{
newtypes::NodeID,
rule::RuleIDOrCustom,
tree::{Tree, TreeLike},
};
use libafl_bolts::HasLen;
use serde::{Deserialize, Serialize};
use crate::{
common::nautilus::grammartec::{
newtypes::NodeId,
rule::RuleIdOrCustom,
tree::{Tree, TreeLike},
},
generators::nautilus::NautilusContext,
inputs::{BytesInput, Input, InputConverter},
Error,
@ -78,7 +78,7 @@ impl NautilusInput {
/// Generate a `Nautilus` input from the given bytes
pub fn unparse(&self, context: &NautilusContext, bytes: &mut Vec<u8>) {
bytes.clear();
self.tree.unparse(NodeID::from(0), &context.ctx, bytes);
self.tree.unparse(NodeId::from(0), &context.ctx, bytes);
}
/// Get the tree representation of this input
@ -99,11 +99,11 @@ impl Hash for NautilusInput {
self.tree().paren.hash(state);
for r in &self.tree().rules {
match r {
RuleIDOrCustom::Custom(a, b) => {
RuleIdOrCustom::Custom(a, b) => {
a.hash(state);
b.hash(state);
}
RuleIDOrCustom::Rule(a) => a.hash(state),
RuleIdOrCustom::Rule(a) => a.hash(state),
}
}
self.tree().sizes.hash(state);

View File

@ -584,7 +584,7 @@ pub trait Monitor {
}
/// Monitor that print exactly nothing.
/// Not good for debuging, very good for speed.
/// Not good for debugging, very good for speed.
#[derive(Debug, Clone)]
pub struct NopMonitor {
start_time: Duration,

View File

@ -29,7 +29,7 @@ impl<S: HasRand> Mutator<EncodedInput, S> for EncodedRandMutator {
if input.codes().is_empty() {
Ok(MutationResult::Skipped)
} else {
let val = state.rand_mut().choose(input.codes_mut());
let val = state.rand_mut().choose(input.codes_mut()).unwrap();
*val = state.rand_mut().next() as u32;
Ok(MutationResult::Mutated)
}
@ -60,7 +60,7 @@ impl<S: HasRand> Mutator<EncodedInput, S> for EncodedIncMutator {
if input.codes().is_empty() {
Ok(MutationResult::Skipped)
} else {
let val = state.rand_mut().choose(input.codes_mut());
let val = state.rand_mut().choose(input.codes_mut()).unwrap();
*val = val.wrapping_add(1);
Ok(MutationResult::Mutated)
}
@ -91,7 +91,7 @@ impl<S: HasRand> Mutator<EncodedInput, S> for EncodedDecMutator {
if input.codes().is_empty() {
Ok(MutationResult::Skipped)
} else {
let val = state.rand_mut().choose(input.codes_mut());
let val = state.rand_mut().choose(input.codes_mut()).unwrap();
*val = val.wrapping_sub(1);
Ok(MutationResult::Mutated)
}
@ -122,7 +122,7 @@ impl<S: HasRand> Mutator<EncodedInput, S> for EncodedAddMutator {
if input.codes().is_empty() {
Ok(MutationResult::Skipped)
} else {
let val = state.rand_mut().choose(input.codes_mut());
let val = state.rand_mut().choose(input.codes_mut()).unwrap();
let num = 1 + state.rand_mut().below(ARITH_MAX) as u32;
*val = match state.rand_mut().below(2) {
0 => val.wrapping_add(num),

View File

@ -138,7 +138,11 @@ where
meta.map.get(&input.terminals()[insert_at].state).map_or(
Ok(MutationResult::Skipped),
|splice_points| {
let from = *choose(splice_points, rand_num);
let from = if let Some(from) = choose(splice_points, rand_num) {
*from
} else {
return Ok(MutationResult::Skipped);
};
input.terminals_mut().truncate(insert_at);
input
@ -208,7 +212,7 @@ where
return Ok(MutationResult::Skipped);
}
let chosen = *state.rand_mut().choose(&self.states);
let chosen = *state.rand_mut().choose(&self.states).unwrap();
let chosen_nums = self.counters.get(&chosen).unwrap().0;
#[allow(clippy::cast_sign_loss, clippy::pedantic)]

View File

@ -51,8 +51,8 @@ where
{
gap_indices.push(i);
}
let min_idx = *choose(&*gap_indices, rand1);
let max_idx = *choose(&*gap_indices, rand2);
let min_idx = *choose(&*gap_indices, rand1).unwrap();
let max_idx = *choose(&*gap_indices, rand2).unwrap();
let (mut min_idx, max_idx) = (min(min_idx, max_idx), max(min_idx, max_idx));
gap_indices.clear();
@ -73,7 +73,7 @@ where
if let Some(meta) = state.metadata_map().get::<Tokens>() {
if !meta.tokens().is_empty() {
let tok = choose(meta.tokens(), rand1);
let tok = choose(meta.tokens(), rand1).unwrap();
if items.last() != Some(&GeneralizedItem::Gap) {
items.push(GeneralizedItem::Gap);
}
@ -169,7 +169,10 @@ where
) -> Result<MutationResult, Error> {
let mut mutated = MutationResult::Skipped;
let depth = *state.rand_mut().choose(&RECURSIVE_REPLACEMENT_DEPTH);
let depth = *state
.rand_mut()
.choose(&RECURSIVE_REPLACEMENT_DEPTH)
.unwrap();
for _ in 0..depth {
if generalised_meta.generalized_len() >= MAX_RECURSIVE_REPLACEMENT_LEN {
break;
@ -187,7 +190,7 @@ where
if self.gap_indices.is_empty() {
break;
}
let selected = *state.rand_mut().choose(&self.gap_indices);
let selected = *state.rand_mut().choose(&self.gap_indices).unwrap();
self.gap_indices.clear();
self.scratch.extend_from_slice(&gen[selected + 1..]);

View File

@ -129,8 +129,8 @@ where
if input.bytes().is_empty() {
Ok(MutationResult::Skipped)
} else {
let bit = 1 << state.rand_mut().choose(0..8);
let byte = state.rand_mut().choose(input.bytes_mut());
let bit = 1 << state.rand_mut().choose(0..8).unwrap();
let byte = state.rand_mut().choose(input.bytes_mut()).unwrap();
*byte ^= bit;
Ok(MutationResult::Mutated)
}
@ -165,7 +165,7 @@ where
if input.bytes().is_empty() {
Ok(MutationResult::Skipped)
} else {
*state.rand_mut().choose(input.bytes_mut()) ^= 0xff;
*state.rand_mut().choose(input.bytes_mut()).unwrap() ^= 0xff;
Ok(MutationResult::Mutated)
}
}
@ -199,7 +199,7 @@ where
if input.bytes().is_empty() {
Ok(MutationResult::Skipped)
} else {
let byte = state.rand_mut().choose(input.bytes_mut());
let byte = state.rand_mut().choose(input.bytes_mut()).unwrap();
*byte = byte.wrapping_add(1);
Ok(MutationResult::Mutated)
}
@ -234,7 +234,7 @@ where
if input.bytes().is_empty() {
Ok(MutationResult::Skipped)
} else {
let byte = state.rand_mut().choose(input.bytes_mut());
let byte = state.rand_mut().choose(input.bytes_mut()).unwrap();
*byte = byte.wrapping_sub(1);
Ok(MutationResult::Mutated)
}
@ -269,7 +269,7 @@ where
if input.bytes().is_empty() {
Ok(MutationResult::Skipped)
} else {
let byte = state.rand_mut().choose(input.bytes_mut());
let byte = state.rand_mut().choose(input.bytes_mut()).unwrap();
*byte = (!(*byte)).wrapping_add(1);
Ok(MutationResult::Mutated)
}
@ -304,7 +304,7 @@ where
if input.bytes().is_empty() {
Ok(MutationResult::Skipped)
} else {
let byte = state.rand_mut().choose(input.bytes_mut());
let byte = state.rand_mut().choose(input.bytes_mut()).unwrap();
*byte ^= 1 + state.rand_mut().below(254) as u8;
Ok(MutationResult::Mutated)
}
@ -352,7 +352,7 @@ macro_rules! add_mutator_impl {
// choose a random window of bytes (windows overlap) and convert to $size
let (index, bytes) = state
.rand_mut()
.choose(input.bytes().windows(size_of::<$size>()).enumerate());
.choose(input.bytes().windows(size_of::<$size>()).enumerate()).unwrap();
let val = <$size>::from_ne_bytes(bytes.try_into().unwrap());
// mutate
@ -415,8 +415,8 @@ macro_rules! interesting_mutator_impl {
let bytes = input.bytes_mut();
let upper_bound = (bytes.len() + 1 - size_of::<$size>());
let idx = state.rand_mut().below(upper_bound);
let val = *state.rand_mut().choose(&$interesting) as $size;
let new_bytes = match state.rand_mut().choose(&[0, 1]) {
let val = *state.rand_mut().choose(&$interesting).unwrap() as $size;
let new_bytes = match state.rand_mut().choose(&[0, 1]).unwrap() {
0 => val.to_be_bytes(),
_ => val.to_le_bytes(),
};
@ -656,7 +656,7 @@ where
}
let range = rand_range(state, size, min(size, 16));
let val = *state.rand_mut().choose(input.bytes());
let val = *state.rand_mut().choose(input.bytes()).unwrap();
let quantity = range.len();
buffer_set(input.bytes_mut(), range.start, quantity, val);

View File

@ -3,18 +3,22 @@
use alloc::borrow::Cow;
use core::fmt::Debug;
use grammartec::{
context::Context,
mutator::Mutator as BackingMutator,
tree::{Tree, TreeMutation},
use libafl_bolts::{
rands::{Rand, RomuDuoJrRand},
Named,
};
use libafl_bolts::Named;
use crate::{
common::nautilus::grammartec::{
context::Context,
mutator::Mutator as BackingMutator,
tree::{Tree, TreeMutation},
},
feedbacks::NautilusChunksMetadata,
generators::nautilus::NautilusContext,
inputs::nautilus::NautilusInput,
mutators::{MutationResult, Mutator},
prelude::HasRand,
state::HasCorpus,
Error, HasMetadata,
};
@ -31,16 +35,17 @@ impl Debug for NautilusRandomMutator<'_> {
}
}
impl<S> Mutator<NautilusInput, S> for NautilusRandomMutator<'_> {
impl<S: HasRand> Mutator<NautilusInput, S> for NautilusRandomMutator<'_> {
fn mutate(
&mut self,
_state: &mut S,
state: &mut S,
input: &mut NautilusInput,
) -> Result<MutationResult, Error> {
// TODO get rid of tmp
let mut tmp = vec![];
self.mutator
.mut_random::<_, ()>(
.mut_random::<_, _>(
state.rand_mut(),
&input.tree,
self.ctx,
&mut |t: &TreeMutation, _ctx: &Context| {
@ -92,10 +97,10 @@ impl Debug for NautilusRecursionMutator<'_> {
}
}
impl<S> Mutator<NautilusInput, S> for NautilusRecursionMutator<'_> {
impl<S: HasRand> Mutator<NautilusInput, S> for NautilusRecursionMutator<'_> {
fn mutate(
&mut self,
_state: &mut S,
state: &mut S,
input: &mut NautilusInput,
) -> Result<MutationResult, Error> {
// TODO don't calc recursions here
@ -103,7 +108,8 @@ impl<S> Mutator<NautilusInput, S> for NautilusRecursionMutator<'_> {
// TODO get rid of tmp
let mut tmp = vec![];
self.mutator
.mut_random_recursion::<_, ()>(
.mut_random_recursion::<_, _>(
state.rand_mut(),
&input.tree,
recursions,
self.ctx,
@ -157,21 +163,24 @@ impl Debug for NautilusSpliceMutator<'_> {
impl<S> Mutator<NautilusInput, S> for NautilusSpliceMutator<'_>
where
S: HasCorpus<Input = NautilusInput> + HasMetadata,
S: HasCorpus<Input = NautilusInput> + HasMetadata + HasRand,
{
fn mutate(
&mut self,
state: &mut S,
input: &mut NautilusInput,
) -> Result<MutationResult, Error> {
// TODO get rid of tmp
let mut tmp = vec![];
// Create a fast temp mutator to get around borrowing..
let mut rand_cpy = { RomuDuoJrRand::with_seed(state.rand_mut().next()) };
let meta = state
.metadata_map()
.get::<NautilusChunksMetadata>()
.expect("NautilusChunksMetadata not in the state");
// TODO get rid of tmp
let mut tmp = vec![];
self.mutator
.mut_splice::<_, ()>(
.mut_splice::<_, _>(
&mut rand_cpy,
&input.tree,
self.ctx,
&meta.cks,

View File

@ -0,0 +1,134 @@
/*!
Loaded Dice
============
A simple module that implements a random sampler implementing the [alias method](https://en.wikipedia.org/wiki/Alias_method). It can be used to sample from discrete probability distributions efficiently (`O(1)` per sample). One uses it by passing a vector of probabilities to the constructor. The constructor builds a data structure in `O(n*n*log(n))` (Note: It would be quite possible to implement this in `O(n*log(n))`, however for reasonable sized number of values this method is faster than using the more efficient data structures. If the construction is slow in your case, you might consider using min/max heaps instead of resorting the array after each construction step). This data structure can then be used to to sample a numbers between `0` and `n` with the corresponding probabilities.
Assume we want to sample from the following distribution: `p(0)=0.5, p(1)=0.3, p(2)=0.1, p(3)=0.1`:
```rust
# extern crate libafl_bolts;
use libafl_bolts::rands::{StdRand, loaded_dice::LoadedDiceSampler};
fn main() {
let mut rand = StdRand::new();
let mut sampler = LoadedDiceSampler::new(&[0.5, 0.3, 0.1, 0.1]);
let iter: usize = 100;
for i in (0..iter) {
println!("{}", sampler.sample(&mut rand));
}
}
```
Original code by @eqv, see <https://github.com/eqv/loaded_dice>
*/
use alloc::vec::Vec;
use super::Rand;
/// Helper struct for [`LoadedDiceSampler`]
#[derive(Clone, Debug, PartialEq)]
struct AliasEntry {
val: usize,
alias: usize,
prob_of_val: f64,
}
impl AliasEntry {
/// Create a new [`AliasEntry`]
pub fn new(val: usize, alias: usize, prob_of_val: f64) -> Self {
AliasEntry {
val,
alias,
prob_of_val,
}
}
}
/// A simple [`LoadedDiceSampler`]
#[derive(Clone, Debug, PartialEq)]
pub struct LoadedDiceSampler {
entries: Vec<AliasEntry>,
}
impl LoadedDiceSampler {
/// Create a new [`LoadedDiceSampler`] with the given probabilities
#[must_use]
pub fn new(probs: &[f64]) -> Self {
let entries = LoadedDiceSampler::construct_table(probs);
Self { entries }
}
/// Get one sample according to the predefined probabilities.
pub fn sample<R: Rand>(&mut self, rand: &mut R) -> usize {
let index = rand.below(self.entries.len());
let coin = rand.next_float();
let entry = &self.entries[index];
if coin > entry.prob_of_val {
entry.alias
} else {
entry.val
}
}
/// Create the table for this [`LoadedDiceSampler`]
#[allow(clippy::cast_precision_loss)]
fn construct_table(probs: &[f64]) -> Vec<AliasEntry> {
let mut res = vec![];
let n = probs.len() as f64;
let inv_n = 1.0 / probs.len() as f64;
let mut tmp = { probs.iter().copied().enumerate().collect::<Vec<_>>() };
while tmp.len() > 1 {
// eqv: rust sort ist optimized for nearly sorted cases, so I assume that a
// better implementation with priority queues might actually be slower, however if you
// run into performance troubles, replace tmp with a min/max heap
tmp.sort_by(|&(_, p1), &(_, p2)| p2.partial_cmp(&p1).unwrap()); // [biggest-prob, ..., smallest-prob]
let (min_i, min_p) = tmp.pop().unwrap();
let &mut (ref max_i, ref mut max_p) = tmp.get_mut(0).unwrap();
res.push(AliasEntry::new(min_i, *max_i, min_p * n));
let used_prob = inv_n - min_p;
*max_p -= used_prob;
}
let (last_i, last_p) = tmp.pop().unwrap();
debug_assert!(0.999 < last_p * n && last_p * n < 1.001); // last value should always be exactly 1 but floats...
res.push(AliasEntry::new(last_i, usize::MAX, 1.0));
res
}
}
#[cfg(all(test, feature = "std"))]
mod tests {
use alloc::vec::Vec;
use super::LoadedDiceSampler;
use crate::rands::{Rand, StdRand};
#[test]
#[allow(clippy::cast_precision_loss)]
fn test_loaded_dice() {
let mut rng = StdRand::with_seed(1337);
let len = rng.between(3, 9);
let base = (0..len).map(|_| rng.next_float()).collect::<Vec<_>>();
let sum: f64 = base.iter().sum();
let base = base.iter().map(|v| v / sum).collect::<Vec<_>>();
let mut sampler = LoadedDiceSampler::new(&base);
let mut res: Vec<usize> = vec![0; len];
let iter: usize = 1000000;
for _ in 0..iter {
let i = sampler.sample(&mut rng);
res[i] += 1;
}
let _res_p = res
.iter()
.map(|&f| f as f64 / iter as f64)
.collect::<Vec<_>>();
//println!("{:?}", res_p);
for (i, c) in res.iter().enumerate() {
let p_i = *c as f64 / iter as f64;
assert!(base[i] * 0.99 < p_i && base[i] * 1.01 > p_i);
}
}
}

View File

@ -8,6 +8,9 @@ use core::{
use serde::{de::DeserializeOwned, Deserialize, Serialize};
#[cfg(feature = "alloc")]
pub mod loaded_dice;
/// Return a pseudo-random seed. For `no_std` environments, a single deterministic sequence is used.
#[must_use]
#[allow(unreachable_code)]
@ -55,10 +58,12 @@ pub type StdRand = RomuDuoJrRand;
/// Choose an item at random from the given iterator, sampling uniformly.
///
/// Will only return `None` for an empty iterator.
///
/// Note: the runtime cost is bound by the iterator's [`nth`][`Iterator::nth`] implementation
/// * For `Vec`, slice, array, this is O(1)
/// * For `HashMap`, `HashSet`, this is O(n)
pub fn choose<I>(from: I, rand: u64) -> I::Item
pub fn choose<I>(from: I, rand: u64) -> Option<I::Item>
where
I: IntoIterator,
I::IntoIter: ExactSizeIterator,
@ -66,14 +71,15 @@ where
// create iterator
let mut iter = from.into_iter();
// make sure there is something to choose from
debug_assert!(iter.len() > 0, "choosing from an empty iterator");
if iter.len() == 0 {
return None;
}
// pick a random, valid index
let index = fast_bound(rand, iter.len());
// return the item chosen
iter.nth(index).unwrap()
Some(iter.nth(index).unwrap())
}
/// Faster and almost unbiased alternative to `rand % n`.
@ -132,12 +138,66 @@ pub trait Rand: Debug + Serialize + DeserializeOwned {
}
/// Convenient variant of [`choose`].
fn choose<I>(&mut self, from: I) -> I::Item
///
/// This method uses [`Iterator::size_hint`] for optimization. With an
/// accurate hint and where [`Iterator::nth`] is a constant-time operation
/// this method can offer `O(1)` performance. Where no size hint is
/// available, complexity is `O(n)` where `n` is the iterator length.
/// Partial hints (where `lower > 0`) also improve performance.
///
/// Copy&paste from [`rand::IteratorRandom`](https://docs.rs/rand/0.8.5/rand/seq/trait.IteratorRandom.html#method.choose)
fn choose<I>(&mut self, from: I) -> Option<I::Item>
where
I: IntoIterator,
I::IntoIter: ExactSizeIterator,
{
choose(from, self.next())
let mut iter = from.into_iter();
let (mut lower, mut upper) = iter.size_hint();
let mut consumed = 0;
let mut result = None;
// Handling for this condition outside the loop allows the optimizer to eliminate the loop
// when the Iterator is an ExactSizeIterator. This has a large performance impact on e.g.
// seq_iter_choose_from_1000.
if upper == Some(lower) {
return if lower == 0 {
None
} else {
iter.nth(self.below(lower))
};
}
// Continue until the iterator is exhausted
loop {
if lower > 1 {
let ix = self.below(lower + consumed);
let skip = if ix < lower {
result = iter.nth(ix);
lower - (ix + 1)
} else {
lower
};
if upper == Some(lower) {
return result;
}
consumed += lower;
if skip > 0 {
iter.nth(skip - 1);
}
} else {
let elem = iter.next();
if elem.is_none() {
return result;
}
consumed += 1;
if self.below(consumed) == 0 {
result = elem;
}
}
let hint = iter.size_hint();
lower = hint.0;
upper = hint.1;
}
}
}

View File

@ -1,12 +1,12 @@
#[cfg(emulation_mode = "systemmode")]
use std::collections::HashSet;
use std::{
collections::HashMap,
fmt::{Debug, Display, Error, Formatter},
rc::Rc,
};
use enum_map::{Enum, EnumMap};
use hashbrown::HashMap;
use libafl::{
executors::ExitKind,
inputs::HasTargetBytes,

View File

@ -8,12 +8,12 @@ use core::{
};
use std::{
cell::{OnceCell, Ref, RefCell, RefMut},
collections::HashMap,
hash::Hash,
ops::Add,
rc::Rc,
};
use hashbrown::HashMap;
use libafl::{
executors::ExitKind,
inputs::HasTargetBytes,

View File

@ -1,9 +1,9 @@
use std::{
collections::HashMap,
fmt::Debug,
sync::atomic::{AtomicU64, Ordering},
};
use hashbrown::HashMap;
use libafl::state::{HasExecutions, State};
use libafl_qemu_sys::GuestPhysAddr;