diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 13c8c78446..4536e410e5 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -189,19 +189,6 @@ jobs:
# Fix me plz
# - name: Test Build libafl_libfuzzer with embed
# run: cargo +nightly test --features=embed-runtime --manifest-path libafl_libfuzzer/Cargo.toml
- ubuntu-check-nightly:
- runs-on: ubuntu-22.04
- needs: ubuntu
- steps:
- - uses: actions/checkout@v3
- - uses: ./.github/workflows/ubuntu-prepare
- - uses: Swatinem/rust-cache@v2
- with: { shared-key: "ubuntu" }
- # ---- build and feature check ----
- # cargo-hack's --feature-powerset would be nice here but libafl has a too many knobs
- - name: Check nightly features
- run: cargo +nightly check --features=agpl && cargo +nightly check --features=nautilus
-
ubuntu-check:
runs-on: ubuntu-22.04
@@ -218,7 +205,6 @@ jobs:
# cargo-hack's --feature-powerset would be nice here but libafl has a too many knobs
- name: Check each feature
# Skipping `python` as it has to be built with the `maturin` tool
- # `agpl`, `nautilus` require nightly
# `sancov_pcguard_edges` is tested seperatelyc
run: python3 ./scripts/parallellize_cargo_check.py ${{ matrix.instance_idx }}
diff --git a/README.md b/README.md
index 39c6f5e2df..bdc1d9e9ae 100644
--- a/README.md
+++ b/README.md
@@ -149,11 +149,3 @@ Unless you explicitly state otherwise, any contribution intentionally submitted
for inclusion in this crate by you, as defined in the Apache-2.0 license, shall
be dual licensed as above, without any additional terms or conditions.
-
-
-
-
-Dependencies under more restrictive licenses, such as GPL or AGPL, can be enabled
-using the respective feature in each crate when it is present, such as the
-'agpl' feature of the libafl crate.
-
diff --git a/fuzzers/baby_fuzzer_nautilus/rust-toolchain b/fuzzers/baby_fuzzer_nautilus/rust-toolchain
deleted file mode 100644
index bf867e0ae5..0000000000
--- a/fuzzers/baby_fuzzer_nautilus/rust-toolchain
+++ /dev/null
@@ -1 +0,0 @@
-nightly
diff --git a/libafl/Cargo.toml b/libafl/Cargo.toml
index be72098072..e872e2e8f1 100644
--- a/libafl/Cargo.toml
+++ b/libafl/Cargo.toml
@@ -130,13 +130,8 @@ llmp_debug = ["std", "libafl_bolts/llmp_debug"]
## Reduces the initial map size for llmp
llmp_small_maps = ["libafl_bolts/llmp_small_maps"] # reduces initial map size for llmp
-#! ## License-Changing Dependencies(!)
-
-## Enables all features hiding dependencies licensed under `AGPL`
-agpl = ["nautilus"]
-
-## Enables the [`Nautilus`](https://wcventure.github.io/FuzzingPaper/Paper/NDSS19_Nautilus.pdf) Grammar Mutator (AGPL-licensed)
-nautilus = ["grammartec", "std", "serde_json/std"]
+## Grammar mutator. Requires nightly.
+nautilus = ["std", "serde_json/std", "pyo3", "rand_trait", "regex-syntax"]
[build-dependencies]
rustversion = "1.0"
@@ -193,16 +188,15 @@ arrayvec = { version = "0.7.4", optional = true, default-features = false } # us
const_format = "0.2.32" # used for providing helpful compiler output
const_panic = "0.2.8" # similarly, for formatting const panic output
+pyo3 = { version = "0.18.3", optional = true } # For nautilus
+regex-syntax = { version = "0.8.3", optional = true } # For nautilus
+
# optional-dev deps (change when target.'cfg(accessible(::std))'.test-dependencies will be stable)
serial_test = { version = "3", optional = true, default-features = false, features = ["logging"] }
# Document all features of this crate (for `cargo doc`)
document-features = { version = "0.2", optional = true }
-# AGPL
-# !!! this create requires nightly
-grammartec = { version = "0.3.1", optional = true }
-
[target.'cfg(unix)'.dependencies]
libc = "0.2" # For (*nix) libc
diff --git a/libafl/build.rs b/libafl/build.rs
index c5d5e43f71..19931d30c5 100644
--- a/libafl/build.rs
+++ b/libafl/build.rs
@@ -6,12 +6,7 @@ fn nightly() {
}
#[rustversion::not(nightly)]
-fn nightly() {
- assert!(
- cfg!(all(not(docrs), not(feature = "nautilus"))),
- "The 'nautilus' feature of libafl requires a nightly compiler"
- );
-}
+fn nightly() {}
fn main() {
println!("cargo:rustc-check-cfg=cfg(nightly)");
diff --git a/libafl/src/common/mod.rs b/libafl/src/common/mod.rs
index 53a85a35cf..c3e75a9498 100644
--- a/libafl/src/common/mod.rs
+++ b/libafl/src/common/mod.rs
@@ -1,8 +1,13 @@
//! This module defines trait shared across different `LibAFL` modules
+#![allow(unused, missing_docs)]
+
use alloc::boxed::Box;
use core::any::type_name;
+#[cfg(feature = "nautilus")]
+pub mod nautilus;
+
use libafl_bolts::{
serdeany::{NamedSerdeAnyMap, SerdeAny, SerdeAnyMap},
Error,
diff --git a/libafl/src/common/nautilus/README.md b/libafl/src/common/nautilus/README.md
new file mode 100644
index 0000000000..a052afad63
--- /dev/null
+++ b/libafl/src/common/nautilus/README.md
@@ -0,0 +1,72 @@
+# Nautilus 2.0 LibAFL Mutator
+
+Nautilus is a coverage guided, grammar-based mutator. You can use it to improve your test coverage and find more bugs. By specifying the grammar of semi-valid inputs, Nautilus is able to perform complex mutation and to uncover more interesting test cases. Many of the ideas behind the original fuzzer are documented in a paper published at NDSS 2019.
+
+
+
+
+
+Version 2.0 has added many improvements to this early prototype.
+Features from version 2.0 we support in LibAFL:
+
+* Support for grammars specified in python
+* Support for non-context free grammars using python scripts to generate inputs from the structure
+* Support for specifying binary protocols/formats
+* Support for specifying regex based terminals that aren't part of the directed mutations
+* Better ability to avoid generating the same very short inputs over and over
+* Helpful error output on invalid grammars
+
+## How Does Nautilus Work?
+
+You specify a grammar using rules such as `EXPR -> EXPR + EXPR` or `EXPR -> NUM` and `NUM -> 1`. From these rules, the fuzzer constructs a tree. This internal representation allows to apply much more complex mutations than raw bytes. This tree is then turned into a real input for the target application. In normal Context Free Grammars, this process is straightforward: all leaves are concatenated. The left tree in the example below would unparse to the input `a=1+2` and the right one to `a=1+1+1+2`. To increase the expressiveness of your grammars, using Nautilus you are able to provide python functions for the unparsing process to allow much more complex specifications.
+
+
+
+
+
+## Examples
+
+Here, we use python to generate a grammar for valid XML-like inputs. Notice the use of a script rule to ensure the opening
+and closing tags match.
+
+```python
+#ctx.rule(NONTERM: string, RHS: string|bytes) adds a rule NONTERM->RHS. We can use {NONTERM} in the RHS to request a recursion.
+ctx.rule("START","{XML_CONTENT}")
+ctx.rule("XML_CONTENT","{XML}{XML_CONTENT}")
+ctx.rule("XML_CONTENT","")
+
+#ctx.script(NONTERM:string, RHS: [string]], func) adds a rule NONTERM->func(*RHS).
+# In contrast to normal `rule`, RHS is an array of nonterminals.
+# It's up to the function to combine the values returned for the NONTERMINALS with any fixed content used.
+ctx.script("XML",["TAG","ATTR","XML_CONTENT"], lambda tag,attr,body: b"<%s %s>%s%s>"%(tag,attr,body,tag) )
+ctx.rule("ATTR","foo=bar")
+ctx.rule("TAG","some_tag")
+ctx.rule("TAG","other_tag")
+
+#sometimes we don't want to explore the set of possible inputs in more detail. For example, if we fuzz a script
+#interpreter, we don't want to spend time on fuzzing all different variable names. In such cases we can use Regex
+#terminals. Regex terminals are only mutated during generation, but not during normal mutation stages, saving a lot of time.
+#The fuzzer still explores different values for the regex, but it won't be able to learn interesting values incrementally.
+#Use this when incremantal exploration would most likely waste time.
+
+ctx.regex("TAG","[a-z]+")
+```
+
+To test your [grammars](https://github.com/nautilus-fuzz/nautilus/tree/mit-main/grammars) you can use the generator:
+
+```sh
+$ cargo run --bin generator -- -g grammars/grammar_py_exmaple.py -t 100
+
+```
+
+## Trophies
+
+*
+* (**CVE-2018-10191**)
+* (**CVE-2018-10199**)
+* (**CVE-2018-12248**)
+* (**CVE-2018-11743**)
+* (**CVE-2018-12247**)
+* (**CVE-2018-12249**)
+*
+*
diff --git a/libafl/src/common/nautilus/grammartec/chunkstore.rs b/libafl/src/common/nautilus/grammartec/chunkstore.rs
new file mode 100644
index 0000000000..40c97a8f5d
--- /dev/null
+++ b/libafl/src/common/nautilus/grammartec/chunkstore.rs
@@ -0,0 +1,154 @@
+use alloc::{string::String, vec::Vec};
+use std::{
+ fs::File,
+ io::Write,
+ sync::{atomic::AtomicBool, RwLock},
+};
+
+use hashbrown::{HashMap, HashSet};
+use libafl_bolts::rands::Rand;
+use serde::{Deserialize, Serialize};
+
+use super::{
+ context::Context,
+ newtypes::{NTermId, NodeId, RuleId},
+ rule::RuleIdOrCustom,
+ tree::{Tree, TreeLike},
+};
+
+#[derive(Debug)]
+pub struct ChunkStoreWrapper {
+ pub chunkstore: RwLock,
+ pub is_locked: AtomicBool,
+}
+impl ChunkStoreWrapper {
+ #[must_use]
+ pub fn new(work_dir: String) -> Self {
+ ChunkStoreWrapper {
+ chunkstore: RwLock::new(ChunkStore::new(work_dir)),
+ is_locked: AtomicBool::new(false),
+ }
+ }
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct ChunkStore {
+ nts_to_chunks: HashMap>,
+ seen_outputs: HashSet>,
+ trees: Vec,
+ work_dir: String,
+ number_of_chunks: usize,
+}
+
+impl ChunkStore {
+ #[must_use]
+ pub fn new(work_dir: String) -> Self {
+ ChunkStore {
+ nts_to_chunks: HashMap::new(),
+ seen_outputs: HashSet::new(),
+ trees: vec![],
+ work_dir,
+ number_of_chunks: 0,
+ }
+ }
+
+ pub fn add_tree(&mut self, tree: Tree, ctx: &Context) {
+ let mut buffer = vec![];
+ let id = self.trees.len();
+ let mut contains_new_chunk = false;
+ for i in 0..tree.size() {
+ buffer.truncate(0);
+ if tree.sizes[i] > 30 {
+ continue;
+ }
+ let n = NodeId::from(i);
+ tree.unparse(n, ctx, &mut buffer);
+ if !self.seen_outputs.contains(&buffer) {
+ self.seen_outputs.insert(buffer.clone());
+ self.nts_to_chunks
+ .entry(tree.get_rule(n, ctx).nonterm())
+ .or_insert_with(Vec::new)
+ .push((id, n));
+ let mut file = File::create(format!(
+ "{}/outputs/chunks/chunk_{:09}",
+ self.work_dir, self.number_of_chunks
+ ))
+ .expect("RAND_596689790");
+ self.number_of_chunks += 1;
+ file.write_all(&buffer).expect("RAND_606896756");
+ contains_new_chunk = true;
+ }
+ }
+ if contains_new_chunk {
+ self.trees.push(tree);
+ }
+ }
+
+ pub fn get_alternative_to(
+ &self,
+ rand: &mut R,
+ r: RuleId,
+ ctx: &Context,
+ ) -> Option<(&Tree, NodeId)> {
+ let chunks = self
+ .nts_to_chunks
+ .get(&ctx.get_nt(&RuleIdOrCustom::Rule(r)));
+ let relevant = chunks.map(|vec| {
+ vec.iter()
+ .filter(move |&&(tid, nid)| self.trees[tid].get_rule_id(nid) != r)
+ });
+ //The unwrap_or is just a quick and dirty fix to catch Errors from the sampler
+ let selected = relevant.and_then(|iter| rand.choose(iter));
+ selected.map(|&(tid, nid)| (&self.trees[tid], nid))
+ }
+
+ #[must_use]
+ pub fn trees(&self) -> usize {
+ self.trees.len()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use alloc::string::ToString;
+ use std::fs;
+
+ use libafl_bolts::rands::StdRand;
+
+ use crate::common::nautilus::grammartec::{
+ chunkstore::ChunkStore, context::Context, tree::TreeLike,
+ };
+
+ #[test]
+ fn chunk_store() {
+ let mut rand = StdRand::new();
+ let mut ctx = Context::new();
+ let r1 = ctx.add_rule("A", b"a {B:a}");
+ let r2 = ctx.add_rule("B", b"b {C:a}");
+ let _ = ctx.add_rule("C", b"c");
+ ctx.initialize(101);
+ let random_size = ctx.get_random_len_for_ruleid(&r1);
+ println!("random_size: {random_size}");
+ let tree = ctx.generate_tree_from_rule(&mut rand, r1, random_size);
+ fs::create_dir_all("/tmp/outputs/chunks").expect("40234068");
+ let mut cks = ChunkStore::new("/tmp/".to_string());
+ cks.add_tree(tree, &ctx);
+ // assert!(cks.seen_outputs.contains("a b c".as_bytes()));
+ // assert!(cks.seen_outputs.contains("b c".as_bytes()));
+ // assert!(cks.seen_outputs.contains("c".as_bytes()));
+ assert_eq!(cks.nts_to_chunks[&ctx.nt_id("A")].len(), 1);
+ let (tree_id, _) = cks.nts_to_chunks[&ctx.nt_id("A")][0];
+ assert_eq!(cks.trees[tree_id].unparse_to_vec(&ctx), "a b c".as_bytes());
+
+ let random_size = ctx.get_random_len_for_ruleid(&r2);
+ let tree = ctx.generate_tree_from_rule(&mut rand, r2, random_size);
+ cks.add_tree(tree, &ctx);
+ // assert_eq!(cks.seen_outputs.len(), 3);
+ // assert_eq!(cks.nts_to_chunks[&ctx.nt_id("B")].len(), 1);
+ let (tree_id, node_id) = cks.nts_to_chunks[&ctx.nt_id("B")][0];
+ assert_eq!(
+ cks.trees[tree_id].unparse_node_to_vec(node_id, &ctx),
+ "b c".as_bytes()
+ );
+ }
+}
diff --git a/libafl/src/common/nautilus/grammartec/context.rs b/libafl/src/common/nautilus/grammartec/context.rs
new file mode 100644
index 0000000000..77128ca573
--- /dev/null
+++ b/libafl/src/common/nautilus/grammartec/context.rs
@@ -0,0 +1,452 @@
+use alloc::{borrow::ToOwned, string::String, vec::Vec};
+
+use hashbrown::HashMap;
+use libafl_bolts::rands::{Rand, RomuDuoJrRand};
+use pyo3::prelude::PyObject;
+
+use super::{
+ newtypes::{NTermId, RuleId},
+ rule::{Rule, RuleIdOrCustom},
+ tree::Tree,
+};
+
+#[derive(Debug, Clone)]
+pub struct Context {
+ rules: Vec,
+ nts_to_rules: HashMap>,
+ nt_ids_to_name: HashMap,
+ names_to_nt_id: HashMap,
+ rules_to_min_size: HashMap,
+
+ nts_to_min_size: HashMap,
+
+ rules_to_num_options: HashMap,
+ nts_to_num_options: HashMap,
+ max_len: usize,
+}
+
+impl Default for Context {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+impl Context {
+ #[must_use]
+ pub fn new() -> Self {
+ Context {
+ rules: vec![],
+ nts_to_rules: HashMap::new(),
+ nt_ids_to_name: HashMap::new(),
+ names_to_nt_id: HashMap::new(),
+
+ rules_to_min_size: HashMap::new(),
+ nts_to_min_size: HashMap::new(),
+
+ rules_to_num_options: HashMap::new(),
+ nts_to_num_options: HashMap::new(),
+ max_len: 0,
+ }
+ }
+
+ pub fn initialize(&mut self, max_len: usize) {
+ self.calc_min_len();
+ self.calc_num_options();
+ self.max_len = max_len + 2;
+ }
+
+ #[must_use]
+ pub fn get_rule(&self, r: RuleId) -> &Rule {
+ let id: usize = r.into();
+ &self.rules[id]
+ }
+
+ #[must_use]
+ pub fn get_nt(&self, r: &RuleIdOrCustom) -> NTermId {
+ return self.get_rule(r.id()).nonterm();
+ }
+
+ #[must_use]
+ pub fn get_num_children(&self, r: &RuleIdOrCustom) -> usize {
+ return self.get_rule(r.id()).number_of_nonterms();
+ }
+
+ pub fn add_rule(&mut self, nt: &str, format: &[u8]) -> RuleId {
+ let rid = self.rules.len().into();
+ let rule = Rule::from_format(self, nt, format);
+ let ntid = self.aquire_nt_id(nt);
+ self.rules.push(rule);
+ self.nts_to_rules.entry(ntid).or_default().push(rid);
+ rid
+ }
+
+ pub fn add_script(&mut self, nt: &str, nts: &[String], script: PyObject) -> RuleId {
+ let rid = self.rules.len().into();
+ let rule = Rule::from_script(self, nt, nts, script);
+ let ntid = self.aquire_nt_id(nt);
+ self.rules.push(rule);
+ self.nts_to_rules.entry(ntid).or_default().push(rid);
+ rid
+ }
+
+ pub fn add_regex(&mut self, nt: &str, regex: &str) -> RuleId {
+ let rid = self.rules.len().into();
+ let rule = Rule::from_regex(self, nt, regex);
+ let ntid = self.aquire_nt_id(nt);
+ self.rules.push(rule);
+ self.nts_to_rules.entry(ntid).or_default().push(rid);
+ rid
+ }
+
+ pub fn add_term_rule(&mut self, nt: &str, term: &[u8]) -> RuleId {
+ let rid = self.rules.len().into();
+ let ntid = self.aquire_nt_id(nt);
+ self.rules.push(Rule::from_term(ntid, term));
+ self.nts_to_rules.entry(ntid).or_default().push(rid);
+ rid
+ }
+
+ pub fn aquire_nt_id(&mut self, nt: &str) -> NTermId {
+ let next_id = self.nt_ids_to_name.len().into();
+ let id = self.names_to_nt_id.entry(nt.into()).or_insert(next_id);
+ self.nt_ids_to_name.entry(*id).or_insert(nt.into());
+ *id
+ }
+
+ #[must_use]
+ pub fn nt_id(&self, nt: &str) -> NTermId {
+ return *self
+ .names_to_nt_id
+ .get(nt)
+ .expect(&("no such nonterminal: ".to_owned() + nt));
+ }
+
+ #[must_use]
+ pub fn nt_id_to_s(&self, nt: NTermId) -> String {
+ self.nt_ids_to_name[&nt].clone()
+ }
+
+ fn calc_min_len_for_rule(&self, r: RuleId) -> Option {
+ let mut res = 1;
+ for nt_id in self.get_rule(r).nonterms() {
+ if let Some(min) = self.nts_to_min_size.get(nt_id) {
+ //println!("Calculating length for Rule(calc_min_len_for_rule): {}, current: {}, adding: {}, because of rule: {}", self.nt_id_to_s(self.get_rule(r).nonterm().clone()), res, min, self.nt_id_to_s(nt_id.clone()));
+ res += *min;
+ } else {
+ return None;
+ }
+ }
+ //println!("Calculated length for Rule(calc_min_len_for_rule): {}, Length: {}", self.nt_id_to_s(self.get_rule(r).nonterm().clone()), res);
+ Some(res)
+ }
+
+ pub fn calc_min_len(&mut self) {
+ let mut something_changed = true;
+ while something_changed {
+ //TODO: find a better solution to prevent consumed_len >= ctx.get_min_len_for_nt(*nt)' Assertions
+ let mut unknown_rules = (0..self.rules.len()).map(RuleId::from).collect::>();
+ something_changed = false;
+ while !unknown_rules.is_empty() {
+ let last_len = unknown_rules.len();
+ unknown_rules.retain(|rule| {
+ if let Some(min) = self.calc_min_len_for_rule(*rule) {
+ let nt = self.get_rule(*rule).nonterm();
+ //let name = self.nt_id_to_s(nt.clone()); //DEBUGGING
+ let e = self.nts_to_min_size.entry(nt).or_insert(min);
+ if *e > min {
+ *e = min;
+ something_changed = true;
+ }
+ //println!("Calculated length for Rule: {}, Length: {}, Min_length_of_nt: {}", name, min, *e);
+ self.rules_to_min_size.insert(*rule, min);
+ false
+ } else {
+ true
+ }
+ });
+ if last_len == unknown_rules.len() {
+ println!("Found unproductive rules: (missing base/non recursive case?)");
+ for r in unknown_rules {
+ println!("{}", self.get_rule(r).debug_show(self));
+ }
+ panic!("Broken Grammar");
+ }
+ }
+ }
+ self.calc_rule_order();
+ }
+
+ fn calc_num_options_for_rule(&self, r: RuleId) -> usize {
+ let mut res = 1_usize;
+ for nt_id in self.get_rule(r).nonterms() {
+ res = res.saturating_mul(*self.nts_to_num_options.get(nt_id).unwrap_or(&1));
+ }
+ res
+ }
+
+ pub fn calc_num_options(&mut self) {
+ for (nt, rules) in &self.nts_to_rules {
+ self.nts_to_num_options.entry(*nt).or_insert(rules.len());
+ }
+
+ let mut something_changed = true;
+ while something_changed {
+ something_changed = false;
+
+ for rid in (0..self.rules.len()).map(RuleId::from) {
+ let num = self.calc_num_options_for_rule(rid);
+ let nt = self.get_rule(rid).nonterm();
+ let e = self.nts_to_num_options.entry(nt).or_insert(num);
+ if *e < num {
+ *e = num;
+ something_changed = true;
+ }
+ //println!("Calculated length for Rule: {}, Length: {}, Min_length_of_nt: {}", name, min, *e);
+ self.rules_to_num_options.insert(rid, num);
+ }
+ }
+ }
+
+ fn calc_rule_order(&mut self) {
+ let rules_to_min_size = &self.rules_to_min_size;
+ for rules in self.nts_to_rules.values_mut() {
+ (*rules).sort_by(|r1, r2| rules_to_min_size[r1].cmp(&rules_to_min_size[r2]));
+ }
+ }
+
+ #[must_use]
+ pub fn check_if_nterm_has_multiple_possiblities(&self, nt: &NTermId) -> bool {
+ self.get_rules_for_nt(*nt).len() > 1
+ }
+
+ pub fn get_random_len(rand: &mut R, len: usize, rhs_of_rule: &[NTermId]) -> usize {
+ Self::simple_get_random_len(rand, rhs_of_rule.len(), len)
+ }
+
+ //we need to get maximal sizes for all subtrees. To generate trees fairly, we want to split the
+ //available size fairly to all nodes. (e.g. all children have the same expected size,
+ //regardless of its index in the current rule. We use this version of the algorithm described
+ //here: https://stackoverflow.com/a/8068956 to get the first value.
+ fn simple_get_random_len(
+ rand: &mut R,
+ number_of_children: usize,
+ total_remaining_len: usize,
+ ) -> usize {
+ let mut res = total_remaining_len;
+ let iters = i32::try_from(number_of_children).unwrap() - 1;
+ for _ in 0..iters {
+ let proposal = rand.between(0, total_remaining_len);
+ if proposal < res {
+ res = proposal;
+ }
+ }
+ res
+ }
+
+ #[must_use]
+ pub fn get_min_len_for_nt(&self, nt: NTermId) -> usize {
+ self.nts_to_min_size[&nt]
+ }
+
+ pub fn get_random_rule_for_nt(&self, rand: &mut R, nt: NTermId, len: usize) -> RuleId {
+ self.simple_get_random_rule_for_nt(rand, nt, len)
+ }
+
+ pub fn get_applicable_rules<'a, R: Rand>(
+ &'a self,
+ rand: &'a mut R,
+ max_len: usize,
+ nt: NTermId,
+ p_include_short_rules: usize,
+ ) -> impl Iterator + 'a {
+ self.nts_to_rules[&nt]
+ .iter()
+ .take_while(move |r| self.rules_to_min_size[*r] <= max_len)
+ .filter(move |r| {
+ self.rules_to_num_options[*r] > 1 || rand.below(100) <= p_include_short_rules
+ })
+ }
+
+ pub fn choose_applicable_rule(
+ &self,
+ rand: &mut R,
+ max_len: usize,
+ nt: NTermId,
+ p_include_short_rules: usize,
+ ) -> Option {
+ // Create a tmp rand to get around borrowing. We hardcode the fatest rand here, because why not.
+ let mut rand_cpy = RomuDuoJrRand::with_seed(rand.next());
+ let rules = self.get_applicable_rules(rand, max_len, nt, p_include_short_rules);
+ rand_cpy.choose(rules).copied()
+ }
+
+ fn simple_get_random_rule_for_nt(
+ &self,
+ rand: &mut R,
+ nt: NTermId,
+ max_len: usize,
+ ) -> RuleId {
+ let p_include_short_rules = 100;
+ /*if self.nts_to_num_options[&nt] < 10 {
+ 100 * 0
+ } else if max_len > 100 {
+ 2 * 0
+ } else if max_len > 20 {
+ 50 * 0
+ } else {
+ 100 * 0;
+ }; */
+
+ if let Some(opt) = self.choose_applicable_rule(rand, max_len, nt, p_include_short_rules) {
+ opt
+ } else if let Some(opt) = self.choose_applicable_rule(rand, max_len, nt, 100) {
+ opt
+ } else {
+ panic!(
+ "there is no way to derive {} within {} steps",
+ self.nt_ids_to_name[&nt], max_len
+ )
+ }
+ }
+
+ #[must_use]
+ pub fn get_random_len_for_ruleid(&self, _rule_id: &RuleId) -> usize {
+ self.max_len //TODO?????
+ }
+
+ #[must_use]
+ pub fn get_random_len_for_nt(&self, _nt: &NTermId) -> usize {
+ self.max_len
+ }
+
+ #[must_use]
+ pub fn get_rules_for_nt(&self, nt: NTermId) -> &Vec {
+ &self.nts_to_rules[&nt]
+ }
+
+ pub fn generate_tree_from_nt(
+ &self,
+ rand: &mut R,
+ nt: NTermId,
+ max_len: usize,
+ ) -> Tree {
+ let random_rule = self.get_random_rule_for_nt(rand, nt, max_len);
+ self.generate_tree_from_rule(rand, random_rule, max_len - 1)
+ }
+
+ pub fn generate_tree_from_rule(&self, rand: &mut R, r: RuleId, len: usize) -> Tree {
+ let mut tree = Tree::from_rule_vec(vec![], self);
+ tree.generate_from_rule(rand, r, len, self);
+ tree
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use alloc::{string::String, vec::Vec};
+
+ use libafl_bolts::rands::StdRand;
+
+ use crate::common::nautilus::grammartec::{
+ context::Context,
+ rule::{Rule, RuleChild, RuleIdOrCustom},
+ tree::{Tree, TreeLike},
+ };
+
+ #[test]
+ fn simple_context() {
+ let mut ctx = Context::new();
+ let r = Rule::from_format(&mut ctx, "F", b"foo{A:a}\\{bar\\}{B:b}asd{C}");
+ let soll = vec![
+ RuleChild::from_lit(b"foo"),
+ RuleChild::from_nt("{A:a}", &mut ctx),
+ RuleChild::from_lit(b"{bar}"),
+ RuleChild::from_nt("{B:b}", &mut ctx),
+ RuleChild::from_lit(b"asd"),
+ RuleChild::from_nt("{C}", &mut ctx),
+ ];
+ if let Rule::Plain(rl) = &r {
+ assert_eq!(&rl.children, &soll);
+ } else {
+ unreachable!();
+ }
+ assert_eq!(r.nonterms()[0], ctx.nt_id("A"));
+ assert_eq!(r.nonterms()[1], ctx.nt_id("B"));
+ assert_eq!(r.nonterms()[2], ctx.nt_id("C"));
+ }
+
+ #[test]
+ fn test_context() {
+ let mut rand = StdRand::new();
+ let mut ctx = Context::new();
+ let r0 = ctx.add_rule("C", b"c{B}c");
+ let r1 = ctx.add_rule("B", b"b{A}b");
+ let _ = ctx.add_rule("A", b"a {A}");
+ let _ = ctx.add_rule("A", b"a {A}");
+ let _ = ctx.add_rule("A", b"a {A}");
+ let _ = ctx.add_rule("A", b"a {A}");
+ let _ = ctx.add_rule("A", b"a {A}");
+ let r3 = ctx.add_rule("A", b"a");
+ ctx.initialize(5);
+ assert_eq!(ctx.get_min_len_for_nt(ctx.nt_id("A")), 1);
+ assert_eq!(ctx.get_min_len_for_nt(ctx.nt_id("B")), 2);
+ assert_eq!(ctx.get_min_len_for_nt(ctx.nt_id("C")), 3);
+ let mut tree = Tree::from_rule_vec(vec![], &ctx);
+ tree.generate_from_nt(&mut rand, ctx.nt_id("C"), 3, &ctx);
+ assert_eq!(
+ tree.rules,
+ vec![
+ RuleIdOrCustom::Rule(r0),
+ RuleIdOrCustom::Rule(r1),
+ RuleIdOrCustom::Rule(r3),
+ ]
+ );
+ let mut data: Vec = vec![];
+ tree.unparse_to(&ctx, &mut data);
+ assert_eq!(String::from_utf8(data).expect("RAND_3377050372"), "cbabc");
+ }
+
+ #[test]
+ fn test_generate_len() {
+ let mut rand = StdRand::new();
+ let mut ctx = Context::new();
+ let r0 = ctx.add_rule("E", b"({E}+{E})");
+ let r1 = ctx.add_rule("E", b"({E}*{E})");
+ let r2 = ctx.add_rule("E", b"({E}-{E})");
+ let r3 = ctx.add_rule("E", b"({E}/{E})");
+ let r4 = ctx.add_rule("E", b"1");
+ ctx.initialize(11);
+ assert_eq!(ctx.get_min_len_for_nt(ctx.nt_id("E")), 1);
+
+ for _ in 0..100 {
+ let mut tree = Tree::from_rule_vec(vec![], &ctx);
+ tree.generate_from_nt(&mut rand, ctx.nt_id("E"), 9, &ctx);
+ assert!(tree.rules.len() < 10);
+ assert!(!tree.rules.is_empty());
+ }
+
+ let rules = [r0, r1, r4, r4, r4]
+ .iter()
+ .map(|x| RuleIdOrCustom::Rule(*x))
+ .collect::>();
+ let tree = Tree::from_rule_vec(rules, &ctx);
+ let mut data: Vec = vec![];
+ tree.unparse_to(&ctx, &mut data);
+ assert_eq!(
+ String::from_utf8(data).expect("RAND_3492562908"),
+ "((1*1)+1)"
+ );
+
+ let rules = [r0, r1, r2, r3, r4, r4, r4, r4, r4]
+ .iter()
+ .map(|x| RuleIdOrCustom::Rule(*x))
+ .collect::>();
+ let tree = Tree::from_rule_vec(rules, &ctx);
+ let mut data: Vec = vec![];
+ tree.unparse_to(&ctx, &mut data);
+ assert_eq!(
+ String::from_utf8(data).expect("RAND_4245419893"),
+ "((((1/1)-1)*1)+1)"
+ );
+ }
+}
diff --git a/libafl/src/common/nautilus/grammartec/mod.rs b/libafl/src/common/nautilus/grammartec/mod.rs
new file mode 100644
index 0000000000..bf15649903
--- /dev/null
+++ b/libafl/src/common/nautilus/grammartec/mod.rs
@@ -0,0 +1,7 @@
+pub mod chunkstore;
+pub mod context;
+pub mod mutator;
+pub mod newtypes;
+pub mod recursion_info;
+pub mod rule;
+pub mod tree;
diff --git a/libafl/src/common/nautilus/grammartec/mutator.rs b/libafl/src/common/nautilus/grammartec/mutator.rs
new file mode 100644
index 0000000000..e9c3a5242d
--- /dev/null
+++ b/libafl/src/common/nautilus/grammartec/mutator.rs
@@ -0,0 +1,601 @@
+use alloc::vec::Vec;
+use std::{collections::HashSet, mem};
+
+use libafl_bolts::{rands::Rand, Error};
+
+use crate::common::nautilus::grammartec::{
+ chunkstore::ChunkStore,
+ context::Context,
+ newtypes::NodeId,
+ recursion_info::RecursionInfo,
+ rule::RuleIdOrCustom,
+ tree::{Tree, TreeLike, TreeMutation},
+};
+
+#[derive(Debug)]
+pub struct Mutator {
+ scratchpad: Tree,
+}
+
+impl Mutator {
+ #[must_use]
+ pub fn new(ctx: &Context) -> Self {
+ Mutator {
+ scratchpad: Tree::from_rule_vec(vec![], ctx),
+ }
+ }
+
+ //Return value indicates if minimization is complete: true: complete, false: not complete
+ #[allow(clippy::too_many_arguments)]
+ pub fn minimize_tree(
+ &mut self,
+ rand: &mut R,
+ tree: &mut Tree,
+ bits: &HashSet,
+ ctx: &Context,
+ start_index: usize,
+ end_index: usize,
+ tester: &mut F,
+ ) -> Result
+ where
+ F: FnMut(&TreeMutation, &HashSet, &Context) -> Result,
+ {
+ let mut i = start_index;
+ while i < tree.size() {
+ let n = NodeId::from(i);
+ let nt = tree.get_rule(n, ctx).nonterm();
+ if tree.subtree_size(n) > ctx.get_min_len_for_nt(nt) {
+ self.scratchpad
+ .generate_from_nt(rand, nt, ctx.get_min_len_for_nt(nt), ctx);
+ if let Some(t) = Mutator::test_and_convert(
+ tree,
+ n,
+ &self.scratchpad,
+ NodeId::from(0),
+ ctx,
+ bits,
+ tester,
+ )? {
+ let _ = mem::replace(tree, t);
+ }
+ }
+ i += 1;
+ if i == end_index {
+ return Ok(false);
+ }
+ }
+ Ok(true)
+ }
+
+ //Return value indicates if minimization is complete: true: complete, false: not complete
+ pub fn minimize_rec(
+ &mut self,
+ tree: &mut Tree,
+ bits: &HashSet,
+ ctx: &Context,
+ start_index: usize,
+ end_index: usize,
+ tester: &mut F,
+ ) -> Result
+ where
+ F: FnMut(&TreeMutation, &HashSet, &Context) -> Result,
+ {
+ let mut i = start_index;
+ while i < tree.size() {
+ let n = NodeId::from(i);
+ if let Some(parent) = Mutator::find_parent_with_nt(tree, n, ctx) {
+ if let Some(t) =
+ Mutator::test_and_convert(tree, parent, tree, n, ctx, bits, tester)?
+ {
+ let _ = mem::replace(tree, t);
+ i = parent.into();
+ }
+ }
+ i += 1;
+ if i == end_index {
+ return Ok(false);
+ }
+ }
+ Ok(true)
+ }
+
+ pub fn mut_rules(
+ &mut self,
+ rand: &mut R,
+ tree: &Tree,
+ ctx: &Context,
+ start_index: usize,
+ end_index: usize,
+ tester: &mut F,
+ ) -> Result
+ where
+ F: FnMut(&TreeMutation, &Context) -> Result<(), Error>,
+ {
+ for i in start_index..end_index {
+ if i == tree.size() {
+ return Ok(true);
+ }
+ let n = NodeId::from(i);
+ let old_rule_id = tree.get_rule_id(n);
+ let rule_ids = ctx
+ .get_rules_for_nt(ctx.get_nt(&RuleIdOrCustom::Rule(old_rule_id)))
+ .clone(); //TODO: Maybe find a better solution
+ for new_rule_id in rule_ids {
+ if old_rule_id != new_rule_id {
+ let random_size = ctx.get_random_len_for_ruleid(&new_rule_id);
+ self.scratchpad
+ .generate_from_rule(rand, new_rule_id, random_size, ctx);
+ let repl = tree.mutate_replace_from_tree(n, &self.scratchpad, NodeId::from(0));
+ tester(&repl, ctx)?;
+ }
+ }
+ }
+ Ok(false)
+ }
+
+ pub fn mut_splice(
+ &mut self,
+ rand: &mut R,
+ tree: &Tree,
+ ctx: &Context,
+ cks: &ChunkStore,
+ tester: &mut F,
+ ) -> Result<(), Error>
+ where
+ F: FnMut(&TreeMutation, &Context) -> Result<(), Error>,
+ {
+ let n = NodeId::from(rand.below(tree.size()));
+ let old_rule_id = tree.get_rule_id(n);
+ if let Some((repl_tree, repl_node)) = cks.get_alternative_to(rand, old_rule_id, ctx) {
+ let repl = tree.mutate_replace_from_tree(n, repl_tree, repl_node);
+ tester(&repl, ctx)?;
+ }
+ Ok(())
+ }
+
+ //pub fn rec_splice(
+ // &mut self,
+ // tree: &Tree,
+ // ctx: &Context,
+ // cks: &ChunkStore,
+ // tester: &mut F
+ // )-> Result<(), Error>
+ //where
+ // F: FnMut(&TreeMutation, &Context) -> Result<(), Error>,
+ //{
+ // let n = NodeId::from(rand::thread_rng().gen_range(0, tree.size()));
+ // if let Some(old_rule_id) = tree.get_rule_id(n){
+ // let nterm_id = ctx.get_rule(old_rule).nonterm();
+ // if let Some((repl_tree, repl_node)) = cks.get_alternative_to(old_rule_id, ctx) {
+ // let repl = tree.mutate_replace_from_tree(n, repl_tree, repl_node);
+ // tester(&repl, ctx)?;
+ // }
+ // }
+ //
+ // return Ok(());
+ //}
+
+ pub fn mut_random(
+ &mut self,
+ rand: &mut R,
+ tree: &Tree,
+ ctx: &Context,
+ tester: &mut F,
+ ) -> Result<(), Error>
+ where
+ F: FnMut(&TreeMutation, &Context) -> Result<(), Error>,
+ {
+ let n = NodeId::from(rand.below(tree.size()));
+ let nterm = tree.get_rule(n, ctx).nonterm();
+ if ctx.check_if_nterm_has_multiple_possiblities(&nterm) {
+ let len = ctx.get_random_len_for_nt(&nterm);
+ self.scratchpad.generate_from_nt(rand, nterm, len, ctx);
+ let repl = tree.mutate_replace_from_tree(n, &self.scratchpad, NodeId::from(0));
+ tester(&repl, ctx)?;
+ }
+ Ok(())
+ }
+
+ pub fn mut_random_recursion(
+ &mut self,
+ rand: &mut R,
+ tree: &Tree,
+ recursions: &mut Vec,
+ ctx: &Context,
+ tester: &mut F,
+ ) -> Result<(), Error>
+ where
+ F: FnMut(&TreeMutation, &Context) -> Result<(), Error>,
+ {
+ let max_len_of_recursions = 2 << rand.between(1, 10);
+ if let Some(recursion_info) = rand.choose(recursions) {
+ let recursion = recursion_info.get_random_recursion_pair(rand);
+ let recursion_len_pre = recursion.1.to_i() - recursion.0.to_i();
+ let recursion_len_total =
+ tree.subtree_size(recursion.0) - tree.subtree_size(recursion.1);
+ let recursion_len_post = recursion_len_total - recursion_len_pre;
+ let num_of_recursions = max_len_of_recursions / recursion_len_total;
+ //Insert pre recursion
+ let postfix = tree.subtree_size(recursion.1);
+ let mut rules_new = Vec::with_capacity(
+ recursion_len_pre * num_of_recursions
+ + postfix
+ + recursion_len_post * num_of_recursions,
+ );
+ let mut sizes_new = Vec::with_capacity(
+ recursion_len_pre * num_of_recursions
+ + postfix
+ + recursion_len_post * num_of_recursions,
+ );
+ for i in 0..num_of_recursions * recursion_len_pre {
+ rules_new.push(
+ tree.get_rule_or_custom(recursion.0 + (i % recursion_len_pre))
+ .clone(),
+ );
+ sizes_new.push(tree.sizes[recursion.0.to_i() + (i % recursion_len_pre)]);
+ }
+
+ //Append ending of original tree
+ for i in 0..postfix {
+ rules_new.push(tree.get_rule_or_custom(recursion.1 + i).clone());
+ sizes_new.push(tree.sizes[recursion.1.to_i() + i]);
+ }
+
+ //Adjust the sizes
+ for (i, item) in sizes_new
+ .iter_mut()
+ .enumerate()
+ .take(num_of_recursions * recursion_len_pre)
+ {
+ if *item >= recursion_len_pre {
+ *item += (num_of_recursions - i / recursion_len_pre - 1) * recursion_len_total;
+ }
+ }
+
+ //Append post recursion
+ for i in 0..num_of_recursions * recursion_len_post {
+ rules_new.push(
+ tree.get_rule_or_custom(recursion.1 + postfix + (i % recursion_len_post))
+ .clone(),
+ );
+ sizes_new.push(tree.sizes[recursion.1.to_i() + postfix + (i % recursion_len_post)]);
+ }
+
+ let recursion_tree = Tree {
+ rules: rules_new,
+ sizes: sizes_new,
+ paren: Vec::new(), /*paren_new*/
+ };
+ let repl = tree.mutate_replace_from_tree(recursion.1, &recursion_tree, NodeId::from(0));
+
+ tester(&repl, ctx)?;
+ }
+ Ok(())
+ }
+
+ fn find_parent_with_nt(tree: &Tree, mut node: NodeId, ctx: &Context) -> Option {
+ let nt = tree.get_rule(node, ctx).nonterm();
+ while let Some(parent) = tree.get_parent(node) {
+ if tree.get_rule(parent, ctx).nonterm() == nt {
+ return Some(parent);
+ }
+ node = parent;
+ }
+ None
+ }
+
+ fn test_and_convert(
+ tree_a: &Tree,
+ n_a: NodeId,
+ tree_b: &Tree,
+ n_b: NodeId,
+ ctx: &Context,
+ fresh_bits: &HashSet,
+ tester: &mut F,
+ ) -> Result