Gramatron (#332)

* grammatron random mut * import String from alloc * gramatron * grammar preprocess scripts * clippy * fix construct_automata.py * splice mutator * fix * clippy * recursion mutator * recursion mut in example * clippy * fix * clippy * grammars
2021-10-21 16:33:40 +02:00 · 2021-10-21 16:33:40 +02:00 · 77e0be218a
commit 77e0be218a
parent 23edffd4c1
26 changed files with 11884 additions and 31 deletions
--- a/fuzzers/baby_fuzzer_gramatron/.gitignore
+++ b/fuzzers/baby_fuzzer_gramatron/.gitignore
--- a/fuzzers/baby_fuzzer_gramatron/Cargo.toml
+++ b/fuzzers/baby_fuzzer_gramatron/Cargo.toml
@ -0,0 +1,23 @@
+[package]
+name = "baby_fuzzer"
+version = "0.6.0"
+authors = ["Andrea Fioraldi <andreafioraldi@gmail.com>", "Dominik Maier <domenukk@gmail.com>"]
+edition = "2018"
+
+[features]
+default = ["std"]
+std = []
+
+[profile.dev]
+panic = "abort"
+
+[profile.release]
+panic = "abort"
+lto = true
+codegen-units = 1
+opt-level = 3
+debug = true
+
+[dependencies]
+libafl = { path = "../../libafl/" }
+serde_json = "1.0.68"
--- a/fuzzers/baby_fuzzer_gramatron/README.md
+++ b/fuzzers/baby_fuzzer_gramatron/README.md
--- a/fuzzers/baby_fuzzer_gramatron/auto.json
+++ b/fuzzers/baby_fuzzer_gramatron/auto.json
--- a/fuzzers/baby_fuzzer_gramatron/corpus/new
+++ b/fuzzers/baby_fuzzer_gramatron/corpus/new
--- a/fuzzers/baby_fuzzer_gramatron/src/main.rs
+++ b/fuzzers/baby_fuzzer_gramatron/src/main.rs
@ -0,0 +1,132 @@
+use std::io::Read;
+use std::{
+    fs,
+    io::BufReader,
+    path::{Path, PathBuf},
+};
+
+#[cfg(windows)]
+use std::ptr::write_volatile;
+
+use libafl::{
+    bolts::{current_nanos, rands::StdRand, tuples::tuple_list},
+    corpus::{InMemoryCorpus, OnDiskCorpus, QueueCorpusScheduler},
+    events::SimpleEventManager,
+    executors::{inprocess::InProcessExecutor, ExitKind},
+    feedbacks::{CrashFeedback, MapFeedbackState, MaxMapFeedback},
+    fuzzer::{Evaluator, Fuzzer, StdFuzzer},
+    generators::{Automaton, GramatronGenerator},
+    inputs::GramatronInput,
+    mutators::{
+        GramatronRandomMutator, GramatronRecursionMutator, GramatronSpliceMutator,
+        StdScheduledMutator,
+    },
+    observers::StdMapObserver,
+    stages::mutational::StdMutationalStage,
+    state::StdState,
+    stats::SimpleStats,
+};
+
+/// Coverage map with explicit assignments due to the lack of instrumentation
+static mut SIGNALS: [u8; 16] = [0; 16];
+/*
+/// Assign a signal to the signals map
+fn signals_set(idx: usize) {
+    unsafe { SIGNALS[idx] = 1 };
+}
+*/
+
+fn read_automaton_from_file<P: AsRef<Path>>(path: P) -> Automaton {
+    let file = fs::File::open(path).unwrap();
+    let reader = BufReader::new(file);
+    serde_json::from_reader(reader).unwrap()
+}
+
+#[allow(clippy::similar_names)]
+pub fn main() {
+    let mut bytes = vec![];
+
+    // The closure that we want to fuzz
+    let mut harness = |input: &GramatronInput| {
+        input.unparse(&mut bytes);
+        unsafe {
+            println!(">>> {}", std::str::from_utf8_unchecked(&bytes));
+        }
+        ExitKind::Ok
+    };
+
+    // Create an observation channel using the signals map
+    let observer = StdMapObserver::new("signals", unsafe { &mut SIGNALS });
+
+    // The state of the edges feedback.
+    let feedback_state = MapFeedbackState::with_observer(&observer);
+
+    // Feedback to rate the interestingness of an input
+    let feedback = MaxMapFeedback::new(&feedback_state, &observer);
+
+    // A feedback to choose if an input is a solution or not
+    let objective = CrashFeedback::new();
+
+    // create a State from scratch
+    let mut state = StdState::new(
+        // RNG
+        StdRand::with_seed(current_nanos()),
+        // Corpus that will be evolved, we keep it in memory for performance
+        InMemoryCorpus::new(),
+        // Corpus in which we store solutions (crashes in this example),
+        // on disk so the user can get them after stopping the fuzzer
+        OnDiskCorpus::new(PathBuf::from("./crashes")).unwrap(),
+        // States of the feedbacks.
+        // They are the data related to the feedbacks that you want to persist in the State.
+        tuple_list!(feedback_state),
+    );
+
+    // The Stats trait define how the fuzzer stats are reported to the user
+    let stats = SimpleStats::new(|s| println!("{}", s));
+
+    // The event manager handle the various events generated during the fuzzing loop
+    // such as the notification of the addition of a new item to the corpus
+    let mut mgr = SimpleEventManager::new(stats);
+
+    // A queue policy to get testcasess from the corpus
+    let scheduler = QueueCorpusScheduler::new();
+
+    // A fuzzer with feedbacks and a corpus scheduler
+    let mut fuzzer = StdFuzzer::new(scheduler, feedback, objective);
+
+    // Create the executor for an in-process function with just one observer
+    let mut executor = InProcessExecutor::new(
+        &mut harness,
+        tuple_list!(observer),
+        &mut fuzzer,
+        &mut state,
+        &mut mgr,
+    )
+    .expect("Failed to create the Executor");
+
+    let mut generator =
+        GramatronGenerator::new(read_automaton_from_file(PathBuf::from("auto.json")));
+
+    // Generate 8 initial inputs
+    state
+        .generate_initial_inputs_forced(&mut fuzzer, &mut executor, &mut generator, &mut mgr, 8)
+        .expect("Failed to generate the initial corpus");
+
+    // Setup a mutational stage with a basic bytes mutator
+    let mutator = StdScheduledMutator::with_max_iterations(
+        tuple_list!(
+            GramatronRandomMutator::new(&generator),
+            GramatronRandomMutator::new(&generator),
+            GramatronRandomMutator::new(&generator),
+            GramatronSpliceMutator::new(),
+            GramatronSpliceMutator::new(),
+            GramatronRecursionMutator::new()
+        ),
+        2,
+    );
+    let mut stages = tuple_list!(StdMutationalStage::new(mutator));
+
+    fuzzer
+        .fuzz_loop(&mut stages, &mut executor, &mut state, &mut mgr)
+        .expect("Error in the fuzzing loop");
+}
--- a/fuzzers/baby_fuzzer_tokens/.gitignore
+++ b/fuzzers/baby_fuzzer_tokens/.gitignore
@ -0,0 +1 @@
+libpng-*
--- a/fuzzers/baby_fuzzer_tokens/Cargo.toml
+++ b/fuzzers/baby_fuzzer_tokens/Cargo.toml
--- a/fuzzers/baby_fuzzer_tokens/README.md
+++ b/fuzzers/baby_fuzzer_tokens/README.md
@ -0,0 +1,8 @@
+# Baby fuzzer
+
+This is a minimalistic example about how to create a libafl based fuzzer.
+
+It runs on a single core until a crash occurs and then exits.
+
+The tested program is a simple Rust function without any instrumentation.
+For real fuzzing, you will want to add some sort to add coverage or other feedback.
--- a/fuzzers/baby_fuzzer_tokens/corpus/new
+++ b/fuzzers/baby_fuzzer_tokens/corpus/new
@ -0,0 +1,4 @@
+fn pippo(v) { return "hello world " + v; }
+var a = 666;
+name = "scozzo" + a;
+pippo(name);
--- a/fuzzers/baby_fuzzer_tokens/src/main.rs
+++ b/fuzzers/baby_fuzzer_tokens/src/main.rs
--- a/libafl/src/generators/gramatron.rs
+++ b/libafl/src/generators/gramatron.rs
@ -0,0 +1,94 @@
+use alloc::{string::String, vec::Vec};
+use core::marker::PhantomData;
+use serde::{Deserialize, Serialize};
+
+use crate::{
+    bolts::rands::Rand,
+    generators::Generator,
+    inputs::{GramatronInput, Terminal},
+    state::HasRand,
+    Error,
+};
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+pub struct Trigger {
+    pub id: String,
+    pub dest: usize,
+    pub term: String,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+pub struct Automaton {
+    pub final_state: usize,
+    pub init_state: usize,
+    pub pda: Vec<Vec<Trigger>>,
+}
+
+#[derive(Clone, Debug)]
+/// Generates random inputs from a grammar automatron
+pub struct GramatronGenerator<R, S>
+where
+    R: Rand,
+    S: HasRand<R>,
+{
+    automaton: Automaton,
+    phantom: PhantomData<(R, S)>,
+}
+
+impl<R, S> Generator<GramatronInput, S> for GramatronGenerator<R, S>
+where
+    R: Rand,
+    S: HasRand<R>,
+{
+    fn generate(&mut self, state: &mut S) -> Result<GramatronInput, Error> {
+        let mut input = GramatronInput::new(vec![]);
+        self.append_generated_terminals(&mut input, state);
+        Ok(input)
+    }
+
+    fn generate_dummy(&self, _state: &mut S) -> GramatronInput {
+        GramatronInput::new(vec![])
+    }
+}
+
+impl<R, S> GramatronGenerator<R, S>
+where
+    R: Rand,
+    S: HasRand<R>,
+{
+    /// Returns a new [`GramatronGenerator`]
+    #[must_use]
+    pub fn new(automaton: Automaton) -> Self {
+        Self {
+            automaton,
+            phantom: PhantomData,
+        }
+    }
+
+    pub fn append_generated_terminals(&self, input: &mut GramatronInput, state: &mut S) -> usize {
+        let mut counter = 0;
+        let final_state = self.automaton.final_state;
+        let mut current_state =
+            input
+                .terminals()
+                .last()
+                .map_or(self.automaton.init_state, |last| {
+                    let triggers = &self.automaton.pda[last.state];
+                    let idx = state.rand_mut().below(triggers.len() as u64) as usize;
+                    triggers[idx].dest
+                });
+
+        while current_state != final_state {
+            let triggers = &self.automaton.pda[current_state];
+            let idx = state.rand_mut().below(triggers.len() as u64) as usize;
+            let trigger = &triggers[idx];
+            input
+                .terminals_mut()
+                .push(Terminal::new(current_state, idx, trigger.term.clone()));
+            current_state = trigger.dest;
+            counter += 1;
+        }
+
+        counter
+    }
+}
--- a/libafl/src/generators/mod.rs
+++ b/libafl/src/generators/mod.rs
@ -1,95 +1,128 @@
 //! Generators may generate bytes or, in general, data, for inputs.

 use alloc::vec::Vec;
-use core::cmp::min;
+use core::{cmp::min, marker::PhantomData};

 use crate::{
    bolts::rands::Rand,
    inputs::{bytes::BytesInput, Input},
+    state::HasRand,
    Error,
 };

+pub mod gramatron;
+pub use gramatron::*;
+
 /// The maximum size of dummy bytes generated by _dummy generator methods
 const DUMMY_BYTES_MAX: usize = 64;

 /// Generators can generate ranges of bytes.
-pub trait Generator<I, R>
+pub trait Generator<I, S>
 where
    I: Input,
-    R: Rand,
 {
    /// Generate a new input
-    fn generate(&mut self, rand: &mut R) -> Result<I, Error>;
+    fn generate(&mut self, state: &mut S) -> Result<I, Error>;

    /// Generate a new dummy input
-    fn generate_dummy(&self) -> I;
+    fn generate_dummy(&self, state: &mut S) -> I;
 }

 #[derive(Clone, Debug)]
 /// Generates random bytes
-pub struct RandBytesGenerator {
-    max_size: usize,
-}
-
-impl<R> Generator<BytesInput, R> for RandBytesGenerator
+pub struct RandBytesGenerator<R, S>
 where
    R: Rand,
+    S: HasRand<R>,
 {
-    fn generate(&mut self, rand: &mut R) -> Result<BytesInput, Error> {
-        let mut size = rand.below(self.max_size as u64);
+    max_size: usize,
+    phantom: PhantomData<(R, S)>,
+}
+
+impl<R, S> Generator<BytesInput, S> for RandBytesGenerator<R, S>
+where
+    R: Rand,
+    S: HasRand<R>,
+{
+    fn generate(&mut self, state: &mut S) -> Result<BytesInput, Error> {
+        let mut size = state.rand_mut().below(self.max_size as u64);
        if size == 0 {
            size = 1;
        }
-        let random_bytes: Vec<u8> = (0..size).map(|_| rand.below(256) as u8).collect();
+        let random_bytes: Vec<u8> = (0..size)
+            .map(|_| state.rand_mut().below(256) as u8)
+            .collect();
        Ok(BytesInput::new(random_bytes))
    }

    /// Generates up to `DUMMY_BYTES_MAX` non-random dummy bytes (0)
-    fn generate_dummy(&self) -> BytesInput {
+    fn generate_dummy(&self, _state: &mut S) -> BytesInput {
        let size = min(self.max_size, DUMMY_BYTES_MAX);
        BytesInput::new(vec![0; size])
    }
 }

-impl RandBytesGenerator {
+impl<R, S> RandBytesGenerator<R, S>
+where
+    R: Rand,
+    S: HasRand<R>,
+{
    /// Returns a new [`RandBytesGenerator`], generating up to `max_size` random bytes.
    #[must_use]
    pub fn new(max_size: usize) -> Self {
-        Self { max_size }
+        Self {
+            max_size,
+            phantom: PhantomData,
+        }
    }
 }

 #[derive(Clone, Debug)]
 /// Generates random printable characters
-pub struct RandPrintablesGenerator {
-    max_size: usize,
-}
-
-impl<R> Generator<BytesInput, R> for RandPrintablesGenerator
+pub struct RandPrintablesGenerator<R, S>
 where
    R: Rand,
+    S: HasRand<R>,
 {
-    fn generate(&mut self, rand: &mut R) -> Result<BytesInput, Error> {
-        let mut size = rand.below(self.max_size as u64);
+    max_size: usize,
+    phantom: PhantomData<(R, S)>,
+}
+
+impl<R, S> Generator<BytesInput, S> for RandPrintablesGenerator<R, S>
+where
+    R: Rand,
+    S: HasRand<R>,
+{
+    fn generate(&mut self, state: &mut S) -> Result<BytesInput, Error> {
+        let mut size = state.rand_mut().below(self.max_size as u64);
        if size == 0 {
            size = 1;
        }
        let printables = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz \t\n!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~".as_bytes();
-        let random_bytes: Vec<u8> = (0..size).map(|_| *rand.choose(printables)).collect();
+        let random_bytes: Vec<u8> = (0..size)
+            .map(|_| *state.rand_mut().choose(printables))
+            .collect();
        Ok(BytesInput::new(random_bytes))
    }

    /// Generates up to `DUMMY_BYTES_MAX` non-random dummy bytes (0)
-    fn generate_dummy(&self) -> BytesInput {
+    fn generate_dummy(&self, _state: &mut S) -> BytesInput {
        let size = min(self.max_size, DUMMY_BYTES_MAX);
        BytesInput::new(vec![0_u8; size])
    }
 }

-impl RandPrintablesGenerator {
+impl<R, S> RandPrintablesGenerator<R, S>
+where
+    R: Rand,
+    S: HasRand<R>,
+{
    /// Creates a new [`RandPrintablesGenerator`], generating up to `max_size` random printable characters.
    #[must_use]
    pub fn new(max_size: usize) -> Self {
-        Self { max_size }
+        Self {
+            max_size,
+            phantom: PhantomData,
+        }
    }
 }
--- a/libafl/src/inputs/gramatron.rs
+++ b/libafl/src/inputs/gramatron.rs
@ -0,0 +1,96 @@
+use ahash::AHasher;
+use core::hash::Hasher;
+
+use alloc::{rc::Rc, string::String, vec::Vec};
+use core::{cell::RefCell, convert::From};
+use serde::{Deserialize, Serialize};
+
+use crate::{
+    inputs::{HasLen, Input},
+    Error,
+};
+
+#[derive(Serialize, Deserialize, Clone, Debug, Default, PartialEq, Eq)]
+pub struct Terminal {
+    pub state: usize,
+    pub trigger_idx: usize,
+    pub symbol: String,
+}
+
+impl Terminal {
+    #[must_use]
+    pub fn new(state: usize, trigger_idx: usize, symbol: String) -> Self {
+        Self {
+            state,
+            trigger_idx,
+            symbol,
+        }
+    }
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug, Default, PartialEq, Eq)]
+pub struct GramatronInput {
+    /// The input representation as list of terminals
+    terms: Vec<Terminal>,
+}
+
+impl Input for GramatronInput {
+    /// Generate a name for this input
+    #[must_use]
+    fn generate_name(&self, _idx: usize) -> String {
+        let mut hasher = AHasher::new_with_keys(0, 0);
+        for term in &self.terms {
+            hasher.write(term.symbol.as_bytes());
+        }
+        format!("{:016x}", hasher.finish())
+    }
+}
+
+/// Rc Ref-cell from Input
+impl From<GramatronInput> for Rc<RefCell<GramatronInput>> {
+    fn from(input: GramatronInput) -> Self {
+        Rc::new(RefCell::new(input))
+    }
+}
+
+impl HasLen for GramatronInput {
+    #[inline]
+    fn len(&self) -> usize {
+        self.terms.len()
+    }
+}
+
+impl GramatronInput {
+    /// Creates a new codes input using the given terminals
+    #[must_use]
+    pub fn new(terms: Vec<Terminal>) -> Self {
+        Self { terms }
+    }
+
+    #[must_use]
+    pub fn terminals(&self) -> &[Terminal] {
+        &self.terms
+    }
+
+    #[must_use]
+    pub fn terminals_mut(&mut self) -> &mut Vec<Terminal> {
+        &mut self.terms
+    }
+
+    pub fn unparse(&self, bytes: &mut Vec<u8>) {
+        bytes.clear();
+        for term in &self.terms {
+            bytes.extend_from_slice(term.symbol.as_bytes());
+        }
+    }
+
+    pub fn crop(&self, from: usize, to: usize) -> Result<Self, Error> {
+        if from < to && to <= self.terms.len() {
+            let mut terms = vec![];
+            terms.clone_from_slice(&self.terms[from..to]);
+            Ok(Self { terms })
+        } else {
+            Err(Error::IllegalArgument("Invalid from or to argument".into()))
+        }
+    }
+}
--- a/libafl/src/inputs/mod.rs
+++ b/libafl/src/inputs/mod.rs
@ -6,6 +6,9 @@ pub use bytes::BytesInput;
 pub mod encoded;
 pub use encoded::*;

+pub mod gramatron;
+pub use gramatron::*;
+
 use alloc::{
    string::{String, ToString},
    vec::Vec,
--- a/libafl/src/mutators/gramatron.rs
+++ b/libafl/src/mutators/gramatron.rs
@ -0,0 +1,285 @@
+use alloc::vec::Vec;
+use core::{cmp::max, marker::PhantomData};
+use hashbrown::HashMap;
+use serde::{Deserialize, Serialize};
+
+use crate::{
+    bolts::{rands::Rand, tuples::Named},
+    corpus::Corpus,
+    generators::GramatronGenerator,
+    inputs::{GramatronInput, Terminal},
+    mutators::{MutationResult, Mutator},
+    state::{HasCorpus, HasMetadata, HasRand},
+    Error,
+};
+
+pub struct GramatronRandomMutator<'a, R, S>
+where
+    S: HasRand<R> + HasMetadata,
+    R: Rand,
+{
+    generator: &'a GramatronGenerator<R, S>,
+}
+
+impl<'a, R, S> Mutator<GramatronInput, S> for GramatronRandomMutator<'a, R, S>
+where
+    S: HasRand<R> + HasMetadata,
+    R: Rand,
+{
+    fn mutate(
+        &mut self,
+        state: &mut S,
+        input: &mut GramatronInput,
+        _stage_idx: i32,
+    ) -> Result<MutationResult, Error> {
+        if !input.terminals().is_empty() {
+            let size = state.rand_mut().below(input.terminals().len() as u64 + 1) as usize;
+            input.terminals_mut().truncate(size);
+        }
+        if self.generator.append_generated_terminals(input, state) > 0 {
+            Ok(MutationResult::Mutated)
+        } else {
+            Ok(MutationResult::Skipped)
+        }
+    }
+}
+
+impl<'a, R, S> Named for GramatronRandomMutator<'a, R, S>
+where
+    S: HasRand<R> + HasMetadata,
+    R: Rand,
+{
+    fn name(&self) -> &str {
+        "GramatronRandomMutator"
+    }
+}
+
+impl<'a, R, S> GramatronRandomMutator<'a, R, S>
+where
+    S: HasRand<R> + HasMetadata,
+    R: Rand,
+{
+    /// Creates a new [`GramatronRandomMutator`].
+    #[must_use]
+    pub fn new(generator: &'a GramatronGenerator<R, S>) -> Self {
+        Self { generator }
+    }
+}
+
+#[derive(Serialize, Deserialize)]
+struct GramatronIdxMapMetadata {
+    pub map: HashMap<usize, Vec<usize>>,
+}
+
+crate::impl_serdeany!(GramatronIdxMapMetadata);
+
+impl GramatronIdxMapMetadata {
+    #[must_use]
+    pub fn new(input: &GramatronInput) -> Self {
+        let mut map = HashMap::default();
+        for i in 0..input.terminals().len() {
+            let entry = map.entry(input.terminals()[i].state).or_insert(vec![]);
+            (*entry).push(i);
+        }
+        Self { map }
+    }
+}
+
+#[derive(Default)]
+pub struct GramatronSpliceMutator<C, R, S>
+where
+    C: Corpus<GramatronInput>,
+    S: HasRand<R> + HasCorpus<C, GramatronInput> + HasMetadata,
+    R: Rand,
+{
+    phantom: PhantomData<(C, R, S)>,
+}
+
+impl<C, R, S> Mutator<GramatronInput, S> for GramatronSpliceMutator<C, R, S>
+where
+    C: Corpus<GramatronInput>,
+    S: HasRand<R> + HasCorpus<C, GramatronInput> + HasMetadata,
+    R: Rand,
+{
+    fn mutate(
+        &mut self,
+        state: &mut S,
+        input: &mut GramatronInput,
+        _stage_idx: i32,
+    ) -> Result<MutationResult, Error> {
+        if input.terminals().is_empty() {
+            return Ok(MutationResult::Skipped);
+        }
+
+        let count = state.corpus().count();
+        let idx = state.rand_mut().below(count as u64) as usize;
+
+        let insert_at = state.rand_mut().below(input.terminals().len() as u64) as usize;
+
+        let rand_num = state.rand_mut().next() as usize;
+
+        let mut other_testcase = state.corpus().get(idx)?.borrow_mut();
+        other_testcase.load_input()?; // Preload the input
+
+        if !other_testcase.has_metadata::<GramatronIdxMapMetadata>() {
+            let meta = GramatronIdxMapMetadata::new(other_testcase.input().as_ref().unwrap());
+            other_testcase.add_metadata(meta);
+        }
+        let meta = other_testcase
+            .metadata()
+            .get::<GramatronIdxMapMetadata>()
+            .unwrap();
+        let other = other_testcase.input().as_ref().unwrap();
+
+        meta.map.get(&input.terminals()[insert_at].state).map_or(
+            Ok(MutationResult::Skipped),
+            |splice_points| {
+                let from = splice_points[rand_num % splice_points.len()];
+
+                input.terminals_mut().truncate(insert_at);
+                input
+                    .terminals_mut()
+                    .extend_from_slice(&other.terminals()[from..]);
+
+                Ok(MutationResult::Mutated)
+            },
+        )
+    }
+}
+
+impl<C, R, S> Named for GramatronSpliceMutator<C, R, S>
+where
+    C: Corpus<GramatronInput>,
+    S: HasRand<R> + HasCorpus<C, GramatronInput> + HasMetadata,
+    R: Rand,
+{
+    fn name(&self) -> &str {
+        "GramatronSpliceMutator"
+    }
+}
+
+impl<'a, C, R, S> GramatronSpliceMutator<C, R, S>
+where
+    C: Corpus<GramatronInput>,
+    S: HasRand<R> + HasCorpus<C, GramatronInput> + HasMetadata,
+    R: Rand,
+{
+    /// Creates a new [`GramatronSpliceMutator`].
+    #[must_use]
+    pub fn new() -> Self {
+        Self {
+            phantom: PhantomData,
+        }
+    }
+}
+
+#[derive(Default)]
+pub struct GramatronRecursionMutator<R, S>
+where
+    S: HasRand<R> + HasMetadata,
+    R: Rand,
+{
+    counters: HashMap<usize, (usize, usize, usize)>,
+    states: Vec<usize>,
+    temp: Vec<Terminal>,
+    phantom: PhantomData<(R, S)>,
+}
+
+impl<R, S> Mutator<GramatronInput, S> for GramatronRecursionMutator<R, S>
+where
+    S: HasRand<R> + HasMetadata,
+    R: Rand,
+{
+    fn mutate(
+        &mut self,
+        state: &mut S,
+        input: &mut GramatronInput,
+        _stage_idx: i32,
+    ) -> Result<MutationResult, Error> {
+        if input.terminals().is_empty() {
+            return Ok(MutationResult::Skipped);
+        }
+
+        self.counters.clear();
+        self.states.clear();
+        for i in 0..input.terminals().len() {
+            let s = input.terminals()[i].state;
+            if let Some(entry) = self.counters.get_mut(&s) {
+                if entry.0 == 1 {
+                    // Keep track only of states with more than one node
+                    self.states.push(s);
+                }
+                entry.0 += 1;
+                entry.2 = max(entry.2, i);
+            } else {
+                self.counters.insert(s, (1, i, i));
+            }
+        }
+
+        if self.states.is_empty() {
+            return Ok(MutationResult::Skipped);
+        }
+
+        let chosen = *state.rand_mut().choose(&self.states);
+        let chosen_nums = self.counters.get(&chosen).unwrap().0;
+
+        #[allow(clippy::cast_sign_loss, clippy::pedantic)]
+        let mut first = state.rand_mut().below(chosen_nums as u64 - 1) as i64;
+        #[allow(clippy::cast_sign_loss, clippy::pedantic)]
+        let mut second = state
+            .rand_mut()
+            .between(first as u64 + 1, chosen_nums as u64 - 1) as i64;
+
+        let mut idx_1 = 0;
+        let mut idx_2 = 0;
+        for i in (self.counters.get(&chosen).unwrap().1)..=(self.counters.get(&chosen).unwrap().2) {
+            if input.terminals()[i].state == chosen {
+                if first == 0 {
+                    idx_1 = i;
+                }
+                if second == 0 {
+                    idx_2 = i;
+                    break;
+                }
+                first -= 1;
+                second -= 1;
+            }
+        }
+        debug_assert!(idx_1 < idx_2);
+
+        self.temp.clear();
+        self.temp.extend_from_slice(&input.terminals()[idx_2..]);
+
+        input.terminals_mut().truncate(idx_2);
+        input.terminals_mut().extend_from_slice(&self.temp);
+
+        Ok(MutationResult::Mutated)
+    }
+}
+
+impl<R, S> Named for GramatronRecursionMutator<R, S>
+where
+    S: HasRand<R> + HasMetadata,
+    R: Rand,
+{
+    fn name(&self) -> &str {
+        "GramatronRecursionMutator"
+    }
+}
+
+impl<R, S> GramatronRecursionMutator<R, S>
+where
+    S: HasRand<R> + HasMetadata,
+    R: Rand,
+{
+    /// Creates a new [`GramatronRecursionMutator`].
+    #[must_use]
+    pub fn new() -> Self {
+        Self {
+            counters: HashMap::default(),
+            states: vec![],
+            temp: vec![],
+            phantom: PhantomData,
+        }
+    }
+}
--- a/libafl/src/mutators/mod.rs
+++ b/libafl/src/mutators/mod.rs
@ -10,6 +10,8 @@ pub mod encoded_mutations;
 pub use encoded_mutations::*;
 pub mod mopt_mutator;
 pub use mopt_mutator::*;
+pub mod gramatron;
+pub use gramatron::*;

 use crate::{
    bolts::tuples::{HasLen, Named},
--- a/libafl/src/state/mod.rs
+++ b/libafl/src/state/mod.rs
@ -485,13 +485,13 @@ where
        forced: bool,
    ) -> Result<(), Error>
    where
-        G: Generator<I, R>,
+        G: Generator<I, Self>,
        Z: Evaluator<E, EM, I, Self>,
        EM: EventFirer<I, Self>,
    {
        let mut added = 0;
        for _ in 0..num {
-            let input = generator.generate(self.rand_mut())?;
+            let input = generator.generate(self)?;
            if forced {
                let _ = fuzzer.add_input(self, executor, manager, input)?;
                added += 1;
@ -523,7 +523,7 @@ where
        num: usize,
    ) -> Result<(), Error>
    where
-        G: Generator<I, R>,
+        G: Generator<I, Self>,
        Z: Evaluator<E, EM, I, Self>,
        EM: EventFirer<I, Self>,
    {
@ -540,7 +540,7 @@ where
        num: usize,
    ) -> Result<(), Error>
    where
-        G: Generator<I, R>,
+        G: Generator<I, Self>,
        Z: Evaluator<E, EM, I, Self>,
        EM: EventFirer<I, Self>,
    {
--- a/scripts/gramatron/construct_automata.py
+++ b/scripts/gramatron/construct_automata.py
@ -0,0 +1,351 @@
+#!/usr/bin/env python3
+# Originally from https://github.com/HexHive/Gramatron
+# License: Apache-2
+# Copyright 2021 HexHive
+# Copyright 2021 AFLplusplus
+
+import sys
+import json
+import re
+from collections import defaultdict
+# import pygraphviz as pgv
+
+gram_data = None
+state_count = 1
+pda = []
+worklist = []
+state_stacks = {} 
+
+# === If user provides upper bound on the stack size during FSA creation ===
+# Specifies the upper bound to which the stack is allowed to grow
+# If for any generated state, the stack size is >= stack_limit then this
+# state is not expanded further.
+stack_limit = None 
+# Holds the set of unexpanded rules owing to the user-passed stack constraint limit
+unexpanded_rules = set()
+
+def main(grammar, limit):
+    global worklist, gram_data, stack_limit
+    current = '0'
+    stack_limit = limit
+    if stack_limit:
+        print ('[X] Operating in bounded stack mode')
+
+    with open(grammar, 'r') as fd:
+        gram_data = json.load(fd)
+    start_symbol = gram_data["Start"][0]
+    worklist.append([current, [start_symbol]])
+    # print (grammar)
+    filename = (grammar.split('/')[-1]).split('.')[0]
+    
+
+    while worklist:
+        # Take an element from the worklist
+        # print ('================')
+        # print ('Worklist:', worklist)
+        element = worklist.pop(0)
+        prep_transitions(element)
+    
+    pda_file = filename + '_transition.json'
+    graph_file = filename + '.png'
+    # print ('XXXXXXXXXXXXXXXX')
+    # print ('PDA file:%s Png graph file:%s' % (pda_file, graph_file))
+    # XXX Commented out because visualization of current version of PHP causes segfault
+    # Create the graph and dump the transitions to a file
+    # create_graph(filename)
+    transformed = postprocess()
+    with open(filename + '_automata.json', 'w+') as fd:
+        json.dump(transformed, fd)
+    with open(filename + '_transition.json', 'w+') as fd:
+        json.dump(pda, fd)
+    if not unexpanded_rules:
+        print ('[X] No unexpanded rules, absolute FSA formed')
+        exit(0)
+    else:
+        print ('[X] Certain rules were not expanded due to stack size limit. Inexact approximation has been created and the disallowed rules have been put in {}_disallowed.json'.format(filename))
+        print ('[X] Number of unexpanded rules:', len(unexpanded_rules))
+        with open(filename + '_disallowed.json', 'w+') as fd:
+            json.dump(list(unexpanded_rules), fd)
+
+def create_graph(filename):
+    '''
+    Creates a DOT representation of the PDA
+    '''
+    global pda
+    G = pgv.AGraph(strict = False, directed = True)
+    for transition in pda:
+        print ('Transition:', transition)
+        G.add_edge(transition['source'], transition['dest'], 
+                label = 'Term:{}'.format(transition['terminal']))
+    G.layout(prog = 'dot')
+    print ('Do it up 2')
+    G.draw(filename + '.png')
+
+def prep_transitions(element):
+    '''
+    Generates transitions
+    '''
+    global gram_data, state_count, pda, worklist, state_stacks, stack_limit, unexpanded_rules
+    state = element[0]
+    try:
+        nonterminal = element[1][0] 
+    except IndexError:
+        # Final state was encountered, pop from worklist without doing anything
+        return
+    rules = gram_data[nonterminal]
+    count = 1
+    for rule in rules:
+        isRecursive  = False
+        # print ('Current state:', state)
+        terminal, ss, termIsRegex = tokenize(rule)
+        transition = get_template()
+        transition['trigger'] = '_'.join([state, str(count)])
+        transition['source'] = state
+        transition['dest'] = str(state_count) 
+        transition['ss'] = ss 
+        transition['terminal'] = terminal
+        transition['rule'] = "{} -> {}".format(nonterminal, rule )
+        if termIsRegex:
+            transition['termIsRegex'] = True
+        
+        # Creating a state stack for the new state
+        try:
+            state_stack = state_stacks[state][:]
+        except:
+            state_stack = []
+        if len(state_stack):
+            state_stack.pop(0)
+        if ss:
+            for symbol in ss[::-1]:
+                state_stack.insert(0, symbol)
+        transition['stack'] = state_stack 
+
+        # Check if a recursive transition state being created, if so make a backward
+        # edge and don't add anything to the worklist
+        # print (state_stacks)
+        if state_stacks:
+            for state_element, stack in state_stacks.items():
+                # print ('Stack:', sorted(stack))
+                # print ('State stack:', sorted(state_stack))
+                if sorted(stack) == sorted(state_stack):
+                    transition['dest'] = state_element
+                    # print ('Recursive:', transition)
+                    pda.append(transition)
+                    count += 1
+                    isRecursive = True
+                    break 
+        # If a recursive transition exercised don't add the same transition as a new
+        # edge, continue onto the next transitions
+        if isRecursive:
+            continue
+            
+        # If the generated state has a stack size > stack_limit then that state is abandoned
+        # and not added to the FSA or the worklist for further expansion
+        if stack_limit:
+            if (len(transition['stack']) > stack_limit):
+                unexpanded_rules.add(transition['rule'])
+                continue
+
+        # Create transitions for the non-recursive relations and add to the worklist
+        # print ('Normal:', transition)
+        # print ('State2:', state)
+        pda.append(transition)
+        worklist.append([transition['dest'], transition['stack']])
+        state_stacks[transition['dest']] = state_stack
+        state_count += 1
+        count += 1
+
+def tokenize(rule):
+    '''
+    Gets the terminal and the corresponding stack symbols from a rule in GNF form
+    '''
+    pattern = re.compile("([r])*\'([\s\S]+)\'([\s\S]*)")
+    terminal = None
+    ss = None
+    termIsRegex = False
+    match = pattern.match(rule)
+    if match.group(1):
+        termIsRegex = True
+    if match.group(2):
+        terminal = match.group(2)
+    else:
+        raise AssertionError("Rule is not in GNF form")
+
+    if match.group(3):
+        ss = (match.group(3)).split()
+
+    return terminal, ss, termIsRegex
+
+def get_template():
+    transition_template = {
+            'trigger':None,
+            'source': None,
+            'dest': None,
+            'termIsRegex': False,
+            'terminal' : None,
+            'stack': []
+            }
+    return transition_template
+
+def postprocess1():
+    '''
+    Creates a representation to be passed on to the C-module
+    '''
+    global pda
+    final_struct = {}
+    # Supporting data structures for if stack limit is imposed
+    culled_pda = []
+    culled_final = []
+    num_transitions = 0 # Keep track of number of transitions
+
+
+    states, final, initial = _get_states()
+    memoized = [[]] * len(states)
+
+    print (initial)
+    assert len(initial) == 1, 'More than one init state found'
+
+    # Cull transitions to states which were not expanded owing to the stack limit
+    if stack_limit:
+
+        blocklist = []
+        for final_state in final:
+            for transition in pda:
+                if (transition["dest"] == final_state) and (len(transition["stack"]) > 0):
+                    blocklist.append(transition["dest"])
+                    continue
+                else:
+                    culled_pda.append(transition)
+        
+        culled_final = [state for state in final if state not in blocklist]
+
+        assert len(culled_final) == 1, 'More than one final state found'
+
+        for transition in culled_pda:
+            state = transition["source"]
+            if transition["dest"] in blocklist:
+                    continue 
+            num_transitions += 1
+            memoized[int(state)].append((transition["trigger"],
+                int(transition["dest"]), transition["terminal"]))
+        final_struct["init_state"] = int(initial)
+        final_struct["final_state"] = int(culled_final[0])
+        # The reason we do this is because when states are culled, the indexing is
+        # still relative to the actual number of states hence we keep numstates recorded
+        # as the original number of states
+        print ('[X] Actual Number of states:', len(memoized))
+        print ('[X] Number of transitions:', num_transitions)
+        print ('[X] Original Number of states:', len(states))
+        final_struct["pda"] = memoized
+        return final_struct
+    
+    # Running FSA construction in exact approximation mode and postprocessing it like so
+    for transition in pda:
+       state = transition["source"]
+       memoized[int(state)].append((transition["trigger"],
+                int(transition["dest"]), transition["terminal"]))
+
+    final_struct["init_state"] = int(initial)
+    final_struct["final_state"] = int(final[0])
+    print ('[X] Actual Number of states:', len(memoized))
+    final_struct["pda"] = memoized
+    return final_struct
+
+def postprocess():
+    '''
+    Creates a representation to be passed on to the C-module
+    '''
+    global pda
+    final_struct = {}
+    memoized = defaultdict(list)
+    # Supporting data structures for if stack limit is imposed
+    culled_pda = []
+    culled_final = []
+    num_transitions = 0 # Keep track of number of transitions
+
+
+    states, final, initial = _get_states()
+
+    print (initial)
+    assert len(initial) == 1, 'More than one init state found'
+
+    # Cull transitions to states which were not expanded owing to the stack limit
+    if stack_limit:
+
+        blocklist = []
+        for final_state in final:
+            for transition in pda:
+                if (transition["dest"] == final_state) and (len(transition["stack"]) > 0):
+                    blocklist.append(transition["dest"])
+                    continue
+                else:
+                    culled_pda.append(transition)
+        
+        culled_final = [state for state in final if state not in blocklist]
+
+        assert len(culled_final) == 1, 'More than one final state found'
+
+        for transition in culled_pda:
+            state = transition["source"]
+            if transition["dest"] in blocklist:
+                    continue 
+            num_transitions += 1
+            memoized[int(state)].append([transition["trigger"], int(transition["dest"]), 
+                transition["terminal"]])
+        
+        
+        
+        final_struct["init_state"] = int(initial)
+        final_struct["final_state"] = int(culled_final[0])
+        # The reason we do this is because when states are culled, the indexing is
+        # still relative to the actual number of states hence we keep numstates recorded
+        # as the original number of states
+        print ('[X] Actual Number of states:', len(memoized.keys()))
+        print ('[X] Number of transitions:', num_transitions)
+        print ('[X] Original Number of states:', len(states))
+        #final_struct["numstates"] = len(states) 
+        memoized_list = [[]]*len(states) 
+    else:
+        # Running FSA construction in exact approximation mode and postprocessing it like so
+        for transition in pda:
+           state = transition["source"]
+           memoized[int(state)].append([transition["trigger"], int(transition["dest"]), 
+               transition["terminal"]])
+
+        final_struct["init_state"] = int(initial)
+        final_struct["final_state"] = int(final[0])
+        print ('[X] Actual Number of states:', len(memoized.keys()))
+        #final_struct["numstates"] = len(memoized.keys()) 
+        memoized_list = [[]]*len(memoized.keys()) 
+    
+    for k in memoized.keys():
+        memoized_list[k] = memoized[k]
+    final_struct["pda"] = memoized_list
+    return final_struct
+
+
+def _get_states():
+    source = set()
+    dest = set()
+    global pda
+    for transition in pda:
+        source.add(transition["source"])
+        dest.add(transition["dest"])
+    source_copy = source.copy()
+    source_copy.update(dest)
+    return list(source_copy), list(dest.difference(source)), str(''.join(list(source.difference(dest))))
+
+if __name__ == '__main__':
+    import argparse
+    parser = argparse.ArgumentParser(description = 'Script to convert GNF grammar to PDA')
+    parser.add_argument(
+            '--gf',
+            type = str,
+            help = 'Location of GNF grammar')
+    parser.add_argument(
+            '--limit',
+            type = int,
+            default = None,
+            help = 'Specify the upper bound for the stack size')
+    args = parser.parse_args()
+    main(args.gf, args.limit)
--- a/scripts/gramatron/gnf_converter.py
+++ b/scripts/gramatron/gnf_converter.py
@ -0,0 +1,309 @@
+#!/usr/bin/env python3
+# Originally from https://github.com/HexHive/Gramatron
+# License: Apache-2
+# Copyright 2021 HexHive
+# Copyright 2021 AFLplusplus
+
+import sys
+import re
+import copy
+import json
+from string import ascii_uppercase
+from itertools import combinations
+from collections import defaultdict
+
+DEBUG = False
+NONTERMINALSET = []
+COUNT = 1
+
+def convert_to_gnf(grammar, start):
+    if DEBUG:
+        with open('debug_preprocess.json', 'w+') as fd:
+            json.dump(grammar, fd)
+    grammar = remove_unit(grammar) # eliminates unit productions
+    if DEBUG:
+        with open('debug_unit.json', 'w+') as fd:
+            json.dump(grammar, fd)
+    grammar = remove_mixed(grammar) # eliminate terminals existing with non-terminals
+    if DEBUG:
+        with open('debug_mixed.json', 'w+') as fd:
+            json.dump(grammar, fd)
+    grammar = break_rules(grammar) # eliminate rules with more than two non-terminals
+    if DEBUG:
+        with open('debug_break.json', 'w+') as fd:
+            json.dump(grammar, fd)
+    grammar = gnf(grammar)
+
+    # Dump GNF form of the grammar with only reachable rules 
+    # reachable_grammar = get_reachable(grammar, start)
+    # with open('debug_gnf_reachable.json', 'w+') as fd:
+    #     json.dump(reachable_grammar, fd)
+    if DEBUG:
+        with open('debug_gnf.json', 'w+') as fd:
+            json.dump(grammar, fd)
+    
+    grammar["Start"] = [start]
+    return grammar
+
+def get_reachable(grammar, start):
+    '''
+    Returns a grammar without dead rules
+    '''
+    reachable_nt = set()
+    worklist = list()
+    processed = set()
+    reachable_grammar = dict()
+    worklist.append(start)
+
+    while worklist:
+        nt = worklist.pop(0)
+        processed.add(nt)
+        reachable_grammar[nt] = grammar[nt]
+        rules = grammar[nt]
+        for rule in rules:
+            tokens = gettokens(rule)
+            for token in tokens:
+                if not isTerminal(token):
+                    if token not in processed:
+                        worklist.append(token)
+    return reachable_grammar
+
+
+def gettokens(rule):
+    pattern = re.compile("([^\s\"\']+)|\"([^\"]*)\"|\'([^\']*)\'")
+    return [matched.group(0) for matched in pattern.finditer(rule)]
+
+def gnf(grammar):
+    old_grammar = copy.deepcopy(grammar)
+    new_grammar = defaultdict(list)
+    isgnf = False
+    while not isgnf:
+        for lhs, rules in old_grammar.items():
+            for rule in rules:
+                tokens = gettokens(rule) 
+                if len(tokens) == 1 and isTerminal(rule):
+                    new_grammar[lhs].append(rule)
+                    continue
+                startoken = tokens[0]
+                endrule = tokens[1:]
+                if not isTerminal(startoken):
+                    newrules = []
+                    extendrules = old_grammar[startoken]
+                    for extension in extendrules:
+                        temprule = endrule[:]
+                        temprule.insert(0, extension)
+                        newrules.append(temprule)
+                    for newnew in newrules:
+                        new_grammar[lhs].append(' '.join(newnew))
+                else:
+                    new_grammar[lhs].append(rule)
+        isgnf = True
+        for lhs, rules in new_grammar.items():
+            for rule in rules:
+                # if "\' \'" or isTerminal(rule):
+                tokens = gettokens(rule)
+                if len(tokens) == 1 and isTerminal(rule):
+                    continue
+                startoken = tokens[0]
+                if not isTerminal(startoken):
+                    isgnf = False
+                    break
+        if not isgnf:
+            old_grammar = copy.deepcopy(new_grammar)
+            new_grammar = defaultdict(list)
+    return new_grammar
+                
+
+def process_antlr4_grammar(data):
+    productions = []
+    production = []
+    for line in data:
+        if line != '\n': 
+            production.append(line)
+        else:
+            productions.append(production)
+            production = []
+    final_rule_set = {}
+    for production in productions:
+        rules = []
+        init = production[0]
+        nonterminal = init.split(':')[0]
+        rules.append(strip_chars(init.split(':')[1]).strip('| '))
+        for production_rule in production[1:]:
+            rules.append(strip_chars(production_rule.split('|')[0]))
+        final_rule_set[nonterminal] = rules
+    # for line in data:
+    #     if line != '\n':
+    #         production.append(line)
+    return final_rule_set
+
+def remove_unit(grammar):
+    nounitproductions = False 
+    old_grammar = copy.deepcopy(grammar)
+    new_grammar = defaultdict(list)
+    while not nounitproductions:
+        for lhs, rules in old_grammar.items():
+            for rhs in rules:
+                # Checking if the rule is a unit production rule
+                if len(gettokens(rhs)) == 1:
+                    if not isTerminal(rhs):
+                        new_grammar[lhs].extend([rule for rule in old_grammar[rhs]])
+                    else:
+                        new_grammar[lhs].append(rhs)
+                else:
+                    new_grammar[lhs].append(rhs)
+        # Checking there are no unit productions left in the grammar 
+        nounitproductions = True
+        for lhs, rules in new_grammar.items():
+            for rhs in rules:
+                if len(gettokens(rhs)) == 1:
+                    if not isTerminal(rhs):
+                        nounitproductions = False
+                        break
+            if not nounitproductions:
+                break
+        # Unit productions are still there in the grammar -- repeat the process
+        if not nounitproductions:
+            old_grammar = copy.deepcopy(new_grammar)
+            new_grammar = defaultdict(list)
+    return new_grammar
+
+def isTerminal(rule):
+    # pattern = re.compile("([r]*\'[\s\S]+\')")
+    pattern = re.compile("\'(.*?)\'")
+    match = pattern.match(rule)
+    if match:
+        return True
+    else:
+        return False
+
+def remove_mixed(grammar):
+    '''
+    Remove rules where there are terminals mixed in with non-terminals
+    '''
+    new_grammar = defaultdict(list)
+    for lhs, rules in grammar.items():
+        for rhs in rules:
+            # tokens = rhs.split(' ')
+            regen_rule = []
+            tokens = gettokens(rhs)
+            if len(gettokens(rhs)) == 1:
+                new_grammar[lhs].append(rhs)
+                continue
+            for token in tokens:
+                # Identify if there is a terminal in the RHS
+                if isTerminal(token):
+                    # Check if a corresponding nonterminal already exists
+                    nonterminal = terminal_exist(token, new_grammar)
+                    if nonterminal:
+                        regen_rule.append(nonterminal)
+                    else:
+                        new_nonterm = get_nonterminal()
+                        new_grammar[new_nonterm].append(token)
+                        regen_rule.append(new_nonterm)
+                else:
+                    regen_rule.append(token)
+            new_grammar[lhs].append(' '.join(regen_rule))
+    return new_grammar
+
+def break_rules(grammar):
+    new_grammar = defaultdict(list)
+    old_grammar = copy.deepcopy(grammar)
+    nomulti = False
+    while not nomulti:
+        for lhs, rules in old_grammar.items():
+            for rhs in rules:
+                tokens = gettokens(rhs)
+                if len(tokens) > 2 and (not isTerminal(rhs)):
+                    split = tokens[:-1] 
+                    nonterminal = terminal_exist(' '.join(split), new_grammar)
+                    if nonterminal:
+                        newrule = ' '.join([nonterminal, tokens[-1]])
+                        new_grammar[lhs].append(newrule)
+                    else:
+                        nonterminal = get_nonterminal()
+                        new_grammar[nonterminal].append(' '.join(split))
+                        newrule = ' '.join([nonterminal, tokens[-1]])
+                        new_grammar[lhs].append(newrule)
+                else:
+                    new_grammar[lhs].append(rhs)
+        nomulti = True
+        for lhs, rules in new_grammar.items():
+            for rhs in rules:
+                # tokens = rhs.split(' ')
+                tokens = gettokens(rhs)
+                if len(tokens) > 2 and (not isTerminal(rhs)):
+                    nomulti = False
+                    break
+        if not nomulti:
+            old_grammar = copy.deepcopy(new_grammar)
+            new_grammar = defaultdict(list)
+    return new_grammar
+
+def strip_chars(rule):
+    return rule.strip('\n\t ')
+
+def get_nonterminal():
+    global NONTERMINALSET
+    if NONTERMINALSET:
+        return NONTERMINALSET.pop(0)
+    else:
+        _repopulate()
+        return NONTERMINALSET.pop(0)
+
+def _repopulate():
+    global COUNT
+    global NONTERMINALSET
+    NONTERMINALSET = [''.join(x) for x in list(combinations(ascii_uppercase, COUNT))]
+    COUNT += 1
+
+def terminal_exist(token, grammar):
+    for nonterminal, rules in grammar.items():
+        if token in rules:
+            return nonterminal
+    return None
+
+
+def main(grammar_file, out, start):
+    grammar = None
+    # If grammar file is a preprocessed NT file, then skip preprocessing
+    if '.json' in grammar_file:
+        with open(grammar_file, 'r') as fd:
+            grammar = json.load(fd)
+    elif '.g4' in grammar_file:
+        with open(grammar_file, 'r') as fd:
+            data = fd.readlines()
+        grammar = process_antlr4_grammar(data)
+    else:
+        raise('Unknwown file format passed. Accepts (.g4/.json)')
+
+    grammar = convert_to_gnf(grammar, start)
+    with open(out, 'w+') as fd:
+        json.dump(grammar, fd)
+
+if __name__ == '__main__':
+    import argparse
+    parser = argparse.ArgumentParser(description = 'Script to convert grammar to GNF form')
+    parser.add_argument(
+            '--gf',
+            type = str,
+            required = True,
+            help = 'Location of grammar file')
+    parser.add_argument(
+            '--out',
+            type = str,
+            required = True,
+            help = 'Location of output file')
+    parser.add_argument(
+            '--start',
+            type = str,
+            required = True,
+            help = 'Start token')
+    parser.add_argument(
+            '--debug',
+            action='store_true',
+            help = 'Write intermediate states to debug files')
+    args = parser.parse_args()
+    DEBUG = args.debug
+
+    main(args.gf, args.out, args.start)
--- a/scripts/gramatron/grammars/js/source.json
+++ b/scripts/gramatron/grammars/js/source.json
@ -0,0 +1,606 @@
+{
+    "ARGLIST": [
+        "EXPR ',' ARGLIST",
+        "EXPR",
+        "EXPR ',' ARGLIST",
+        "EXPR"
+    ],
+    "ARGS": [
+        "'()'",
+        "'(' ARGLIST ')'",
+        "'()'",
+        "'(' ARGLIST ')'"
+    ],
+    "ARITHMETICOPERATION": [
+        "EXPR '/' EXPR",
+        "EXPR '*' EXPR",
+        "EXPR '+' EXPR",
+        "EXPR '-' EXPR",
+        "EXPR '%' EXPR",
+        "EXPR '**' EXPR",
+        "EXPR '++'"
+    ],
+    "ARRAY": [
+        "'[' ARRAYCONTENT ']'",
+        "'[]'"
+    ],
+    "ARRAYCONTENT": [
+        "EXPR ',' ARRAYCONTENT",
+        "EXPR"
+    ],
+    "BOOLEAN": [
+        "'true'",
+        "'false'"
+    ],
+    "BYTEWISEOPERATION": [
+        "EXPR '&' EXPR",
+        "EXPR '|' EXPR"
+    ],
+    "COMPARISONOPERATION": [
+        "EXPR '<' EXPR"
+    ],
+    "DECIMALDIGITS": [
+        "'20'",
+        "'1234'",
+        "'66'",
+        "'234_9'",
+        "'99999999999999999999'"
+    ],
+    "DECIMALNUMBER": [
+        "DECIMALDIGITS"
+    ],
+    "EXPR": [
+        "'(' EXPR ')'",
+        "VAR",
+        "'delete' SP EXPR",
+        "'new' SP IDENTIFIER ARGS",
+        "LITERAL",
+        "IDENTIFIER",
+        "METHODCALL",
+        "'(' ARITHMETICOPERATION ')'",
+        "'(' COMPARISONOPERATION ')'",
+        "'(' BYTEWISEOPERATION ')'",
+        "'(' LOGICALOPERATION ')'"
+    ],
+    "IDENTIFIER": [
+        "'Object'",
+        "VAR",
+        "'Function'",
+        "'main'",
+        "'opt'",
+        "'Boolean'",
+        "'Symbol'",
+        "'JSON'",
+        "'Error'",
+        "'EvalError'",
+        "'RangeError'",
+        "'ReferenceError'",
+        "'SyntaxError'",
+        "'TypeError'",
+        "'URIError'",
+        "'this'",
+        "'Number'",
+        "'Math'",
+        "'Date'",
+        "'String'",
+        "'RegExp'",
+        "'Array'",
+        "'Int8Array'",
+        "'Uint8Array'",
+        "'Uint8ClampedArray'",
+        "'Int16Array'",
+        "'Uint16Array'",
+        "'Int32Array'",
+        "'Uint32Array'",
+        "'Float32Array'",
+        "'Float64Array'",
+        "'DataView'",
+        "'ArrayBuffer'",
+        "'Map'",
+        "'Set'",
+        "'WeakMap'",
+        "'WeakSet'",
+        "'Promise'",
+        "'AsyncFunction'",
+        "'asyncGenerator'",
+        "'Reflect'",
+        "'Proxy'",
+        "'Intl'",
+        "'Intl.Collator'",
+        "'Intl.DateTimeFormat'",
+        "'Intl.NumberFormat'",
+        "'Intl.PluralRules'",
+        "'WebAssembly'",
+        "'WebAssembly.Module'",
+        "'WebAssembly.Instance'",
+        "'WebAssembly.Memory'",
+        "'WebAssembly.Table'",
+        "'WebAssembly.CompileError'",
+        "'WebAssembly.LinkError'",
+        "'WebAssembly.RuntimeError'",
+        "'arguments'",
+        "'Infinity'",
+        "'NaN'",
+        "'undefined'",
+        "'null'",
+        "'console'",
+        "' '"
+    ],
+    "IDENTIFIERLIST": [
+        "IDENTIFIER ',' IDENTIFIERLIST",
+        "'(' IDENTIFIERLIST '),' IDENTIFIERLIST",
+        "IDENTIFIER"
+    ],
+    "JSBLOCK": [
+        "JSSTATEMENT",
+        "JSSTATEMENT JSBLOCK"
+    ],
+    "JSSTATEMENT": [
+        "STATEMENT NEWLINE"
+    ],
+    "LITERAL": [
+        "'null'",
+        "BOOLEAN",
+        "NUMBER",
+        "ARRAY"
+    ],
+    "LOGICALOPERATION": [
+        "EXPR '&&' EXPR",
+        "EXPR '||' EXPR"
+    ],
+    "METHODCALL": [
+        "OBJECT PROPERTY METHODCALL1"
+    ],
+    "METHODCALL1": [
+        "'.' METHOD_NAME ARGS METHODCALL1",
+        "' '"
+    ],
+    "METHOD_NAME": [
+        "IDENTIFIER",
+        "'print'",
+        "'eval'",
+        "'uneval'",
+        "'isFinite'",
+        "'isNaN'",
+        "'parseFloat'",
+        "'parseInt'",
+        "'decodeURI'",
+        "'decodeURIComponent'",
+        "'encodeURI'",
+        "'encodeURIComponent'",
+        "'escape'",
+        "'unescape'",
+        "'assign'",
+        "'create'",
+        "'defineProperty'",
+        "'defineProperties'",
+        "'entries'",
+        "'freeze'",
+        "'getOwnPropertyDescriptor'",
+        "'getOwnPropertyDescriptors'",
+        "'getOwnPropertyNames'",
+        "'getOwnPropertySymbols'",
+        "'getPrototypeOf'",
+        "'is'",
+        "'isExtensible'",
+        "'isFrozen'",
+        "'isSealed'",
+        "'keys'",
+        "'preventExtensions'",
+        "'seal'",
+        "'setPrototypeOf'",
+        "'values'",
+        "'__defineGetter__'",
+        "'__defineSetter__'",
+        "'__lookupGetter__'",
+        "'__lookupSetter__'",
+        "'hasOwnProperty'",
+        "'isPrototypeOf'",
+        "'propertyIsEnumerable'",
+        "'toSource'",
+        "'toLocaleString'",
+        "'toString'",
+        "'unwatch'",
+        "'valueOf'",
+        "'watch'",
+        "'apply'",
+        "'bind'",
+        "'call'",
+        "'isGenerator'",
+        "'valueOf'",
+        "'for'",
+        "'keyFor'",
+        "'stringify'",
+        "'isInteger'",
+        "'isSafeInteger'",
+        "'toInteger'",
+        "'toExponential'",
+        "'toFixed'",
+        "'toLocaleString'",
+        "'toPrecision'",
+        "'abs'",
+        "'acos'",
+        "'acosh'",
+        "'asin'",
+        "'asinh'",
+        "'atan'",
+        "'atanh'",
+        "'atan2'",
+        "'cbrt'",
+        "'ceil'",
+        "'clz32'",
+        "'cos'",
+        "'cosh'",
+        "'exp'",
+        "'expm1'",
+        "'floor'",
+        "'fround'",
+        "'hypot'",
+        "'imul'",
+        "'log'",
+        "'log1p'",
+        "'log10'",
+        "'log2'",
+        "'max'",
+        "'min'",
+        "'pow'",
+        "'random'",
+        "'round'",
+        "'sign'",
+        "'sin'",
+        "'sinh'",
+        "'sqrt'",
+        "'tan'",
+        "'tanh'",
+        "'trunc'",
+        "'now'",
+        "'parse'",
+        "'UTC'",
+        "'getDate'",
+        "'getDay'",
+        "'getFullYear'",
+        "'getHours'",
+        "'getMilliseconds'",
+        "'getMinutes'",
+        "'getMonth'",
+        "'getSeconds'",
+        "'getTime'",
+        "'getTimezoneOffset'",
+        "'getUTCDate'",
+        "'getUTCDay'",
+        "'getUTCFullYear'",
+        "'getUTCHours'",
+        "'getUTCMilliseconds'",
+        "'getUTCMinutes'",
+        "'getUTCMonth'",
+        "'getUTCSeconds'",
+        "'getYear'",
+        "'setDate'",
+        "'setFullYear'",
+        "'setHours'",
+        "'setMilliseconds'",
+        "'setMinutes'",
+        "'setMonth'",
+        "'setSeconds'",
+        "'setTime'",
+        "'setUTCDate'",
+        "'setUTCFullYear'",
+        "'setUTCHours'",
+        "'setUTCMilliseconds'",
+        "'setUTCMinutes'",
+        "'setUTCMonth'",
+        "'setUTCSeconds'",
+        "'setYear'",
+        "'toDateString'",
+        "'toISOString'",
+        "'toJSON'",
+        "'toGMTString'",
+        "'toLocaleDateString'",
+        "'toLocaleFormat'",
+        "'toLocaleString'",
+        "'toLocaleTimeString'",
+        "'toTimeString'",
+        "'toUTCString'",
+        "'indexOf'",
+        "'substring'",
+        "'charAt'",
+        "'strcmp'",
+        "'fromCharCode'",
+        "'fromCodePoint'",
+        "'raw'",
+        "'charCodeAt'",
+        "'slice'",
+        "'codePointAt'",
+        "'concat'",
+        "'includes'",
+        "'endsWith'",
+        "'lastIndexOf'",
+        "'localeCompare'",
+        "'match'",
+        "'normalize'",
+        "'padEnd'",
+        "'padStart'",
+        "'quote'",
+        "'repeat'",
+        "'replace'",
+        "'search'",
+        "'split'",
+        "'startsWith'",
+        "'substr'",
+        "'toLocaleLowerCase'",
+        "'toLocaleUpperCase'",
+        "'toLowerCase'",
+        "'toUpperCase'",
+        "'trim'",
+        "'trimleft'",
+        "'trimright'",
+        "'anchor'",
+        "'big'",
+        "'blink'",
+        "'bold'",
+        "'fixed'",
+        "'fontcolor'",
+        "'fontsize'",
+        "'italics'",
+        "'link'",
+        "'small'",
+        "'strike'",
+        "'sub'",
+        "'sup'",
+        "'compile'",
+        "'exec'",
+        "'test'",
+        "'from'",
+        "'isArray'",
+        "'of'",
+        "'copyWithin'",
+        "'fill'",
+        "'pop'",
+        "'push'",
+        "'reverse'",
+        "'shift'",
+        "'sort'",
+        "'splice'",
+        "'unshift'",
+        "'concat'",
+        "'join'",
+        "'every'",
+        "'filter'",
+        "'findIndex'",
+        "'forEach'",
+        "'map'",
+        "'reduce'",
+        "'reduceRight'",
+        "'some'",
+        "'move'",
+        "'getInt8'",
+        "'getUint8'",
+        "'getInt16'",
+        "'getUint16'",
+        "'getInt32'",
+        "'getUint32'",
+        "'getFloat32'",
+        "'getFloat64'",
+        "'setInt8'",
+        "'setUint8'",
+        "'setInt16'",
+        "'setUint16'",
+        "'setInt32'",
+        "'setUint32'",
+        "'setFloat32'",
+        "'setFloat64'",
+        "'isView'",
+        "'transfer'",
+        "'clear'",
+        "'get'",
+        "'has'",
+        "'set'",
+        "'add'",
+        "'splat'",
+        "'check'",
+        "'extractLane'",
+        "'replaceLane'",
+        "'load'",
+        "'load1'",
+        "'load2'",
+        "'load3'",
+        "'store'",
+        "'store1'",
+        "'store2'",
+        "'store3'",
+        "'addSaturate'",
+        "'div'",
+        "'mul'",
+        "'neg'",
+        "'reciprocalApproximation'",
+        "'reciprocalSqrtApproximation'",
+        "'subSaturate'",
+        "'shuffle'",
+        "'swizzle'",
+        "'maxNum'",
+        "'minNum'",
+        "'select'",
+        "'equal'",
+        "'notEqual'",
+        "'lessThan'",
+        "'lessThanOrEqual'",
+        "'greaterThan'",
+        "'greaterThanOrEqual'",
+        "'and'",
+        "'or'",
+        "'xor'",
+        "'not'",
+        "'shiftLeftByScalar'",
+        "'shiftRightByScalar'",
+        "'allTrue'",
+        "'anyTrue'",
+        "'fromFloat32x4'",
+        "'fromFloat32x4Bits'",
+        "'fromFloat64x2Bits'",
+        "'fromInt32x4'",
+        "'fromInt32x4Bits'",
+        "'fromInt16x8Bits'",
+        "'fromInt8x16Bits'",
+        "'fromUint32x4'",
+        "'fromUint32x4Bits'",
+        "'fromUint16x8Bits'",
+        "'fromUint8x16Bits'",
+        "'neg'",
+        "'compareExchange'",
+        "'exchange'",
+        "'wait'",
+        "'wake'",
+        "'isLockFree'",
+        "'all'",
+        "'race'",
+        "'reject'",
+        "'resolve'",
+        "'catch'",
+        "'then'",
+        "'finally'",
+        "'next'",
+        "'throw'",
+        "'close'",
+        "'send'",
+        "'apply'",
+        "'construct'",
+        "'deleteProperty'",
+        "'ownKeys'",
+        "'getCanonicalLocales'",
+        "'supportedLocalesOf'",
+        "'resolvedOptions'",
+        "'formatToParts'",
+        "'resolvedOptions'",
+        "'instantiate'",
+        "'instantiateStreaming'",
+        "'compileStreaming'",
+        "'validate'",
+        "'customSections'",
+        "'exports'",
+        "'imports'",
+        "'grow'",
+        "'super'",
+        "'in'",
+        "'instanceof'",
+        "' '"
+    ],
+    "NEWLINE": [
+        "'\\n'"
+    ],
+    "NUMBER": [
+        "'1/2'",
+        "'1E2'",
+        "'1E02'",
+        "'1E+02'",
+        "'-1'",
+        "'-1.00'",
+        "'-1/2'",
+        "'-1E2'",
+        "'-1E02'",
+        "'-1E+02'",
+        "'1/0'",
+        "'0/0'",
+        "'-2147483648/-1'",
+        "'-9223372036854775808/-1'",
+        "'-0'",
+        "'-0.0'",
+        "'+0'"
+    ],
+    "OBJECT": [
+        "IDENTIFIER"
+    ],
+    "PROGRAM": [
+        "JSBLOCK"
+    ],
+    "PROPERTY": [
+        "'.length' PROPERTY",
+        "'.prototype' PROPERTY",
+        "'.constructor' PROPERTY",
+        "'.__proto__' PROPERTY",
+        "'.__noSuchMethod__' PROPERTY",
+        "'.__count__' PROPERTY",
+        "'.__parent__' PROPERTY",
+        "'.arguments' PROPERTY",
+        "'.arity' PROPERTY",
+        "'.caller' PROPERTY",
+        "'.name' PROPERTY",
+        "'.displayName' PROPERTY",
+        "'.iterator' PROPERTY",
+        "'.asyncIterator' PROPERTY",
+        "'.match' PROPERTY",
+        "'.replace' PROPERTY",
+        "'.search' PROPERTY",
+        "'.split' PROPERTY",
+        "'.hasInstance' PROPERTY",
+        "'.isConcatSpreadable' PROPERTY",
+        "'.unscopables' PROPERTY",
+        "'.species' PROPERTY",
+        "'.toPrimitive' PROPERTY",
+        "'.toStringTag' PROPERTY",
+        "'.fileName' PROPERTY",
+        "'.lineNumber' PROPERTY",
+        "'.columnNumber' PROPERTY",
+        "'.message' PROPERTY",
+        "'.name' PROPERTY",
+        "'.EPSILON' PROPERTY",
+        "'.MAX_SAFE_INTEGER' PROPERTY",
+        "'.MAX_VALUE' PROPERTY",
+        "'.MIN_SAFE_INTEGER' PROPERTY",
+        "'.MIN_VALUE' PROPERTY",
+        "'.NaN' PROPERTY",
+        "'.NEGATIVE_INFINITY' PROPERTY",
+        "'.POSITIVE_INFINITY' PROPERTY",
+        "'.E' PROPERTY",
+        "'.LN2' PROPERTY",
+        "'.LN10' PROPERTY",
+        "'.LOG2E' PROPERTY",
+        "'.LOG10E' PROPERTY",
+        "'.PI' PROPERTY",
+        "'.SQRT1_2' PROPERTY",
+        "'.SQRT2' PROPERTY",
+        "'.flags' PROPERTY",
+        "'.global' PROPERTY",
+        "'.ignoreCase' PROPERTY",
+        "'.multiline' PROPERTY",
+        "'.source' PROPERTY",
+        "'.sticky' PROPERTY",
+        "'.unicode' PROPERTY",
+        "'.buffer' PROPERTY",
+        "'.byteLength' PROPERTY",
+        "'.byteOffset' PROPERTY",
+        "'.BYTES_PER_ELEMENT' PROPERTY",
+        "'.compare' PROPERTY",
+        "'.format' PROPERTY",
+        "'.callee' PROPERTY",
+        "'.caller' PROPERTY",
+        "'.memory' PROPERTY",
+        "'.exports' PROPERTY",
+        "' '"
+    ],
+    "SP": [
+        "' '"
+    ],
+    "STATEMENT": [
+        "EXPR ';'",
+        "'var' SP VAR '=' EXPR ';'",
+        "'let' SP VAR '=' EXPR ';'",
+        "VAR '=' EXPR ';'",
+        "VAR PROPERTY '=' EXPR ';'",
+        "VAR '[' DECIMALNUMBER ']' '=' EXPR ';'",
+        "'const' SP VAR '=' EXPR ';'",
+        "'typeof' SP EXPR ';'",
+        "'void' SP EXPR ';'",
+        "'return' SP EXPR ';'",
+        "VAR ':'"
+    ],
+    "VAR": [
+        "'a'",
+        "'b'",
+        "'c'",
+        "'d'",
+        "'e'",
+        "'f'",
+        "'g'",
+        "'h'"
+    ]
+}
--- a/scripts/gramatron/grammars/js/source_automata.json
+++ b/scripts/gramatron/grammars/js/source_automata.json
--- a/scripts/gramatron/grammars/php/source.json
+++ b/scripts/gramatron/grammars/php/source.json
--- a/scripts/gramatron/grammars/php/source_automata.json
+++ b/scripts/gramatron/grammars/php/source_automata.json
--- a/scripts/gramatron/grammars/ruby/source.json
+++ b/scripts/gramatron/grammars/ruby/source.json
--- a/scripts/gramatron/grammars/ruby/source_automata.json
+++ b/scripts/gramatron/grammars/ruby/source_automata.json