Port gramatron preprocessing to Rust (#341)
* grammatron random mut * import String from alloc * gramatron * grammar preprocess scripts * clippy * fix construct_automata.py * splice mutator * fix * clippy * recursion mutator * recursion mut in example * clippy * fix * clippy * grammars * fix gramatron * fmt
This commit is contained in:
parent
7eb293e087
commit
2055eabede
@ -27,4 +27,5 @@ default-members = [
|
||||
exclude = [
|
||||
"fuzzers",
|
||||
"bindings",
|
||||
"scripts",
|
||||
]
|
||||
|
@ -20,4 +20,4 @@ debug = true
|
||||
|
||||
[dependencies]
|
||||
libafl = { path = "../../libafl/" }
|
||||
serde_json = "1.0.68"
|
||||
postcard = "0.7"
|
||||
|
BIN
fuzzers/baby_fuzzer_gramatron/auto.postcard
Normal file
BIN
fuzzers/baby_fuzzer_gramatron/auto.postcard
Normal file
Binary file not shown.
@ -14,7 +14,7 @@ use libafl::{
|
||||
events::SimpleEventManager,
|
||||
executors::{inprocess::InProcessExecutor, ExitKind},
|
||||
feedbacks::{CrashFeedback, MapFeedbackState, MaxMapFeedback},
|
||||
fuzzer::{Evaluator, Fuzzer, StdFuzzer},
|
||||
fuzzer::{Fuzzer, StdFuzzer},
|
||||
generators::{Automaton, GramatronGenerator},
|
||||
inputs::GramatronInput,
|
||||
mutators::{
|
||||
@ -38,8 +38,10 @@ fn signals_set(idx: usize) {
|
||||
|
||||
fn read_automaton_from_file<P: AsRef<Path>>(path: P) -> Automaton {
|
||||
let file = fs::File::open(path).unwrap();
|
||||
let reader = BufReader::new(file);
|
||||
serde_json::from_reader(reader).unwrap()
|
||||
let mut reader = BufReader::new(file);
|
||||
let mut buffer = Vec::new();
|
||||
reader.read_to_end(&mut buffer).unwrap();
|
||||
postcard::from_bytes(&buffer).unwrap()
|
||||
}
|
||||
|
||||
#[allow(clippy::similar_names)]
|
||||
@ -104,9 +106,28 @@ pub fn main() {
|
||||
)
|
||||
.expect("Failed to create the Executor");
|
||||
|
||||
let mut generator =
|
||||
GramatronGenerator::new(read_automaton_from_file(PathBuf::from("auto.json")));
|
||||
let automaton = read_automaton_from_file(PathBuf::from("auto.postcard"));
|
||||
let mut generator = GramatronGenerator::new(&automaton);
|
||||
|
||||
/// Use this code to profile the generator performance
|
||||
/*
|
||||
use libafl::generators::Generator;
|
||||
use std::collections::HashSet;
|
||||
let mut set = HashSet::new();
|
||||
let st = libafl::bolts::current_milliseconds();
|
||||
let mut b = vec![];
|
||||
let mut c = 0;
|
||||
for _ in 0..100000000 {
|
||||
let i = generator.generate(&mut state).unwrap();
|
||||
i.unparse(&mut b);
|
||||
set.insert(b.clone());
|
||||
c += b.len();
|
||||
}
|
||||
println!("{} / {}", c, libafl::bolts::current_milliseconds() - st);
|
||||
println!("{} / 100000000", set.len());
|
||||
|
||||
return;
|
||||
*/
|
||||
// Generate 8 initial inputs
|
||||
state
|
||||
.generate_initial_inputs_forced(&mut fuzzer, &mut executor, &mut generator, &mut mgr, 8)
|
||||
|
@ -134,7 +134,7 @@ where
|
||||
.build()
|
||||
.launch()?;
|
||||
|
||||
(self.run_client)(state, mgr, bind_to.id)?;
|
||||
(self.run_client)(state, mgr, bind_to.id).expect("Client closure failed");
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
@ -23,8 +23,8 @@ use alloc::boxed::Box;
|
||||
use alloc::string::ToString;
|
||||
use core::{marker::PhantomData, time::Duration};
|
||||
|
||||
/// Send a stats update all 3 (or more) seconds
|
||||
const STATS_TIMEOUT_DEFAULT: Duration = Duration::from_millis(3 * 1000);
|
||||
/// Send a stats update all 15 (or more) seconds
|
||||
const STATS_TIMEOUT_DEFAULT: Duration = Duration::from_secs(15);
|
||||
|
||||
/// Holds a scheduler
|
||||
pub trait HasCorpusScheduler<CS, I, S>
|
||||
|
@ -10,14 +10,13 @@ use crate::{
|
||||
Error,
|
||||
};
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone, Debug)]
|
||||
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)]
|
||||
pub struct Trigger {
|
||||
pub id: String,
|
||||
pub dest: usize,
|
||||
pub term: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone, Debug)]
|
||||
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)]
|
||||
pub struct Automaton {
|
||||
pub final_state: usize,
|
||||
pub init_state: usize,
|
||||
@ -26,16 +25,16 @@ pub struct Automaton {
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
/// Generates random inputs from a grammar automatron
|
||||
pub struct GramatronGenerator<R, S>
|
||||
pub struct GramatronGenerator<'a, R, S>
|
||||
where
|
||||
R: Rand,
|
||||
S: HasRand<R>,
|
||||
{
|
||||
automaton: Automaton,
|
||||
automaton: &'a Automaton,
|
||||
phantom: PhantomData<(R, S)>,
|
||||
}
|
||||
|
||||
impl<R, S> Generator<GramatronInput, S> for GramatronGenerator<R, S>
|
||||
impl<'a, R, S> Generator<GramatronInput, S> for GramatronGenerator<'a, R, S>
|
||||
where
|
||||
R: Rand,
|
||||
S: HasRand<R>,
|
||||
@ -51,14 +50,14 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
impl<R, S> GramatronGenerator<R, S>
|
||||
impl<'a, R, S> GramatronGenerator<'a, R, S>
|
||||
where
|
||||
R: Rand,
|
||||
S: HasRand<R>,
|
||||
{
|
||||
/// Returns a new [`GramatronGenerator`]
|
||||
#[must_use]
|
||||
pub fn new(automaton: Automaton) -> Self {
|
||||
pub fn new(automaton: &'a Automaton) -> Self {
|
||||
Self {
|
||||
automaton,
|
||||
phantom: PhantomData,
|
||||
|
@ -18,7 +18,7 @@ where
|
||||
S: HasRand<R> + HasMetadata,
|
||||
R: Rand,
|
||||
{
|
||||
generator: &'a GramatronGenerator<R, S>,
|
||||
generator: &'a GramatronGenerator<'a, R, S>,
|
||||
}
|
||||
|
||||
impl<'a, R, S> Mutator<GramatronInput, S> for GramatronRandomMutator<'a, R, S>
|
||||
@ -61,7 +61,7 @@ where
|
||||
{
|
||||
/// Creates a new [`GramatronRandomMutator`].
|
||||
#[must_use]
|
||||
pub fn new(generator: &'a GramatronGenerator<R, S>) -> Self {
|
||||
pub fn new(generator: &'a GramatronGenerator<'a, R, S>) -> Self {
|
||||
Self { generator }
|
||||
}
|
||||
}
|
||||
|
17
scripts/gramatron/README.md
Normal file
17
scripts/gramatron/README.md
Normal file
@ -0,0 +1,17 @@
|
||||
# Gramatron preprocessing scripts
|
||||
|
||||
In this folder live the scripts to convert a grammar (some examples in the `grammars/` subfolder) into a serialized Automaton.
|
||||
|
||||
You need as first to convert the grammar to the GNF form using the `gnf_converter.py` Python script.
|
||||
|
||||
Then use the output as input of the `construct_automata` crate.
|
||||
|
||||
Here an example using the Ruby grammar:
|
||||
|
||||
```
|
||||
./gnf_converter.py --gf grammars/ruby_grammar.json --out ruby_gnf.json --start PROGRAM
|
||||
cd construct_automata
|
||||
RUSTFLAGS="-C target-cpu=native" cargo run --release -- --gf ../ruby_gnf.json --out ../ruby_automaton.postcard
|
||||
```
|
||||
|
||||
You can add the `--limit` flag to limit the stack size, as described in the Gramatron paper.
|
14
scripts/gramatron/construct_automata/Cargo.toml
Normal file
14
scripts/gramatron/construct_automata/Cargo.toml
Normal file
@ -0,0 +1,14 @@
|
||||
[package]
|
||||
name = "construct_automata"
|
||||
version = "0.1.0"
|
||||
edition = "2018"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
serde_json = "1.0"
|
||||
regex = "1"
|
||||
postcard = "0.7"
|
||||
lazy_static = "1.4.0"
|
||||
libafl = { path = "../../../libafl" }
|
||||
clap = { version = "3.0.0-beta.2", features = ["yaml"] }
|
22
scripts/gramatron/construct_automata/src/clap-config.yaml
Normal file
22
scripts/gramatron/construct_automata/src/clap-config.yaml
Normal file
@ -0,0 +1,22 @@
|
||||
name: construct_automata
|
||||
version: "0.1.0"
|
||||
author: "Andrea Fioraldi <andreafioraldi@gmail.com>"
|
||||
about: Generate a serialized Automaton using a json GNF grammar
|
||||
args:
|
||||
- grammar:
|
||||
short: g
|
||||
long: grammar-file
|
||||
value_name: GRAMMAR
|
||||
required: true
|
||||
takes_value: true
|
||||
- output:
|
||||
short: o
|
||||
long: output
|
||||
value_name: OUTPUT
|
||||
required: true
|
||||
takes_value: true
|
||||
- limit:
|
||||
short: l
|
||||
long: limit
|
||||
value_name: LIMIT
|
||||
takes_value: true
|
314
scripts/gramatron/construct_automata/src/main.rs
Normal file
314
scripts/gramatron/construct_automata/src/main.rs
Normal file
@ -0,0 +1,314 @@
|
||||
use clap::{load_yaml, App};
|
||||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
use serde_json::Value;
|
||||
use std::collections::{HashMap, HashSet, VecDeque};
|
||||
use std::{
|
||||
fs,
|
||||
io::{BufReader, Write},
|
||||
path::Path,
|
||||
rc::Rc,
|
||||
};
|
||||
|
||||
use libafl::generators::gramatron::{Automaton, Trigger};
|
||||
|
||||
fn read_grammar_from_file<P: AsRef<Path>>(path: P) -> Value {
|
||||
let file = fs::File::open(path).unwrap();
|
||||
let reader = BufReader::new(file);
|
||||
serde_json::from_reader(reader).unwrap()
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Element {
|
||||
pub state: usize,
|
||||
pub items: Rc<VecDeque<String>>,
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, Clone, PartialEq, Eq, Hash)]
|
||||
struct Transition {
|
||||
pub source: usize,
|
||||
pub dest: usize,
|
||||
pub ss: Vec<String>,
|
||||
pub terminal: String,
|
||||
pub is_regex: bool,
|
||||
pub stack: Rc<VecDeque<String>>,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct Stacks {
|
||||
pub q: HashMap<usize, VecDeque<String>>,
|
||||
pub s: HashMap<usize, Vec<String>>,
|
||||
}
|
||||
|
||||
fn tokenize(rule: &str) -> (String, Vec<String>, bool) {
|
||||
lazy_static! {
|
||||
static ref RE: Regex = Regex::new(r"([r])*'([\s\S]+)'([\s\S]*)").unwrap();
|
||||
}
|
||||
let cap = RE.captures(rule).unwrap();
|
||||
let is_regex = cap.get(1).is_some();
|
||||
let terminal = cap.get(2).unwrap().as_str().to_owned();
|
||||
let ss = cap.get(3).map_or(vec![], |m| {
|
||||
m.as_str()
|
||||
.split_whitespace()
|
||||
.map(|x| x.to_owned())
|
||||
.collect()
|
||||
});
|
||||
if terminal == "\\n" {
|
||||
("\n".into(), ss, is_regex)
|
||||
} else {
|
||||
(terminal, ss, is_regex)
|
||||
}
|
||||
}
|
||||
|
||||
fn prepare_transitions(
|
||||
grammar: &Value,
|
||||
pda: &mut Vec<Transition>,
|
||||
state_stacks: &mut Stacks,
|
||||
state_count: &mut usize,
|
||||
worklist: &mut VecDeque<Element>,
|
||||
element: Element,
|
||||
stack_limit: usize,
|
||||
) {
|
||||
if element.items.is_empty() {
|
||||
return; // Final state was encountered, pop from worklist without doing anything
|
||||
}
|
||||
|
||||
let state = element.state;
|
||||
let nonterminal = &element.items[0];
|
||||
let rules = grammar[nonterminal].as_array().unwrap();
|
||||
// let mut i = 0;
|
||||
'rules_loop: for rule in rules {
|
||||
let rule = rule.as_str().unwrap();
|
||||
let (terminal, ss, is_regex) = tokenize(rule);
|
||||
let dest = *state_count;
|
||||
|
||||
// println!("Rule \"{}\", {} over {}", &rule, i, rules.len());
|
||||
|
||||
// Creating a state stack for the new state
|
||||
let mut state_stack = state_stacks
|
||||
.q
|
||||
.get(&state)
|
||||
.map_or(VecDeque::new(), |x| x.clone());
|
||||
if !state_stack.is_empty() {
|
||||
state_stack.pop_front();
|
||||
}
|
||||
for symbol in ss.iter().rev() {
|
||||
state_stack.push_front(symbol.clone());
|
||||
}
|
||||
let mut state_stack_sorted: Vec<_> = state_stack.iter().cloned().collect();
|
||||
state_stack_sorted.sort();
|
||||
|
||||
let mut transition = Transition {
|
||||
source: state,
|
||||
dest,
|
||||
ss,
|
||||
terminal,
|
||||
is_regex,
|
||||
stack: Rc::new(state_stack.clone()),
|
||||
};
|
||||
|
||||
// Check if a recursive transition state being created, if so make a backward
|
||||
// edge and don't add anything to the worklist
|
||||
for (key, val) in state_stacks.s.iter() {
|
||||
if state_stack_sorted == *val {
|
||||
transition.dest = *key;
|
||||
// i += 1;
|
||||
pda.push(transition.clone());
|
||||
|
||||
// If a recursive transition exercised don't add the same transition as a new
|
||||
// edge, continue onto the next transitions
|
||||
continue 'rules_loop;
|
||||
}
|
||||
}
|
||||
|
||||
// If the generated state has a stack size > stack_limit then that state is abandoned
|
||||
// and not added to the FSA or the worklist for further expansion
|
||||
if stack_limit > 0 {
|
||||
if transition.stack.len() > stack_limit {
|
||||
// TODO add to unexpanded_rules
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Create transitions for the non-recursive relations and add to the worklist
|
||||
worklist.push_back(Element {
|
||||
state: dest,
|
||||
items: transition.stack.clone(),
|
||||
});
|
||||
state_stacks.q.insert(dest, state_stack);
|
||||
state_stacks.s.insert(dest, state_stack_sorted);
|
||||
pda.push(transition);
|
||||
|
||||
println!("worklist size: {}", worklist.len());
|
||||
|
||||
*state_count += 1;
|
||||
// i += 1;
|
||||
}
|
||||
}
|
||||
|
||||
fn get_states(pda: &[Transition]) -> (HashSet<usize>, HashSet<usize>, HashSet<usize>) {
|
||||
let mut source = HashSet::new();
|
||||
let mut dest = HashSet::new();
|
||||
for transition in pda {
|
||||
source.insert(transition.source);
|
||||
dest.insert(transition.dest);
|
||||
}
|
||||
let all = source.union(&dest).map(|x| *x).collect();
|
||||
(
|
||||
all,
|
||||
dest.difference(&source).map(|x| *x).collect(),
|
||||
source.difference(&dest).map(|x| *x).collect(),
|
||||
)
|
||||
}
|
||||
|
||||
fn postprocess(pda: &[Transition], stack_limit: usize) -> Automaton {
|
||||
let mut num_transition = 0;
|
||||
let (states, finals, initial) = get_states(pda);
|
||||
|
||||
assert!(initial.len() == 1);
|
||||
|
||||
println!("# transitions: {}", pda.len());
|
||||
println!("# states: {}", states.len());
|
||||
println!("initial state: {:?}", &initial);
|
||||
println!("final states: {:?}", &finals);
|
||||
|
||||
let mut memoized = Vec::with_capacity(states.len());
|
||||
//let mut memoized_unique = Vec::with_capacity(states.len());
|
||||
|
||||
// if stack_limit ...
|
||||
if stack_limit > 0 {
|
||||
let mut culled_pda = Vec::with_capacity(pda.len());
|
||||
let mut blocklist = HashSet::new();
|
||||
//let mut culled_pda_unique = HashSet::new();
|
||||
|
||||
for final_state in &finals {
|
||||
pda.iter().for_each(|transition| {
|
||||
if transition.dest == *final_state && transition.stack.len() > 0 {
|
||||
blocklist.insert(transition.dest);
|
||||
} else {
|
||||
culled_pda.push(transition);
|
||||
//culled_pda_unique.insert(transition);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// println!("culled_pda size: {} pda size: {}", culled_pda.len(), pda.len());
|
||||
|
||||
let culled_finals: HashSet<usize> = finals.difference(&blocklist).map(|x| *x).collect();
|
||||
assert!(culled_finals.len() == 1);
|
||||
|
||||
culled_pda.iter().for_each(|transition| {
|
||||
if blocklist.contains(&transition.dest) {
|
||||
return;
|
||||
}
|
||||
num_transition += 1;
|
||||
let state = transition.source;
|
||||
if state >= memoized.len() {
|
||||
memoized.resize(state + 1, vec![]);
|
||||
}
|
||||
memoized[state].push(Trigger {
|
||||
dest: transition.dest,
|
||||
term: transition.terminal.clone(),
|
||||
});
|
||||
|
||||
if num_transition % 4096 == 0 {
|
||||
println!(
|
||||
"processed {} transitions over {}",
|
||||
num_transition,
|
||||
culled_pda.len()
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
/*
|
||||
culled_pda_unique.iter().for_each(|transition| {
|
||||
if blocklist.contains(&transition.dest) {
|
||||
return;
|
||||
}
|
||||
num_transition += 1;
|
||||
let state = transition.source;
|
||||
if state >= memoized_unique.len() {
|
||||
memoized_unique.resize(state +1, vec![]);
|
||||
}
|
||||
memoized_unique[state].push(Trigger {dest: transition.dest, term: transition.terminal.clone()});
|
||||
});
|
||||
*/
|
||||
|
||||
Automaton {
|
||||
init_state: initial.iter().next().cloned().unwrap(),
|
||||
final_state: culled_finals.iter().next().cloned().unwrap(),
|
||||
pda: memoized,
|
||||
}
|
||||
} else {
|
||||
// Running FSA construction in exact approximation mode and postprocessing it like so
|
||||
pda.iter().for_each(|transition| {
|
||||
num_transition += 1;
|
||||
let state = transition.source;
|
||||
if state >= memoized.len() {
|
||||
memoized.resize(state + 1, vec![]);
|
||||
}
|
||||
memoized[state].push(Trigger {
|
||||
dest: transition.dest,
|
||||
term: transition.terminal.clone(),
|
||||
});
|
||||
|
||||
if num_transition % 4096 == 0 {
|
||||
println!(
|
||||
"processed {} transitions over {}",
|
||||
num_transition,
|
||||
pda.len()
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
Automaton {
|
||||
init_state: initial.iter().next().cloned().unwrap(),
|
||||
final_state: finals.iter().next().cloned().unwrap(),
|
||||
pda: memoized,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let yaml = load_yaml!("clap-config.yaml");
|
||||
let matches = App::from(yaml).get_matches();
|
||||
|
||||
let grammar_file = matches.value_of("grammar").unwrap();
|
||||
let output_file = matches.value_of("output").unwrap();
|
||||
let stack_limit = matches.value_of_t::<usize>("limit").unwrap_or(0);
|
||||
|
||||
let mut worklist = VecDeque::new();
|
||||
let mut state_count = 1;
|
||||
let mut state_stacks = Stacks::default();
|
||||
let mut pda = vec![];
|
||||
|
||||
let grammar = read_grammar_from_file(grammar_file);
|
||||
let start_symbol = grammar["Start"][0].as_str().unwrap().to_owned();
|
||||
let mut start_vec = VecDeque::new();
|
||||
start_vec.push_back(start_symbol);
|
||||
worklist.push_back(Element {
|
||||
state: 0,
|
||||
items: Rc::new(start_vec),
|
||||
});
|
||||
|
||||
while let Some(element) = worklist.pop_front() {
|
||||
prepare_transitions(
|
||||
&grammar,
|
||||
&mut pda,
|
||||
&mut state_stacks,
|
||||
&mut state_count,
|
||||
&mut worklist,
|
||||
element,
|
||||
stack_limit,
|
||||
);
|
||||
}
|
||||
|
||||
state_stacks.q.clear();
|
||||
state_stacks.s.clear();
|
||||
|
||||
let transformed = postprocess(&pda, stack_limit);
|
||||
let serialized = postcard::to_allocvec(&transformed).unwrap();
|
||||
|
||||
let mut file = fs::File::create(output_file).unwrap();
|
||||
file.write_all(&serialized).unwrap();
|
||||
}
|
7
scripts/gramatron/gnf_converter.py
Normal file → Executable file
7
scripts/gramatron/gnf_converter.py
Normal file → Executable file
@ -186,15 +186,20 @@ def remove_mixed(grammar):
|
||||
for rhs in rules:
|
||||
# tokens = rhs.split(' ')
|
||||
regen_rule = []
|
||||
# print('---------------------')
|
||||
# print(rhs)
|
||||
tokens = gettokens(rhs)
|
||||
if len(gettokens(rhs)) == 1:
|
||||
new_grammar[lhs].append(rhs)
|
||||
continue
|
||||
for token in tokens:
|
||||
# print(token, isTerminal(token), regen_rule)
|
||||
# Identify if there is a terminal in the RHS
|
||||
if isTerminal(token):
|
||||
# Check if a corresponding nonterminal already exists
|
||||
nonterminal = terminal_exist(token, new_grammar)
|
||||
# nonterminal = terminal_exist(token, new_grammar)
|
||||
nonterminal = None
|
||||
# TODO(andrea) disabled ATM, further investigation using the Ruby grammar needed
|
||||
if nonterminal:
|
||||
regen_rule.append(nonterminal)
|
||||
else:
|
||||
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Loading…
x
Reference in New Issue
Block a user