Dominik Maier e8b3d33bf4
Update dependencies, removed unused deps, CI fixes (#839)
* update clap, remove unused deps

* update grammartek

* update pyo3

* update pyo3

* undid clap update

* not changing nyx

* updated deps

* Update more deps, fixes

* not needed clippy

* fix windows

* try to enable deprecated pyproto for pyo3

* unused

* moving some things to clap4 after all

* initial move to clap 4

* fix clap

* more clap4, removed accidental file

* fixes, fmt

* fix

* all fix no play

* fix
2022-10-18 20:36:43 +02:00

340 lines
10 KiB
Rust

use std::{
collections::{HashMap, HashSet, VecDeque},
fs,
io::{BufReader, Write},
path::{Path, PathBuf},
rc::Rc,
};
use clap::{self, Parser};
use lazy_static::lazy_static;
use libafl::generators::gramatron::{Automaton, Trigger};
use regex::Regex;
use serde_json::Value;
#[derive(Debug, Parser)]
#[command(
name = "construct_automata",
about = "Generate a serialized Automaton using a json GNF grammar",
author = "Andrea Fioraldi <andreafioraldi@gmail.com>"
)]
struct Opt {
#[arg(
short,
long = "grammar-file",
name = "GRAMMAR",
help = "The grammar to use during fuzzing"
)]
grammar: PathBuf,
#[arg(
short,
long,
name = "LIMIT",
help = "The max stack size after which a generated input is abandoned",
default_value = "0"
)]
limit: usize,
#[arg(short, long, help = "Set the output file", name = "OUTPUT")]
output: PathBuf,
}
fn read_grammar_from_file<P: AsRef<Path>>(path: P) -> Value {
let file = fs::File::open(path).unwrap();
let reader = BufReader::new(file);
serde_json::from_reader(reader).unwrap()
}
#[derive(Debug)]
struct Element {
pub state: usize,
pub items: Rc<VecDeque<String>>,
}
#[derive(Default, Debug, Clone, PartialEq, Eq, Hash)]
struct Transition {
pub source: usize,
pub dest: usize,
pub ss: Vec<String>,
pub terminal: String,
pub is_regex: bool,
pub stack: Rc<VecDeque<String>>,
}
#[derive(Default)]
struct Stacks {
pub q: HashMap<usize, VecDeque<String>>,
pub s: HashMap<usize, Vec<String>>,
}
fn tokenize(rule: &str) -> (String, Vec<String>, bool) {
lazy_static! {
static ref RE: Regex = Regex::new(r"([r])*'([\s\S]+)'([\s\S]*)").unwrap();
}
let cap = RE.captures(rule).unwrap();
let is_regex = cap.get(1).is_some();
let terminal = cap.get(2).unwrap().as_str().to_owned();
let ss = cap.get(3).map_or(vec![], |m| {
m.as_str()
.split_whitespace()
.map(ToOwned::to_owned)
.collect()
});
if terminal == "\\n" {
("\n".into(), ss, is_regex)
} else {
(terminal, ss, is_regex)
}
}
fn prepare_transitions(
grammar: &Value,
pda: &mut Vec<Transition>,
state_stacks: &mut Stacks,
state_count: &mut usize,
worklist: &mut VecDeque<Element>,
element: &Element,
stack_limit: usize,
) {
if element.items.is_empty() {
return; // Final state was encountered, pop from worklist without doing anything
}
let state = element.state;
let nonterminal = &element.items[0];
let rules = grammar[nonterminal].as_array().unwrap();
// let mut i = 0;
'rules_loop: for rule in rules {
let rule = rule.as_str().unwrap();
let (terminal, ss, is_regex) = tokenize(rule);
let dest = *state_count;
// println!("Rule \"{}\", {} over {}", &rule, i, rules.len());
// Creating a state stack for the new state
let mut state_stack = state_stacks
.q
.get(&state)
.map_or(VecDeque::new(), Clone::clone);
if !state_stack.is_empty() {
state_stack.pop_front();
}
for symbol in ss.iter().rev() {
state_stack.push_front(symbol.clone());
}
let mut state_stack_sorted: Vec<_> = state_stack.iter().cloned().collect();
state_stack_sorted.sort();
let mut transition = Transition {
source: state,
dest,
ss,
terminal,
is_regex,
stack: Rc::new(state_stack.clone()),
};
// Check if a recursive transition state being created, if so make a backward
// edge and don't add anything to the worklist
for (key, val) in &state_stacks.s {
if state_stack_sorted == *val {
transition.dest = *key;
// i += 1;
pda.push(transition.clone());
// If a recursive transition exercised don't add the same transition as a new
// edge, continue onto the next transitions
continue 'rules_loop;
}
}
// If the generated state has a stack size > stack_limit then that state is abandoned
// and not added to the FSA or the worklist for further expansion
if stack_limit > 0 && transition.stack.len() > stack_limit {
// TODO add to unexpanded_rules
continue;
}
// Create transitions for the non-recursive relations and add to the worklist
worklist.push_back(Element {
state: dest,
items: transition.stack.clone(),
});
state_stacks.q.insert(dest, state_stack);
state_stacks.s.insert(dest, state_stack_sorted);
pda.push(transition);
println!("worklist size: {}", worklist.len());
*state_count += 1;
// i += 1;
}
}
fn get_states(pda: &[Transition]) -> (HashSet<usize>, HashSet<usize>, HashSet<usize>) {
let mut source = HashSet::new();
let mut dest = HashSet::new();
for transition in pda {
source.insert(transition.source);
dest.insert(transition.dest);
}
let all = source.union(&dest).copied().collect();
(
all,
dest.difference(&source).copied().collect(),
source.difference(&dest).copied().collect(),
)
}
fn postprocess(pda: &[Transition], stack_limit: usize) -> Automaton {
let mut num_transition = 0;
let (states, finals, initial) = get_states(pda);
assert!(initial.len() == 1);
println!("# transitions: {}", pda.len());
println!("# states: {}", states.len());
println!("initial state: {:?}", &initial);
println!("final states: {:?}", &finals);
let mut memoized = Vec::with_capacity(states.len());
//let mut memoized_unique = Vec::with_capacity(states.len());
// if stack_limit ...
if stack_limit > 0 {
let mut culled_pda = Vec::with_capacity(pda.len());
let mut blocklist = HashSet::new();
//let mut culled_pda_unique = HashSet::new();
for final_state in &finals {
for transition in pda.iter() {
if transition.dest == *final_state && transition.stack.len() > 0 {
blocklist.insert(transition.dest);
} else {
culled_pda.push(transition);
//culled_pda_unique.insert(transition);
}
}
}
// println!("culled_pda size: {} pda size: {}", culled_pda.len(), pda.len());
let culled_finals: HashSet<usize> = finals.difference(&blocklist).copied().collect();
assert!(culled_finals.len() == 1);
for transition in &culled_pda {
if blocklist.contains(&transition.dest) {
continue;
}
num_transition += 1;
let state = transition.source;
if state >= memoized.len() {
memoized.resize(state + 1, vec![]);
}
memoized[state].push(Trigger {
dest: transition.dest,
term: transition.terminal.clone(),
});
if num_transition % 4096 == 0 {
println!(
"processed {} transitions over {}",
num_transition,
culled_pda.len()
);
}
}
/*
culled_pda_unique.iter().for_each(|transition| {
if blocklist.contains(&transition.dest) {
return;
}
num_transition += 1;
let state = transition.source;
if state >= memoized_unique.len() {
memoized_unique.resize(state +1, vec![]);
}
memoized_unique[state].push(Trigger {dest: transition.dest, term: transition.terminal.clone()});
});
*/
Automaton {
init_state: initial.iter().next().copied().unwrap(),
final_state: culled_finals.iter().next().copied().unwrap(),
pda: memoized,
}
} else {
// Running FSA construction in exact approximation mode and postprocessing it like so
for transition in pda.iter() {
num_transition += 1;
let state = transition.source;
if state >= memoized.len() {
memoized.resize(state + 1, vec![]);
}
memoized[state].push(Trigger {
dest: transition.dest,
term: transition.terminal.clone(),
});
if num_transition % 4096 == 0 {
println!(
"processed {} transitions over {}",
num_transition,
pda.len()
);
}
}
Automaton {
init_state: initial.iter().next().copied().unwrap(),
final_state: finals.iter().next().copied().unwrap(),
pda: memoized,
}
}
}
fn main() {
let opt = Opt::parse();
let grammar_file = opt.grammar;
let output_file = opt.output;
let stack_limit = opt.limit;
let mut worklist = VecDeque::new();
let mut state_count = 1;
let mut state_stacks = Stacks::default();
let mut pda = vec![];
let grammar = read_grammar_from_file(grammar_file);
let start_symbol = grammar["Start"][0].as_str().unwrap().to_owned();
let mut start_vec = VecDeque::new();
start_vec.push_back(start_symbol);
worklist.push_back(Element {
state: 0,
items: Rc::new(start_vec),
});
while let Some(element) = worklist.pop_front() {
prepare_transitions(
&grammar,
&mut pda,
&mut state_stacks,
&mut state_count,
&mut worklist,
&element,
stack_limit,
);
}
state_stacks.q.clear();
state_stacks.s.clear();
let transformed = postprocess(&pda, stack_limit);
let serialized = postcard::to_allocvec(&transformed).unwrap();
let mut file = fs::File::create(output_file).unwrap();
file.write_all(&serialized).unwrap();
}