Dump Control Flow Graph in AFLCoverage LLVM Pass (#557)

* Allow dumping CFG in AFLCoverage pass

* Consider cases of edges from zeros to entry basic block.

* Expose public structs and traits

* linting

* fix doc

* clippy

* Remove unnecessary dependency

* add missing derive
This commit is contained in:
Chaofan Shou 2022-03-02 02:19:19 -08:00 committed by GitHub
parent 8cb41366ac
commit 4e3091eace
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 451 additions and 4 deletions

View File

@ -21,3 +21,4 @@ which = "4.2.2"
glob = "0.3" glob = "0.3"
[dependencies] [dependencies]
serde = { version = "1.0", default-features = false, features = ["alloc", "derive"] } # serialization lib

View File

@ -29,6 +29,14 @@
#include "common-llvm.h" #include "common-llvm.h"
#include <time.h> #include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <ctype.h>
#include <list> #include <list>
#include <string> #include <string>
@ -40,6 +48,8 @@
#include "llvm/IR/Module.h" #include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h" #include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h" #include "llvm/Support/MathExtras.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/Support/FormatVariadic.h"
#if LLVM_VERSION_MAJOR > 3 || \ #if LLVM_VERSION_MAJOR > 3 || \
(LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4)
@ -69,6 +79,8 @@ static cl::opt<uint32_t> Ngram("ngram", cl::desc("Size of the Ngram instrumentat
static cl::opt<uint32_t> CtxK("ctx_k", cl::desc("Size of the context for K-Ctx context sensitivity (0 to disable)"), cl::init(0), cl::NotHidden); static cl::opt<uint32_t> CtxK("ctx_k", cl::desc("Size of the context for K-Ctx context sensitivity (0 to disable)"), cl::init(0), cl::NotHidden);
static cl::opt<bool> Ctx("ctx", cl::desc("Enable full context sensitive coverage"), cl::init(false), cl::NotHidden); static cl::opt<bool> Ctx("ctx", cl::desc("Enable full context sensitive coverage"), cl::init(false), cl::NotHidden);
static cl::opt<bool> ThreadSafe("thread_safe", cl::desc("Use the thread safe instrumentation"), cl::init(false), cl::NotHidden); static cl::opt<bool> ThreadSafe("thread_safe", cl::desc("Use the thread safe instrumentation"), cl::init(false), cl::NotHidden);
static cl::opt<bool> DumpCFG("dump_afl_cfg", cl::desc("Dump CFG containing AFL-style edge index"), cl::init(false), cl::NotHidden);
static cl::opt<std::string> DumpCFGPath("dump_afl_cfg_path", cl::desc("Path to dump CFG containing AFL-style edge index"), cl::init(".cfg"), cl::NotHidden);
namespace { namespace {
@ -94,8 +106,10 @@ class AFLCoverage : public ModulePass {
#endif #endif
protected: protected:
uint32_t map_size = MAP_SIZE; uint32_t map_size = MAP_SIZE;
uint32_t function_minimum_size = 1; uint32_t function_minimum_size = 1;
DenseMap<BasicBlock *, int32_t> bb_to_cur_loc;
DenseMap<StringRef, BasicBlock *> entry_bb;
}; };
@ -142,7 +156,7 @@ PreservedAnalyses AFLCoverage::run(Module &M, ModuleAnalysisManager &MAM) {
#else #else
bool AFLCoverage::runOnModule(Module &M) { bool AFLCoverage::runOnModule(Module &M) {
#endif #endif
if (Ctx && DumpCFG) FATAL("Does not support dumping CFG with full context sensitive coverage enabled.");
LLVMContext &C = M.getContext(); LLVMContext &C = M.getContext();
IntegerType *Int8Ty = IntegerType::getInt8Ty(C); IntegerType *Int8Ty = IntegerType::getInt8Ty(C);
@ -395,6 +409,7 @@ bool AFLCoverage::runOnModule(Module &M) {
// if (!isInInstrumentList(&F)) { continue; } // if (!isInInstrumentList(&F)) { continue; }
if (F.size() < function_minimum_size) { continue; } if (F.size() < function_minimum_size) { continue; }
if (DumpCFG) entry_bb[F.getName()] = &F.getEntryBlock();
std::list<Value *> todo; std::list<Value *> todo;
for (auto &BB : F) { for (auto &BB : F) {
@ -494,7 +509,7 @@ bool AFLCoverage::runOnModule(Module &M) {
// cur_loc++; // cur_loc++;
cur_loc = RandBelow(map_size); cur_loc = RandBelow(map_size);
if (DumpCFG) bb_to_cur_loc[&BB] = cur_loc;
/* There is a problem with Ubuntu 18.04 and llvm 6.0 (see issue #63). /* There is a problem with Ubuntu 18.04 and llvm 6.0 (see issue #63).
The inline function successors() is not inlined and also not found at runtime The inline function successors() is not inlined and also not found at runtime
:-( As I am unable to detect Ubuntu18.04 heree, the next best thing is to :-( As I am unable to detect Ubuntu18.04 heree, the next best thing is to
@ -737,6 +752,36 @@ bool AFLCoverage::runOnModule(Module &M) {
} }
} }
if (DumpCFG) {
int fd;
if ((fd = open(DumpCFGPath.c_str(), O_WRONLY | O_CREAT | O_APPEND, 0644)) < 0)
FATAL("Could not open/create CFG dump file.");
std::string cfg = "";
for (auto record = entry_bb.begin(); record != entry_bb.end(); record++) {
// Dump function BB entry points
cfg += formatv("$${0}+{1}\n", record->getFirst(), bb_to_cur_loc[record->getSecond()]);
}
for (auto record = bb_to_cur_loc.begin(); record != bb_to_cur_loc.end(); record++) {
// Dump CFG information
auto current_bb = record->getFirst();
Function* calling_func = current_bb->getParent();
if (calling_func) {
auto function_name = calling_func->getName().str();
cfg += formatv("%%{0}", function_name);
}
else
cfg += "%%__";
auto current_cur_loc = record->getSecond();
cfg += formatv("+{0}\n", current_cur_loc);
for (auto bb_successor = succ_begin(current_bb);
bb_successor != succ_end(current_bb); bb_successor++) {
cfg += formatv("->{0}\n", bb_to_cur_loc[*bb_successor]).str();
}
}
if (Debug) errs() << "CFG: \n" << cfg;
if (cfg.size() > 0 && write(fd, cfg.c_str(), cfg.length()) <= 0)
FATAL("Failed to dump CFG.\n");
}
/* Say something nice. */ /* Say something nice. */

399
libafl_cc/src/cfg.rs Normal file
View File

@ -0,0 +1,399 @@
//! LLVM style control flow graph with information of AFL-style index of the each
//! edges, use together with ``AFLCoverage`` pass having --dump-afl-cfg flag enabled.
use core::borrow::Borrow;
use serde::{Deserialize, Serialize};
use std::collections::{BinaryHeap, HashMap, HashSet};
use std::marker::PhantomData;
/// Compute the weight of a [`CfgEdge`]. Lower means shorter distance in the graph.
pub trait HasWeight<T> {
/// Compute the weight of a [`CfgEdge`]. Lower means shorter distance in the graph.
fn compute(metadata: Option<&T>) -> u32;
}
/// An edge in the CFG.
#[derive(Debug, Serialize, Deserialize)]
pub struct CfgEdge<T>
where
T: HasWeight<T>,
{
/// The index of the coverage map AFL inserts to, which is (``prev_loc`` >> 1) ^ ``cur_loc``.
pub xored_loc: usize,
/// The from node's index (i.e., ``prev_loc``) in the edge.
pub top_node_loc: usize,
/// The to node's index (i.e., ``cur_loc``) in the edge.
pub bottom_node_loc: usize,
/// Name of the function that contains such edge. For anonymous function, it is "__".
pub calling_func: String,
/// Indexes of successor block.
pub successor_basic_blocks: Vec<usize>,
/// ``prev_loc`` >> 1 ^ ``cur_loc`` of edges connecting [`CfgEdge.bottom_node_loc`]
/// to successor blocks.
pub successor_edges: Vec<usize>,
/// Custom metadata.
pub metadata: Option<T>,
}
impl<T> CfgEdge<T>
where
T: HasWeight<T>,
{
/// Add a successor for an edge.
pub fn add_successor(&mut self, successor_loc: usize) {
self.successor_basic_blocks.push(successor_loc);
self.successor_edges
.push((self.bottom_node_loc >> 1) ^ successor_loc);
}
/// Calculate the weight of an edge.
pub fn get_weight(&self) -> u32 {
T::compute(self.metadata.as_ref())
}
}
/// An entry basic block of a function.
#[derive(Debug)]
pub struct EntryBasicBlockInfo {
/// Name of the function that contains such basic block. For anonymous function, it is "__".
pub calling_func: String,
/// The node's index (i.e., ``cur_loc``).
pub node_loc: usize,
/// ``prev_loc`` >> 1 ^ ``cur_loc`` of edges connecting [`EntryBasicBlockInfo.node_loc`]
/// to successor blocks.
pub successor_edges: Vec<usize>,
}
impl EntryBasicBlockInfo {
/// Add a successor for an edge.
pub fn add_successor(&mut self, successor_loc: usize) {
self.successor_edges
.push((self.node_loc >> 1) ^ successor_loc);
}
}
/// An LLVM style control flow graph.
/// Note: This does not track across functions.
#[derive(Debug)]
pub struct ControlFlowGraph<T>
where
T: HasWeight<T>,
{
/// List of edges in the control flow graph.
///
/// If there is collision, then only the latest edge would be saved.
edges: Vec<Option<CfgEdge<T>>>,
/// Mapping each function's name to its corresponding entry basic block information.
func_to_entry_bb: HashMap<String, EntryBasicBlockInfo>,
}
impl<T> ControlFlowGraph<T>
where
T: HasWeight<T>,
{
/// Inserts an edge into CFG.
#[must_use]
pub fn new() -> Self {
let map_size = option_env!("LIBAFL_EDGES_MAP_SIZE")
.map_or(Ok(65536), str::parse)
.expect("Could not parse LIBAFL_EDGES_MAP_SIZE");
Self {
edges: (0..map_size).map(|_| None).collect(),
func_to_entry_bb: HashMap::default(),
}
}
/// Inserts an edge into CFG.
fn insert_edge(&mut self, xored_loc: usize, edge: CfgEdge<T>) {
self.edges[xored_loc] = Some(edge);
}
/// Inserts a function and its entry basic block information into CFG.
fn create_func_entry(&mut self, func_name: &str, entry_info: EntryBasicBlockInfo) {
self.func_to_entry_bb
.insert(func_name.to_string(), entry_info);
}
}
/// Helper for reading CFG dump files.
#[derive(Debug)]
struct CfgFileReader<T>
where
T: HasWeight<T>,
{
current_bb: usize,
bb_to_func: HashMap<usize, String>,
bb_to_successors: HashMap<usize, Vec<usize>>,
func_to_entry_bb: HashMap<String, usize>,
phantom: PhantomData<T>,
}
impl<T> CfgFileReader<T>
where
T: HasWeight<T>,
{
pub fn new() -> Self {
Self {
current_bb: 0,
bb_to_func: HashMap::default(),
bb_to_successors: HashMap::default(),
func_to_entry_bb: HashMap::default(),
phantom: PhantomData,
}
}
/// Parse a line in CFG dump files.
pub fn parse_line(&mut self, line: &str) -> bool {
const FAILED_TO_PARSE: &str = "Cannot parsing CFG file at line";
if line.len() < 2 {
return false;
}
let (_, line_content) = line.split_at(2);
match &line[0..2] {
"->" => {
// "->{basic block id}": Map current basic block to its destination basic block.
let successor: usize = line_content.parse().expect(FAILED_TO_PARSE);
match self.bb_to_successors.get_mut(&self.current_bb) {
None => {
self.bb_to_successors
.insert(self.current_bb, vec![successor]);
}
Some(successors) => {
successors.push(successor);
}
}
}
"%%" => {
// "%%{function name}+{index}": Make current basic block to be {index}.
let mut splitter = line_content.split('+');
let func_name = splitter.next().expect(FAILED_TO_PARSE).into();
self.current_bb = splitter.next().expect(FAILED_TO_PARSE).parse().expect("");
self.bb_to_func.insert(self.current_bb, func_name);
}
"$$" => {
// "$${function name}+{index}": Function {function name}'s entry block is {index}.
let mut splitter = line_content.split('+');
let func_name = splitter.next().expect(FAILED_TO_PARSE).into();
let entry_bb: usize = splitter
.next()
.expect(FAILED_TO_PARSE)
.parse()
.expect(FAILED_TO_PARSE);
self.func_to_entry_bb.insert(func_name, entry_bb);
}
_ => {}
}
true
}
/// Convert current state to a [`ControlFlowGraph`].
pub fn to_cfg(&self) -> ControlFlowGraph<T> {
let mut cfg = ControlFlowGraph::new();
let mut entry_bb_locs: Vec<usize> = vec![];
for (func_name, entry_bb) in &self.func_to_entry_bb {
entry_bb_locs.push(*entry_bb);
let mut entry = EntryBasicBlockInfo {
calling_func: func_name.to_string(),
node_loc: *entry_bb,
successor_edges: vec![],
};
if let Some(successors) = self.bb_to_successors.get(entry_bb) {
for successor in successors {
entry.add_successor(*successor);
}
}
cfg.create_func_entry(func_name, entry);
}
// Insert edges from zero to entry basic blocks.
let mut bb_to_successors_with_zero = self.bb_to_successors.clone();
if !entry_bb_locs.is_empty() {
bb_to_successors_with_zero.insert(0, entry_bb_locs);
}
for (bb_loc, successor_locs) in &bb_to_successors_with_zero {
let current_func = match bb_loc {
0 => self.bb_to_func.get(&successor_locs[0]).unwrap(),
_ => self.bb_to_func.get(bb_loc).unwrap(),
};
for successor_loc in successor_locs {
let xored_loc = (*bb_loc >> 1) ^ (*successor_loc);
let mut edge = CfgEdge {
xored_loc,
top_node_loc: *bb_loc,
bottom_node_loc: *successor_loc,
calling_func: current_func.clone(),
successor_basic_blocks: vec![],
successor_edges: vec![],
metadata: None,
};
if let Some(successors_of_successor) = self.bb_to_successors.get(successor_loc) {
for successor_of_successor in successors_of_successor {
edge.add_successor(*successor_of_successor);
}
}
cfg.insert_edge(xored_loc, edge);
}
}
cfg
}
}
impl<T> ControlFlowGraph<T>
where
T: HasWeight<T>,
{
/// Load a CFG from a dump file generated by ``AFLCoverage`` pass.
#[must_use]
pub fn from_file(file_name: &str) -> ControlFlowGraph<T> {
ControlFlowGraph::from_content(
std::fs::read_to_string(file_name)
.expect("file not found!")
.as_str(),
)
}
/// Load a CFG from string generated by ``AFLCoverage`` pass.
#[allow(unused_must_use)]
#[must_use]
pub fn from_content(content: &str) -> ControlFlowGraph<T> {
let mut reader = CfgFileReader::new();
content
.lines()
.map(|line| reader.parse_line(line))
.collect::<Vec<bool>>();
reader.to_cfg()
}
/// Get the edge at the index of the coverage map AFL inserts to.
#[must_use]
pub fn get_edge(&self, xored_loc: usize) -> Option<&CfgEdge<T>> {
self.edges[xored_loc].as_ref()
}
/// Get the mutable edge at the index of the coverage map AFL inserts to.
#[must_use]
pub fn get_edge_mut(&mut self, xored_loc: usize) -> Option<&mut CfgEdge<T>> {
self.edges[xored_loc].as_mut()
}
/// Get entry basic block information of a function.
#[must_use]
pub fn get_entry(&self, func_name: &str) -> Option<&EntryBasicBlockInfo> {
self.func_to_entry_bb.get(func_name)
}
/// Get mutable entry basic block information of a function.
#[must_use]
pub fn get_entry_mut(&mut self, func_name: &str) -> Option<&mut EntryBasicBlockInfo> {
self.func_to_entry_bb.get_mut(func_name)
}
/// Calculate shortest distance from start edge to all other edges
/// in the function containing such ``start``.
///
/// Unreachable edges from ``start`` would not be inserted in the returned hash map.
#[must_use]
pub fn calculate_distances_to_all_edges(&self, start: usize) -> HashMap<usize, u32> {
let mut distances: HashMap<usize, u32> = HashMap::new();
let mut visited = HashSet::new();
let mut to_visit = BinaryHeap::new(); // BinaryHeap<(loc, distance)>
let initial_weight = self
.get_edge(start)
.expect("unknown destination")
.get_weight();
distances.insert(start, initial_weight);
to_visit.push((start, initial_weight));
while let Some((edge, distance)) = to_visit.pop() {
if !visited.insert(edge) {
continue;
}
if let Some(edge_info) = self.get_edge(edge) {
for successor in &edge_info.successor_edges {
let successor_info = self
.get_edge(*successor)
.expect("unknown successor added")
.borrow();
let new_distance = distance + successor_info.get_weight();
let is_shorter = distances
.get(successor)
.map_or(true, |&current| new_distance < current);
if is_shorter {
distances.insert(*successor, new_distance);
to_visit.push((*successor, new_distance));
}
}
}
}
distances
}
}
impl<T> Default for ControlFlowGraph<T>
where
T: HasWeight<T>,
{
fn default() -> Self {
ControlFlowGraph::from_file(".cfg")
}
}
#[cfg(test)]
mod tests {
use crate::cfg::{ControlFlowGraph, HasWeight};
struct TestMetaData {}
impl HasWeight<TestMetaData> for TestMetaData {
fn compute(_metadata: Option<&TestMetaData>) -> u32 {
1
}
}
// ┌────────────┐
// │main (41864)├───────► main (52706)
// └────┬───────┘ ▲
// │ │
// └───────────────► main (26911) ────────────► main (41925)
const TEST_GRAPH_STR: &str = "$$main+41864\n$$_ZN7MyClass1VEi+50306\n%%_ZN7MyClass1VEi+50306\n->19123\n%%main+41864\n->52706\n->26911\n%%main+52706\n%%main+26911\n->52706\n->41925\n";
#[test]
fn test_basic_cfg_from_str() {
let cfg: ControlFlowGraph<TestMetaData> = ControlFlowGraph::from_content(TEST_GRAPH_STR);
let entry = cfg.get_entry("main").unwrap();
assert_eq!(entry.calling_func, "main");
assert_eq!(entry.successor_edges.len(), 2);
assert_eq!(entry.node_loc, 41864);
assert_eq!(entry.successor_edges[0], (41864 >> 1) ^ 52706);
assert_eq!(entry.successor_edges[1], (41864 >> 1) ^ 26911);
let mut edge = cfg.get_edge((50306 >> 1) ^ 19123).unwrap();
assert_eq!(edge.calling_func, "_ZN7MyClass1VEi");
assert_eq!(edge.successor_edges.len(), 0);
assert_eq!(edge.successor_basic_blocks.len(), 0);
edge = cfg.get_edge((26911 >> 1) ^ 52706).unwrap();
assert_eq!(edge.calling_func, "main");
assert_eq!(edge.successor_edges.len(), 0);
assert_eq!(edge.successor_basic_blocks.len(), 0);
edge = cfg.get_edge((41864 >> 1) ^ 26911).unwrap();
assert_eq!(edge.calling_func, "main");
assert_eq!(edge.successor_edges.len(), 2);
assert_eq!(*edge.successor_edges.get(0).unwrap(), (26911 >> 1) ^ 52706);
assert!(cfg.get_edge(26911).is_none());
assert!(cfg.get_edge(41864).is_some());
}
#[test]
fn test_shortest_path() {
let cfg: ControlFlowGraph<TestMetaData> = ControlFlowGraph::from_content(TEST_GRAPH_STR);
let distances = cfg.calculate_distances_to_all_edges((41864 >> 1) ^ 26911);
assert_eq!(*distances.get(&((41864 >> 1) ^ 26911)).unwrap(), 1);
assert_eq!(*distances.get(&((26911 >> 1) ^ 52706)).unwrap(), 2);
assert_eq!(*distances.get(&((26911 >> 1) ^ 41925)).unwrap(), 2);
assert!(distances.get(&((41864 >> 1) ^ 52706)).is_none());
}
}

View File

@ -59,6 +59,8 @@
use std::{convert::Into, path::Path, process::Command, string::String, vec::Vec}; use std::{convert::Into, path::Path, process::Command, string::String, vec::Vec};
pub mod cfg;
pub use cfg::{CfgEdge, ControlFlowGraph, EntryBasicBlockInfo, HasWeight};
pub mod clang; pub mod clang;
pub use clang::{ClangWrapper, LLVMPasses}; pub use clang::{ClangWrapper, LLVMPasses};