From 4e3091eace71652996c73228ebf93938cfd67f29 Mon Sep 17 00:00:00 2001 From: Chaofan Shou Date: Wed, 2 Mar 2022 02:19:19 -0800 Subject: [PATCH] Dump Control Flow Graph in AFLCoverage LLVM Pass (#557) * Allow dumping CFG in AFLCoverage pass * Consider cases of edges from zeros to entry basic block. * Expose public structs and traits * linting * fix doc * clippy * Remove unnecessary dependency * add missing derive --- libafl_cc/Cargo.toml | 1 + libafl_cc/src/afl-coverage-pass.cc | 53 +++- libafl_cc/src/cfg.rs | 399 +++++++++++++++++++++++++++++ libafl_cc/src/lib.rs | 2 + 4 files changed, 451 insertions(+), 4 deletions(-) create mode 100644 libafl_cc/src/cfg.rs diff --git a/libafl_cc/Cargo.toml b/libafl_cc/Cargo.toml index baa90358ce..5863b14b01 100644 --- a/libafl_cc/Cargo.toml +++ b/libafl_cc/Cargo.toml @@ -21,3 +21,4 @@ which = "4.2.2" glob = "0.3" [dependencies] +serde = { version = "1.0", default-features = false, features = ["alloc", "derive"] } # serialization lib diff --git a/libafl_cc/src/afl-coverage-pass.cc b/libafl_cc/src/afl-coverage-pass.cc index 9cd82c6fd4..281072c295 100644 --- a/libafl_cc/src/afl-coverage-pass.cc +++ b/libafl_cc/src/afl-coverage-pass.cc @@ -29,6 +29,14 @@ #include "common-llvm.h" #include +#include +#include +#include +#include +#include +#include +#include +#include #include #include @@ -40,6 +48,8 @@ #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/FormatVariadic.h" #if LLVM_VERSION_MAJOR > 3 || \ (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) @@ -69,6 +79,8 @@ static cl::opt Ngram("ngram", cl::desc("Size of the Ngram instrumentat static cl::opt CtxK("ctx_k", cl::desc("Size of the context for K-Ctx context sensitivity (0 to disable)"), cl::init(0), cl::NotHidden); static cl::opt Ctx("ctx", cl::desc("Enable full context sensitive coverage"), cl::init(false), cl::NotHidden); static cl::opt ThreadSafe("thread_safe", cl::desc("Use the thread safe instrumentation"), cl::init(false), cl::NotHidden); +static cl::opt DumpCFG("dump_afl_cfg", cl::desc("Dump CFG containing AFL-style edge index"), cl::init(false), cl::NotHidden); +static cl::opt DumpCFGPath("dump_afl_cfg_path", cl::desc("Path to dump CFG containing AFL-style edge index"), cl::init(".cfg"), cl::NotHidden); namespace { @@ -94,8 +106,10 @@ class AFLCoverage : public ModulePass { #endif protected: - uint32_t map_size = MAP_SIZE; - uint32_t function_minimum_size = 1; + uint32_t map_size = MAP_SIZE; + uint32_t function_minimum_size = 1; + DenseMap bb_to_cur_loc; + DenseMap entry_bb; }; @@ -142,7 +156,7 @@ PreservedAnalyses AFLCoverage::run(Module &M, ModuleAnalysisManager &MAM) { #else bool AFLCoverage::runOnModule(Module &M) { #endif - + if (Ctx && DumpCFG) FATAL("Does not support dumping CFG with full context sensitive coverage enabled."); LLVMContext &C = M.getContext(); IntegerType *Int8Ty = IntegerType::getInt8Ty(C); @@ -395,6 +409,7 @@ bool AFLCoverage::runOnModule(Module &M) { // if (!isInInstrumentList(&F)) { continue; } if (F.size() < function_minimum_size) { continue; } + if (DumpCFG) entry_bb[F.getName()] = &F.getEntryBlock(); std::list todo; for (auto &BB : F) { @@ -494,7 +509,7 @@ bool AFLCoverage::runOnModule(Module &M) { // cur_loc++; cur_loc = RandBelow(map_size); - + if (DumpCFG) bb_to_cur_loc[&BB] = cur_loc; /* There is a problem with Ubuntu 18.04 and llvm 6.0 (see issue #63). The inline function successors() is not inlined and also not found at runtime :-( As I am unable to detect Ubuntu18.04 heree, the next best thing is to @@ -737,6 +752,36 @@ bool AFLCoverage::runOnModule(Module &M) { } } + if (DumpCFG) { + int fd; + if ((fd = open(DumpCFGPath.c_str(), O_WRONLY | O_CREAT | O_APPEND, 0644)) < 0) + FATAL("Could not open/create CFG dump file."); + std::string cfg = ""; + for (auto record = entry_bb.begin(); record != entry_bb.end(); record++) { + // Dump function BB entry points + cfg += formatv("$${0}+{1}\n", record->getFirst(), bb_to_cur_loc[record->getSecond()]); + } + for (auto record = bb_to_cur_loc.begin(); record != bb_to_cur_loc.end(); record++) { + // Dump CFG information + auto current_bb = record->getFirst(); + Function* calling_func = current_bb->getParent(); + if (calling_func) { + auto function_name = calling_func->getName().str(); + cfg += formatv("%%{0}", function_name); + } + else + cfg += "%%__"; + auto current_cur_loc = record->getSecond(); + cfg += formatv("+{0}\n", current_cur_loc); + for (auto bb_successor = succ_begin(current_bb); + bb_successor != succ_end(current_bb); bb_successor++) { + cfg += formatv("->{0}\n", bb_to_cur_loc[*bb_successor]).str(); + } + } + if (Debug) errs() << "CFG: \n" << cfg; + if (cfg.size() > 0 && write(fd, cfg.c_str(), cfg.length()) <= 0) + FATAL("Failed to dump CFG.\n"); + } /* Say something nice. */ diff --git a/libafl_cc/src/cfg.rs b/libafl_cc/src/cfg.rs new file mode 100644 index 0000000000..f6d3aa50e1 --- /dev/null +++ b/libafl_cc/src/cfg.rs @@ -0,0 +1,399 @@ +//! LLVM style control flow graph with information of AFL-style index of the each +//! edges, use together with ``AFLCoverage`` pass having --dump-afl-cfg flag enabled. +use core::borrow::Borrow; +use serde::{Deserialize, Serialize}; +use std::collections::{BinaryHeap, HashMap, HashSet}; +use std::marker::PhantomData; + +/// Compute the weight of a [`CfgEdge`]. Lower means shorter distance in the graph. +pub trait HasWeight { + /// Compute the weight of a [`CfgEdge`]. Lower means shorter distance in the graph. + fn compute(metadata: Option<&T>) -> u32; +} + +/// An edge in the CFG. +#[derive(Debug, Serialize, Deserialize)] +pub struct CfgEdge +where + T: HasWeight, +{ + /// The index of the coverage map AFL inserts to, which is (``prev_loc`` >> 1) ^ ``cur_loc``. + pub xored_loc: usize, + /// The from node's index (i.e., ``prev_loc``) in the edge. + pub top_node_loc: usize, + /// The to node's index (i.e., ``cur_loc``) in the edge. + pub bottom_node_loc: usize, + /// Name of the function that contains such edge. For anonymous function, it is "__". + pub calling_func: String, + /// Indexes of successor block. + pub successor_basic_blocks: Vec, + /// ``prev_loc`` >> 1 ^ ``cur_loc`` of edges connecting [`CfgEdge.bottom_node_loc`] + /// to successor blocks. + pub successor_edges: Vec, + /// Custom metadata. + pub metadata: Option, +} + +impl CfgEdge +where + T: HasWeight, +{ + /// Add a successor for an edge. + pub fn add_successor(&mut self, successor_loc: usize) { + self.successor_basic_blocks.push(successor_loc); + self.successor_edges + .push((self.bottom_node_loc >> 1) ^ successor_loc); + } + + /// Calculate the weight of an edge. + pub fn get_weight(&self) -> u32 { + T::compute(self.metadata.as_ref()) + } +} + +/// An entry basic block of a function. +#[derive(Debug)] +pub struct EntryBasicBlockInfo { + /// Name of the function that contains such basic block. For anonymous function, it is "__". + pub calling_func: String, + /// The node's index (i.e., ``cur_loc``). + pub node_loc: usize, + /// ``prev_loc`` >> 1 ^ ``cur_loc`` of edges connecting [`EntryBasicBlockInfo.node_loc`] + /// to successor blocks. + pub successor_edges: Vec, +} + +impl EntryBasicBlockInfo { + /// Add a successor for an edge. + pub fn add_successor(&mut self, successor_loc: usize) { + self.successor_edges + .push((self.node_loc >> 1) ^ successor_loc); + } +} + +/// An LLVM style control flow graph. +/// Note: This does not track across functions. +#[derive(Debug)] +pub struct ControlFlowGraph +where + T: HasWeight, +{ + /// List of edges in the control flow graph. + /// + /// If there is collision, then only the latest edge would be saved. + edges: Vec>>, + /// Mapping each function's name to its corresponding entry basic block information. + func_to_entry_bb: HashMap, +} + +impl ControlFlowGraph +where + T: HasWeight, +{ + /// Inserts an edge into CFG. + #[must_use] + pub fn new() -> Self { + let map_size = option_env!("LIBAFL_EDGES_MAP_SIZE") + .map_or(Ok(65536), str::parse) + .expect("Could not parse LIBAFL_EDGES_MAP_SIZE"); + Self { + edges: (0..map_size).map(|_| None).collect(), + func_to_entry_bb: HashMap::default(), + } + } + + /// Inserts an edge into CFG. + fn insert_edge(&mut self, xored_loc: usize, edge: CfgEdge) { + self.edges[xored_loc] = Some(edge); + } + + /// Inserts a function and its entry basic block information into CFG. + fn create_func_entry(&mut self, func_name: &str, entry_info: EntryBasicBlockInfo) { + self.func_to_entry_bb + .insert(func_name.to_string(), entry_info); + } +} + +/// Helper for reading CFG dump files. +#[derive(Debug)] +struct CfgFileReader +where + T: HasWeight, +{ + current_bb: usize, + bb_to_func: HashMap, + bb_to_successors: HashMap>, + func_to_entry_bb: HashMap, + phantom: PhantomData, +} + +impl CfgFileReader +where + T: HasWeight, +{ + pub fn new() -> Self { + Self { + current_bb: 0, + bb_to_func: HashMap::default(), + bb_to_successors: HashMap::default(), + func_to_entry_bb: HashMap::default(), + phantom: PhantomData, + } + } + + /// Parse a line in CFG dump files. + pub fn parse_line(&mut self, line: &str) -> bool { + const FAILED_TO_PARSE: &str = "Cannot parsing CFG file at line"; + if line.len() < 2 { + return false; + } + let (_, line_content) = line.split_at(2); + + match &line[0..2] { + "->" => { + // "->{basic block id}": Map current basic block to its destination basic block. + let successor: usize = line_content.parse().expect(FAILED_TO_PARSE); + match self.bb_to_successors.get_mut(&self.current_bb) { + None => { + self.bb_to_successors + .insert(self.current_bb, vec![successor]); + } + Some(successors) => { + successors.push(successor); + } + } + } + "%%" => { + // "%%{function name}+{index}": Make current basic block to be {index}. + let mut splitter = line_content.split('+'); + let func_name = splitter.next().expect(FAILED_TO_PARSE).into(); + self.current_bb = splitter.next().expect(FAILED_TO_PARSE).parse().expect(""); + self.bb_to_func.insert(self.current_bb, func_name); + } + "$$" => { + // "$${function name}+{index}": Function {function name}'s entry block is {index}. + let mut splitter = line_content.split('+'); + let func_name = splitter.next().expect(FAILED_TO_PARSE).into(); + let entry_bb: usize = splitter + .next() + .expect(FAILED_TO_PARSE) + .parse() + .expect(FAILED_TO_PARSE); + self.func_to_entry_bb.insert(func_name, entry_bb); + } + _ => {} + } + true + } + + /// Convert current state to a [`ControlFlowGraph`]. + pub fn to_cfg(&self) -> ControlFlowGraph { + let mut cfg = ControlFlowGraph::new(); + let mut entry_bb_locs: Vec = vec![]; + for (func_name, entry_bb) in &self.func_to_entry_bb { + entry_bb_locs.push(*entry_bb); + let mut entry = EntryBasicBlockInfo { + calling_func: func_name.to_string(), + node_loc: *entry_bb, + successor_edges: vec![], + }; + if let Some(successors) = self.bb_to_successors.get(entry_bb) { + for successor in successors { + entry.add_successor(*successor); + } + } + cfg.create_func_entry(func_name, entry); + } + + // Insert edges from zero to entry basic blocks. + let mut bb_to_successors_with_zero = self.bb_to_successors.clone(); + if !entry_bb_locs.is_empty() { + bb_to_successors_with_zero.insert(0, entry_bb_locs); + } + + for (bb_loc, successor_locs) in &bb_to_successors_with_zero { + let current_func = match bb_loc { + 0 => self.bb_to_func.get(&successor_locs[0]).unwrap(), + _ => self.bb_to_func.get(bb_loc).unwrap(), + }; + for successor_loc in successor_locs { + let xored_loc = (*bb_loc >> 1) ^ (*successor_loc); + let mut edge = CfgEdge { + xored_loc, + top_node_loc: *bb_loc, + bottom_node_loc: *successor_loc, + calling_func: current_func.clone(), + successor_basic_blocks: vec![], + successor_edges: vec![], + metadata: None, + }; + if let Some(successors_of_successor) = self.bb_to_successors.get(successor_loc) { + for successor_of_successor in successors_of_successor { + edge.add_successor(*successor_of_successor); + } + } + cfg.insert_edge(xored_loc, edge); + } + } + cfg + } +} + +impl ControlFlowGraph +where + T: HasWeight, +{ + /// Load a CFG from a dump file generated by ``AFLCoverage`` pass. + #[must_use] + pub fn from_file(file_name: &str) -> ControlFlowGraph { + ControlFlowGraph::from_content( + std::fs::read_to_string(file_name) + .expect("file not found!") + .as_str(), + ) + } + + /// Load a CFG from string generated by ``AFLCoverage`` pass. + #[allow(unused_must_use)] + #[must_use] + pub fn from_content(content: &str) -> ControlFlowGraph { + let mut reader = CfgFileReader::new(); + content + .lines() + .map(|line| reader.parse_line(line)) + .collect::>(); + reader.to_cfg() + } + /// Get the edge at the index of the coverage map AFL inserts to. + #[must_use] + pub fn get_edge(&self, xored_loc: usize) -> Option<&CfgEdge> { + self.edges[xored_loc].as_ref() + } + + /// Get the mutable edge at the index of the coverage map AFL inserts to. + #[must_use] + pub fn get_edge_mut(&mut self, xored_loc: usize) -> Option<&mut CfgEdge> { + self.edges[xored_loc].as_mut() + } + + /// Get entry basic block information of a function. + #[must_use] + pub fn get_entry(&self, func_name: &str) -> Option<&EntryBasicBlockInfo> { + self.func_to_entry_bb.get(func_name) + } + + /// Get mutable entry basic block information of a function. + #[must_use] + pub fn get_entry_mut(&mut self, func_name: &str) -> Option<&mut EntryBasicBlockInfo> { + self.func_to_entry_bb.get_mut(func_name) + } + + /// Calculate shortest distance from start edge to all other edges + /// in the function containing such ``start``. + /// + /// Unreachable edges from ``start`` would not be inserted in the returned hash map. + #[must_use] + pub fn calculate_distances_to_all_edges(&self, start: usize) -> HashMap { + let mut distances: HashMap = HashMap::new(); + let mut visited = HashSet::new(); + let mut to_visit = BinaryHeap::new(); // BinaryHeap<(loc, distance)> + let initial_weight = self + .get_edge(start) + .expect("unknown destination") + .get_weight(); + distances.insert(start, initial_weight); + to_visit.push((start, initial_weight)); + + while let Some((edge, distance)) = to_visit.pop() { + if !visited.insert(edge) { + continue; + } + if let Some(edge_info) = self.get_edge(edge) { + for successor in &edge_info.successor_edges { + let successor_info = self + .get_edge(*successor) + .expect("unknown successor added") + .borrow(); + let new_distance = distance + successor_info.get_weight(); + let is_shorter = distances + .get(successor) + .map_or(true, |¤t| new_distance < current); + + if is_shorter { + distances.insert(*successor, new_distance); + to_visit.push((*successor, new_distance)); + } + } + } + } + distances + } +} + +impl Default for ControlFlowGraph +where + T: HasWeight, +{ + fn default() -> Self { + ControlFlowGraph::from_file(".cfg") + } +} + +#[cfg(test)] +mod tests { + use crate::cfg::{ControlFlowGraph, HasWeight}; + + struct TestMetaData {} + + impl HasWeight for TestMetaData { + fn compute(_metadata: Option<&TestMetaData>) -> u32 { + 1 + } + } + + // ┌────────────┐ + // │main (41864)├───────► main (52706) + // └────┬───────┘ ▲ + // │ │ + // └───────────────► main (26911) ────────────► main (41925) + const TEST_GRAPH_STR: &str = "$$main+41864\n$$_ZN7MyClass1VEi+50306\n%%_ZN7MyClass1VEi+50306\n->19123\n%%main+41864\n->52706\n->26911\n%%main+52706\n%%main+26911\n->52706\n->41925\n"; + + #[test] + fn test_basic_cfg_from_str() { + let cfg: ControlFlowGraph = ControlFlowGraph::from_content(TEST_GRAPH_STR); + let entry = cfg.get_entry("main").unwrap(); + assert_eq!(entry.calling_func, "main"); + assert_eq!(entry.successor_edges.len(), 2); + assert_eq!(entry.node_loc, 41864); + assert_eq!(entry.successor_edges[0], (41864 >> 1) ^ 52706); + assert_eq!(entry.successor_edges[1], (41864 >> 1) ^ 26911); + + let mut edge = cfg.get_edge((50306 >> 1) ^ 19123).unwrap(); + assert_eq!(edge.calling_func, "_ZN7MyClass1VEi"); + assert_eq!(edge.successor_edges.len(), 0); + assert_eq!(edge.successor_basic_blocks.len(), 0); + + edge = cfg.get_edge((26911 >> 1) ^ 52706).unwrap(); + assert_eq!(edge.calling_func, "main"); + assert_eq!(edge.successor_edges.len(), 0); + assert_eq!(edge.successor_basic_blocks.len(), 0); + + edge = cfg.get_edge((41864 >> 1) ^ 26911).unwrap(); + assert_eq!(edge.calling_func, "main"); + assert_eq!(edge.successor_edges.len(), 2); + assert_eq!(*edge.successor_edges.get(0).unwrap(), (26911 >> 1) ^ 52706); + + assert!(cfg.get_edge(26911).is_none()); + assert!(cfg.get_edge(41864).is_some()); + } + + #[test] + fn test_shortest_path() { + let cfg: ControlFlowGraph = ControlFlowGraph::from_content(TEST_GRAPH_STR); + let distances = cfg.calculate_distances_to_all_edges((41864 >> 1) ^ 26911); + assert_eq!(*distances.get(&((41864 >> 1) ^ 26911)).unwrap(), 1); + assert_eq!(*distances.get(&((26911 >> 1) ^ 52706)).unwrap(), 2); + assert_eq!(*distances.get(&((26911 >> 1) ^ 41925)).unwrap(), 2); + assert!(distances.get(&((41864 >> 1) ^ 52706)).is_none()); + } +} diff --git a/libafl_cc/src/lib.rs b/libafl_cc/src/lib.rs index c7a713736d..b591dea7ba 100644 --- a/libafl_cc/src/lib.rs +++ b/libafl_cc/src/lib.rs @@ -59,6 +59,8 @@ use std::{convert::Into, path::Path, process::Command, string::String, vec::Vec}; +pub mod cfg; +pub use cfg::{CfgEdge, ControlFlowGraph, EntryBasicBlockInfo, HasWeight}; pub mod clang; pub use clang::{ClangWrapper, LLVMPasses};