diff --git a/libafl_cc/build.rs b/libafl_cc/build.rs index 8d44951936..fef32e90bd 100644 --- a/libafl_cc/build.rs +++ b/libafl_cc/build.rs @@ -132,25 +132,25 @@ fn build_pass( ldflags: &Vec<&str>, src_dir: &Path, src_file: &str, + optional: bool, ) { let dot_offset = src_file.rfind('.').unwrap(); let src_stub = &src_file[..dot_offset]; println!("cargo:rerun-if-changed=src/{src_file}"); - if cfg!(unix) { - assert!(Command::new(bindir_path.join("clang++")) + let r = if cfg!(unix) { + let r = Command::new(bindir_path.join("clang++")) .arg("-v") .args(cxxflags) .arg(src_dir.join(src_file)) .args(ldflags) .arg("-o") .arg(out_dir.join(format!("{src_stub}.{}", dll_extension()))) - .status() - .unwrap_or_else(|_| panic!("Failed to compile {src_file}")) - .success()); + .status(); + + Some(r) } else if cfg!(windows) { - println!("{cxxflags:?}"); - assert!(Command::new(bindir_path.join("clang-cl")) + let r = Command::new(bindir_path.join("clang-cl")) .arg("-v") .args(cxxflags) .arg(src_dir.join(src_file)) @@ -162,12 +162,38 @@ fn build_pass( .join(format!("{src_stub}.{}", dll_extension())) .display() )) - .status() - .unwrap_or_else(|_| panic!("Failed to compile {src_file}")) - .success()); + .status(); + Some(r) + } else { + None + }; + + match r { + Some(r) => match r { + Ok(s) => { + if !s.success() { + if optional { + println!("cargo:warning=Skipping src/{src_file}"); + } else { + panic!("Failed to compile {src_file}"); + } + } + } + Err(_) => { + if optional { + println!("cargo:warning=Skipping src/{src_file}"); + } else { + panic!("Failed to compile {src_file}"); + } + } + }, + None => { + println!("cargo:warning=Skipping src/{src_file}"); + } } } +#[allow(clippy::single_element_loop)] #[allow(clippy::too_many_lines)] fn main() { let out_dir = env::var_os("OUT_DIR").unwrap(); @@ -318,7 +344,28 @@ pub const LIBAFL_CC_LLVM_VERSION: Option = None; "autotokens-pass.cc", "coverage-accounting-pass.cc", ] { - build_pass(bindir_path, out_dir, &cxxflags, &ldflags, src_dir, pass); + build_pass( + bindir_path, + out_dir, + &cxxflags, + &ldflags, + src_dir, + pass, + false, + ); + } + + // Optional pass + for pass in &["dump-cfg-pass.cc"] { + build_pass( + bindir_path, + out_dir, + &cxxflags, + &ldflags, + src_dir, + pass, + true, + ); } cc::Build::new() diff --git a/libafl_cc/src/clang.rs b/libafl_cc/src/clang.rs index 4433755fb5..9b16369cfe 100644 --- a/libafl_cc/src/clang.rs +++ b/libafl_cc/src/clang.rs @@ -38,6 +38,8 @@ pub enum LLVMPasses { AutoTokens, /// The Coverage Accouting (BB metric) pass CoverageAccounting, + /// The dump cfg pass + DumpCfg, } impl LLVMPasses { @@ -54,6 +56,9 @@ impl LLVMPasses { } LLVMPasses::CoverageAccounting => PathBuf::from(env!("OUT_DIR")) .join(format!("coverage-accounting-pass.{}", dll_extension())), + LLVMPasses::DumpCfg => { + PathBuf::from(env!("OUT_DIR")).join(format!("dump-cfg-pass.{}", dll_extension())) + } } } } diff --git a/libafl_cc/src/dump-cfg-pass.cc b/libafl_cc/src/dump-cfg-pass.cc new file mode 100644 index 0000000000..f045632b64 --- /dev/null +++ b/libafl_cc/src/dump-cfg-pass.cc @@ -0,0 +1,246 @@ +/* + LibAFL - DumpCfg LLVM pass + -------------------------------------------------- + + Written by Dongjia Zhang + + Copyright 2022-2023 AFLplusplus Project. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at: + + http://www.apache.org/licenses/LICENSE-2.0 + +*/ + +#include +#include +#ifndef _WIN32 + #include + #include +#else + #include +#endif +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "llvm/Config/llvm-config.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/IRBuilder.h" + +#if USE_NEW_PM + #include "llvm/Passes/PassPlugin.h" + #include "llvm/Passes/PassBuilder.h" + #include "llvm/IR/PassManager.h" +#else + #include "llvm/IR/LegacyPassManager.h" +#endif + +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Pass.h" +#include "llvm/IR/Constants.h" + +#include + +#include + +#define FATAL(x...) \ + do { \ + fprintf(stderr, "FATAL: " x); \ + exit(1); \ + \ + } while (0) + +using namespace llvm; + +namespace { + +#if USE_NEW_PM +class DumpCfgPass : public PassInfoMixin { + public: + DumpCfgPass() { +#else +class DumpCfgPass : public ModulePass { + public: + static char ID; + + DumpCfgPass() : ModulePass(ID) { +#endif + } + +#if USE_NEW_PM + PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); +#else + bool runOnModule(Module &M) override; +#endif + + protected: + DenseMap bb_to_cur_loc; + DenseMap entry_bb; + DenseMap> calls_in_bb; + + private: + bool isLLVMIntrinsicFn(StringRef &n) { + // Not interested in these LLVM's functions + if (n.startswith("llvm.")) { + return true; + } else { + return false; + } + } +}; + +} // namespace + +#if USE_NEW_PM +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { + return {LLVM_PLUGIN_API_VERSION, "DumpCfgPass", "v0.1", + /* lambda to insert our pass into the pass pipeline. */ + [](PassBuilder &PB) { + + #if LLVM_VERSION_MAJOR <= 13 + using OptimizationLevel = typename PassBuilder::OptimizationLevel; + #endif + PB.registerOptimizerLastEPCallback( + [](ModulePassManager &MPM, OptimizationLevel OL) { + MPM.addPass(DumpCfgPass()); + }); + }}; +} +#else +char DumpCfgPass::ID = 0; +#endif + +#if USE_NEW_PM +PreservedAnalyses DumpCfgPass::run(Module &M, ModuleAnalysisManager &MAM) { +#else +bool DumpCfgPass::runOnModule(Module &M) { + +#endif + LLVMContext &Ctx = M.getContext(); + auto moduleName = M.getName(); + + for (auto &F : M) { + unsigned bb_cnt = 0; + entry_bb[F.getName()] = &F.getEntryBlock(); + for (auto &BB : F) { + bb_to_cur_loc[&BB] = bb_cnt; + bb_cnt++; + for (auto &IN : BB) { + CallBase *callBase = nullptr; + if ((callBase = dyn_cast(&IN))) { + auto F = callBase->getCalledFunction(); + if (F) { + StringRef fname = F->getName(); + if (isLLVMIntrinsicFn(fname)) { continue; } + + calls_in_bb[&BB].push_back(fname); + } + } + } + } + } + + nlohmann::json cfg; + + // Dump CFG for this module + for (auto record = bb_to_cur_loc.begin(); record != bb_to_cur_loc.end(); + record++) { + auto current_bb = record->getFirst(); + auto loc = record->getSecond(); + Function *calling_func = current_bb->getParent(); + std::string func_name = std::string(""); + + if (calling_func) { + func_name = std::string(calling_func->getName()); + // outs() << "Function name: " << calling_func->getName() << "\n"; + } + + std::vector outgoing; + for (auto bb_successor = succ_begin(current_bb); + bb_successor != succ_end(current_bb); bb_successor++) { + outgoing.push_back(bb_to_cur_loc[*bb_successor]); + } + cfg["edges"][func_name][loc] = outgoing; + } + + for (auto record = calls_in_bb.begin(); record != calls_in_bb.end(); + record++) { + auto current_bb = record->getFirst(); + auto loc = bb_to_cur_loc[current_bb]; + Function *calling_func = current_bb->getParent(); + std::string func_name = std::string(""); + + if (calling_func) { + func_name = std::string(calling_func->getName()); + // outs() << "Function name: " << calling_func->getName() << "\n"; + } + + std::vector outgoing_funcs; + for (auto &item : record->getSecond()) { + outgoing_funcs.push_back(std::string(item)); + } + if (!outgoing_funcs.empty()) { + cfg["calls"][func_name][std::to_string(loc)] = outgoing_funcs; + } + } + + for (auto record = entry_bb.begin(); record != entry_bb.end(); record++) { + cfg["entries"][std::string(record->getFirst())] = + bb_to_cur_loc[record->getSecond()]; + } + + if (getenv("CFG_OUTPUT_PATH")) { + std::ofstream cfg_out(getenv("CFG_OUTPUT_PATH") + std::string("/") + + std::string(moduleName) + ".cfg"); + cfg_out << cfg << "\n"; + } else { + FATAL("CFG_OUTPUT_PATH not set!"); + } + +#if USE_NEW_PM + auto PA = PreservedAnalyses::all(); + return PA; +#else + return true; +#endif +} + +#if USE_NEW_PM + +#else +static void registerDumpCfgPass(const PassManagerBuilder &, + legacy::PassManagerBase &PM) { + PM.add(new DumpCfgPass()); +} + +static RegisterPass X("dumpcfg", "dumpcfg instrumentation pass", + false, false); + +static RegisterStandardPasses RegisterDumpCfgPass( + PassManagerBuilder::EP_OptimizerLast, registerDumpCfgPass); + +static RegisterStandardPasses RegisterDumpCfgPass0( + PassManagerBuilder::EP_EnabledOnOptLevel0, registerDumpCfgPass); +#endif diff --git a/libafl_cc/src/lib.rs b/libafl_cc/src/lib.rs index 57d564c18f..fd94480adb 100644 --- a/libafl_cc/src/lib.rs +++ b/libafl_cc/src/lib.rs @@ -168,7 +168,7 @@ pub trait CompilerWrapper { let args = self.command()?; if !self.is_silent() { - dbg!("{args:?}"); + dbg!(args.clone()); } if args.is_empty() { return Err(Error::InvalidArguments( @@ -180,7 +180,7 @@ pub trait CompilerWrapper { Err(e) => return Err(Error::Io(e)), }; if !self.is_silent() { - dbg!("{status:?}"); + dbg!(status); } Ok(status.code()) } diff --git a/utils/cfg_builder/build.py b/utils/cfg_builder/build.py new file mode 100644 index 0000000000..0a39dd6db6 --- /dev/null +++ b/utils/cfg_builder/build.py @@ -0,0 +1,52 @@ +#!/usr/bin/python3 + +import json +import os +import networkx as nx +cfg = dict() + +input_path = os.environ["CFG_OUTPUT_PATH"] +for dirpath, _, files in os.walk(input_path): + for x in files: + if x.endswith(".cfg"): + cfg[x] = json.load(open(os.path.join(dirpath, x))) + +G = nx.DiGraph() + +node_ids = 0 + +for mname, module in cfg.items(): + fnname2SG = dict() + # First, add all the intra-procedural edges + + for (fname, v) in module['edges'].items(): + sz = len(v) + for idx in range(node_ids, node_ids + sz): + G.add_node(idx) + G.nodes[idx]['label'] = mname + ' ' + \ + fname + ' ' + str(idx - node_ids) + node_id_list = list(range(node_ids, node_ids + sz)) + node_ids += sz + SG = G.subgraph(node_id_list) + fnname2SG[fname] = SG + for src, dsts in enumerate(v): + for item in dsts: + G.add_edge(node_id_list[src], node_id_list[item]) + + # Next, build inter-procedural edges + for (fname, calls) in module['calls'].items(): + for (idx, target_fns) in calls.items(): + # G.nodes isn't sorted + + src = sorted(fnname2SG[fname].nodes())[0] + int(idx) + for target_fn in target_fns: + if target_fn in fnname2SG: + offset = module['entries'][target_fn] + + dst = sorted(fnname2SG[target_fn].nodes)[0] + offset + + # Now we have 2 index, build the edge + G.add_edge(src, dst) + +if "DOT_OUT" in os.environ: + nx.nx_agraph.write_dot(G, "cfg.xdot")