Dump whole program's CFG pass (#1226)

* skelton

* pass

* compiles

* python

* optional pass

* rev

* chg
This commit is contained in:
Dongjia "toka" Zhang 2023-04-24 14:57:24 +02:00 committed by GitHub
parent 8ade809588
commit eab7c32e9f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 363 additions and 13 deletions

View File

@ -132,25 +132,25 @@ fn build_pass(
ldflags: &Vec<&str>,
src_dir: &Path,
src_file: &str,
optional: bool,
) {
let dot_offset = src_file.rfind('.').unwrap();
let src_stub = &src_file[..dot_offset];
println!("cargo:rerun-if-changed=src/{src_file}");
if cfg!(unix) {
assert!(Command::new(bindir_path.join("clang++"))
let r = if cfg!(unix) {
let r = Command::new(bindir_path.join("clang++"))
.arg("-v")
.args(cxxflags)
.arg(src_dir.join(src_file))
.args(ldflags)
.arg("-o")
.arg(out_dir.join(format!("{src_stub}.{}", dll_extension())))
.status()
.unwrap_or_else(|_| panic!("Failed to compile {src_file}"))
.success());
.status();
Some(r)
} else if cfg!(windows) {
println!("{cxxflags:?}");
assert!(Command::new(bindir_path.join("clang-cl"))
let r = Command::new(bindir_path.join("clang-cl"))
.arg("-v")
.args(cxxflags)
.arg(src_dir.join(src_file))
@ -162,12 +162,38 @@ fn build_pass(
.join(format!("{src_stub}.{}", dll_extension()))
.display()
))
.status()
.unwrap_or_else(|_| panic!("Failed to compile {src_file}"))
.success());
.status();
Some(r)
} else {
None
};
match r {
Some(r) => match r {
Ok(s) => {
if !s.success() {
if optional {
println!("cargo:warning=Skipping src/{src_file}");
} else {
panic!("Failed to compile {src_file}");
}
}
}
Err(_) => {
if optional {
println!("cargo:warning=Skipping src/{src_file}");
} else {
panic!("Failed to compile {src_file}");
}
}
},
None => {
println!("cargo:warning=Skipping src/{src_file}");
}
}
}
#[allow(clippy::single_element_loop)]
#[allow(clippy::too_many_lines)]
fn main() {
let out_dir = env::var_os("OUT_DIR").unwrap();
@ -318,7 +344,28 @@ pub const LIBAFL_CC_LLVM_VERSION: Option<usize> = None;
"autotokens-pass.cc",
"coverage-accounting-pass.cc",
] {
build_pass(bindir_path, out_dir, &cxxflags, &ldflags, src_dir, pass);
build_pass(
bindir_path,
out_dir,
&cxxflags,
&ldflags,
src_dir,
pass,
false,
);
}
// Optional pass
for pass in &["dump-cfg-pass.cc"] {
build_pass(
bindir_path,
out_dir,
&cxxflags,
&ldflags,
src_dir,
pass,
true,
);
}
cc::Build::new()

View File

@ -38,6 +38,8 @@ pub enum LLVMPasses {
AutoTokens,
/// The Coverage Accouting (BB metric) pass
CoverageAccounting,
/// The dump cfg pass
DumpCfg,
}
impl LLVMPasses {
@ -54,6 +56,9 @@ impl LLVMPasses {
}
LLVMPasses::CoverageAccounting => PathBuf::from(env!("OUT_DIR"))
.join(format!("coverage-accounting-pass.{}", dll_extension())),
LLVMPasses::DumpCfg => {
PathBuf::from(env!("OUT_DIR")).join(format!("dump-cfg-pass.{}", dll_extension()))
}
}
}
}

View File

@ -0,0 +1,246 @@
/*
LibAFL - DumpCfg LLVM pass
--------------------------------------------------
Written by Dongjia Zhang <toka@aflplus.plus>
Copyright 2022-2023 AFLplusplus Project. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at:
http://www.apache.org/licenses/LICENSE-2.0
*/
#include <stdio.h>
#include <stdlib.h>
#ifndef _WIN32
#include <unistd.h>
#include <sys/time.h>
#else
#include <io.h>
#endif
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <ctype.h>
#include <list>
#include <string>
#include <fstream>
#include <set>
#include "llvm/Config/llvm-config.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/IRBuilder.h"
#if USE_NEW_PM
#include "llvm/Passes/PassPlugin.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/IR/PassManager.h"
#else
#include "llvm/IR/LegacyPassManager.h"
#endif
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Pass.h"
#include "llvm/IR/Constants.h"
#include <iostream>
#include <nlohmann/json.hpp>
#define FATAL(x...) \
do { \
fprintf(stderr, "FATAL: " x); \
exit(1); \
\
} while (0)
using namespace llvm;
namespace {
#if USE_NEW_PM
class DumpCfgPass : public PassInfoMixin<DumpCfgPass> {
public:
DumpCfgPass() {
#else
class DumpCfgPass : public ModulePass {
public:
static char ID;
DumpCfgPass() : ModulePass(ID) {
#endif
}
#if USE_NEW_PM
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
#else
bool runOnModule(Module &M) override;
#endif
protected:
DenseMap<BasicBlock *, uint32_t> bb_to_cur_loc;
DenseMap<StringRef, BasicBlock *> entry_bb;
DenseMap<BasicBlock *, std::vector<StringRef>> calls_in_bb;
private:
bool isLLVMIntrinsicFn(StringRef &n) {
// Not interested in these LLVM's functions
if (n.startswith("llvm.")) {
return true;
} else {
return false;
}
}
};
} // namespace
#if USE_NEW_PM
extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK
llvmGetPassPluginInfo() {
return {LLVM_PLUGIN_API_VERSION, "DumpCfgPass", "v0.1",
/* lambda to insert our pass into the pass pipeline. */
[](PassBuilder &PB) {
#if LLVM_VERSION_MAJOR <= 13
using OptimizationLevel = typename PassBuilder::OptimizationLevel;
#endif
PB.registerOptimizerLastEPCallback(
[](ModulePassManager &MPM, OptimizationLevel OL) {
MPM.addPass(DumpCfgPass());
});
}};
}
#else
char DumpCfgPass::ID = 0;
#endif
#if USE_NEW_PM
PreservedAnalyses DumpCfgPass::run(Module &M, ModuleAnalysisManager &MAM) {
#else
bool DumpCfgPass::runOnModule(Module &M) {
#endif
LLVMContext &Ctx = M.getContext();
auto moduleName = M.getName();
for (auto &F : M) {
unsigned bb_cnt = 0;
entry_bb[F.getName()] = &F.getEntryBlock();
for (auto &BB : F) {
bb_to_cur_loc[&BB] = bb_cnt;
bb_cnt++;
for (auto &IN : BB) {
CallBase *callBase = nullptr;
if ((callBase = dyn_cast<CallBase>(&IN))) {
auto F = callBase->getCalledFunction();
if (F) {
StringRef fname = F->getName();
if (isLLVMIntrinsicFn(fname)) { continue; }
calls_in_bb[&BB].push_back(fname);
}
}
}
}
}
nlohmann::json cfg;
// Dump CFG for this module
for (auto record = bb_to_cur_loc.begin(); record != bb_to_cur_loc.end();
record++) {
auto current_bb = record->getFirst();
auto loc = record->getSecond();
Function *calling_func = current_bb->getParent();
std::string func_name = std::string("");
if (calling_func) {
func_name = std::string(calling_func->getName());
// outs() << "Function name: " << calling_func->getName() << "\n";
}
std::vector<uint32_t> outgoing;
for (auto bb_successor = succ_begin(current_bb);
bb_successor != succ_end(current_bb); bb_successor++) {
outgoing.push_back(bb_to_cur_loc[*bb_successor]);
}
cfg["edges"][func_name][loc] = outgoing;
}
for (auto record = calls_in_bb.begin(); record != calls_in_bb.end();
record++) {
auto current_bb = record->getFirst();
auto loc = bb_to_cur_loc[current_bb];
Function *calling_func = current_bb->getParent();
std::string func_name = std::string("");
if (calling_func) {
func_name = std::string(calling_func->getName());
// outs() << "Function name: " << calling_func->getName() << "\n";
}
std::vector<std::string> outgoing_funcs;
for (auto &item : record->getSecond()) {
outgoing_funcs.push_back(std::string(item));
}
if (!outgoing_funcs.empty()) {
cfg["calls"][func_name][std::to_string(loc)] = outgoing_funcs;
}
}
for (auto record = entry_bb.begin(); record != entry_bb.end(); record++) {
cfg["entries"][std::string(record->getFirst())] =
bb_to_cur_loc[record->getSecond()];
}
if (getenv("CFG_OUTPUT_PATH")) {
std::ofstream cfg_out(getenv("CFG_OUTPUT_PATH") + std::string("/") +
std::string(moduleName) + ".cfg");
cfg_out << cfg << "\n";
} else {
FATAL("CFG_OUTPUT_PATH not set!");
}
#if USE_NEW_PM
auto PA = PreservedAnalyses::all();
return PA;
#else
return true;
#endif
}
#if USE_NEW_PM
#else
static void registerDumpCfgPass(const PassManagerBuilder &,
legacy::PassManagerBase &PM) {
PM.add(new DumpCfgPass());
}
static RegisterPass<DumpCfgPass> X("dumpcfg", "dumpcfg instrumentation pass",
false, false);
static RegisterStandardPasses RegisterDumpCfgPass(
PassManagerBuilder::EP_OptimizerLast, registerDumpCfgPass);
static RegisterStandardPasses RegisterDumpCfgPass0(
PassManagerBuilder::EP_EnabledOnOptLevel0, registerDumpCfgPass);
#endif

View File

@ -168,7 +168,7 @@ pub trait CompilerWrapper {
let args = self.command()?;
if !self.is_silent() {
dbg!("{args:?}");
dbg!(args.clone());
}
if args.is_empty() {
return Err(Error::InvalidArguments(
@ -180,7 +180,7 @@ pub trait CompilerWrapper {
Err(e) => return Err(Error::Io(e)),
};
if !self.is_silent() {
dbg!("{status:?}");
dbg!(status);
}
Ok(status.code())
}

View File

@ -0,0 +1,52 @@
#!/usr/bin/python3
import json
import os
import networkx as nx
cfg = dict()
input_path = os.environ["CFG_OUTPUT_PATH"]
for dirpath, _, files in os.walk(input_path):
for x in files:
if x.endswith(".cfg"):
cfg[x] = json.load(open(os.path.join(dirpath, x)))
G = nx.DiGraph()
node_ids = 0
for mname, module in cfg.items():
fnname2SG = dict()
# First, add all the intra-procedural edges
for (fname, v) in module['edges'].items():
sz = len(v)
for idx in range(node_ids, node_ids + sz):
G.add_node(idx)
G.nodes[idx]['label'] = mname + ' ' + \
fname + ' ' + str(idx - node_ids)
node_id_list = list(range(node_ids, node_ids + sz))
node_ids += sz
SG = G.subgraph(node_id_list)
fnname2SG[fname] = SG
for src, dsts in enumerate(v):
for item in dsts:
G.add_edge(node_id_list[src], node_id_list[item])
# Next, build inter-procedural edges
for (fname, calls) in module['calls'].items():
for (idx, target_fns) in calls.items():
# G.nodes isn't sorted
src = sorted(fnname2SG[fname].nodes())[0] + int(idx)
for target_fn in target_fns:
if target_fn in fnname2SG:
offset = module['entries'][target_fn]
dst = sorted(fnname2SG[target_fn].nodes)[0] + offset
# Now we have 2 index, build the edge
G.add_edge(src, dst)
if "DOT_OUT" in os.environ:
nx.nx_agraph.write_dot(G, "cfg.xdot")