diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 9e1a955c57..1615072949 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -44,6 +44,9 @@ jobs: - uses: actions/checkout@v4 - if: runner.os == 'Linux' uses: ./.github/workflows/ubuntu-prepare + - name: Install LLVM + if: runner.os == 'MacOS' + run: brew install llvm@${{env.MAIN_LLVM_VERSION}} - uses: Swatinem/rust-cache@v2 with: { shared-key: "ubuntu" } if: runner.os == 'Linux' @@ -199,7 +202,7 @@ jobs: cargo-fmt: runs-on: ubuntu-24.04 env: - MAIN_LLVM_VERSION: 19 + MAIN_LLVM_VERSION: 20 steps: - uses: actions/checkout@v4 - uses: ./.github/workflows/ubuntu-prepare @@ -585,7 +588,7 @@ jobs: - name: Add nightly clippy run: rustup toolchain install nightly --component clippy --allow-downgrade && rustup default nightly - name: Install deps - run: brew install z3 gtk+3 python + run: brew install z3 gtk+3 python llvm@${{env.MAIN_LLVM_VERSION}} - name: Install cxxbridge run: cargo install cxxbridge-cmd - uses: actions/checkout@v4 @@ -597,17 +600,6 @@ jobs: - name: Clippy run: cargo +nightly clippy --tests --all --exclude libafl_nyx --exclude symcc_runtime --exclude runtime_test - ios: - runs-on: macOS-latest - steps: - - uses: dtolnay/rust-toolchain@stable - - name: install ios - run: rustup target add aarch64-apple-ios - - uses: actions/checkout@v4 - - uses: Swatinem/rust-cache@v2 - - name: Build iOS - run: PYO3_CROSS_PYTHON_VERSION=$(python3 -c "print('{}.{}'.format(__import__('sys').version_info.major, __import__('sys').version_info.minor))") cargo build --target aarch64-apple-ios && cd libafl_frida && cargo build --target aarch64-apple-ios && cd .. - android: runs-on: ubuntu-24.04 steps: diff --git a/libafl_cc/Cargo.toml b/libafl_cc/Cargo.toml index 3723ee2b67..a0ca8fcfbb 100644 --- a/libafl_cc/Cargo.toml +++ b/libafl_cc/Cargo.toml @@ -22,7 +22,6 @@ categories = [ [features] default = [ - "ddg-instr", "function-logging", "cmplog-routines", "autotokens", @@ -30,11 +29,9 @@ default = [ "cmplog-instructions", "ctx", "dump-cfg", - "profiling", ] # llvm passes -ddg-instr = [] function-logging = [] cmplog-routines = [] autotokens = [] @@ -42,7 +39,6 @@ coverage-accounting = [] cmplog-instructions = [] ctx = [] dump-cfg = [] -profiling = [] [build-dependencies] cc = { workspace = true, features = ["parallel"] } diff --git a/libafl_cc/build.rs b/libafl_cc/build.rs index 682ef73a43..187ae2b239 100644 --- a/libafl_cc/build.rs +++ b/libafl_cc/build.rs @@ -1,7 +1,6 @@ use core::str; #[cfg(any( target_vendor = "apple", - feature = "ddg-instr", feature = "function-logging", feature = "cmplog-routines", feature = "autotokens", @@ -9,7 +8,6 @@ use core::str; feature = "cmplog-instructions", feature = "ctx", feature = "dump-cfg", - feature = "profiling", ))] use std::path::PathBuf; use std::{env, fs::File, io::Write, path::Path, process::Command}; @@ -24,11 +22,10 @@ const LLVM_VERSION_MAX: u32 = 33; /// The min version of `LLVM` we're looking for #[cfg(not(target_vendor = "apple"))] -const LLVM_VERSION_MIN: u32 = 6; +const LLVM_VERSION_MIN: u32 = 15; /// Get the extension for a shared object #[cfg(any( - feature = "ddg-instr", feature = "function-logging", feature = "cmplog-routines", feature = "autotokens", @@ -36,7 +33,6 @@ const LLVM_VERSION_MIN: u32 = 6; feature = "cmplog-instructions", feature = "ctx", feature = "dump-cfg", - feature = "profiling", ))] fn dll_extension<'a>() -> &'a str { if let Ok(vendor) = env::var("CARGO_CFG_TARGET_VENDOR") { @@ -162,7 +158,6 @@ fn find_llvm_version() -> Option { } #[cfg(any( - feature = "ddg-instr", feature = "function-logging", feature = "cmplog-routines", feature = "autotokens", @@ -170,7 +165,6 @@ fn find_llvm_version() -> Option { feature = "cmplog-instructions", feature = "ctx", feature = "dump-cfg", - feature = "profiling", ))] #[expect(clippy::too_many_arguments)] fn build_pass( @@ -279,7 +273,6 @@ fn main() { println!("cargo:rerun-if-env-changed=LLVM_VERSION"); println!("cargo:rerun-if-env-changed=LIBAFL_EDGES_MAP_DEFAULT_SIZE"); println!("cargo:rerun-if-env-changed=LIBAFL_ACCOUNTING_MAP_SIZE"); - println!("cargo:rerun-if-env-changed=LIBAFL_DDG_MAP_SIZE"); println!("cargo:rerun-if-changed=src/common-llvm.h"); println!("cargo:rerun-if-changed=build.rs"); @@ -387,19 +380,8 @@ pub const LIBAFL_CC_LLVM_VERSION: Option = None; .expect("Could not parse LIBAFL_ACCOUNTING_MAP_SIZE"); cxxflags.push(format!("-DACCOUNTING_MAP_SIZE={acc_map_size}")); - let ddg_map_size: usize = option_env!("LIBAFL_DDG_MAP_SIZE") - .map_or(Ok(65_536), str::parse) - .expect("Could not parse LIBAFL_DDG_MAP_SIZE"); - cxxflags.push(format!("-DDDG_MAP_SIZE={ddg_map_size}")); - let llvm_version = find_llvm_version(); - if let Some(ver) = llvm_version { - if ver >= 14 { - cxxflags.push(String::from("-DUSE_NEW_PM")); - } - } - write!( clang_constants_file, "// These constants are autogenerated by build.rs @@ -419,9 +401,6 @@ pub const LIBAFL_CC_LLVM_VERSION: Option = None; /// The size of the accounting maps pub const ACCOUNTING_MAP_SIZE: usize = {acc_map_size}; - /// The size of the ddg maps - pub const DDG_MAP_SIZE: usize = {acc_map_size}; - /// The llvm version used to build llvm passes pub const LIBAFL_CC_LLVM_VERSION: Option = {llvm_version:?}; ", @@ -481,18 +460,6 @@ pub const LIBAFL_CC_LLVM_VERSION: Option = None; ldflags.push(&sdk_path); } - #[cfg(feature = "ddg-instr")] - build_pass( - bindir_path, - out_dir, - &cxxflags, - &ldflags, - src_dir, - "ddg-instr.cc", - Some(&vec!["ddg-utils.cc"]), - true, - ); - #[cfg(feature = "function-logging")] build_pass( bindir_path, @@ -577,18 +544,6 @@ pub const LIBAFL_CC_LLVM_VERSION: Option = None; false, ); - #[cfg(feature = "profiling")] - build_pass( - bindir_path, - out_dir, - &cxxflags, - &ldflags, - src_dir, - "profiling-pass.cc", - None, - false, - ); - cc::Build::new() .file(src_dir.join("no-link-rt.c")) .compile("no-link-rt"); diff --git a/libafl_cc/src/autotokens-pass.cc b/libafl_cc/src/autotokens-pass.cc index db6664674e..a343531a42 100644 --- a/libafl_cc/src/autotokens-pass.cc +++ b/libafl_cc/src/autotokens-pass.cc @@ -35,22 +35,6 @@ #include "common-llvm.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/IR/IRBuilder.h" - -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/DebugInfo.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/Verifier.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Pass.h" -#include "llvm/IR/Constants.h" - #ifndef O_DSYNC #define O_DSYNC O_SYNC #endif @@ -67,24 +51,12 @@ using namespace llvm; namespace { -#if USE_NEW_PM class AutoTokensPass : public PassInfoMixin { public: AutoTokensPass() { -#else -class AutoTokensPass : public ModulePass { - public: - static char ID; - - AutoTokensPass() : ModulePass(ID) { -#endif } -#if USE_NEW_PM PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); -#else - bool runOnModule(Module &M) override; -#endif protected: private: @@ -93,7 +65,6 @@ class AutoTokensPass : public ModulePass { } // namespace -#if USE_NEW_PM extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK llvmGetPassPluginInfo() { return {LLVM_PLUGIN_API_VERSION, "AutoTokensPass", "v0.1", @@ -101,16 +72,13 @@ llvmGetPassPluginInfo() { [](PassBuilder &PB) { PB.registerOptimizerLastEPCallback( [](ModulePassManager &MPM, OptimizationLevel OL - #if LLVM_VERSION_MAJOR >= 20 +#if LLVM_VERSION_MAJOR >= 20 , ThinOrFullLTOPhase Phase - #endif +#endif ) { MPM.addPass(AutoTokensPass()); }); }}; } -#else -char AutoTokensPass::ID = 0; -#endif void dict2file(int fd, uint8_t *mem, uint32_t len) { uint32_t i, j, binary = 0; @@ -144,12 +112,7 @@ void dict2file(int fd, uint8_t *mem, uint32_t len) { #endif } -#if USE_NEW_PM PreservedAnalyses AutoTokensPass::run(Module &M, ModuleAnalysisManager &MAM) { -#else -bool AutoTokensPass::runOnModule(Module &M) { -#endif - DenseMap valueMap; char *ptr; int fd, found = 0; @@ -547,12 +510,8 @@ bool AutoTokensPass::runOnModule(Module &M) { if (use_file) { close(fd); -#if USE_NEW_PM auto PA = PreservedAnalyses::all(); return PA; -#else - return true; -#endif } LLVMContext &Ctx = M.getContext(); @@ -602,29 +561,6 @@ bool AutoTokensPass::runOnModule(Module &M) { } } -#if USE_NEW_PM auto PA = PreservedAnalyses::all(); return PA; -#else - return true; -#endif -} - -#if USE_NEW_PM - -#else -static void registerAutoTokensPass(const PassManagerBuilder &, - legacy::PassManagerBase &PM) { - PM.add(new AutoTokensPass()); -} - -static RegisterPass X("autotokens", - "autotokens instrumentation pass", false, - false); - -static RegisterStandardPasses RegisterAutoTokensPass( - PassManagerBuilder::EP_OptimizerLast, registerAutoTokensPass); - -static RegisterStandardPasses RegisterAutoTokensPass0( - PassManagerBuilder::EP_EnabledOnOptLevel0, registerAutoTokensPass); -#endif +} \ No newline at end of file diff --git a/libafl_cc/src/clang.rs b/libafl_cc/src/clang.rs index 44bcf802b1..372c5fa2c9 100644 --- a/libafl_cc/src/clang.rs +++ b/libafl_cc/src/clang.rs @@ -39,10 +39,6 @@ pub enum LLVMPasses { Ctx, /// Function logging FunctionLogging, - /// Profiling - Profiling, - /// Data dependency instrumentation - DDG, } impl LLVMPasses { @@ -69,12 +65,6 @@ impl LLVMPasses { LLVMPasses::FunctionLogging => { PathBuf::from(env!("OUT_DIR")).join(format!("function-logging.{}", dll_extension())) } - LLVMPasses::Profiling => { - PathBuf::from(env!("OUT_DIR")).join(format!("profiling.{}", dll_extension())) - } - LLVMPasses::DDG => { - PathBuf::from(env!("OUT_DIR")).join(format!("ddg-instr.{}", dll_extension())) - } } } } @@ -97,7 +87,6 @@ pub struct ClangWrapper { bit_mode: u32, need_libafl_arg: bool, has_libafl_arg: bool, - use_new_pm: bool, output: Option, configurations: Vec, @@ -413,38 +402,20 @@ impl ToolWrapper for ClangWrapper { return Ok(args); } - if !self.passes.is_empty() { - if self.use_new_pm { - if let Some(ver) = LIBAFL_CC_LLVM_VERSION { - if ver < 16 { - args.push("-fexperimental-new-pass-manager".into()); - } - } - } else { - args.push("-flegacy-pass-manager".into()); - } - } for pass in &self.passes { use_pass = true; - if self.use_new_pm { - // https://github.com/llvm/llvm-project/issues/56137 - // Need this -Xclang -load -Xclang -.so thing even with the new PM - // to pass the arguments to LLVM Passes - args.push("-Xclang".into()); - args.push("-load".into()); - args.push("-Xclang".into()); - args.push(pass.path().into_os_string().into_string().unwrap()); - args.push("-Xclang".into()); - args.push(format!( - "-fpass-plugin={}", - pass.path().into_os_string().into_string().unwrap() - )); - } else { - args.push("-Xclang".into()); - args.push("-load".into()); - args.push("-Xclang".into()); - args.push(pass.path().into_os_string().into_string().unwrap()); - } + // https://github.com/llvm/llvm-project/issues/56137 + // Need this -Xclang -load -Xclang -.so thing even with the new PM + // to pass the arguments to LLVM Passes + args.push("-Xclang".into()); + args.push("-load".into()); + args.push("-Xclang".into()); + args.push(pass.path().into_os_string().into_string().unwrap()); + args.push("-Xclang".into()); + args.push(format!( + "-fpass-plugin={}", + pass.path().into_os_string().into_string().unwrap() + )); } if !self.is_asm && !self.passes.is_empty() { for passes_arg in &self.passes_args { @@ -551,14 +522,6 @@ impl ClangWrapper { /// Create a new Clang Wrapper #[must_use] pub fn new() -> Self { - #[cfg(unix)] - let use_new_pm = match LIBAFL_CC_LLVM_VERSION { - Some(ver) => ver >= 14, - None => false, - }; - #[cfg(not(unix))] - let use_new_pm = false; - Self { optimize: true, wrapped_cc: CLANG_PATH.into(), @@ -572,7 +535,6 @@ impl ClangWrapper { bit_mode: 0, need_libafl_arg: false, has_libafl_arg: false, - use_new_pm, output: None, configurations: vec![crate::Configuration::Default], ignoring_configurations: false, @@ -646,12 +608,6 @@ impl ClangWrapper { self.need_libafl_arg = value; self } - - /// Set if use new llvm pass manager. - pub fn use_new_pm(&mut self, value: bool) -> &'_ mut Self { - self.use_new_pm = value; - self - } } #[cfg(test)] diff --git a/libafl_cc/src/cmplog-instructions-pass.cc b/libafl_cc/src/cmplog-instructions-pass.cc index 27bd1c88cc..a4a0fe42b9 100644 --- a/libafl_cc/src/cmplog-instructions-pass.cc +++ b/libafl_cc/src/cmplog-instructions-pass.cc @@ -37,15 +37,8 @@ #include "llvm/Pass.h" #include "llvm/Analysis/ValueTracking.h" -#if LLVM_VERSION_MAJOR > 3 || \ - (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) - #include "llvm/IR/Verifier.h" - #include "llvm/IR/DebugInfo.h" -#else - #include "llvm/Analysis/Verifier.h" - #include "llvm/DebugInfo.h" - #define nullptr 0 -#endif +#include "llvm/IR/Verifier.h" +#include "llvm/IR/DebugInfo.h" #include @@ -55,33 +48,12 @@ static cl::opt CmplogExtended("cmplog_instructions_extended", cl::init(false), cl::NotHidden); namespace { -#if USE_NEW_PM class CmpLogInstructions : public PassInfoMixin { public: CmpLogInstructions() { } -#else -class CmpLogInstructions : public ModulePass { - public: - static char ID; - CmpLogInstructions() : ModulePass(ID) { - } -#endif - -#if USE_NEW_PM PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); -#else - bool runOnModule(Module &M) override; - - #if LLVM_VERSION_MAJOR < 4 - const char *getPassName() const override { - #else - StringRef getPassName() const override { - #endif - return "cmplog instructions"; - } -#endif private: bool hookInstrs(Module &M); @@ -90,28 +62,20 @@ class CmpLogInstructions : public ModulePass { } // namespace -#if USE_NEW_PM extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK llvmGetPassPluginInfo() { return {LLVM_PLUGIN_API_VERSION, "CmpLogInstructions", "v0.1", [](PassBuilder &PB) { - #if LLVM_VERSION_MAJOR >= 16 - #if LLVM_VERSION_MAJOR >= 20 +#if LLVM_VERSION_MAJOR >= 20 PB.registerPipelineStartEPCallback( - #else +#else PB.registerOptimizerEarlyEPCallback( - #endif - #else - PB.registerOptimizerLastEPCallback( - #endif +#endif [](ModulePassManager &MPM, OptimizationLevel OL) { MPM.addPass(CmpLogInstructions()); }); }}; } -#else -char CmpLogInstructions::ID = 0; -#endif template Iterator Unique(Iterator first, Iterator last) { @@ -286,17 +250,11 @@ bool CmpLogInstructions::hookInstrs(Module &M) { continue; } -#if (LLVM_VERSION_MAJOR >= 12) vector_cnt = tt->getElementCount().getKnownMinValue(); ty0 = tt->getElementType(); -#endif } - if (ty0->isHalfTy() -#if LLVM_VERSION_MAJOR >= 11 - || ty0->isBFloatTy() -#endif - ) + if (ty0->isHalfTy() || ty0->isBFloatTy()) max_size = 16; else if (ty0->isFloatTy()) max_size = 32; @@ -306,11 +264,9 @@ bool CmpLogInstructions::hookInstrs(Module &M) { max_size = 80; else if (ty0->isFP128Ty() || ty0->isPPC_FP128Ty()) max_size = 128; -#if (LLVM_VERSION_MAJOR >= 12) else if (ty0->getTypeID() != llvm::Type::PointerTyID && !be_quiet) fprintf(stderr, "Warning: unsupported cmp type for cmplog: %u!\n", ty0->getTypeID()); -#endif attr += 8; is_fp = 1; @@ -318,7 +274,6 @@ bool CmpLogInstructions::hookInstrs(Module &M) { } else { if (ty0->isVectorTy()) { -#if (LLVM_VERSION_MAJOR >= 12) VectorType *tt = dyn_cast(ty0); if (!tt) { fprintf(stderr, "Warning: cmplog cmp vector is not a vector!\n"); @@ -327,7 +282,6 @@ bool CmpLogInstructions::hookInstrs(Module &M) { vector_cnt = tt->getElementCount().getKnownMinValue(); ty1 = ty0 = tt->getElementType(); -#endif } intTyOp0 = dyn_cast(ty0); @@ -339,13 +293,10 @@ bool CmpLogInstructions::hookInstrs(Module &M) { : intTyOp1->getBitWidth(); } else { -#if (LLVM_VERSION_MAJOR >= 12) if (ty0->getTypeID() != llvm::Type::PointerTyID && !be_quiet) { fprintf(stderr, "Warning: unsupported cmp type for cmplog: %u\n", ty0->getTypeID()); } - -#endif } } @@ -624,42 +575,12 @@ bool CmpLogInstructions::hookInstrs(Module &M) { return true; } -#if USE_NEW_PM PreservedAnalyses CmpLogInstructions::run(Module &M, ModuleAnalysisManager &MAM) { -#else -bool CmpLogInstructions::runOnModule(Module &M) { -#endif hookInstrs(M); -#if USE_NEW_PM auto PA = PreservedAnalyses::all(); -#endif verifyModule(M); -#if USE_NEW_PM return PA; -#else - return true; -#endif } - -#if USE_NEW_PM -#else -static void registerCmpLogInstructionsPass(const PassManagerBuilder &, - legacy::PassManagerBase &PM) { - auto p = new CmpLogInstructions(); - PM.add(p); -} - -static RegisterStandardPasses RegisterCmpLogInstructionsPass( - PassManagerBuilder::EP_OptimizerLast, registerCmpLogInstructionsPass); - -static RegisterStandardPasses RegisterCmpLogInstructionsPass0( - PassManagerBuilder::EP_EnabledOnOptLevel0, registerCmpLogInstructionsPass); - -static RegisterStandardPasses RegisterCmpLogInstructionsPassLTO( - PassManagerBuilder::EP_FullLinkTimeOptimizationLast, - registerCmpLogInstructionsPass); - -#endif diff --git a/libafl_cc/src/cmplog-routines-pass.cc b/libafl_cc/src/cmplog-routines-pass.cc index 953edbf598..52b67b932e 100644 --- a/libafl_cc/src/cmplog-routines-pass.cc +++ b/libafl_cc/src/cmplog-routines-pass.cc @@ -28,25 +28,6 @@ #include "common-llvm.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Pass.h" -#include "llvm/Analysis/ValueTracking.h" - -#if LLVM_VERSION_MAJOR > 3 || \ - (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) - #include "llvm/IR/Verifier.h" - #include "llvm/IR/DebugInfo.h" -#else - #include "llvm/Analysis/Verifier.h" - #include "llvm/DebugInfo.h" - #define nullptr 0 -#endif - #include using namespace llvm; @@ -55,32 +36,12 @@ static cl::opt CmplogExtended("cmplog_routines_extended", cl::init(false), cl::NotHidden); namespace { -#if USE_NEW_PM class CmpLogRoutines : public PassInfoMixin { public: CmpLogRoutines() { -#else - -class CmpLogRoutines : public ModulePass { - public: - static char ID; - CmpLogRoutines() : ModulePass(ID) { -#endif } -#if USE_NEW_PM PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); -#else - bool runOnModule(Module &M) override; - - #if LLVM_VERSION_MAJOR < 4 - const char *getPassName() const override { - #else - StringRef getPassName() const override { - #endif - return "cmplog routines"; - } -#endif private: bool hookRtns(Module &M); @@ -88,24 +49,20 @@ class CmpLogRoutines : public ModulePass { } // namespace -#if USE_NEW_PM extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK llvmGetPassPluginInfo() { return {LLVM_PLUGIN_API_VERSION, "CmpLogRoutines", "v0.1", [](PassBuilder &PB) { PB.registerOptimizerLastEPCallback( [](ModulePassManager &MPM, OptimizationLevel OL - #if LLVM_VERSION_MAJOR >= 20 +#if LLVM_VERSION_MAJOR >= 20 , ThinOrFullLTOPhase Phase - #endif +#endif ) { MPM.addPass(CmpLogRoutines()); }); }}; } -#else -char CmpLogRoutines::ID = 0; -#endif #include bool CmpLogRoutines::hookRtns(Module &M) { @@ -514,41 +471,11 @@ bool CmpLogRoutines::hookRtns(Module &M) { return true; } -#if USE_NEW_PM PreservedAnalyses CmpLogRoutines::run(Module &M, ModuleAnalysisManager &MAM) { -#else -bool CmpLogRoutines::runOnModule(Module &M) { -#endif hookRtns(M); -#if USE_NEW_PM auto PA = PreservedAnalyses::all(); -#endif verifyModule(M); -#if USE_NEW_PM return PA; -#else - return true; -#endif -} - -#if USE_NEW_PM -#else -static void registerCmpLogRoutinesPass(const PassManagerBuilder &, - legacy::PassManagerBase &PM) { - auto p = new CmpLogRoutines(); - PM.add(p); -} - -static RegisterStandardPasses RegisterCmpLogRoutinesPass( - PassManagerBuilder::EP_OptimizerLast, registerCmpLogRoutinesPass); - -static RegisterStandardPasses RegisterCmpLogRoutinesPass0( - PassManagerBuilder::EP_EnabledOnOptLevel0, registerCmpLogRoutinesPass); - -static RegisterStandardPasses RegisterCmpLogRoutinesPassLTO( - PassManagerBuilder::EP_FullLinkTimeOptimizationLast, - registerCmpLogRoutinesPass); - -#endif \ No newline at end of file +} \ No newline at end of file diff --git a/libafl_cc/src/common-llvm.h b/libafl_cc/src/common-llvm.h index 294ab22448..5832f03b82 100644 --- a/libafl_cc/src/common-llvm.h +++ b/libafl_cc/src/common-llvm.h @@ -5,35 +5,25 @@ #include #include "llvm/Config/llvm-config.h" -#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 5 -typedef long double max_align_t; -#endif - -#if LLVM_VERSION_MAJOR >= 7 /* use new pass manager */ -// #define USE_NEW_PM 1 -#endif /* #if LLVM_VERSION_STRING >= "4.0.1" */ -#if LLVM_VERSION_MAJOR > 4 || \ - (LLVM_VERSION_MAJOR == 4 && LLVM_VERSION_PATCH >= 1) - #define HAVE_VECTOR_INTRINSICS 1 -#endif +#define HAVE_VECTOR_INTRINSICS 1 -#if LLVM_VERSION_MAJOR >= 16 - #include +#include constexpr std::nullopt_t None = std::nullopt; -#endif -#ifdef USE_NEW_PM - #include "llvm/Passes/PassPlugin.h" - #include "llvm/Passes/PassBuilder.h" - #include "llvm/IR/PassManager.h" -#else - #include "llvm/IR/LegacyPassManager.h" - #include "llvm/Transforms/IPO/PassManagerBuilder.h" -#endif - -#include "llvm/IR/Function.h" +// all llvm includes and friends +#include "llvm/Support/CommandLine.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Verifier.h" +#include "llvm/IR/CFG.h" +#include "llvm/Passes/PassPlugin.h" +#include "llvm/Passes/PassBuilder.h" +#include "llvm/IR/PassManager.h" #define FATAL(...) \ do { \ @@ -45,22 +35,6 @@ static uint32_t RandBelow(uint32_t max) { return (uint32_t)rand() % (max + 1); } -/* needed up to 3.9.0 */ -#if LLVM_VERSION_MAJOR == 3 && \ - (LLVM_VERSION_MINOR < 9 || \ - (LLVM_VERSION_MINOR == 9 && LLVM_VERSION_PATCH < 1)) -static uint64_t PowerOf2Ceil(unsigned in) { - uint64_t in64 = in - 1; - in64 |= (in64 >> 1); - in64 |= (in64 >> 2); - in64 |= (in64 >> 4); - in64 |= (in64 >> 8); - in64 |= (in64 >> 16); - in64 |= (in64 >> 32); - return in64 + 1; -} -#endif - /* Function that we never instrument or analyze */ /* Note: this ignore check is also called in isInInstrumentList() */ static inline bool isIgnoreFunction(const llvm::Function *F) { diff --git a/libafl_cc/src/coverage-accounting-pass.cc b/libafl_cc/src/coverage-accounting-pass.cc index 292ee6fc94..bb4fa7b8ab 100644 --- a/libafl_cc/src/coverage-accounting-pass.cc +++ b/libafl_cc/src/coverage-accounting-pass.cc @@ -22,27 +22,6 @@ #include #include -#include "llvm/Support/CommandLine.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/MathExtras.h" - -// Without this, Can't build with llvm-14 & old PM -#if LLVM_VERSION_MAJOR >= 14 && !defined(USE_NEW_PM) - #include "llvm/Pass.h" -#endif - -#if LLVM_VERSION_MAJOR > 3 || \ - (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) - #include "llvm/IR/DebugInfo.h" - #include "llvm/IR/CFG.h" -#else - #include "llvm/DebugInfo.h" - #include "llvm/Support/CFG.h" -#endif - typedef uint32_t prev_loc_t; #define MAP_SIZE ACCOUNTING_MAP_SIZE @@ -180,16 +159,9 @@ bool isSecuritySensitiveFunction(Function *F) { return 0; } -#ifdef USE_NEW_PM class AFLCoverage : public PassInfoMixin { public: AFLCoverage() { -#else -class AFLCoverage : public ModulePass { - public: - static char ID; - AFLCoverage() : ModulePass(ID) { -#endif granularity = StringSwitch(GranularityStr) .Case("BB", BB_GRAN) .Case("FUNC", FUNC_GRAN) @@ -197,11 +169,7 @@ class AFLCoverage : public ModulePass { // initInstrumentList(); } -#ifdef USE_NEW_PM PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); -#else - bool runOnModule(Module &M) override; -#endif protected: uint32_t map_size = MAP_SIZE; @@ -211,23 +179,22 @@ class AFLCoverage : public ModulePass { } // namespace -#ifdef USE_NEW_PM extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK llvmGetPassPluginInfo() { return {LLVM_PLUGIN_API_VERSION, "AFLCoverageAccounting", "v0.1", /* lambda to insert our pass into the pass pipeline. */ [](PassBuilder &PB) { - #if 1 +#if 1 PB.registerOptimizerLastEPCallback( [](ModulePassManager &MPM, OptimizationLevel OL - #if LLVM_VERSION_MAJOR >= 20 + #if LLVM_VERSION_MAJOR >= 20 , ThinOrFullLTOPhase Phase - #endif + #endif ) { MPM.addPass(AFLCoverage()); }); - /* TODO LTO registration */ - #else +/* TODO LTO registration */ +#else using PipelineElement = typename PassBuilder::PipelineElement; PB.registerPipelineParsingCallback([](StringRef Name, ModulePassManager &MPM, @@ -239,29 +206,18 @@ llvmGetPassPluginInfo() { return false; } }); - #endif +#endif }}; } -#else -char AFLCoverage::ID = 0; -#endif - -#ifdef USE_NEW_PM PreservedAnalyses AFLCoverage::run(Module &M, ModuleAnalysisManager &MAM) { -#else -bool AFLCoverage::runOnModule(Module &M) { -#endif - LLVMContext &C = M.getContext(); IntegerType *Int32Ty = IntegerType::getInt32Ty(C); uint32_t rand_seed; unsigned int cur_loc = 0; -#ifdef USE_NEW_PM auto PA = PreservedAnalyses::all(); -#endif /* Setup random() so we get Actually Random(TM) */ rand_seed = time(NULL); @@ -337,35 +293,21 @@ bool AFLCoverage::runOnModule(Module &M) { /* Load prev_loc */ - LoadInst *PrevLoc = IRB.CreateLoad( -#if LLVM_VERSION_MAJOR >= 14 - Int32Ty, -#endif - AFLPrevLoc); + LoadInst *PrevLoc = IRB.CreateLoad(Int32Ty, AFLPrevLoc); PrevLoc->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); /* Load SHM pointer */ - LoadInst *MemReadPtr = IRB.CreateLoad( -#if LLVM_VERSION_MAJOR >= 14 - PointerType::get(Int32Ty, 0), -#endif - AFLMemOpPtr); + LoadInst *MemReadPtr = + IRB.CreateLoad(PointerType::get(Int32Ty, 0), AFLMemOpPtr); MemReadPtr->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); - Value *MemReadPtrIdx = IRB.CreateGEP( -#if LLVM_VERSION_MAJOR >= 14 - Int32Ty, -#endif - MemReadPtr, IRB.CreateXor(PrevLoc, CurLoc)); + Value *MemReadPtrIdx = + IRB.CreateGEP(Int32Ty, MemReadPtr, IRB.CreateXor(PrevLoc, CurLoc)); /* Update bitmap */ - LoadInst *MemReadCount = IRB.CreateLoad( -#if LLVM_VERSION_MAJOR >= 14 - Int32Ty, -#endif - MemReadPtrIdx); + LoadInst *MemReadCount = IRB.CreateLoad(Int32Ty, MemReadPtrIdx); MemReadCount->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); Value *MemReadIncr = @@ -391,22 +333,5 @@ bool AFLCoverage::runOnModule(Module &M) { (unsigned)InstRatio); } -#ifdef USE_NEW_PM return PA; -#else - return true; -#endif -} - -#ifndef USE_NEW_PM -static void registerAFLPass(const PassManagerBuilder &, - legacy::PassManagerBase &PM) { - PM.add(new AFLCoverage()); -} - -static RegisterStandardPasses RegisterAFLPass( - PassManagerBuilder::EP_OptimizerLast, registerAFLPass); - -static RegisterStandardPasses RegisterAFLPass0( - PassManagerBuilder::EP_EnabledOnOptLevel0, registerAFLPass); -#endif +} \ No newline at end of file diff --git a/libafl_cc/src/ctx-pass.cc b/libafl_cc/src/ctx-pass.cc index 3657312df1..80fbe97b76 100644 --- a/libafl_cc/src/ctx-pass.cc +++ b/libafl_cc/src/ctx-pass.cc @@ -34,32 +34,6 @@ #include #include -#include "llvm/Config/llvm-config.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/IR/IRBuilder.h" - -#if USE_NEW_PM - #include "llvm/Passes/PassPlugin.h" - #include "llvm/Passes/PassBuilder.h" - #include "llvm/IR/PassManager.h" -#else - #include "llvm/IR/LegacyPassManager.h" - #include "llvm/Transforms/IPO/PassManagerBuilder.h" -#endif - -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/DebugInfo.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/Verifier.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Pass.h" -#include "llvm/IR/Constants.h" - #include using namespace llvm; @@ -68,24 +42,12 @@ using namespace llvm; namespace { -#if USE_NEW_PM class CtxPass : public PassInfoMixin { public: CtxPass() { -#else -class CtxPass : public ModulePass { - public: - static char ID; - - CtxPass() : ModulePass(ID) { -#endif } -#if USE_NEW_PM PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); -#else - bool runOnModule(Module &M) override; -#endif protected: uint32_t map_size = MAP_SIZE; @@ -107,7 +69,6 @@ class CtxPass : public ModulePass { } // namespace -#if USE_NEW_PM extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK llvmGetPassPluginInfo() { return {LLVM_PLUGIN_API_VERSION, "CtxPass", "v0.1", @@ -115,23 +76,15 @@ llvmGetPassPluginInfo() { [](PassBuilder &PB) { PB.registerOptimizerLastEPCallback([](ModulePassManager &MPM, OptimizationLevel OL - #if LLVM_VERSION_MAJOR >= 20 +#if LLVM_VERSION_MAJOR >= 20 , ThinOrFullLTOPhase Phase - #endif +#endif ) { MPM.addPass(CtxPass()); }); }}; } -#else -char CtxPass::ID = 0; -#endif -#if USE_NEW_PM PreservedAnalyses CtxPass::run(Module &M, ModuleAnalysisManager &MAM) { -#else -bool CtxPass::runOnModule(Module &M) { - -#endif LLVMContext &C = M.getContext(); auto moduleName = M.getName(); IntegerType *Int8Ty = IntegerType::getInt8Ty(C); @@ -201,27 +154,6 @@ bool CtxPass::runOnModule(Module &M) { } } -#if USE_NEW_PM auto PA = PreservedAnalyses::all(); return PA; -#else - return true; -#endif -} - -#if USE_NEW_PM - -#else -static void registerCtxPass(const PassManagerBuilder &, - legacy::PassManagerBase &PM) { - PM.add(new CtxPass()); -} - -static RegisterPass X("ctx", "ctx instrumentation pass", false, false); - -static RegisterStandardPasses RegisterCtxPass( - PassManagerBuilder::EP_OptimizerLast, registerCtxPass); - -static RegisterStandardPasses RegisterCtxPass0( - PassManagerBuilder::EP_EnabledOnOptLevel0, registerCtxPass); -#endif +} \ No newline at end of file diff --git a/libafl_cc/src/ddg-instr.cc b/libafl_cc/src/ddg-instr.cc deleted file mode 100644 index 2e27c21dbf..0000000000 --- a/libafl_cc/src/ddg-instr.cc +++ /dev/null @@ -1,784 +0,0 @@ -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/PassManager.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/CFG.h" -#include "llvm/BinaryFormat/MachO.h" -#include "llvm/IR/Argument.h" -#include "llvm/IR/Attributes.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Comdat.h" -#include "llvm/IR/Constant.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DIBuilder.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/DebugLoc.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Dominators.h" -#include "llvm/Analysis/PostDominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/GlobalAlias.h" -#include "llvm/IR/GlobalValue.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InlineAsm.h" -#include "llvm/IR/InstVisitor.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/MDBuilder.h" -#include "llvm/IR/Metadata.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/Use.h" -#include "llvm/IR/Value.h" -#include "llvm/IR/Verifier.h" -#include "llvm/IR/DebugInfo.h" -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/MC/MCSectionMachO.h" -#include "llvm/Pass.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/ScopedPrinter.h" -#include "llvm/Support/raw_ostream.h" -#include -#include "llvm/Transforms/Utils/ASanStackFrameLayout.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/ModuleUtils.h" -#include "llvm/Transforms/Utils/PromoteMemToReg.h" - -// #include "WPA/WPAPass.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "ddg-utils.h" -#include "common-llvm.h" - -#define MAX_DEPTH 3 -#define MIN_FCN_SIZE 1 -#define VAR_NAME_LEN 264 - -#define MAP_SIZE DDG_MAP_SIZE -// #define MAP_SIZE 65536 -#define ALL_BIT_SET (MAP_SIZE - 1) -// #define MAP_SIZE 255 - -// #define INTERPROCEDURAL 1 // unset if you want only intraprocedural ret -// values management BUT #define LOAD_INSTR // considers loads as -// stores - -// #define DEBUG 1 // set if you want debug prints enabled - -#define AFL_SR(s) (srandom(s)) -#define AFL_R(x) (random() % (x)) - -#ifdef DEBUG - #define DEBUG(X) \ - do { \ - X; \ - } while (false) -#else - #define DEBUG(X) ((void)0) -#endif - -using namespace llvm; -// using namespace svf; - -class DDGInstrModulePass : public PassInfoMixin { - private: - void InsertDataFlow(Value *Operand, Value *Res) { - std::map>::iterator it = - this->DataFlowTracker.begin(); - while (it != this->DataFlowTracker.end()) { - std::vector Slice = it->second; - std::vector::iterator jt; - for (jt = Slice.begin(); jt != Slice.end(); ++jt) { - if (Operand == *jt) { - this->DataFlowTracker[it->first].push_back(Res); - break; - } - } - it++; - } - } - - void RetrieveDataFlow(Value *V, std::vector *Dependencies) { - std::map>::iterator it = - this->DataFlowTracker.begin(); - while (it != this->DataFlowTracker.end()) { - std::vector Slice = it->second; - std::vector::iterator jt; - for (jt = Slice.begin(); jt != Slice.end(); ++jt) { - if (V == *jt) { - Dependencies->push_back(it->first); - break; - } - } - it++; - } - } - - bool isSourceCodeVariable(Value *Variable) { - std::map>::iterator it = - this->DataFlowTracker.find(Variable); - return it != this->DataFlowTracker.end(); - } - - bool isLLVMVariable(Value *Variable, - std::map *LLVMVariables) { - std::map::iterator it = - LLVMVariables->find(Variable); - return it != LLVMVariables->end(); - } - - void CreateDataFlow(Value *Variable) { - std::map>::iterator it = - this->DataFlowTracker.find(Variable); - if (it == this->DataFlowTracker.end()) { - this->DataFlowTracker[Variable].push_back(Variable); - } - } - - // When we have `Store A, B`, we want to know that exactly B reperensents. In - // the default case, it is a source code variable and so we're done. BUT, in - // many cases B could represent the field of a struct, or a location whithin a - // buffer. So, we need to recover what B represents to be more precise when we - // define the dependency relationship. - void RetrieveAccessedVariable(Value *Variable, std::vector *Flows, - std::map *LLVMVariables, - Value **ActualSrcVariable) { - if (isLLVMVariable(Variable, LLVMVariables)) { - // If it is an LLVM variable (mostly for struct fields), we have it - // tracked down in the LLVMVariables list, so we just need to parse the - // GEP inst - Instruction *DefiningInstruction = (*LLVMVariables)[Variable]; - // For now we only handle the GEP instructions, maybe in future - // it could be useful to implement other instructions - if (auto GEP = dyn_cast(DefiningInstruction)) { - Value *PtrOperand = GEP->getPointerOperand(); - Variable = PtrOperand; - *ActualSrcVariable = PtrOperand; - if (isSourceCodeVariable(PtrOperand)) { - // We finally could connect an LLVM variable to an actual Source code - // Variable! - for (unsigned int i = 1; i < DefiningInstruction->getNumOperands(); - i++) { // Starts from 1, since 0 is thr PtrOperand - Value *Op = DefiningInstruction->getOperand(i); - if (!isa(Op)) { RetrieveDataFlow(Op, Flows); } - } - return; - } else { - // Re-itereate the Variable analysis - RetrieveAccessedVariable(Variable, Flows, LLVMVariables, - ActualSrcVariable); - } - for (unsigned int i = 1; i < DefiningInstruction->getNumOperands(); - i++) { // Starts from 1, since 0 is thr PtrOperand - Value *Op = DefiningInstruction->getOperand(i); - if (!isa(Op)) { RetrieveDataFlow(Op, Flows); } - } - } - } else { - // If it is not a GEP-defined llvm variable, we basically use the DataFlow - // Tracker, to retrieve the dependency of this variable. The idea is that, - // if this llvm variable is not GEP-depending, it should be easier to - // retrieve what it does represent - std::vector TmpFlow; - RetrieveDataFlow(Variable, &TmpFlow); - if (TmpFlow.size() == 1) { - *ActualSrcVariable = TmpFlow[0]; - // We found a Source Code variable (Variable->getName()) - return; - } else if (TmpFlow.size() > 1) { - *ActualSrcVariable = TmpFlow[0]; - DEBUG(errs() << "[Warning] multiple flows for the same GEP access, " - "choosing the first one\n"); - } else { - return; - } - } - } - - public: - static char ID; - FunctionCallee logger; - Type *VoidTy; - std::map> DataFlowTracker; - - PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM) { - LLVMContext &C = M.getContext(); - - auto &FAM = - MAM.getResult(M).getManager(); - auto DTCallback = [&FAM](Function &F) -> DominatorTree * { - return &FAM.getResult(F); - }; - - auto PDTCallback = [&FAM](Function &F) -> PostDominatorTree * { - return &FAM.getResult(F); - }; - - auto LICallback = [&FAM](Function &F) -> LoopInfo * { - return &FAM.getResult(F); - }; - - IntegerType *Int16Ty = IntegerType::getInt16Ty(C); - IntegerType *Int8Ty = IntegerType::getInt8Ty(C); - // IntegerType *Int32Ty = IntegerType::getInt32Ty(C); - ConstantInt *Zero = ConstantInt::get(Int8Ty, 0); - ConstantInt *One = ConstantInt::get(Int8Ty, 1); - unsigned int instrumentedLocations = 0; - - std::map BlocksLocs; - std::map VisitedBlocks; - ConstantInt *Visited = ConstantInt::get(Int16Ty, 0xff); - ConstantInt *NonVisited = ConstantInt::get(Int16Ty, 0); - ConstantInt *CurLoc; - char *name = nullptr; - unsigned BBCounter = 0; - - unsigned bb_count = 0; - unsigned int cur_loc = 0; - uint32_t map_size = MAP_SIZE; - - struct timeval tv; - struct timezone tz; - unsigned int rand_seed; - - /* Setup random() so we get Actually Random(TM) outputs from AFL_R() */ - gettimeofday(&tv, &tz); - rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid(); - AFL_SR(rand_seed); - - GlobalVariable *DDGMapPtr = M.getGlobalVariable("__ddg_area_ptr"); - if (DDGMapPtr == nullptr) - DDGMapPtr = - new GlobalVariable(M, PointerType::get(Int8Ty, 0), false, - GlobalValue::ExternalLinkage, 0, "__ddg_area_ptr"); - -#ifdef INTERPROCEDURAL - // For each function we store the return Values - std::map> ReturnValues; - - for (auto &F : M) { - if (F.size() < MIN_FCN_SIZE) continue; - - for (auto &BB : F) { - for (auto &I : BB) { - if (auto RI = dyn_cast(&I)) { - Value *RetVal = RI->getReturnValue(); - if (RetVal) { ReturnValues[&F].push_back(RI); } - } - } - } - } -#endif - - for (auto &F : M) { - if (F.size() < MIN_FCN_SIZE) continue; - - std::map> - Stores; // Represents the nodes of our DataDep Graph - std::vector> - StoreEdges; // Contains the edges of the DDG - std::map> - IncomingEdges; // Map s.t. key is a BB and value is a set of BBs - // whose data flow reaches the key - std::map - LLVMVariables; // LLVM IR Variables which are used as Operands for - // the store (for instance, the ones resulting from a - // GEP) - - BasicBlock &EntryBB = F.getEntryBlock(); - Instruction *FirstInst = &*EntryBB.getFirstNonPHIOrDbg(); - - // First we add the function params to track the dataflow - for (Function::arg_iterator arg_it = F.arg_begin(); arg_it != F.arg_end(); - arg_it++) { - Argument *Arg = arg_it; - if (Value *ArgVariable = dyn_cast(Arg)) { - CreateDataFlow(ArgVariable); - FlowWriteInstruction *MyStore = - new FlowWriteInstruction(&EntryBB, FirstInst, declaration); - Stores[ArgVariable].push_back(MyStore); - } - } - - LoopInfo *LI = LICallback(F); - DominatorTree *DT = DTCallback(F); - PostDominatorTree *PT = PDTCallback(F); - - // We basically want to track data flow between memory instructions - // and call instructions (i.e., the arguments) - - // Here we extract the data dependence info for function F - for (auto &BB : F) { - BBCounter += 1; - for (auto &I : BB) { - // We track all variables "Alloca" derived and we add them to the - // RootNode - if (auto AI = dyn_cast(&I)) { - Value *Variable = static_cast(AI); - CreateDataFlow(Variable); - } - - if (auto LOI = dyn_cast(&I)) { - Value *Variable = LOI->getPointerOperand(); - CreateDataFlow(Variable); -#ifdef LOAD_INSTR - std::vector Flows; - RetrieveDataFlow(Variable, &Flows); - - // If `Variable` does not directly represent a Src code variable, we - // fetch what it represents (e.g., the field of a struct) - if (!isSourceCodeVariable(Variable)) { - Value *ActualSrcVariable = nullptr; - RetrieveAccessedVariable(Variable, &Flows, &LLVMVariables, - &ActualSrcVariable); - if (ActualSrcVariable) Variable = ActualSrcVariable; - } - - for (std::vector::iterator it = Flows.begin(); - it != Flows.end(); ++it) { - Value *Dependency = *it; - - // First we find the edges between the current store and the - // previous ones (i.e., when we wrote into `c` and `b` if the - // current store is `a = c + b`) - std::vector AllStoresPerVariable = - Stores[Dependency]; - unsigned ConsideredStores = 0; - bool *ReachingStores = isReachableByStore( - &AllStoresPerVariable, LOI, &DT, &LI, &ConsideredStores); - - // ReachingStores[0] refers to the last Store instruction that we - // met (i.e., the last in `AllStoresPerVariable` This is why we - // iterate the vector in a reverse way BUT the array in the - // forward - unsigned i = 0; - for (std::vector::reverse_iterator it = - AllStoresPerVariable.rbegin(); - it != AllStoresPerVariable.rend(); it++) { - if (ReachingStores[i] && (i < ConsideredStores)) { - Instruction *Src = (*it)->I; - if (Src == - LOI) // Already managed in the `reachableByStores` method - continue; - if (Src->getParent() != LOI->getParent()) { - StoreEdges.push_back(edge); - IncomingEdges[LOI->getParent()].insert(LOI->getParent()); - DEBUG(errs() << "+++++++++++\nAdding edge\n"); - DEBUG(debug_instruction(Src)); - DEBUG(debug_instruction(LOI)); - DEBUG(errs() << "-----------\n"); - } - } - i++; - } - - delete[] ReachingStores; - } - // Then we insert the new Store in our map that contains all the - // stores, so we build forward deps - FlowWriteInstruction *MyStore = - new FlowWriteInstruction(LOI->getParent(), LOI, declaration); - Stores[Variable].push_back(MyStore); -#endif - } - - if (auto GEP = dyn_cast( - &I)) { // We dedicate an list for GEPs defined llvm vars. - Value *Var = static_cast( - &I); // For other LLVM variables, we use the DataflowTracker - LLVMVariables[Var] = GEP; - } - - // We propagate the dependency info - Value *Result = static_cast(&I); - if (Result and - !isa( - I)) { // We exclude CallInst, as they're managed separately - // (Not excluding them now, would introduce a double - // dependency leading to the same value) - for (unsigned int i = 0; i < I.getNumOperands(); i++) { - Value *Op = I.getOperand(i); - if (!isa(Op)) InsertDataFlow(Op, Result); - } - } -#ifdef INTERPROCEDURAL - else if (Result and isa(I)) { - CallInst *CI = dyn_cast(&I); - Function *CalledFunction = CI->getCalledFunction(); - std::map>::iterator it = - ReturnValues.find(CalledFunction); - if (it != ReturnValues.end()) { - std::vector RetValsInstrs = it->second; - for (std::vector::iterator jt = - RetValsInstrs.begin(); - jt != RetValsInstrs.end(); jt++) { - Instruction *In = *jt; - ReturnInst *Ret = static_cast(In); - Value *RV = Ret->getReturnValue(); - CreateDataFlow(RV); - InsertDataFlow(RV, Result); // We indicate dependency between - // retval and call site - Stores[RV].push_back(new FlowWriteInstruction( - Ret->getParent(), Ret, declaration)); - } - } - } -#endif - // We create the actual DDG depending on mem accesses and Call - // instructions - if (auto ST = dyn_cast(&I)) { - Value *Variable = ST->getPointerOperand(); // Where we're writing - Value *Access = ST->getValueOperand(); // What we're writing, this - // gives us the dependencies - // The current Store is writing `Access` into `Variable` - - std::vector Flows; - RetrieveDataFlow(Access, &Flows); - - // If `Variable` does not directly represent a Src code variable, we - // fetch what it represents (e.g., the field of a struct) - if (!isSourceCodeVariable(Variable)) { - Value *ActualSrcVariable = nullptr; - RetrieveAccessedVariable(Variable, &Flows, &LLVMVariables, - &ActualSrcVariable); - if (ActualSrcVariable) Variable = ActualSrcVariable; - } - - StoreType Type = declaration; // Usually we have `a = c + b` - for (std::vector::iterator it = Flows.begin(); - it != Flows.end(); ++it) { - Value *Dependency = *it; - if (Dependency == Variable) // If we fall into `a += c + b`, we - // manage differently - Type = modification; // Probably we dont need this distinction - // anymore, but keep it for future - // experiments - - // First we find the edges between the current store and the - // previous ones (i.e., when we wrote into `c` and `b` if the - // current store is `a = c + b`) - std::vector AllStoresPerVariable = - Stores[Dependency]; - unsigned ConsideredStores = 0; - bool *ReachingStores = isReachableByStore( - &AllStoresPerVariable, ST, DT, LI, &ConsideredStores); - - // ReachingStores[0] refers to the last Store instruction that we - // met (i.e., the last in `AllStoresPerVariable` This is why we - // iterate the vector in a reverse way BUT the array in the - // forward - unsigned i = 0; - for (std::vector::reverse_iterator it = - AllStoresPerVariable.rbegin(); - it != AllStoresPerVariable.rend(); it++) { - if (ReachingStores[i] && (i < ConsideredStores)) { - Instruction *Src = (*it)->I; - if (Src == - ST) // Already managed in the `reachableByStores` method - continue; - if (isPredecessorBB(Src, - ST)) // Already managed by edge coverage - continue; -#if LLVM_VERSION_MAJOR == 9 - BasicBlock *SrcParent = Src->getParent(); - BasicBlock *STParent = ST->getParent(); - if (PT->dominates(SrcParent, STParent)) -#else - if (PT->dominates(Src, ST)) -#endif - continue; - if (Src->getParent() != ST->getParent()) { - std::tuple edge = - decltype(edge){Src->getParent(), ST->getParent()}; - StoreEdges.push_back(edge); - IncomingEdges[ST->getParent()].insert(Src->getParent()); - DEBUG(errs() << "+++++++++++\nAdding edge\n"); - DEBUG(debug_instruction(Src)); - DEBUG(debug_instruction(ST)); - DEBUG(errs() << "-----------\n"); - } - } - i++; - } - - delete[] ReachingStores; - } - // Then we insert the new Store in our map that contains all the - // stores, so we build forward deps - FlowWriteInstruction *MyStore = - new FlowWriteInstruction(ST->getParent(), ST, Type); - Stores[Variable].push_back(MyStore); - - } - // Three major cases: - // 1) a = foo(x) => a depends on the result of foo() applied - // on x and x depends on its previous values and return value 2) - // memcpy(src, dst, N) => dst depends on src and N && the triple src, - // dst, N depends on their previous value (memcpy or any other API) 3) - // foo(x, out_y, out_z) => out_x, out_y are writen within foo - // depending on x. Thus here the dependency is managed internally to - // the function when passing on it - else if (CallInst *Call = dyn_cast(&I)) { - FlowWriteInstruction *MyStore = nullptr; - Value *Variable = nullptr; - Function *FC = Call->getCalledFunction(); - // DEBUG(errs() << "Looking for dependencies when calling " << - // FC->getName() << "\n"); - int argStart = - 0; // In some cases, we dont want to track dependencies for - // each argument. For instance, for memcpy(src, dst, n), we - // can ignore previous `src` dependencies, since it is being - // written. Rather, for this specific case, we generate a - // FlowWriteInstruction object to save the fact that `src` - // internal value has been modified according to `dst` and - // `n` - - if (FC == nullptr) continue; - if (FC->isIntrinsic()) { - switch (FC->getIntrinsicID()) { - case Intrinsic::memcpy: { - Variable = Call->getArgOperand(0); - std::vector Flows; - RetrieveDataFlow(Variable, &Flows); - if (Flows.size() != 0) Variable = Flows[0]; - MyStore = new FlowWriteInstruction(Call->getParent(), Call, - declaration); - argStart = 1; - break; - } - case Intrinsic::memset: { - // memset does not produce a real dataflow - // errs() << "memset to implement\n"; - break; - } - case Intrinsic::memmove: { - Variable = Call->getArgOperand(0); - std::vector Flows; - RetrieveDataFlow(Variable, &Flows); - if (Flows.size() != 0) Variable = Flows[0]; - MyStore = new FlowWriteInstruction(Call->getParent(), Call, - declaration); - argStart = 1; - break; - } - default: { - // errs() << "Not implemented/interesting intrinsic for data - // flow\n"; - break; - } - } - } - for (unsigned int i = argStart; i < Call->arg_size(); i++) { - Value *ArgOp = Call->getArgOperand(i); - if (!isa(ArgOp)) { - std::vector Flows; - RetrieveDataFlow(ArgOp, &Flows); - - for (std::vector::iterator it = Flows.begin(); - it != Flows.end(); ++it) { - Value *Dependency = *it; - // DEBUG(errs() << "Call depending on: {" << - // Dependency->getName() << "}\n"); - std::vector AllStoresPerVariable = - Stores[Dependency]; - unsigned ConsideredStores = 0; - bool *ReachingStores = isReachableByStore( - &AllStoresPerVariable, Call, DT, LI, &ConsideredStores); - unsigned i = 0; - for (std::vector::reverse_iterator - it = AllStoresPerVariable.rbegin(); - it != AllStoresPerVariable.rend(); it++) { - if (ReachingStores[i] && (i < ConsideredStores)) { - Instruction *Src = (*it)->I; - if (Src == Call) // Already managed in the - // `reachableByStores` method - continue; - if (isPredecessorBB(Src, Call)) continue; -#if LLVM_VERSION_MAJOR == 9 - BasicBlock *SrcParent = Src->getParent(); - BasicBlock *CallParent = Call->getParent(); - if (PT->dominates(SrcParent, CallParent)) -#else - if (PT->dominates(Src, Call)) -#endif - continue; - if (Src->getParent() != Call->getParent()) { - std::tuple edge = - decltype(edge){Src->getParent(), Call->getParent()}; - StoreEdges.push_back(edge); - IncomingEdges[Call->getParent()].insert( - Src->getParent()); - DEBUG(errs() << "+++++++++++\nAdding edge\n"); - DEBUG(debug_instruction(Src)); - DEBUG(debug_instruction(Call)); - DEBUG(errs() << "-----------\n"); - } - } - i++; - } - } - } - } - if (Variable != nullptr && MyStore != nullptr) { - Stores[Variable].push_back(MyStore); - } - } else - continue; - } - } - - // Instrument the locations in the function - BasicBlock::iterator IP = EntryBB.getFirstInsertionPt(); - IRBuilder<> IRB(&(*IP)); - Value *IsCurrentBlockVisited; - - for (auto &BB : F) { - bb_count++; - name = new char[VAR_NAME_LEN]; - memset(name, 0, VAR_NAME_LEN); - snprintf(name, VAR_NAME_LEN, "my_var_%d", BBCounter++); - AllocaInst *AllocaIsCurrentlyBlockVisited = - IRB.CreateAlloca(Int16Ty, nullptr, StringRef(name)); - AllocaIsCurrentlyBlockVisited->setMetadata(M.getMDKindID("nosanitize"), - MDNode::get(C, None)); - IsCurrentBlockVisited = - static_cast(AllocaIsCurrentlyBlockVisited); - StoreInst *InitializeVisited; - if (&EntryBB == &BB) - InitializeVisited = IRB.CreateStore(Visited, IsCurrentBlockVisited); - else - InitializeVisited = - IRB.CreateStore(NonVisited, IsCurrentBlockVisited); - - if (InitializeVisited) - InitializeVisited->setMetadata(M.getMDKindID("nosanitize"), - MDNode::get(C, None)); - - VisitedBlocks[&BB] = IsCurrentBlockVisited; - - // errs() << "MAP SIZE " << std::to_string(map_size) << "\n"; - cur_loc = AFL_R(map_size); - CurLoc = ConstantInt::get(Int16Ty, cur_loc); - BlocksLocs[&BB] = CurLoc; - } - - for (auto &BB : F) { - if (&BB == &EntryBB) continue; - - IP = BB.getFirstInsertionPt(); - IRBuilder<> IRB(&(*IP)); - IsCurrentBlockVisited = VisitedBlocks[&BB]; - - StoreInst *StoreIsVisited = - IRB.CreateStore(Visited, IsCurrentBlockVisited); - StoreIsVisited->setMetadata(M.getMDKindID("nosanitize"), - MDNode::get(C, None)); - - Value *HashedLoc = nullptr; - if (IncomingEdges[&BB].size() <= 1) continue; - for (std::set::iterator it = IncomingEdges[&BB].begin(); - it != IncomingEdges[&BB].end(); ++it) { - Value *isVisited = VisitedBlocks[*it]; - ConstantInt *PotentiallyPreviousLoc = BlocksLocs[*it]; - if (!isVisited or !PotentiallyPreviousLoc) continue; - LoadInst *LoadIsVisited = - IRB.CreateLoad(isVisited->getType(), isVisited); - LoadIsVisited->setMetadata(M.getMDKindID("nosanitize"), - MDNode::get(C, None)); - - Value *PrevLocIfVisited = - IRB.CreateAnd(LoadIsVisited, PotentiallyPreviousLoc); - CurLoc = BlocksLocs[&BB]; - if (HashedLoc == nullptr) - HashedLoc = IRB.CreateXor(CurLoc, PrevLocIfVisited); - else - HashedLoc = IRB.CreateXor(HashedLoc, PrevLocIfVisited); - } - if (HashedLoc == nullptr) continue; - - HashedLoc = IRB.CreateZExt(HashedLoc, IRB.getInt16Ty()); - - LoadInst *MapPtr = - IRB.CreateLoad(PointerType::get(Int8Ty, 0), DDGMapPtr); - MapPtr->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); - - Value *MapPtrIdx = IRB.CreateGEP(Int8Ty, MapPtr, HashedLoc); - LoadInst *Counter = IRB.CreateLoad(Int8Ty, MapPtrIdx); - Counter->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); - - Value *Incr = IRB.CreateAdd(Counter, One); - auto cf = IRB.CreateICmpEQ(Incr, Zero); - auto carry = IRB.CreateZExt(cf, Int8Ty); - Incr = IRB.CreateAdd(Incr, carry); - - StoreInst *StoreMapPtr = IRB.CreateStore(Incr, MapPtrIdx); - StoreMapPtr->setMetadata(M.getMDKindID("nosanitize"), - MDNode::get(C, None)); - - instrumentedLocations++; - } - } - - errs() << "DDG - Instrumented " << instrumentedLocations - << " locations over a total of " << bb_count << " \t\n"; - - auto PA = PreservedAnalyses::all(); - return PA; - } -}; - -extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK -llvmGetPassPluginInfo() { - return {LLVM_PLUGIN_API_VERSION, "DDGInstrPass", "v0.1", - /* lambda to insert our pass into the pass pipeline. */ - [](PassBuilder &PB) { - PB.registerOptimizerLastEPCallback( - [](ModulePassManager &MPM, OptimizationLevel OL -#if LLVM_VERSION_MAJOR >= 20 - , - ThinOrFullLTOPhase Phase -#endif - ) { MPM.addPass(DDGInstrModulePass()); }); - }}; -} diff --git a/libafl_cc/src/ddg-utils.cc b/libafl_cc/src/ddg-utils.cc deleted file mode 100644 index 98c9afe954..0000000000 --- a/libafl_cc/src/ddg-utils.cc +++ /dev/null @@ -1,113 +0,0 @@ -#include "llvm/Analysis/CFG.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/ScopedPrinter.h" -#include "llvm/Support/raw_ostream.h" -#include -#include "llvm/Transforms/Utils/ASanStackFrameLayout.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/ModuleUtils.h" -#include "llvm/Transforms/Utils/PromoteMemToReg.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "ddg-utils.h" - -#define BB_THRESHOLD 16 - -void debug_instruction(Instruction *I) { - DILocation *D = I->getDebugLoc(); - - if (D != NULL) { - errs() << "Line: " << D->getLine() << "\n"; - return; - } - errs() << "[DEBUG] No dbg info recovered\n"; -} - -// void debug_DDG(std::map> graph) { -// std::map>::iterator it = -// graph.begin(); while(it != graph.end()) { -// CustomDDGNode* src = it->first; -// std::vector sinks = it->second; -// -// it++; -// } -// } - -// Checks if Src is in the predecessor BB of To -bool isPredecessorBB(Instruction *Src, Instruction *To) { - BasicBlock *ToParent = To->getParent(); - BasicBlock *SrcParent = Src->getParent(); - for (auto it = pred_begin(ToParent); it != pred_end(ToParent); ++it) { - BasicBlock *predecessor = *it; - if (predecessor == SrcParent) return true; - } - return false; -} - -bool *isReachableByStore(std::vector *From, - Instruction *To, DominatorTree *DT, LoopInfo *LI, - unsigned *ConsideredStores) { - size_t NumberOfStores = From->size(); - unsigned bb_threshold = - NumberOfStores < BB_THRESHOLD ? NumberOfStores : BB_THRESHOLD; - *ConsideredStores = bb_threshold; - FlowWriteInstruction *TopNstores[bb_threshold]; - bool *ReachingStores = new bool[bb_threshold]; - SmallPtrSet ExclusionSet; - unsigned idx = 0; - for (std::vector::reverse_iterator it = - From->rbegin(); - it != From->rend(); it++) { - FlowWriteInstruction *MyStore = *it; - // TopNStores contains the last N stores, which are the ones that we check - // if are reachable. These are put in reverse order, i.e., the position `0` - // (TopNstores[0]) is the last store that we met (which is the last in the - // vector From) - TopNstores[idx] = MyStore; - ExclusionSet.insert(MyStore->BB); - idx++; - if (idx >= bb_threshold) break; - } - - // We need the ExclusionSet to be complete, before startintg with the actual - // check loop - for (int i = 0; i < bb_threshold; i++) { - Instruction *FromInstruction = TopNstores[i]->I; - if (TopNstores[i]->BB == To->getParent()) { - // If the two BBs are the same, we discard this flow. It is not - // interesting since if we reach the BB we cover it - ReachingStores[i] = false; - // continue; // RE-ENABLE THIS WHEN NO DEBUGGING IS NEEDED; - } - ExclusionSet.erase(TopNstores[i]->BB); - if (FromInstruction != To) { - bool r = - isPotentiallyReachable(FromInstruction, To, &ExclusionSet, DT, LI); - // errs() << "isPotentiallyReachable " << r << "\n"; - ReachingStores[i] = r; - } else - ReachingStores[i] = false; // Same instruction not reachable by itself - ExclusionSet.insert(TopNstores[i]->BB); - } - // ReachingStores[0] refers to the last Store instruction that we met - - return ReachingStores; -} \ No newline at end of file diff --git a/libafl_cc/src/ddg-utils.h b/libafl_cc/src/ddg-utils.h deleted file mode 100644 index 9de3bf1df4..0000000000 --- a/libafl_cc/src/ddg-utils.h +++ /dev/null @@ -1,119 +0,0 @@ - -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/PassManager.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/CFG.h" -#include "llvm/BinaryFormat/MachO.h" -#include "llvm/IR/Argument.h" -#include "llvm/IR/Attributes.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Comdat.h" -#include "llvm/IR/Constant.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DIBuilder.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/DebugLoc.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/GlobalAlias.h" -#include "llvm/IR/GlobalValue.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InlineAsm.h" -#include "llvm/IR/InstVisitor.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/MDBuilder.h" -#include "llvm/IR/Metadata.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/Use.h" -#include "llvm/IR/Value.h" -#include "llvm/IR/Verifier.h" -#include "llvm/IR/DebugInfo.h" -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/MC/MCSectionMachO.h" -#include "llvm/Pass.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/ScopedPrinter.h" -#include "llvm/Support/raw_ostream.h" -#include -#include "llvm/Transforms/Utils/ASanStackFrameLayout.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/ModuleUtils.h" -#include "llvm/Transforms/Utils/PromoteMemToReg.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace llvm; - -enum StoreType { declaration, modification }; - -struct FlowWriteInstruction { - BasicBlock *BB; - Instruction *I; - // Value* WrittenVar; - // Value* WhatWeAreWriting; - // std::vector* WhatWeAreDepending; - StoreType Type; - - FlowWriteInstruction(BasicBlock *_BB, Instruction *_I, StoreType _T) { - this->BB = _BB; - this->I = _I; - this->Type = _T; - } - - FlowWriteInstruction(struct FlowWriteInstruction *S) { - this->BB = S->BB; - this->I = S->I; - this->Type = S->Type; - } -}; - -// Debug - -void debug_instruction(Instruction *I); -// void debug_DDG(std::map> graph); - -// Other util methods - -bool *isReachableByStore(std::vector *From, - Instruction *To, DominatorTree *DT, LoopInfo *LI, - unsigned *ConsideredStores); -bool isPredecessorBB(Instruction *Src, Instruction *To); diff --git a/libafl_cc/src/dump-cfg-pass.cc b/libafl_cc/src/dump-cfg-pass.cc index 37bc91a96f..9903515ef5 100644 --- a/libafl_cc/src/dump-cfg-pass.cc +++ b/libafl_cc/src/dump-cfg-pass.cc @@ -33,32 +33,7 @@ #include #include -#include "llvm/Config/llvm-config.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/IR/IRBuilder.h" - -#if USE_NEW_PM - #include "llvm/Passes/PassPlugin.h" - #include "llvm/Passes/PassBuilder.h" - #include "llvm/IR/PassManager.h" -#else - #include "llvm/IR/LegacyPassManager.h" - #include "llvm/Transforms/IPO/PassManagerBuilder.h" -#endif - -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/DebugInfo.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/Verifier.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Pass.h" -#include "llvm/IR/Constants.h" - +#include "common-llvm.h" #include #include @@ -74,24 +49,12 @@ using namespace llvm; namespace { -#if USE_NEW_PM class DumpCfgPass : public PassInfoMixin { public: DumpCfgPass() { -#else -class DumpCfgPass : public ModulePass { - public: - static char ID; - - DumpCfgPass() : ModulePass(ID) { -#endif } -#if USE_NEW_PM PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); -#else - bool runOnModule(Module &M) override; -#endif protected: DenseMap bb_to_cur_loc; @@ -115,7 +78,6 @@ class DumpCfgPass : public ModulePass { } // namespace -#if USE_NEW_PM extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK llvmGetPassPluginInfo() { return {LLVM_PLUGIN_API_VERSION, "DumpCfgPass", "v0.1", @@ -123,24 +85,16 @@ llvmGetPassPluginInfo() { [](PassBuilder &PB) { PB.registerOptimizerLastEPCallback( [](ModulePassManager &MPM, OptimizationLevel OL - #if LLVM_VERSION_MAJOR >= 20 +#if LLVM_VERSION_MAJOR >= 20 , ThinOrFullLTOPhase Phase - #endif +#endif ) { MPM.addPass(DumpCfgPass()); }); }}; } -#else -char DumpCfgPass::ID = 0; -#endif -#if USE_NEW_PM PreservedAnalyses DumpCfgPass::run(Module &M, ModuleAnalysisManager &MAM) { -#else -bool DumpCfgPass::runOnModule(Module &M) { - -#endif LLVMContext &Ctx = M.getContext(); auto moduleName = M.getName(); @@ -222,28 +176,6 @@ bool DumpCfgPass::runOnModule(Module &M) { FATAL("CFG_OUTPUT_PATH not set!"); } -#if USE_NEW_PM auto PA = PreservedAnalyses::all(); return PA; -#else - return true; -#endif -} - -#if USE_NEW_PM - -#else -static void registerDumpCfgPass(const PassManagerBuilder &, - legacy::PassManagerBase &PM) { - PM.add(new DumpCfgPass()); -} - -static RegisterPass X("dumpcfg", "dumpcfg instrumentation pass", - false, false); - -static RegisterStandardPasses RegisterDumpCfgPass( - PassManagerBuilder::EP_OptimizerLast, registerDumpCfgPass); - -static RegisterStandardPasses RegisterDumpCfgPass0( - PassManagerBuilder::EP_EnabledOnOptLevel0, registerDumpCfgPass); -#endif +} \ No newline at end of file diff --git a/libafl_cc/src/function-logging.cc b/libafl_cc/src/function-logging.cc index ca2f81d957..28da42cbe5 100644 --- a/libafl_cc/src/function-logging.cc +++ b/libafl_cc/src/function-logging.cc @@ -38,14 +38,9 @@ #include "llvm/ADT/Statistic.h" #include "llvm/IR/IRBuilder.h" -#if USE_NEW_PM - #include "llvm/Passes/PassPlugin.h" - #include "llvm/Passes/PassBuilder.h" - #include "llvm/IR/PassManager.h" -#else - #include "llvm/IR/LegacyPassManager.h" - #include "llvm/Transforms/IPO/PassManagerBuilder.h" -#endif +#include "llvm/Passes/PassPlugin.h" +#include "llvm/Passes/PassBuilder.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Module.h" @@ -68,24 +63,12 @@ using namespace llvm; namespace { -#if USE_NEW_PM class FunctionLogging : public PassInfoMixin { public: FunctionLogging() { -#else -class FunctionLogging : public ModulePass { - public: - static char ID; - - FunctionLogging() : ModulePass(ID) { -#endif } -#if USE_NEW_PM PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); -#else - bool runOnModule(Module &M) override; -#endif protected: uint32_t map_size = MAP_SIZE; @@ -107,7 +90,6 @@ class FunctionLogging : public ModulePass { } // namespace -#if USE_NEW_PM extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK llvmGetPassPluginInfo() { return {LLVM_PLUGIN_API_VERSION, "FunctionLoggingPass", "v0.1", @@ -115,23 +97,15 @@ llvmGetPassPluginInfo() { [](PassBuilder &PB) { PB.registerOptimizerLastEPCallback( [](ModulePassManager &MPM, OptimizationLevel OL - #if LLVM_VERSION_MAJOR >= 20 +#if LLVM_VERSION_MAJOR >= 20 , ThinOrFullLTOPhase Phase - #endif +#endif ) { MPM.addPass(FunctionLogging()); }); }}; } -#else -char FunctionLogging::ID = 0; -#endif -#if USE_NEW_PM PreservedAnalyses FunctionLogging::run(Module &M, ModuleAnalysisManager &MAM) { -#else -bool FunctionLogging::runOnModule(Module &M) { - -#endif LLVMContext &C = M.getContext(); auto moduleName = M.getName(); Type *VoidTy = Type::getVoidTy(C); @@ -163,28 +137,6 @@ bool FunctionLogging::runOnModule(Module &M) { IRB.CreateCall(callHook, args); } -#if USE_NEW_PM auto PA = PreservedAnalyses::all(); return PA; -#else - return true; -#endif -} - -#if USE_NEW_PM - -#else -static void registerFunctionLoggingPass(const PassManagerBuilder &, - legacy::PassManagerBase &PM) { - PM.add(new FunctionLoggingPass()); -} - -static RegisterPass X("function-logging", - "function logging pass", false, false); - -static RegisterStandardPasses RegisterFunctionLogging( - PassManagerBuilder::EP_OptimizerLast, registerFunctionLoggingPass); - -static RegisterStandardPasses RegisterFunctionLogging0( - PassManagerBuilder::EP_EnabledOnOptLevel0, registerFunctionLoggingPass); -#endif +} \ No newline at end of file diff --git a/libafl_cc/src/profiling-pass.cc b/libafl_cc/src/profiling-pass.cc deleted file mode 100644 index 1191f239da..0000000000 --- a/libafl_cc/src/profiling-pass.cc +++ /dev/null @@ -1,795 +0,0 @@ -/* - LibAFL - Profiling LLVM pass - -------------------------------------------------- - - Written by Dongjia Zhang - - Copyright 2022-2023 AFLplusplus Project. All rights reserved. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at: - - http://www.apache.org/licenses/LICENSE-2.0 - -*/ - -// This llvm pass is for conducting static analysis. - -#include -#include -#include -#ifndef _WIN32 - #include - #include -#else - #include -#endif -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -// LLVM Includes - -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Config/llvm-config.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/IRBuilder.h" -#if USE_NEW_PM - #include "llvm/IR/PassManager.h" - #include "llvm/Passes/PassBuilder.h" - #include "llvm/Passes/PassPlugin.h" -#else - #include "llvm/IR/LegacyPassManager.h" -#endif -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DebugInfo.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Verifier.h" -#include "llvm/Pass.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Support/FileSystem.h" - -// Other includes -#include -#include -#include -#include -#include -#include - -using namespace llvm; - -namespace { - -#if USE_NEW_PM -class AnalysisPass : public PassInfoMixin { - public: - AnalysisPass() { -#else -class AnalysisPass : public ModulePass { - public: - static char ID; - - AnalysisPass() : ModulePass(ID) { -#endif - } - -#if USE_NEW_PM - PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); -#else - bool runOnModule(Module &M) override; -#endif - - protected: - DenseMap bb_to_cur_loc; - DenseMap entry_bb; - DenseMap> calls_in_bb; - // DenseMap> structDesc; - // The type name is not in the memory, so create std::strign impromptu - - private: - uint32_t travereScope(DIScope *bottom) { - uint32_t level = 0; - for (auto scope = bottom; !isa(scope); - scope = scope->getScope()) { - level += 1; - } - - return level; - } - - std::string typeWriter(Type *typ) { - // Because there's no string object for the type in the memory - // I have to build the string myself - std::string type_str; - llvm::raw_string_ostream rso(type_str); - typ->print(rso); - return rso.str(); - } - - bool isMemCmp(Module &M, CallBase *cb) { - auto FT = cb->getCalledFunction()->getFunctionType(); - auto FuncName = cb->getCalledFunction()->getName().str(); - - bool isMemcmp = (!FuncName.compare("memcmp") || !FuncName.compare("bcmp") || - !FuncName.compare("CRYPTO_memcmp") || - !FuncName.compare("OPENSSL_memcmp") || - !FuncName.compare("memcmp_const_time") || - !FuncName.compare("memcmpct")); - isMemcmp &= FT->getNumParams() == 3 && - FT->getReturnType()->isIntegerTy(32) && - FT->getParamType(0)->isPointerTy() && - FT->getParamType(1)->isPointerTy() && - FT->getParamType(2)->isIntegerTy(); - return isMemcmp; - } - - bool isStrcmp(Module &M, CallBase *cb) { - auto FT = cb->getCalledFunction()->getFunctionType(); - auto FuncName = cb->getCalledFunction()->getName().str(); - - bool isStrcmp = - (!FuncName.compare("strcmp") || !FuncName.compare("xmlStrcmp") || - !FuncName.compare("xmlStrEqual") || !FuncName.compare("g_strcmp0") || - !FuncName.compare("curl_strequal") || - !FuncName.compare("strcsequal") || !FuncName.compare("strcasecmp") || - !FuncName.compare("stricmp") || !FuncName.compare("ap_cstr_casecmp") || - !FuncName.compare("OPENSSL_strcasecmp") || - !FuncName.compare("xmlStrcasecmp") || - !FuncName.compare("g_strcasecmp") || - !FuncName.compare("g_ascii_strcasecmp") || - !FuncName.compare("Curl_strcasecompare") || - !FuncName.compare("Curl_safe_strcasecompare") || - !FuncName.compare("cmsstrcasecmp") || !FuncName.compare("strstr") || - !FuncName.compare("g_strstr_len") || - !FuncName.compare("ap_strcasestr") || !FuncName.compare("xmlStrstr") || - !FuncName.compare("xmlStrcasestr") || - !FuncName.compare("g_str_has_prefix") || - !FuncName.compare("g_str_has_suffix")); - isStrcmp &= FT->getNumParams() == 2 && - FT->getReturnType()->isIntegerTy(32) && - FT->getParamType(0) == FT->getParamType(1) && - FT->getParamType(0) == - IntegerType::getInt8Ty(M.getContext())->getPointerTo(0); - return isStrcmp; - } - - bool isStrncmp(Module &M, CallBase *cb) { - auto FT = cb->getCalledFunction()->getFunctionType(); - auto FuncName = cb->getCalledFunction()->getName().str(); - - bool isStrncmp = - (!FuncName.compare("strncmp") || !FuncName.compare("xmlStrncmp") || - !FuncName.compare("curl_strnequal") || - !FuncName.compare("strncasecmp") || !FuncName.compare("strnicmp") || - !FuncName.compare("ap_cstr_casecmpn") || - !FuncName.compare("OPENSSL_strncasecmp") || - !FuncName.compare("xmlStrncasecmp") || - !FuncName.compare("g_ascii_strncasecmp") || - !FuncName.compare("Curl_strncasecompare") || - !FuncName.compare("g_strncasecmp")); - isStrncmp &= FT->getNumParams() == 3 && - FT->getReturnType()->isIntegerTy(32) && - FT->getParamType(0) == FT->getParamType(1) && - FT->getParamType(0) == - IntegerType::getInt8Ty(M.getContext())->getPointerTo(0) && - FT->getParamType(2)->isIntegerTy(); - return isStrncmp; - } - - bool isGccStdStringStdString(Module &M, CallBase *cb) { - auto FT = cb->getCalledFunction()->getFunctionType(); - auto Callee = cb->getCalledFunction(); - bool isGccStdStringStdString = - Callee->getName().find("__is_charIT_EE7__value") != std::string::npos && - Callee->getName().find("St7__cxx1112basic_stringIS2_St11char_traits") != - std::string::npos && - FT->getNumParams() >= 2 && FT->getParamType(0) == FT->getParamType(1) && - FT->getParamType(0)->isPointerTy(); - return isGccStdStringStdString; - } - - bool isGccStdStringCString(Module &M, CallBase *cb) { - auto FT = cb->getCalledFunction()->getFunctionType(); - auto Callee = cb->getCalledFunction(); - - bool isGccStdStringCString = - Callee->getName().find( - "St7__cxx1112basic_stringIcSt11char_" - "traitsIcESaIcEE7compareEPK") != std::string::npos && - FT->getNumParams() >= 2 && FT->getParamType(0)->isPointerTy() && - FT->getParamType(1)->isPointerTy(); - return isGccStdStringCString; - } - - bool isLlvmStdStringStdString(Module &M, CallBase *cb) { - auto FT = cb->getCalledFunction()->getFunctionType(); - auto Callee = cb->getCalledFunction(); - - bool isLlvmStdStringStdString = - Callee->getName().find("_ZNSt3__1eqI") != std::string::npos && - Callee->getName().find("_12basic_stringI") != std::string::npos && - Callee->getName().find("_11char_traits") != std::string::npos && - FT->getNumParams() >= 2 && FT->getParamType(0)->isPointerTy() && - FT->getParamType(1)->isPointerTy(); - return isLlvmStdStringStdString; - } - - bool isLlvmStdStringCString(Module &M, CallBase *cb) { - auto FT = cb->getCalledFunction()->getFunctionType(); - auto Callee = cb->getCalledFunction(); - - bool isLlvmStdStringCString = - Callee->getName().find("_ZNSt3__1eqI") != std::string::npos && - Callee->getName().find("_12basic_stringI") != std::string::npos && - FT->getNumParams() >= 2 && FT->getParamType(0)->isPointerTy() && - FT->getParamType(1)->isPointerTy(); - - return isLlvmStdStringCString; - } - - bool isLLVMIntrinsicFn(StringRef &n) { - // Not interested in these LLVM's functions - if (n.starts_with("llvm.")) { - return true; - } else { - return false; - } - } - - bool isMemorySensitiveFn(StringRef &n) { - if (n.equals_insensitive("write") || n.equals_insensitive("read") || - n.equals_insensitive("fgets") || n.equals_insensitive("memcmp") || - n.equals_insensitive("memcpy") || n.equals_insensitive("mempcpy") || - n.equals_insensitive("memmove") || n.equals_insensitive("memset") || - n.equals_insensitive("memchr") || n.equals_insensitive("memrchr") || - n.equals_insensitive("memmem") || n.equals_insensitive("bzero") || - n.equals_insensitive("explicit_bzero") || - n.equals_insensitive("bcmp") || n.equals_insensitive("strchr") || - n.equals_insensitive("strrchr") || n.equals_insensitive("strcasecmp") || - n.equals_insensitive("strncat") || n.equals_insensitive("strerror") || - n.equals_insensitive("strncasecmp") || n.equals_insensitive("strcat") || - n.equals_insensitive("strcmp") || n.equals_insensitive("strspn") || - n.equals_insensitive("strncmp") || n.equals_insensitive("strcpy") || - n.equals_insensitive("strncpy") || n.equals_insensitive("strcoll") || - n.equals_insensitive("stpcpy") || n.equals_insensitive("strdup") || - n.equals_insensitive("strlen") || n.equals_insensitive("strxfrm") || - n.equals_insensitive("strtok") || n.equals_insensitive("strnlen") || - n.equals_insensitive("strstr") || n.equals_insensitive("strcasestr") || - n.equals_insensitive("strscpn") || n.equals_insensitive("strpbrk") || - n.equals_insensitive("atoi") || n.equals_insensitive("atol") || - n.equals_insensitive("atoll") || n.equals_insensitive("wcslen") || - n.equals_insensitive("wcscpy") || n.equals_insensitive("wcscmp")) { - return true; - } else { - return false; - } - } - - bool isMallocFn(StringRef &n) { - if (n.equals_insensitive("malloc") || n.equals_insensitive("calloc") || - n.equals_insensitive("realloc") || - n.equals_insensitive("reallocarray") || - n.equals_insensitive("memalign") || - n.equals_insensitive("__libc_memalign") || - n.equals_insensitive("aligned_alloc") || - n.equals_insensitive("posix_memalign") || - n.equals_insensitive("valloc") || n.equals_insensitive("pvalloc") || - n.equals_insensitive("mmap")) { - return true; - } else { - return false; - } - } - - bool isFreeFn(StringRef &n) { - if (n.equals_insensitive("free") || n.equals_insensitive("cfree") || - n.equals_insensitive("munmap")) { - return true; - } else { - return false; - } - } - - bool isCppNewFn(StringRef &n) { - // operator new[](unsigned long) - // operator new[](unsigned long, std::nothrow_t const&) - // operator new[](unsigned long, std::align_val_t) - // operator new[](unsigned long, std::align_val_t, std::nothrow_t const&) - // operator new(unsigned long) - // operator new(unsigned long, std::nothrow_t const&) - // operator new(unsigned long, std::align_val_t) - // operator new(unsigned long, std::align_val_t, std::nothrow_t const&) - - if (n.equals_insensitive("_Znam") || - n.equals_insensitive("_ZnamRKSt9nothrow_t") || - n.equals_insensitive("_ZnamSt11align_val_t") || - n.equals_insensitive("_ZnamSt11align_val_tRKSt9nothrow_t") || - n.equals_insensitive("_Znwm") || - n.equals_insensitive("_ZnwmRKSt9nothrow_t") || - n.equals_insensitive("_ZnwmSt11align_val_t") || - n.equals_insensitive("_ZnwmSt11align_val_tRKSt9nothrow_t")) { - return true; - } else { - return false; - } - } - - bool isCppDelete(StringRef &n) { - // operator delete[](void*) - // operator delete[](void*, unsigned long) - // operator delete[](void*, unsigned long, std::align_val_t) - // operator delete[](void*, std::nothrow_t const&) - // operator delete[](void*, std::align_val_t) - // operator delete[](void*, std::align_val_t, std::nothrow_t const&) - // operator delete(void*) - // operator delete(void*, unsigned long) - // operator delete(void*, unsigned long, std::align_val_t) - // operator delete(void*, std::nothrow_t const&) - // operator delete(void*, std::align_val_t) - // operator delete(void*, std::align_val_t, std::nothrow_t const&) - - if (n.equals_insensitive("_ZdaPv") || n.equals_insensitive("_ZdaPvm") || - n.equals_insensitive("_ZdaPvmSt11align_val_t") || - n.equals_insensitive("_ZdaPvRKSt9nothrow_t") || - n.equals_insensitive("_ZdaPvSt11align_val_t") || - n.equals_insensitive("_ZdaPvSt11align_val_tRKSt9nothrow_t") || - n.equals_insensitive("_ZdlPv") || n.equals_insensitive("_ZdlPvm") || - n.equals_insensitive("_ZdlPvmSt11align_val_t") || - n.equals_insensitive("_ZdlPvRKSt9nothrow_t") || - n.equals_insensitive("_ZdlPvSt11align_val_t") || - n.equals_insensitive("_ZdlPvSt11align_val_tRKSt9nothrow_t") - - ) { - return true; - } else { - return false; - } - } -}; - -} // namespace - -inline bool file_exist(const std::string &name) { - std::ifstream f(name.c_str()); - return f.good(); -} - -#if USE_NEW_PM -extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK -llvmGetPassPluginInfo() { - return {LLVM_PLUGIN_API_VERSION, "AnalysisPass", "v0.1", - /* lambda to insert our pass into the pass pipeline. */ - [](PassBuilder &PB) { - PB.registerOptimizerLastEPCallback( - [](ModulePassManager &MPM, OptimizationLevel OL - #if LLVM_VERSION_MAJOR >= 20 - , - ThinOrFullLTOPhase Phase - #endif - - ) { MPM.addPass(AnalysisPass()); }); - }}; -} -#else -char AnalysisPass::ID = 0; -#endif - -#if USE_NEW_PM -PreservedAnalyses AnalysisPass::run(Module &M, ModuleAnalysisManager &MAM) { -#else -bool AnalysisPass::runOnModule(Module &M) { - -#endif - - std::string relFilename = M.getSourceFileName(); - llvm::SmallString<128> FilenameVec = StringRef(relFilename); - llvm::SmallString<128> RealPath; - llvm::sys::fs::real_path(FilenameVec, RealPath); - std::filesystem::path fp{std::string(RealPath)}; - std::string genericFilePath = fp.generic_string(); - - std::replace(genericFilePath.begin(), genericFilePath.end(), '/', '#'); - - /* - std::ifstream ifs; - ifs.open("/out/whitelist.txt"); - - if (ifs.fail()) { abort(); } - std::string srcfile; - std::vector srcList; - while (ifs >> srcfile) { - srcList.push_back(srcfile); - } - - bool run = false; - - for (std::string S : srcList) { - if (S == Filename) { - outs() << "Accept " << Filename << "\n"; - run = true; - } - } - */ - bool run = true; - - std::string output_dir; - const char *path = std::getenv("ANALYSIS_OUTPUT"); - if (path != nullptr) { - output_dir = path; - if (std::filesystem::exists(output_dir) && - std::filesystem::is_directory(output_dir)) { - // good - } else { - std::cerr << "Output path is empty!" << std::endl; - } - // Use the output_dir string here - } else { - std::cerr << "Output path not set!" << std::endl; - } - bool done_already = - file_exist(output_dir + std::string("/") + genericFilePath + ".json"); - if (done_already) { - run = false; - } else { - std::ofstream out_lock(output_dir + std::string("/") + genericFilePath + - ".json"); - } - - if (run) { - outs() << "Analysis on " + genericFilePath << "\n"; - LLVMContext &Ctx = M.getContext(); - auto moduleName = M.getName().str(); - nlohmann::json res; - - for (auto &F : M) { - if (F.isDeclaration()) { continue; } - - DenseMap APIcalls; - DenseMap heapAPIs; - DenseMap memoryAPIs; - std::unordered_map nestedLevel; - std::unordered_map cmpGlobals; - std::unordered_map cmpNonZeros; - DenseMap structWrites; - std::unordered_map structArgs; - std::unordered_map cmpTypes; - std::unordered_map callArgTypes; - std::unordered_map storeTypes; - std::unordered_map loadTypes; - std::unordered_map allocaTypes; - std::unordered_map cmpComplexity; - - unsigned bb_cnt = 0; - unsigned inst_cnt = 0; - unsigned edges_cnt = 0; - - unsigned call_cnt = 0; - unsigned cmp_cnt = 0; - unsigned load_cnt = 0; - unsigned store_cnt = 0; - unsigned alloca_cnt = 0; - unsigned branch_cnt = 0; - unsigned binary_op_cnt = 0; - - entry_bb[F.getName()] = &F.getEntryBlock(); - // now we get the sha256sum for this function. (mangled function name - // should be unique else it will result in linker error) by this we make a - // map ( |-> ) - std::size_t hashed = std::hash{}(F.getName().str()); - // cast again as string, it's json, key has to be a string - std::string function_id = std::to_string(hashed); - - for (auto &BB : F) { - bb_to_cur_loc[&BB] = bb_cnt; - bb_cnt++; - for (auto &IN : BB) { - /// Check data types - - auto meta = IN.getMetadata(0); - if (meta) { - DILocation *diloc = nullptr; - if ((diloc = dyn_cast(meta))) { - auto scope = diloc->getScope(); - uint32_t nested_level = travereScope(scope); - nestedLevel[nested_level] += 1; - } - } - - CallBase *callBase = nullptr; - CmpInst *cmpInst = nullptr; - LoadInst *loadInst = nullptr; - StoreInst *storeInst = nullptr; - AllocaInst *allocaInst = nullptr; - BranchInst *branchInst = nullptr; - BinaryOperator *binaryOp = nullptr; - - if ((binaryOp = dyn_cast(&IN))) { - binary_op_cnt++; - } else if ((branchInst = dyn_cast(&IN))) { - branch_cnt++; - } else if ((callBase = dyn_cast(&IN))) { - // What type of call is this? - auto F = callBase->getCalledFunction(); - if (F) { - StringRef name = F->getName(); - if (isLLVMIntrinsicFn(name)) { - // just ignore - continue; - } - APIcalls[name]++; - call_cnt++; - - calls_in_bb[&BB].push_back(name); - // Check memory related calls - if (isMallocFn(name)) { - heapAPIs["malloc"]++; - } else if (isFreeFn(name)) { - heapAPIs["free"]++; - } else if (isCppNewFn(name)) { - heapAPIs["new"]++; - } else if (isCppDelete(name)) { - heapAPIs["delete"]++; - } - - if (isMemorySensitiveFn(name)) { memoryAPIs[name]++; } - - if (isMemCmp(M, callBase)) { - cmpComplexity["mem cmp"]++; - } else if (isStrcmp(M, callBase) || isStrncmp(M, callBase) || - isGccStdStringCString(M, callBase) || - isGccStdStringStdString(M, callBase) || - isLlvmStdStringCString(M, callBase) || - isLlvmStdStringStdString(M, callBase)) { - cmpComplexity["str cmp"]++; - } - - for (auto arg = F->arg_begin(); arg != F->arg_end(); arg++) { - auto arg_ty = arg->getType(); - std::string type_str = typeWriter(arg_ty); - callArgTypes[type_str]++; - } - } - } else if ((cmpInst = dyn_cast(&IN))) { - FCmpInst *fcmp = nullptr; - ICmpInst *icmp = nullptr; - - if ((icmp = dyn_cast(cmpInst))) { - cmpComplexity["int cmp"]++; - } else if ((fcmp = dyn_cast(cmpInst))) { - cmpComplexity["float cmp"]++; - } - auto typ = cmpInst->getOperand(0)->getType(); - - auto op0 = cmpInst->getOperand(0); - auto op1 = cmpInst->getOperand(1); - uint32_t num_constants = 0; - uint32_t non_zero_constants = 0; - - Constant *c1 = nullptr; - Constant *c2 = nullptr; - - if ((c1 = dyn_cast(op0))) { - if (!c1->isZeroValue()) { non_zero_constants++; } - num_constants++; - } - - if ((c2 = dyn_cast(op1))) { - if (c2->isZeroValue()) { non_zero_constants++; } - num_constants++; - } - - cmpGlobals[num_constants]++; - cmpNonZeros[num_constants]++; - cmpTypes[typeWriter(typ)]++; - cmp_cnt++; - } else if ((loadInst = dyn_cast(&IN))) { - auto typ = loadInst->getType(); - loadTypes[typeWriter(typ)]++; - load_cnt++; - } else if ((storeInst = dyn_cast(&IN))) { - auto typ = storeInst->getValueOperand()->getType(); - storeTypes[typeWriter(typ)]++; - // Here check writes into structs - // check where storeInst stores into - auto op = storeInst->getPointerOperand(); - GetElementPtrInst *gep = nullptr; - if ((gep = dyn_cast(op))) { - // If this is a gep? - auto typ = gep->getSourceElementType(); - - if (typ->isStructTy()) { structWrites[typ->getStructName()]++; } - } - - store_cnt++; - } else if ((allocaInst = dyn_cast(&IN))) { - auto typ = allocaInst->getAllocatedType(); - allocaTypes[typeWriter(typ)]++; - alloca_cnt++; - } - - inst_cnt++; - } - - auto term = BB.getTerminator(); - edges_cnt += term->getNumSuccessors(); - - // Dump everything in this Fn - } - - std::string fnname = std::string(F.getName()); - - res[function_id]["name"] = fnname; - - if (bb_cnt) { res[function_id]["# BBs"] = bb_cnt; } - - if (inst_cnt) { res[function_id]["# insts"] = inst_cnt; } - - if (edges_cnt) { res[function_id]["# edges"] = edges_cnt; } - - if (binary_op_cnt) { res[function_id]["# binaryOp"] = binary_op_cnt; } - - if (call_cnt) { res[function_id]["# call"] = call_cnt; } - - if (cmp_cnt) { res[function_id]["# cmp"] = cmp_cnt; } - - if (load_cnt) { res[function_id]["# load"] = load_cnt; } - - if (store_cnt) { res[function_id]["# store"] = store_cnt; } - - if (alloca_cnt) { res[function_id]["# alloca"] = alloca_cnt; } - - if (branch_cnt) { res[function_id]["# branch"] = branch_cnt; } - - res[function_id]["ABC metric"] = - sqrt(alloca_cnt * alloca_cnt + branch_cnt * branch_cnt + - call_cnt * call_cnt); - res[function_id]["cyclomatic"] = edges_cnt - bb_cnt + 2; - - // outs() << "APIs:\n"; - for (auto record = APIcalls.begin(); record != APIcalls.end(); record++) { - auto key = record->getFirst(); - if (!isLLVMIntrinsicFn(key)) { - res[function_id]["AP"][std::string(key)] = APIcalls[key]; - // outs() << key << " " << APIcalls[key] << "\n"; - } - } - // outs() << "\n"; - - // outs() << "memoryAPIs:\n"; - for (auto record = heapAPIs.begin(); record != heapAPIs.end(); record++) { - auto key = record->getFirst(); - res[function_id]["h AP"][std::string(key)] = heapAPIs[key]; - // outs() << key << " " << heapAPIs[key] << "\n"; - } - // outs() << "\n"; - - for (auto record = memoryAPIs.begin(); record != memoryAPIs.end(); - record++) { - auto key = record->getFirst(); - res[function_id]["m AP"][std::string(key)] = memoryAPIs[key]; - // outs() << key << " " << memoryAPIs[key] << "\n"; - } - - for (auto record = nestedLevel.begin(); record != nestedLevel.end(); - record++) { - auto key = record->first; - res[function_id]["ne lv"][std::to_string(key)] = nestedLevel[key]; - // outs() << key << " " << memoryAPIs[key] << "\n"; - } - - for (auto record = cmpGlobals.begin(); record != cmpGlobals.end(); - record++) { - auto key = record->first; - res[function_id]["cm gl"][std::to_string(key)] = cmpGlobals[key]; - // outs() << key << " " << memoryAPIs[key] << "\n"; - } - - for (auto record = cmpNonZeros.begin(); record != cmpNonZeros.end(); - record++) { - auto key = record->first; - res[function_id]["cm nz"][std::to_string(key)] = cmpNonZeros[key]; - // outs() << key << " " << memoryAPIs[key] << "\n"; - } - - // outs() << "writesIntoStructs:\n"; - for (auto record = structWrites.begin(); record != structWrites.end(); - record++) { - auto key = record->getFirst(); - // Some are nameless struct - res[function_id]["wr st"][std::string(key)] = structWrites[key]; - // outs() << key << " " << structWrites[key] << "\n"; - } - // outs() << "\n"; - - // outs() << "StructsInArgs:\n"; - for (auto record = structArgs.begin(); record != structArgs.end(); - record++) { - auto key = record->first; - res[function_id]["str arg"][std::string(key)] = record->second; - // outs() << key << " " << record->second << "\n"; - } - // outs() << "\n"; - - // outs() << "CmpTypes:\n"; - for (auto record = cmpTypes.begin(); record != cmpTypes.end(); record++) { - res[function_id]["cm ty"][record->first] = record->second; - // outs() << record->first << " " << record->second << "\n"; - } - // outs() << "\n"; - - for (auto record = cmpComplexity.begin(); record != cmpComplexity.end(); - record++) { - res[function_id]["cm cm"][record->first] = record->second; - // outs() << record->first << " " << record->second << "\n"; - } - - // outs() << "CallArgTypes:\n"; - for (auto record = callArgTypes.begin(); record != callArgTypes.end(); - record++) { - res[function_id]["ar ty"][record->first] = record->second; - // outs() << record->first << " " << record->second << "\n"; - } - // outs() << "\n"; - - // outs() << "storeTypes:\n"; - for (auto record = storeTypes.begin(); record != storeTypes.end(); - record++) { - res[function_id]["st ty"][record->first] = record->second; - // outs() << record->first << " " << record->second << "\n"; - } - // outs() << "\n"; - - // outs() << "loadTypes:\n"; - for (auto record = loadTypes.begin(); record != loadTypes.end(); - record++) { - res[function_id]["l ty"][record->first] = record->second; - // outs() << record->first << " " << record->second << "\n"; - } - // outs() << "\n"; - - // outs() << "allocaTypes:\n"; - for (auto record = allocaTypes.begin(); record != allocaTypes.end(); - record++) { - res[function_id]["al ty"][record->first] = record->second; - // outs() << record->first << " " << record->second << "\n"; - } - // outs() << "\n"; - - if (getenv("ANALYSIS_OUTPUT")) { - if (std::ofstream(getenv("ANALYSIS_OUTPUT") + std::string("/") + - genericFilePath + ".json") - << res << "\n") { - } else { - errs() << "Failed to write the data" - << "\n"; - } - } else { - errs() << "Failed to write the data, output path not set!" - << "\n"; - } - } - } - -#if USE_NEW_PM - auto PA = PreservedAnalyses::all(); - return PA; -#else - return true; -#endif -} diff --git a/libafl_qemu/src/qemu/mod.rs b/libafl_qemu/src/qemu/mod.rs index d545f1f26d..fd6297b98e 100644 --- a/libafl_qemu/src/qemu/mod.rs +++ b/libafl_qemu/src/qemu/mod.rs @@ -1270,7 +1270,7 @@ pub mod pybind { extern "C" fn py_generic_hook_wrapper(idx: u64, _pc: GuestAddr) { let obj = unsafe { let hooks = &raw mut PY_GENERIC_HOOKS; - &(*hooks)[idx as usize].1 + &(&(*hooks))[idx as usize].1 }; Python::with_gil(|py| { obj.call0(py).expect("Error in the hook"); diff --git a/libafl_targets/build.rs b/libafl_targets/build.rs index 46ad61d818..721a2e4726 100644 --- a/libafl_targets/build.rs +++ b/libafl_targets/build.rs @@ -52,10 +52,6 @@ fn main() { .map_or(Ok(SIXTY_FIVE_KB), str::parse) .expect("Could not parse LIBAFL_ACCOUNTING_MAP_SIZE"); - let ddg_map_size: usize = option_env!("LIBAFL_DDG_MAP_SIZE") - .map_or(Ok(SIXTY_FIVE_KB), str::parse) - .expect("Could not parse LIBAFL_DDG_MAP_SIZE"); - assert!(edges_map_default_size <= edges_map_allocated_size); assert!(edges_map_default_size.is_power_of_two()); @@ -75,8 +71,6 @@ fn main() { pub const CMPLOG_MAP_H: usize = {cmplog_map_h}; /// The size of the accounting maps pub const ACCOUNTING_MAP_SIZE: usize = {acc_map_size}; - /// The size of the accounting maps - pub const DDG_MAP_SIZE: usize = {ddg_map_size}; " ) .expect("Could not write file"); @@ -89,7 +83,6 @@ fn main() { println!("cargo:rerun-if-env-changed=LIBAFL_CMPLOG_MAP_W"); println!("cargo:rerun-if-env-changed=LIBAFL_CMPLOG_MAP_H"); println!("cargo:rerun-if-env-changed=LIBAFL_ACCOUNTING_MAP_SIZE"); - println!("cargo:rerun-if-env-changed=LIBAFL_DDG_MAP_SIZE"); #[cfg(feature = "common")] { @@ -200,7 +193,6 @@ fn main() { Some(&*format!("{edges_map_allocated_size}")), ) .define("ACCOUNTING_MAP_SIZE", Some(&*format!("{acc_map_size}"))) - .define("DDG_MAP_SIZE", Some(&*format!("{ddg_map_size}"))) .compile("coverage"); } diff --git a/libafl_targets/src/coverage.c b/libafl_targets/src/coverage.c index 2ddba3963e..5668749445 100644 --- a/libafl_targets/src/coverage.c +++ b/libafl_targets/src/coverage.c @@ -11,9 +11,6 @@ typedef uint32_t prev_loc_t; extern uint8_t __afl_area_ptr_local[EDGES_MAP_ALLOCATED_SIZE]; uint8_t *__afl_area_ptr = __afl_area_ptr_local; -extern uint8_t __ddg_area_ptr_local[DDG_MAP_SIZE]; -uint8_t *__ddg_area_ptr = __ddg_area_ptr_local; - extern uint32_t __afl_acc_memop_ptr_local[ACCOUNTING_MAP_SIZE]; uint32_t *__afl_acc_memop_ptr = __afl_acc_memop_ptr_local; diff --git a/libafl_targets/src/coverage.rs b/libafl_targets/src/coverage.rs index 2a7e764da9..965a2769c6 100644 --- a/libafl_targets/src/coverage.rs +++ b/libafl_targets/src/coverage.rs @@ -12,7 +12,7 @@ use alloc::borrow::Cow; #[cfg(any(target_os = "linux", target_vendor = "apple"))] use libafl::{Error, mutators::Tokens}; -use crate::{ACCOUNTING_MAP_SIZE, DDG_MAP_SIZE, EDGES_MAP_ALLOCATED_SIZE, EDGES_MAP_DEFAULT_SIZE}; +use crate::{ACCOUNTING_MAP_SIZE, EDGES_MAP_ALLOCATED_SIZE, EDGES_MAP_DEFAULT_SIZE}; /// The map for edges. #[unsafe(no_mangle)] @@ -20,12 +20,6 @@ use crate::{ACCOUNTING_MAP_SIZE, DDG_MAP_SIZE, EDGES_MAP_ALLOCATED_SIZE, EDGES_M pub static mut __afl_area_ptr_local: [u8; EDGES_MAP_ALLOCATED_SIZE] = [0; EDGES_MAP_ALLOCATED_SIZE]; pub use __afl_area_ptr_local as EDGES_MAP; -/// The map for data dependency -#[unsafe(no_mangle)] -#[allow(non_upper_case_globals)] // expect breaks here for some reason -pub static mut __ddg_area_ptr_local: [u8; DDG_MAP_SIZE] = [0; DDG_MAP_SIZE]; -pub use __ddg_area_ptr_local as DDG_MAP; - /// The map for accounting mem writes. #[unsafe(no_mangle)] #[allow(non_upper_case_globals)] // expect breaks here for some reason @@ -42,9 +36,6 @@ unsafe extern "C" { /// The area pointer points to the edges map. pub static mut __afl_area_ptr: *mut u8; - /// The area pointer points to the data flow map - pub static mut __ddg_area_ptr: *mut u8; - /// The area pointer points to the accounting mem operations map. pub static mut __afl_acc_memop_ptr: *mut u32; @@ -58,7 +49,6 @@ unsafe extern "C" { } pub use __afl_acc_memop_ptr as ACCOUNTING_MEMOP_MAP_PTR; pub use __afl_area_ptr as EDGES_MAP_PTR; -pub use __ddg_area_ptr as DDG_MAP_PTR; /// Return Tokens from the compile-time token section #[cfg(any(target_os = "linux", target_vendor = "apple"))] diff --git a/libafl_targets/src/sancov_8bit.rs b/libafl_targets/src/sancov_8bit.rs index 96100d3ebe..9ce2fa4ab6 100644 --- a/libafl_targets/src/sancov_8bit.rs +++ b/libafl_targets/src/sancov_8bit.rs @@ -207,7 +207,7 @@ mod observers { let elem = self.intervals.query(idx..=idx).next().unwrap(); let i = elem.value; let j = idx - elem.interval.start; - unsafe { (*counters_maps_ptr())[*i].as_slice()[j] } + unsafe { (&(*counters_maps_ptr()))[*i].as_slice()[j] } } #[inline] @@ -215,7 +215,7 @@ mod observers { let elem = self.intervals.query_mut(idx..=idx).next().unwrap(); let i = elem.value; let j = idx - elem.interval.start; - unsafe { (*counters_maps_ptr_mut())[*i].as_slice_mut()[j] = val }; + unsafe { (&mut (*counters_maps_ptr_mut()))[*i].as_slice_mut()[j] = val }; } #[inline] diff --git a/utils/libafl_repo_tools/src/main.rs b/utils/libafl_repo_tools/src/main.rs index a080e64f33..ddc7f72023 100644 --- a/utils/libafl_repo_tools/src/main.rs +++ b/utils/libafl_repo_tools/src/main.rs @@ -84,7 +84,7 @@ use tokio::{process::Command, task::JoinSet}; use walkdir::{DirEntry, WalkDir}; use which::which; -const REF_LLVM_VERSION: u32 = 19; +const REF_LLVM_VERSION: u32 = 20; fn is_workspace_toml(path: &Path) -> bool { for line in read_to_string(path).unwrap().lines() {