From 0b38fabeb009a201cce87cc0e56a57f5ea4199dc Mon Sep 17 00:00:00 2001 From: "Dongjia \"toka\" Zhang" Date: Sat, 14 Oct 2023 11:47:16 +0200 Subject: [PATCH] CmpLog {Instruction, Switches} pass (#1612) * switch and instruction pass copy paste * FMT * add cb * put things in the functions * fi * working * oops * options * no 128bit int on windows * no 128bit on windows * oops * unix only * windows * PACKED * let's stop using it, it's ugly * fmt * adjust --------- Co-authored-by: Andrea Fioraldi --- libafl_cc/build.rs | 2 + libafl_cc/src/clang.rs | 12 + libafl_cc/src/cmplog-instructions-pass.cc | 618 ++++++++++++++++++++++ libafl_cc/src/cmplog-switches-pass.cc | 394 ++++++++++++++ libafl_cc/src/no-link-rt.c | 71 +++ libafl_targets/src/cmplog.c | 129 ++++- libafl_targets/src/cmplog.h | 34 ++ libafl_targets/src/cmps/mod.rs | 16 + libafl_targets/src/common.h | 5 + 9 files changed, 1280 insertions(+), 1 deletion(-) create mode 100644 libafl_cc/src/cmplog-instructions-pass.cc create mode 100644 libafl_cc/src/cmplog-switches-pass.cc diff --git a/libafl_cc/build.rs b/libafl_cc/build.rs index c1deeb5fd9..a5d7cc0e3a 100644 --- a/libafl_cc/build.rs +++ b/libafl_cc/build.rs @@ -397,6 +397,8 @@ pub const LIBAFL_CC_LLVM_VERSION: Option = None; "afl-coverage-pass.cc", "autotokens-pass.cc", "coverage-accounting-pass.cc", + "cmplog-instructions-pass.cc", + "cmplog-switches-pass.cc", ] { build_pass( bindir_path, diff --git a/libafl_cc/src/clang.rs b/libafl_cc/src/clang.rs index f81ea62582..24954e9101 100644 --- a/libafl_cc/src/clang.rs +++ b/libafl_cc/src/clang.rs @@ -41,6 +41,12 @@ pub enum LLVMPasses { CoverageAccounting, /// The dump cfg pass DumpCfg, + #[cfg(unix)] + /// The CmpLog Instruction pass + CmpLogInstructions, + #[cfg(unix)] + /// The CmpLog Switch pass + CmpLogSwitches, } impl LLVMPasses { @@ -60,6 +66,12 @@ impl LLVMPasses { LLVMPasses::DumpCfg => { PathBuf::from(env!("OUT_DIR")).join(format!("dump-cfg-pass.{}", dll_extension())) } + #[cfg(unix)] + LLVMPasses::CmpLogInstructions => PathBuf::from(env!("OUT_DIR")) + .join(format!("cmplog-instructions-pass.{}", dll_extension())), + #[cfg(unix)] + LLVMPasses::CmpLogSwitches => PathBuf::from(env!("OUT_DIR")) + .join(format!("cmplog-switches-pass.{}", dll_extension())), } } } diff --git a/libafl_cc/src/cmplog-instructions-pass.cc b/libafl_cc/src/cmplog-instructions-pass.cc new file mode 100644 index 0000000000..0a47c3309d --- /dev/null +++ b/libafl_cc/src/cmplog-instructions-pass.cc @@ -0,0 +1,618 @@ +/* + american fuzzy lop++ - LLVM CmpLog instrumentation + -------------------------------------------------- + + Written by Andrea Fioraldi + + Copyright 2015, 2016 Google Inc. All rights reserved. + Copyright 2019-2020 AFLplusplus Project. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at: + + http://www.apache.org/licenses/LICENSE-2.0 + +*/ + +#include +#include +#ifndef _WIN32 + #include + #include +#endif + +#include +#include +#include +#include "llvm/Config/llvm-config.h" + +#if USE_NEW_PM + #include "llvm/Passes/PassPlugin.h" + #include "llvm/Passes/PassBuilder.h" + #include "llvm/IR/PassManager.h" +#else + #include "llvm/IR/LegacyPassManager.h" + #include "llvm/Transforms/IPO/PassManagerBuilder.h" +#endif + +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/ValueTracking.h" + +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) + #include "llvm/IR/Verifier.h" + #include "llvm/IR/DebugInfo.h" +#else + #include "llvm/Analysis/Verifier.h" + #include "llvm/DebugInfo.h" + #define nullptr 0 +#endif + +#include + +using namespace llvm; +static cl::opt CmplogExtended("cmplog_instructions_extended", + cl::desc("Uses extended header"), + cl::init(false), cl::NotHidden); +namespace { + +/* Function that we never instrument or analyze */ +/* Note: this ignore check is also called in isInInstrumentList() */ + +/* Function that we never instrument or analyze */ +/* Note: this ignore check is also called in isInInstrumentList() */ +bool isIgnoreFunction(const llvm::Function *F) { + // Starting from "LLVMFuzzer" these are functions used in libfuzzer based + // fuzzing campaign installations, e.g. oss-fuzz + + static constexpr const char *ignoreList[] = { + + "asan.", + "llvm.", + "sancov.", + "__ubsan", + "ign.", + "__afl", + "_fini", + "__libc_", + "__asan", + "__msan", + "__cmplog", + "__sancov", + "__san", + "__cxx_", + "__decide_deferred", + "_GLOBAL", + "_ZZN6__asan", + "_ZZN6__lsan", + "msan.", + "LLVMFuzzerM", + "LLVMFuzzerC", + "LLVMFuzzerI", + "maybe_duplicate_stderr", + "discard_output", + "close_stdout", + "dup_and_close_stderr", + "maybe_close_fd_mask", + "ExecuteFilesOnyByOne" + + }; + + for (auto const &ignoreListFunc : ignoreList) { + if (F->getName().startswith(ignoreListFunc)) { return true; } + } + + static constexpr const char *ignoreSubstringList[] = { + + "__asan", "__msan", "__ubsan", "__lsan", + "__san", "__sanitize", "__cxx", "_GLOBAL__", + "DebugCounter", "DwarfDebug", "DebugLoc" + + }; + + for (auto const &ignoreListFunc : ignoreSubstringList) { + // hexcoder: F->getName().contains() not avaiilable in llvm 3.8.0 + if (StringRef::npos != F->getName().find(ignoreListFunc)) { return true; } + } + + return false; +} + +#if USE_NEW_PM +class CmpLogInstructions : public PassInfoMixin { + public: + CmpLogInstructions() { + } +#else + +class CmpLogInstructions : public ModulePass { + public: + static char ID; + CmpLogInstructions() : ModulePass(ID) { + } +#endif + +#if USE_NEW_PM + PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); +#else + bool runOnModule(Module &M) override; + + #if LLVM_VERSION_MAJOR < 4 + const char *getPassName() const override { + #else + StringRef getPassName() const override { + #endif + return "cmplog instructions"; + } +#endif + + private: + bool hookInstrs(Module &M); + bool be_quiet = true; +}; + +} // namespace + +#if USE_NEW_PM +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { + return {LLVM_PLUGIN_API_VERSION, "CmpLogInstructions", "v0.1", + [](PassBuilder &PB) { + #if LLVM_VERSION_MAJOR <= 13 + using OptimizationLevel = typename PassBuilder::OptimizationLevel; + #endif + PB.registerOptimizerLastEPCallback( + [](ModulePassManager &MPM, OptimizationLevel OL) { + MPM.addPass(CmpLogInstructions()); + }); + }}; +} +#else +char CmpLogInstructions::ID = 0; +#endif + +template +Iterator Unique(Iterator first, Iterator last) { + while (first != last) { + Iterator next(first); + last = std::remove(++next, last, *first); + first = next; + } + + return last; +} + +bool CmpLogInstructions::hookInstrs(Module &M) { + std::vector icomps; + LLVMContext &C = M.getContext(); + + Type *VoidTy = Type::getVoidTy(C); + IntegerType *Int8Ty = IntegerType::getInt8Ty(C); + IntegerType *Int16Ty = IntegerType::getInt16Ty(C); + IntegerType *Int32Ty = IntegerType::getInt32Ty(C); + IntegerType *Int64Ty = IntegerType::getInt64Ty(C); + IntegerType *Int128Ty = IntegerType::getInt128Ty(C); + + FunctionCallee cmplogHookIns1; + FunctionCallee cmplogHookIns2; + FunctionCallee cmplogHookIns4; + FunctionCallee cmplogHookIns8; +#ifndef _WIN32 + FunctionCallee cmplogHookIns16; + FunctionCallee cmplogHookInsN; +#endif + if (CmplogExtended) { + cmplogHookIns1 = M.getOrInsertFunction("__cmplog_ins_hook1_extended", + VoidTy, Int8Ty, Int8Ty, Int8Ty); + } else { + cmplogHookIns1 = M.getOrInsertFunction("__cmplog_ins_hook1", VoidTy, Int8Ty, + Int8Ty, Int8Ty); + } + + if (CmplogExtended) { + cmplogHookIns2 = M.getOrInsertFunction("__cmplog_ins_hook2_extended", + VoidTy, Int16Ty, Int16Ty, Int8Ty); + } else { + cmplogHookIns2 = M.getOrInsertFunction("__cmplog_ins_hook2", VoidTy, + Int16Ty, Int16Ty, Int8Ty); + } + + if (CmplogExtended) { + cmplogHookIns4 = M.getOrInsertFunction("__cmplog_ins_hook4_extended", + VoidTy, Int32Ty, Int32Ty, Int8Ty); + } else { + cmplogHookIns4 = M.getOrInsertFunction("__cmplog_ins_hook4", VoidTy, + Int32Ty, Int32Ty, Int8Ty); + } + + if (CmplogExtended) { + cmplogHookIns8 = M.getOrInsertFunction("__cmplog_ins_hook8_extended", + VoidTy, Int64Ty, Int64Ty, Int8Ty); + } else { + cmplogHookIns8 = M.getOrInsertFunction("__cmplog_ins_hook8", VoidTy, + Int64Ty, Int64Ty, Int8Ty); + } + +#ifndef _WIN32 + if (CmplogExtended) { + cmplogHookIns16 = M.getOrInsertFunction("__cmplog_ins_hook16_extended", + VoidTy, Int128Ty, Int128Ty, Int8Ty); + } else { + cmplogHookIns16 = M.getOrInsertFunction("__cmplog_ins_hook16", VoidTy, + Int128Ty, Int128Ty, Int8Ty); + } + + if (CmplogExtended) { + cmplogHookInsN = M.getOrInsertFunction("__cmplog_ins_hookN_extended", + VoidTy, Int128Ty, Int128Ty, Int8Ty); + } else { + cmplogHookInsN = M.getOrInsertFunction("__cmplog_ins_hookN", VoidTy, + Int128Ty, Int128Ty, Int8Ty); + } +#endif + + Constant *Null = Constant::getNullValue(PointerType::get(Int8Ty, 0)); + + /* iterate over all functions, bbs and instruction and add suitable calls */ + for (auto &F : M) { + if (!isIgnoreFunction(&F)) continue; + + for (auto &BB : F) { + for (auto &IN : BB) { + CmpInst *selectcmpInst = nullptr; + if ((selectcmpInst = dyn_cast(&IN))) { + icomps.push_back(selectcmpInst); + } + } + } + } + + if (icomps.size()) { + // if (!be_quiet) errs() << "Hooking " << icomps.size() << + // " cmp instructions\n"; + + for (auto &selectcmpInst : icomps) { + IRBuilder<> IRB(selectcmpInst->getParent()); + IRB.SetInsertPoint(selectcmpInst); + + Value *op0 = selectcmpInst->getOperand(0); + Value *op1 = selectcmpInst->getOperand(1); + Value *op0_saved = op0, *op1_saved = op1; + auto ty0 = op0->getType(); + auto ty1 = op1->getType(); + + IntegerType *intTyOp0 = NULL; + IntegerType *intTyOp1 = NULL; + unsigned max_size = 0, cast_size = 0; + unsigned attr = 0, vector_cnt = 0, is_fp = 0; + CmpInst *cmpInst = dyn_cast(selectcmpInst); + + if (!cmpInst) { continue; } + + switch (cmpInst->getPredicate()) { + case CmpInst::ICMP_NE: + case CmpInst::FCMP_UNE: + case CmpInst::FCMP_ONE: + break; + case CmpInst::ICMP_EQ: + case CmpInst::FCMP_UEQ: + case CmpInst::FCMP_OEQ: + attr += 1; + break; + case CmpInst::ICMP_UGT: + case CmpInst::ICMP_SGT: + case CmpInst::FCMP_OGT: + case CmpInst::FCMP_UGT: + attr += 2; + break; + case CmpInst::ICMP_UGE: + case CmpInst::ICMP_SGE: + case CmpInst::FCMP_OGE: + case CmpInst::FCMP_UGE: + attr += 3; + break; + case CmpInst::ICMP_ULT: + case CmpInst::ICMP_SLT: + case CmpInst::FCMP_OLT: + case CmpInst::FCMP_ULT: + attr += 4; + break; + case CmpInst::ICMP_ULE: + case CmpInst::ICMP_SLE: + case CmpInst::FCMP_OLE: + case CmpInst::FCMP_ULE: + attr += 5; + break; + default: + break; + } + + if (selectcmpInst->getOpcode() == Instruction::FCmp) { + if (ty0->isVectorTy()) { + VectorType *tt = dyn_cast(ty0); + if (!tt) { + fprintf(stderr, "Warning: cmplog cmp vector is not a vector!\n"); + continue; + } + +#if (LLVM_VERSION_MAJOR >= 12) + vector_cnt = tt->getElementCount().getKnownMinValue(); + ty0 = tt->getElementType(); +#endif + } + + if (ty0->isHalfTy() +#if LLVM_VERSION_MAJOR >= 11 + || ty0->isBFloatTy() +#endif + ) + max_size = 16; + else if (ty0->isFloatTy()) + max_size = 32; + else if (ty0->isDoubleTy()) + max_size = 64; + else if (ty0->isX86_FP80Ty()) + max_size = 80; + else if (ty0->isFP128Ty() || ty0->isPPC_FP128Ty()) + max_size = 128; +#if (LLVM_VERSION_MAJOR >= 12) + else if (ty0->getTypeID() != llvm::Type::PointerTyID && !be_quiet) + fprintf(stderr, "Warning: unsupported cmp type for cmplog: %u!\n", + ty0->getTypeID()); +#endif + + attr += 8; + is_fp = 1; + // fprintf(stderr, "HAVE FP %u!\n", vector_cnt); + + } else { + if (ty0->isVectorTy()) { +#if (LLVM_VERSION_MAJOR >= 12) + VectorType *tt = dyn_cast(ty0); + if (!tt) { + fprintf(stderr, "Warning: cmplog cmp vector is not a vector!\n"); + continue; + } + + vector_cnt = tt->getElementCount().getKnownMinValue(); + ty1 = ty0 = tt->getElementType(); +#endif + } + + intTyOp0 = dyn_cast(ty0); + intTyOp1 = dyn_cast(ty1); + + if (intTyOp0 && intTyOp1) { + max_size = intTyOp0->getBitWidth() > intTyOp1->getBitWidth() + ? intTyOp0->getBitWidth() + : intTyOp1->getBitWidth(); + + } else { +#if (LLVM_VERSION_MAJOR >= 12) + if (ty0->getTypeID() != llvm::Type::PointerTyID && !be_quiet) { + fprintf(stderr, "Warning: unsupported cmp type for cmplog: %u\n", + ty0->getTypeID()); + } + +#endif + } + } + + if (!max_size || max_size < 16) { + // fprintf(stderr, "too small\n"); + continue; + } + + if (max_size % 8) { max_size = (((max_size / 8) + 1) * 8); } + + if (max_size > 128) { + if (!be_quiet) { + fprintf(stderr, + "Cannot handle this compare bit size: %u (truncating)\n", + max_size); + } + + max_size = 128; + } + + // do we need to cast? + switch (max_size) { + case 8: + case 16: + case 32: + case 64: + case 128: + cast_size = max_size; + break; + default: + cast_size = 128; + } + + // XXX FIXME BUG TODO + if (is_fp && vector_cnt) { continue; } + + uint64_t cur = 0, last_val0 = 0, last_val1 = 0, cur_val; + + while (1) { + std::vector args; + bool skip = false; + + if (vector_cnt) { + op0 = IRB.CreateExtractElement(op0_saved, cur); + op1 = IRB.CreateExtractElement(op1_saved, cur); + /* + std::string errMsg; + raw_string_ostream os(errMsg); + op0_saved->print(os); + fprintf(stderr, "X: %s\n", os.str().c_str()); + */ + if (is_fp) { + /* + ConstantFP *i0 = dyn_cast(op0); + ConstantFP *i1 = dyn_cast(op1); + // BUG FIXME TODO: this is null ... but why? + // fprintf(stderr, "%p %p\n", i0, i1); + if (i0) { + + cur_val = (uint64_t)i0->getValue().convertToDouble(); + if (last_val0 && last_val0 == cur_val) { skip = true; + + } last_val0 = cur_val; + + } + + if (i1) { + + cur_val = (uint64_t)i1->getValue().convertToDouble(); + if (last_val1 && last_val1 == cur_val) { skip = true; + + } last_val1 = cur_val; + + } + + */ + + } else { + ConstantInt *i0 = dyn_cast(op0); + ConstantInt *i1 = dyn_cast(op1); + if (i0 && i0->uge(0xffffffffffffffff) == false) { + cur_val = i0->getZExtValue(); + if (last_val0 && last_val0 == cur_val) { skip = true; } + last_val0 = cur_val; + } + + if (i1 && i1->uge(0xffffffffffffffff) == false) { + cur_val = i1->getZExtValue(); + if (last_val1 && last_val1 == cur_val) { skip = true; } + last_val1 = cur_val; + } + } + } + + if (!skip) { + // errs() << "[CMPLOG] cmp " << *cmpInst << "(in function " << + // cmpInst->getFunction()->getName() << ")\n"; + + // first bitcast to integer type of the same bitsize as the original + // type (this is a nop, if already integer) + Value *op0_i = IRB.CreateBitCast( + op0, IntegerType::get(C, ty0->getPrimitiveSizeInBits())); + // then create a int cast, which does zext, trunc or bitcast. In our + // case usually zext to the next larger supported type (this is a nop + // if already the right type) + Value *V0 = + IRB.CreateIntCast(op0_i, IntegerType::get(C, cast_size), false); + args.push_back(V0); + Value *op1_i = IRB.CreateBitCast( + op1, IntegerType::get(C, ty1->getPrimitiveSizeInBits())); + Value *V1 = + IRB.CreateIntCast(op1_i, IntegerType::get(C, cast_size), false); + args.push_back(V1); + + // errs() << "[CMPLOG] casted parameters:\n0: " << *V0 << "\n1: " << + // *V1 + // << "\n"; + + if (CmplogExtended) { + // Only do this when using extended header + ConstantInt *attribute = ConstantInt::get(Int8Ty, attr); + args.push_back(attribute); + } +#ifndef _WIN32 + if (cast_size != max_size) { + ConstantInt *bitsize = ConstantInt::get(Int8Ty, (max_size / 8) - 1); + args.push_back(bitsize); + } +#endif + + // fprintf(stderr, "_ExtInt(%u) castTo %u with attr %u didcast %u\n", + // max_size, cast_size, attr); + + switch (cast_size) { + case 8: + IRB.CreateCall(cmplogHookIns1, args); + break; + case 16: + IRB.CreateCall(cmplogHookIns2, args); + break; + case 32: + IRB.CreateCall(cmplogHookIns4, args); + break; + case 64: + IRB.CreateCall(cmplogHookIns8, args); + break; +#ifndef _WIN32 + case 128: + if (max_size == 128) { + IRB.CreateCall(cmplogHookIns16, args); + + } else { + IRB.CreateCall(cmplogHookInsN, args); + } + + break; +#endif + } + } + + /* else fprintf(stderr, "skipped\n"); */ + + ++cur; + if (cur >= vector_cnt) { break; } + } + } + } + + if (icomps.size()) { + return true; + } else { + return false; + } +} + +#if USE_NEW_PM +PreservedAnalyses CmpLogInstructions::run(Module &M, + ModuleAnalysisManager &MAM) { +#else +bool CmpLogInstructions::runOnModule(Module &M) { +#endif + hookInstrs(M); + +#if USE_NEW_PM + auto PA = PreservedAnalyses::all(); +#endif + verifyModule(M); + +#if USE_NEW_PM + return PA; +#else + return true; +#endif +} + +#if USE_NEW_PM +#else +static void registerCmpLogInstructionsPass(const PassManagerBuilder &, + legacy::PassManagerBase &PM) { + auto p = new CmpLogInstructions(); + PM.add(p); +} + +static RegisterStandardPasses RegisterCmpLogInstructionsPass( + PassManagerBuilder::EP_OptimizerLast, registerCmpLogInstructionsPass); + +static RegisterStandardPasses RegisterCmpLogInstructionsPass0( + PassManagerBuilder::EP_EnabledOnOptLevel0, registerCmpLogInstructionsPass); + +static RegisterStandardPasses RegisterCmpLogInstructionsPassLTO( + PassManagerBuilder::EP_FullLinkTimeOptimizationLast, + registerCmpLogInstructionsPass); + +#endif diff --git a/libafl_cc/src/cmplog-switches-pass.cc b/libafl_cc/src/cmplog-switches-pass.cc new file mode 100644 index 0000000000..2d6c64b580 --- /dev/null +++ b/libafl_cc/src/cmplog-switches-pass.cc @@ -0,0 +1,394 @@ +/* + american fuzzy lop++ - LLVM CmpLog instrumentation + -------------------------------------------------- + + Written by Andrea Fioraldi + + Copyright 2015, 2016 Google Inc. All rights reserved. + Copyright 2019-2020 AFLplusplus Project. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at: + + http://www.apache.org/licenses/LICENSE-2.0 + +*/ + +#include +#include +#ifndef _WIN32 + #include + #include +#endif + +#include +#include +#include +#include "llvm/Config/llvm-config.h" + +#if USE_NEW_PM + #include "llvm/Passes/PassPlugin.h" + #include "llvm/Passes/PassBuilder.h" + #include "llvm/IR/PassManager.h" +#else + #include "llvm/IR/LegacyPassManager.h" + #include "llvm/Transforms/IPO/PassManagerBuilder.h" +#endif + +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/ValueTracking.h" + +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) + #include "llvm/IR/Verifier.h" + #include "llvm/IR/DebugInfo.h" +#else + #include "llvm/Analysis/Verifier.h" + #include "llvm/DebugInfo.h" + #define nullptr 0 +#endif + +#include + +using namespace llvm; + +static cl::opt CmplogExtended("cmplog_switches_extended", + cl::desc("Uses extended header"), + cl::init(false), cl::NotHidden); +namespace { + +/* Function that we never instrument or analyze */ +/* Note: this ignore check is also called in isInInstrumentList() */ +bool isIgnoreFunction(const llvm::Function *F) { + // Starting from "LLVMFuzzer" these are functions used in libfuzzer based + // fuzzing campaign installations, e.g. oss-fuzz + + static constexpr const char *ignoreList[] = { + + "asan.", + "llvm.", + "sancov.", + "__ubsan", + "ign.", + "__afl", + "_fini", + "__libc_", + "__asan", + "__msan", + "__cmplog", + "__sancov", + "__san", + "__cxx_", + "__decide_deferred", + "_GLOBAL", + "_ZZN6__asan", + "_ZZN6__lsan", + "msan.", + "LLVMFuzzerM", + "LLVMFuzzerC", + "LLVMFuzzerI", + "maybe_duplicate_stderr", + "discard_output", + "close_stdout", + "dup_and_close_stderr", + "maybe_close_fd_mask", + "ExecuteFilesOnyByOne" + + }; + + for (auto const &ignoreListFunc : ignoreList) { + if (F->getName().startswith(ignoreListFunc)) { return true; } + } + + static constexpr const char *ignoreSubstringList[] = { + + "__asan", "__msan", "__ubsan", "__lsan", + "__san", "__sanitize", "__cxx", "_GLOBAL__", + "DebugCounter", "DwarfDebug", "DebugLoc" + + }; + + for (auto const &ignoreListFunc : ignoreSubstringList) { + // hexcoder: F->getName().contains() not avaiilable in llvm 3.8.0 + if (StringRef::npos != F->getName().find(ignoreListFunc)) { return true; } + } + + return false; +} + +#if USE_NEW_PM +class CmpLogSwitches : public PassInfoMixin { + public: + CmpLogSwitches() { +#else + +class CmpLogSwitches : public ModulePass { + public: + static char ID; + CmpLogSwitches() : ModulePass(ID) { +#endif + } + +#if USE_NEW_PM + PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); +#else + bool runOnModule(Module &M) override; + + #if LLVM_VERSION_MAJOR < 4 + const char *getPassName() const override { + #else + StringRef getPassName() const override { + #endif + return "cmplog switches"; + } +#endif + + private: + bool hookInstrs(Module &M); +}; + +} // namespace + +#if USE_NEW_PM +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { + return {LLVM_PLUGIN_API_VERSION, "CmpLogSwitches", "v0.1", + [](PassBuilder &PB) { + #if LLVM_VERSION_MAJOR <= 13 + using OptimizationLevel = typename PassBuilder::OptimizationLevel; + #endif + PB.registerOptimizerLastEPCallback( + [](ModulePassManager &MPM, OptimizationLevel OL) { + MPM.addPass(CmpLogSwitches()); + }); + }}; +} +#else +char CmpLogSwitches::ID = 0; +#endif + +template +Iterator Unique(Iterator first, Iterator last) { + while (first != last) { + Iterator next(first); + last = std::remove(++next, last, *first); + first = next; + } + + return last; +} + +bool CmpLogSwitches::hookInstrs(Module &M) { + std::vector switches; + LLVMContext &C = M.getContext(); + + Type *VoidTy = Type::getVoidTy(C); + IntegerType *Int8Ty = IntegerType::getInt8Ty(C); + IntegerType *Int16Ty = IntegerType::getInt16Ty(C); + IntegerType *Int32Ty = IntegerType::getInt32Ty(C); + IntegerType *Int64Ty = IntegerType::getInt64Ty(C); + + FunctionCallee cmplogHookIns1; + FunctionCallee cmplogHookIns2; + FunctionCallee cmplogHookIns4; + FunctionCallee cmplogHookIns8; + + if (CmplogExtended) { + cmplogHookIns1 = M.getOrInsertFunction("__cmplog_ins_hook1_extended", + VoidTy, Int8Ty, Int8Ty, Int8Ty); + } else { + cmplogHookIns1 = M.getOrInsertFunction("__cmplog_ins_hook1", VoidTy, Int8Ty, + Int8Ty, Int8Ty); + } + + if (CmplogExtended) { + cmplogHookIns2 = M.getOrInsertFunction("__cmplog_ins_hook2_extended", + VoidTy, Int16Ty, Int16Ty, Int8Ty); + } else { + cmplogHookIns2 = M.getOrInsertFunction("__cmplog_ins_hook2", VoidTy, + Int16Ty, Int16Ty, Int8Ty); + } + + if (CmplogExtended) { + cmplogHookIns4 = M.getOrInsertFunction("__cmplog_ins_hook4_extended", + VoidTy, Int32Ty, Int32Ty, Int8Ty); + } else { + cmplogHookIns4 = M.getOrInsertFunction("__cmplog_ins_hook4", VoidTy, + Int32Ty, Int32Ty, Int8Ty); + } + + if (CmplogExtended) { + cmplogHookIns8 = M.getOrInsertFunction("__cmplog_ins_hook8_extended", + VoidTy, Int64Ty, Int64Ty, Int8Ty); + } else { + cmplogHookIns8 = M.getOrInsertFunction("__cmplog_ins_hook8", VoidTy, + Int64Ty, Int64Ty, Int8Ty); + } + + for (auto &F : M) { + if (!isIgnoreFunction(&F)) { continue; } + + for (auto &BB : F) { + SwitchInst *switchInst = nullptr; + if ((switchInst = dyn_cast(BB.getTerminator()))) { + if (switchInst->getNumCases() > 1) { switches.push_back(switchInst); } + } + } + } + + switches.erase(Unique(switches.begin(), switches.end()), switches.end()); + + if (switches.size()) { + for (auto &SI : switches) { + Value *Val = SI->getCondition(); + unsigned int max_size = Val->getType()->getIntegerBitWidth(); + unsigned int cast_size; + unsigned char do_cast = 0; + + if (!SI->getNumCases() || max_size < 16) { + // skipping trivial switch + continue; + } + + if (max_size % 8) { + max_size = (((max_size / 8) + 1) * 8); + do_cast = 1; + } + + if (max_size > 128) { + // can't handle this + + max_size = 128; + do_cast = 1; + } + + IRBuilder<> IRB(SI->getParent()); + IRB.SetInsertPoint(SI); + + switch (max_size) { + case 8: + case 16: + case 32: + case 64: + case 128: + cast_size = max_size; + break; + default: + cast_size = 128; + do_cast = 1; + } + + // The predicate of the switch clause + Value *CompareTo = Val; + if (do_cast) { + CompareTo = + IRB.CreateIntCast(CompareTo, IntegerType::get(C, cast_size), false); + } + + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; + ++i) { + // Who uses LLVM Major < 5?? :p + ConstantInt *cint = i->getCaseValue(); + + if (cint) { + std::vector args; + args.push_back(CompareTo); + + Value *new_param = cint; + if (do_cast) { + new_param = + IRB.CreateIntCast(cint, IntegerType::get(C, cast_size), false); + } + + if (new_param) { + args.push_back(new_param); + if (CmplogExtended) { + ConstantInt *attribute = ConstantInt::get(Int8Ty, 1); + args.push_back(attribute); + } + if (cast_size != max_size) { + // not 8, 16, 32, 64, 128. + ConstantInt *bitsize = + ConstantInt::get(Int8Ty, (max_size / 8) - 1); + args.push_back(bitsize); // we have the arg for size in hookinsN + } + + switch (cast_size) { + case 8: + IRB.CreateCall(cmplogHookIns1, args); + break; + case 16: + IRB.CreateCall(cmplogHookIns2, args); + break; + case 32: + IRB.CreateCall(cmplogHookIns4, args); + break; + case 64: + IRB.CreateCall(cmplogHookIns8, args); + break; + case 128: +#ifdef WORD_SIZE_64 + if (max_size == 128) { + IRB.CreateCall(cmplogHookIns16, args); + + } else { + IRB.CreateCall(cmplogHookInsN, args); + } + +#endif + break; + default: + break; + } + } + } + } + } + } +} + +#if USE_NEW_PM +PreservedAnalyses CmpLogSwitches::run(Module &M, ModuleAnalysisManager &MAM) { +#else +bool CmpLogSwitches::runOnModule(Module &M) { +#endif + hookInstrs(M); + +#if USE_NEW_PM + auto PA = PreservedAnalyses::all(); +#endif + verifyModule(M); + +#if USE_NEW_PM + return PA; +#else + return true; +#endif +} + +#if USE_NEW_PM +#else +static void registerCmpLogSwitchesPass(const PassManagerBuilder &, + legacy::PassManagerBase &PM) { + auto p = new CmpLogSwitches(); + PM.add(p); +} + +static RegisterStandardPasses RegisterCmpLogSwitchesPass( + PassManagerBuilder::EP_OptimizerLast, registerCmpLogSwitchesPass); + +static RegisterStandardPasses RegisterCmpLogSwitchesPass0( + PassManagerBuilder::EP_EnabledOnOptLevel0, registerCmpLogSwitchesPass); + +static RegisterStandardPasses RegisterCmpLogSwitchesPassLTO( + PassManagerBuilder::EP_FullLinkTimeOptimizationLast, + registerCmpLogSwitchesPass); + +#endif \ No newline at end of file diff --git a/libafl_cc/src/no-link-rt.c b/libafl_cc/src/no-link-rt.c index c2a972311a..e410405a06 100644 --- a/libafl_cc/src/no-link-rt.c +++ b/libafl_cc/src/no-link-rt.c @@ -1,5 +1,10 @@ #include +#ifndef _WIN32 +typedef unsigned __int128 uint128_t; +typedef uint128_t u128; +#endif + uint8_t *__afl_area_ptr; uint8_t *__afl_acc_memop_ptr; @@ -11,6 +16,72 @@ void __libafl_targets_cmplog_instructions(uintptr_t k, uint8_t shape, (void)arg2; } +void __cmplog_ins_hook1_extended(uint8_t arg1, uint8_t arg2, uint8_t attr) { + (void)arg1; + (void)arg2; + (void)attr; +} +void __cmplog_ins_hook1(uint8_t arg1, uint8_t arg2) { + (void)arg1; + (void)arg2; +} + +void __cmplog_ins_hook2_extended(uint16_t arg1, uint16_t arg2, uint8_t attr) { + (void)attr; + (void)arg1; + (void)arg2; +} +void __cmplog_ins_hook2(uint16_t arg1, uint16_t arg2) { + (void)arg1; + (void)arg2; +} + +void __cmplog_ins_hook4_extended(uint32_t arg1, uint32_t arg2, uint8_t attr) { + (void)attr; + (void)arg1; + (void)arg2; +} +void __cmplog_ins_hook4(uint32_t arg1, uint32_t arg2) { + (void)arg1; + (void)arg2; +} + +void __cmplog_ins_hook8_extended(uint64_t arg1, uint64_t arg2, uint8_t attr) { + (void)attr; + (void)arg1; + (void)arg2; +} +void __cmplog_ins_hook8(uint64_t arg1, uint64_t arg2) { + (void)arg1; + (void)arg2; +} + +#ifndef _WIN32 +void __cmplog_ins_hook16_extended(uint128_t arg1, uint128_t arg2, + uint8_t attr) { + (void)attr; + (void)arg1; + (void)arg2; +} +void __cmplog_ins_hook16(uint128_t arg1, uint128_t arg2) { + (void)arg1; + (void)arg2; +} + +void __cmplog_ins_hookN_extended(uint128_t arg1, uint128_t arg2, uint8_t attr, + uint8_t size) { + (void)attr; + (void)size; + (void)arg1; + (void)arg2; +} +void __cmplog_ins_hookN(uint128_t arg1, uint128_t arg2, uint8_t size) { + (void)arg1; + (void)arg2; + (void)size; +} +#endif + void __cmplog_rtn_hook(uint8_t *ptr1, uint8_t *ptr2) { (void)ptr1; (void)ptr2; diff --git a/libafl_targets/src/cmplog.c b/libafl_targets/src/cmplog.c index a0c29e6379..a4146f012b 100644 --- a/libafl_targets/src/cmplog.c +++ b/libafl_targets/src/cmplog.c @@ -37,7 +37,8 @@ __attribute__((weak)) void *__asan_region_is_poisoned(const void *beg, #endif -CmpLogMap *libafl_cmplog_map_ptr = &libafl_cmplog_map; +CmpLogMap *libafl_cmplog_map_ptr = &libafl_cmplog_map; +CmpLogMapExtended *libafl_cmplog_map_extended_ptr = &libafl_cmplog_map_extended; void __libafl_targets_cmplog_instructions(uintptr_t k, uint8_t shape, uint64_t arg1, uint64_t arg2) { @@ -63,6 +64,32 @@ void __libafl_targets_cmplog_instructions(uintptr_t k, uint8_t shape, libafl_cmplog_enabled = true; } +void __libafl_targets_cmplog_instructions_extended(uintptr_t k, uint8_t shape, + uint64_t arg1, uint64_t arg2, + uint8_t attr) { + if (!libafl_cmplog_enabled) { return; } + libafl_cmplog_enabled = false; + + uint16_t hits; + if (libafl_cmplog_map_extended_ptr->headers[k].type != CMPLOG_KIND_INS) { + libafl_cmplog_map_extended_ptr->headers[k].type = CMPLOG_KIND_INS; + libafl_cmplog_map_extended_ptr->headers[k].hits = 1; + libafl_cmplog_map_extended_ptr->headers[k].shape = shape; + hits = 0; + } else { + hits = libafl_cmplog_map_extended_ptr->headers[k].hits++; + if (libafl_cmplog_map_extended_ptr->headers[k].shape < shape) { + libafl_cmplog_map_extended_ptr->headers[k].shape = shape; + } + } + + hits &= CMPLOG_MAP_H - 1; + libafl_cmplog_map_extended_ptr->vals.operands[k][hits].v0 = arg1; + libafl_cmplog_map_extended_ptr->vals.operands[k][hits].v1 = arg2; + libafl_cmplog_map_extended_ptr->headers[k].attribute = attr; + libafl_cmplog_enabled = true; +} + // POSIX shenanigan to see if an area is mapped. // If it is mapped as X-only, we have a problem, so maybe we should add a check // to avoid to call it on .text addresses @@ -161,6 +188,106 @@ void __libafl_targets_cmplog_routines_len(uintptr_t k, const uint8_t *ptr1, __libafl_targets_cmplog_routines_checked(k, ptr1, ptr2, len); } +/* + CMPLOG Callback for instructions +*/ +#include +void __cmplog_ins_hook1_extended(uint8_t arg1, uint8_t arg2, uint8_t attr) { + uintptr_t k = RETADDR; + k = (k >> 4) ^ (k << 8); + k &= CMPLOG_MAP_W - 1; + + __libafl_targets_cmplog_instructions_extended(k, 0, arg1, arg2, attr); +} +void __cmplog_ins_hook1(uint8_t arg1, uint8_t arg2) { + uintptr_t k = RETADDR; + k = (k >> 4) ^ (k << 8); + k &= CMPLOG_MAP_W - 1; + + __libafl_targets_cmplog_instructions(k, 1, arg1, arg2); +} + +void __cmplog_ins_hook2_extended(uint16_t arg1, uint16_t arg2, uint8_t attr) { + uintptr_t k = RETADDR; + k = (k >> 4) ^ (k << 8); + k &= CMPLOG_MAP_W - 1; + + __libafl_targets_cmplog_instructions_extended(k, 1, arg1, arg2, attr); +} +void __cmplog_ins_hook2(uint16_t arg1, uint16_t arg2) { + uintptr_t k = RETADDR; + k = (k >> 4) ^ (k << 8); + k &= CMPLOG_MAP_W - 1; + + __libafl_targets_cmplog_instructions(k, 2, arg1, arg2); +} + +void __cmplog_ins_hook4_extended(uint32_t arg1, uint32_t arg2, uint8_t attr) { + uintptr_t k = RETADDR; + k = (k >> 4) ^ (k << 8); + k &= CMPLOG_MAP_W - 1; + + __libafl_targets_cmplog_instructions_extended(k, 3, arg1, arg2, attr); +} +void __cmplog_ins_hook4(uint32_t arg1, uint32_t arg2) { + uintptr_t k = RETADDR; + k = (k >> 4) ^ (k << 8); + k &= CMPLOG_MAP_W - 1; + + __libafl_targets_cmplog_instructions(k, 4, arg1, arg2); +} + +void __cmplog_ins_hook8_extended(uint64_t arg1, uint64_t arg2, uint8_t attr) { + uintptr_t k = RETADDR; + k = (k >> 4) ^ (k << 8); + k &= CMPLOG_MAP_W - 1; + + __libafl_targets_cmplog_instructions_extended(k, 7, arg1, arg2, attr); +} +void __cmplog_ins_hook8(uint64_t arg1, uint64_t arg2) { + uintptr_t k = RETADDR; + k = (k >> 4) ^ (k << 8); + k &= CMPLOG_MAP_W - 1; + + __libafl_targets_cmplog_instructions(k, 8, arg1, arg2); +} + +#ifndef _WIN32 +void __cmplog_ins_hook16_extended(uint128_t arg1, uint128_t arg2, + uint8_t attr) { + uintptr_t k = RETADDR; + k = (k >> 4) ^ (k << 8); + k &= CMPLOG_MAP_W - 1; + + __libafl_targets_cmplog_instructions_extended(k, 15, arg1, arg2, attr); +} +void __cmplog_ins_hook16(uint128_t arg1, uint128_t arg2) { + uintptr_t k = RETADDR; + k = (k >> 4) ^ (k << 8); + k &= CMPLOG_MAP_W - 1; + + __libafl_targets_cmplog_instructions(k, 16, arg1, arg2); +} + +void __cmplog_ins_hookN_extended(uint128_t arg1, uint128_t arg2, uint8_t attr, + uint8_t size) { + uintptr_t k = RETADDR; + k = (k >> 4) ^ (k << 8); + k &= CMPLOG_MAP_W - 1; + + __libafl_targets_cmplog_instructions_extended(k, size - 1, arg1, arg2, attr); +} +void __cmplog_ins_hookN(uint128_t arg1, uint128_t arg2, uint8_t size) { + uintptr_t k = RETADDR; + k = (k >> 4) ^ (k << 8); + k &= CMPLOG_MAP_W - 1; + + __libafl_targets_cmplog_instructions(k, size, arg1, arg2); +} +#endif +/* + CMPLOG Callback for routines +*/ void __cmplog_rtn_hook(const uint8_t *ptr1, const uint8_t *ptr2) { uintptr_t k = RETADDR; diff --git a/libafl_targets/src/cmplog.h b/libafl_targets/src/cmplog.h index 72eb8bb86c..75a91ab893 100644 --- a/libafl_targets/src/cmplog.h +++ b/libafl_targets/src/cmplog.h @@ -25,6 +25,29 @@ typedef struct CmpLogHeader { uint8_t kind; } CmpLogHeader; +#ifndef _WIN32 +typedef struct CmpLogHeaderExtended { + unsigned hits : 24; + unsigned id : 24; + unsigned shape : 5; + unsigned type : 2; + unsigned attribute : 4; + unsigned overflow : 1; + unsigned reserved : 4; +} __attribute__((packed)) CmpLogHeaderExtended; +#else +__pragma(pack(push, 1)) typedef struct CmpLogHeaderExtended { + unsigned hits : 24; + unsigned id : 24; + unsigned shape : 5; + unsigned type : 2; + unsigned attribute : 4; + unsigned overflow : 1; + unsigned reserved : 4; +} CmpLogHeaderExtended; +__pragma(pack(pop)) +#endif + typedef struct CmpLogInstruction { uint64_t v0; uint64_t v1; @@ -43,9 +66,20 @@ typedef struct CmpLogMap { } vals; } CmpLogMap; +typedef struct CmpLogMapExtended { + CmpLogHeaderExtended headers[CMPLOG_MAP_W]; + union { + CmpLogInstruction operands[CMPLOG_MAP_W][CMPLOG_MAP_H]; + CmpLogRoutine routines[CMPLOG_MAP_W][CMPLOG_MAP_RTN_H]; + } vals; +} CmpLogMapExtended; + extern CmpLogMap libafl_cmplog_map; extern CmpLogMap *libafl_cmplog_map_ptr; +extern CmpLogMapExtended libafl_cmplog_map_extended; +extern CmpLogMapExtended *libafl_cmplog_map_extended_ptr; + extern uint8_t libafl_cmplog_enabled; void __libafl_targets_cmplog_instructions(uintptr_t k, uint8_t shape, diff --git a/libafl_targets/src/cmps/mod.rs b/libafl_targets/src/cmps/mod.rs index 5f13908ec4..8d4987bbdc 100644 --- a/libafl_targets/src/cmps/mod.rs +++ b/libafl_targets/src/cmps/mod.rs @@ -405,7 +405,23 @@ pub static mut libafl_cmplog_map: CmpLogMap = CmpLogMap { }, }; +/// The globale `CmpLog` map, aflpp style +#[no_mangle] +#[allow(clippy::large_stack_arrays)] +pub static mut libafl_cmplog_map_extended: AFLppCmpLogMap = AFLppCmpLogMap { + headers: [AFLppCmpLogHeader { data: [0; 8] }; CMPLOG_MAP_W], + vals: AFLppCmpLogVals { + operands: [[AFLppCmpLogOperands { + v0: 0, + v1: 0, + v0_128: 0, + v1_128: 0, + }; CMPLOG_MAP_H]; CMPLOG_MAP_W], + }, +}; + pub use libafl_cmplog_map as CMPLOG_MAP; +pub use libafl_cmplog_map_extended as CMPLOG_MAP_EXTENDED; #[derive(Debug, Clone, Copy)] #[repr(C, packed)] diff --git a/libafl_targets/src/common.h b/libafl_targets/src/common.h index 0960b9719e..59be9e7dee 100644 --- a/libafl_targets/src/common.h +++ b/libafl_targets/src/common.h @@ -6,6 +6,11 @@ #define true 1 #define false 0 +#ifndef _WIN32 +typedef unsigned __int128 uint128_t; +typedef uint128_t u128; +#endif + #define STATIC_ASSERT(pred) \ switch (0) { \ case 0: \