From 62614ce1016c86e3f00f35b56399292ceabd486b Mon Sep 17 00:00:00 2001 From: Dongjia Zhang Date: Thu, 27 Jan 2022 03:23:04 +0900 Subject: [PATCH] LLVM AutoTokens (#470) * posix dict2file llvm pass * new PM * working * clean up * fmt * fix * silence clippy * bring the println back * early return * rename * weak symbols * linux onky * fuzzbench change * only linux * linux only * cfg * cfg * fix * fix * fix * why * fix * bug fix * rename * rename * macros & rename * add_from_autotokens * fix fuzzbench * std -> core * builder pattern? * clippy * wrong cfg * cfgstd * fuzzbench fmt * no unsafe * update fuzzbench_text * use TokenSectiopn Co-authored-by: Andrea Fioraldi --- fuzzers/fuzzbench/src/bin/libafl_cc.rs | 4 + fuzzers/fuzzbench/src/lib.rs | 21 +- fuzzers/fuzzbench_text/src/bin/libafl_cc.rs | 4 + fuzzers/fuzzbench_text/src/lib.rs | 37 +- fuzzers/libfuzzer_libpng_ctx/Makefile | 6 +- libafl/src/mutators/token_mutations.rs | 100 ++- libafl_cc/build.rs | 11 + libafl_cc/src/autotokens-pass.cc | 737 ++++++++++++++++++++ libafl_cc/src/clang.rs | 5 + libafl_targets/src/common.h | 9 + libafl_targets/src/coverage.c | 14 + libafl_targets/src/coverage.rs | 15 + 12 files changed, 947 insertions(+), 16 deletions(-) create mode 100644 libafl_cc/src/autotokens-pass.cc diff --git a/fuzzers/fuzzbench/src/bin/libafl_cc.rs b/fuzzers/fuzzbench/src/bin/libafl_cc.rs index 8c9e37f638..68a84b0333 100644 --- a/fuzzers/fuzzbench/src/bin/libafl_cc.rs +++ b/fuzzers/fuzzbench/src/bin/libafl_cc.rs @@ -16,6 +16,10 @@ pub fn main() { dir.pop(); let mut cc = ClangWrapper::new(); + + #[cfg(target_os = "linux")] + cc.add_pass(LLVMPasses::AutoTokens); + if let Some(code) = cc .cpp(is_cpp) // silence the compiler wrapper output, needed for some configure scripts. diff --git a/fuzzers/fuzzbench/src/lib.rs b/fuzzers/fuzzbench/src/lib.rs index 7af42e89c1..ad9de01327 100644 --- a/fuzzers/fuzzbench/src/lib.rs +++ b/fuzzers/fuzzbench/src/lib.rs @@ -38,7 +38,7 @@ use libafl::{ monitors::SimpleMonitor, mutators::{ scheduled::havoc_mutations, token_mutations::I2SRandReplace, tokens_mutations, - StdMOptMutator, StdScheduledMutator, Tokens, + StdMOptMutator, StdScheduledMutator, TokenSection, Tokens, }, observers::{HitcountsMapObserver, StdMapObserver, TimeObserver}, stages::{ @@ -54,6 +54,9 @@ use libafl_targets::{ MAX_EDGES_NUM, }; +#[cfg(target_os = "linux")] +use libafl_targets::token_section; + /// The fuzzer main (as `no_mangle` C function) #[no_mangle] pub fn libafl_main() { @@ -352,9 +355,19 @@ fn fuzz( let mut stages = tuple_list!(calibration, tracing, i2s, power); // Read tokens - if let Some(tokenfile) = tokenfile { - if state.metadata().get::().is_none() { - state.add_metadata(Tokens::from_tokens_file(tokenfile)?); + if state.metadata().get::().is_none() { + let mut toks = Tokens::default(); + if let Some(tokenfile) = tokenfile { + toks = toks.parse_tokens_file(vec![tokenfile])?; + } + #[cfg(target_os = "linux")] + { + let token_section = TokenSection::new(token_section()); + toks = toks.parse_autotokens(token_section)?; + } + + if !toks.tokens().is_empty() { + state.add_metadata(toks); } } diff --git a/fuzzers/fuzzbench_text/src/bin/libafl_cc.rs b/fuzzers/fuzzbench_text/src/bin/libafl_cc.rs index 8c9e37f638..68a84b0333 100644 --- a/fuzzers/fuzzbench_text/src/bin/libafl_cc.rs +++ b/fuzzers/fuzzbench_text/src/bin/libafl_cc.rs @@ -16,6 +16,10 @@ pub fn main() { dir.pop(); let mut cc = ClangWrapper::new(); + + #[cfg(target_os = "linux")] + cc.add_pass(LLVMPasses::AutoTokens); + if let Some(code) = cc .cpp(is_cpp) // silence the compiler wrapper output, needed for some configure scripts. diff --git a/fuzzers/fuzzbench_text/src/lib.rs b/fuzzers/fuzzbench_text/src/lib.rs index 99fef86fe4..4be2e33846 100644 --- a/fuzzers/fuzzbench_text/src/lib.rs +++ b/fuzzers/fuzzbench_text/src/lib.rs @@ -44,7 +44,7 @@ use libafl::{ }, scheduled::havoc_mutations, token_mutations::I2SRandReplace, - tokens_mutations, StdMOptMutator, StdScheduledMutator, Tokens, + tokens_mutations, StdMOptMutator, StdScheduledMutator, TokenSection, Tokens, }, observers::{HitcountsMapObserver, StdMapObserver, TimeObserver}, stages::{ @@ -60,6 +60,9 @@ use libafl_targets::{ MAX_EDGES_NUM, }; +#[cfg(target_os = "linux")] +use libafl_targets::token_section; + /// The fuzzer main (as `no_mangle` C function) #[no_mangle] pub fn libafl_main() { @@ -413,9 +416,19 @@ fn fuzz_binary( let mut stages = tuple_list!(calibration, tracing, i2s, power); // Read tokens - if let Some(tokenfile) = tokenfile { - if state.metadata().get::().is_none() { - state.add_metadata(Tokens::from_tokens_file(tokenfile)?); + if state.metadata().get::().is_none() { + let mut toks = Tokens::default(); + if let Some(tokenfile) = tokenfile { + toks = toks.parse_tokens_file(vec![tokenfile])?; + } + #[cfg(target_os = "linux")] + { + let token_section = TokenSection::new(token_section()); + toks = toks.parse_autotokens(token_section)?; + } + + if !toks.tokens().is_empty() { + state.add_metadata(toks); } } @@ -623,9 +636,19 @@ fn fuzz_text( let mut stages = tuple_list!(generalization, calibration, tracing, i2s, power, grimoire); // Read tokens - if let Some(tokenfile) = tokenfile { - if state.metadata().get::().is_none() { - state.add_metadata(Tokens::from_tokens_file(tokenfile)?); + if state.metadata().get::().is_none() { + let mut toks = Tokens::default(); + if let Some(tokenfile) = tokenfile { + toks = toks.parse_tokens_file(vec![tokenfile])?; + } + #[cfg(target_os = "linux")] + { + let token_section = TokenSection::new(token_section()); + toks = toks.parse_autotokens(token_section)?; + } + + if !toks.tokens().is_empty() { + state.add_metadata(toks); } } diff --git a/fuzzers/libfuzzer_libpng_ctx/Makefile b/fuzzers/libfuzzer_libpng_ctx/Makefile index ca0a514241..e7ed9d10c5 100644 --- a/fuzzers/libfuzzer_libpng_ctx/Makefile +++ b/fuzzers/libfuzzer_libpng_ctx/Makefile @@ -40,11 +40,11 @@ clean: $(MAKE) -C libpng-1.6.37 clean run: all - ./$(FUZZER_NAME) --cores 0 & + ./$(FUZZER_NAME) --cores 0 --input ./corpus & short_test: all rm -rf libafl_unix_shmem_server || true - timeout 10s ./$(FUZZER_NAME) --cores 0 & + timeout 10s ./$(FUZZER_NAME) --cores 0 --input ./corpus & test: all - timeout 60s ./$(FUZZER_NAME) --cores 0 & + timeout 60s ./$(FUZZER_NAME) --cores 0 --input ./corpus & diff --git a/libafl/src/mutators/token_mutations.rs b/libafl/src/mutators/token_mutations.rs index e7f3e389cf..ba3a4d16c3 100644 --- a/libafl/src/mutators/token_mutations.rs +++ b/libafl/src/mutators/token_mutations.rs @@ -22,8 +22,27 @@ use crate::{ Error, }; +#[derive(Debug, Clone, Copy)] +/// Struct for token start and end +pub struct TokenSection { + start: *const u8, + stop: *const u8, +} + +impl TokenSection { + /// Init + #[must_use] + pub fn new(section: (*const u8, *const u8)) -> Self { + Self { + start: section.0, + stop: section.1, + } + } +} + /// A state metadata holding a list of tokens -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Default, Serialize, Deserialize)] +#[allow(clippy::unsafe_derive_deserialize)] pub struct Tokens { token_vec: Vec>, } @@ -38,6 +57,83 @@ impl Tokens { Self { token_vec } } + #[must_use] + /// Build tokens from vec + pub fn parse_vec(mut self, vec: Vec>) -> Self { + self.token_vec = vec; + self + } + + /// Build tokens from files + #[cfg(feature = "std")] + pub fn parse_tokens_file

(mut self, files: Vec

) -> Result + where + P: AsRef, + { + for file in files { + self.add_tokens_from_file(file)?; + } + Ok(self) + } + + /// Build tokens from autotokens + pub fn parse_autotokens(mut self, autotoken: TokenSection) -> Result { + unsafe { + self.add_from_autotokens(autotoken)?; + } + Ok(self) + } + + /// Reads from an autotokens section, returning the count of new entries read + pub unsafe fn add_from_autotokens(&mut self, autotoken: TokenSection) -> Result { + if cfg!(target_os = "linux") { + let mut entries = 0; + let token_start = autotoken.start; + let token_stop = autotoken.stop; + let section_size: usize = token_stop.offset_from(token_start).try_into().unwrap(); + // println!("size: {}", section_size); + let slice = core::slice::from_raw_parts(token_start, section_size); + + let mut head = 0; + + // Now we know the beginning and the end of the token section.. let's parse them into tokens + loop { + if head >= section_size { + // Sanity Check + assert!(head == section_size); + break; + } + let size = slice[head] as usize; + head += 1; + if size > 0 { + self.add_token(&slice[head..head + size].to_vec()); + #[cfg(feature = "std")] + println!( + "Token size: {} content: {:x?}", + size, + &slice[head..head + size].to_vec() + ); + head += size; + entries += 1; + } + } + + Ok(entries) + } else { + // TODO: Autodict for OSX and windows + Ok(0) + } + } + + /// Creates a new token from autotokens + pub fn from_autotokens(autotoken: TokenSection) -> Result { + let mut ret = Self::new(vec![]); + unsafe { + ret.add_from_autotokens(autotoken)?; + } + Ok(ret) + } + /// Creates a new instance from a file #[cfg(feature = "std")] pub fn from_tokens_file

(file: P) -> Result @@ -62,7 +158,7 @@ impl Tokens { /// Reads a tokens file, returning the count of new entries read #[cfg(feature = "std")] - pub fn add_tokens_from_file

(&mut self, file: P) -> Result + pub fn add_tokens_from_file

(&mut self, file: P) -> Result where P: AsRef, { diff --git a/libafl_cc/build.rs b/libafl_cc/build.rs index dd52f6452f..cecc1e5a10 100644 --- a/libafl_cc/build.rs +++ b/libafl_cc/build.rs @@ -142,6 +142,7 @@ fn main() { println!("cargo:rerun-if-changed=src/cmplog-routines-pass.cc"); println!("cargo:rerun-if-changed=src/afl-coverage-pass.cc"); + println!("cargo:rerun-if-changed=src/autotokens-pass.cc"); let _ = Command::new(llvm_bindir.join("clang++")) .args(&cxxflags) @@ -162,6 +163,16 @@ fn main() { .arg(out_dir.join(format!("afl-coverage-pass.{}", dll_extension()))) .status() .expect("Failed to compile afl-coverage-pass.cc"); + + let _ = Command::new(llvm_bindir.join("clang++")) + .args(&cxxflags) + .args(&custom_flags) + .arg(src_dir.join("autotokens-pass.cc")) + .args(&ldflags) + .args(&["-fPIC", "-shared", "-o"]) + .arg(out_dir.join(format!("autotokens-pass.{}", dll_extension()))) + .status() + .expect("Failed to compile autotokens-pass.cc"); } else { write!( &mut clang_constants_file, diff --git a/libafl_cc/src/autotokens-pass.cc b/libafl_cc/src/autotokens-pass.cc new file mode 100644 index 0000000000..ab17fad60b --- /dev/null +++ b/libafl_cc/src/autotokens-pass.cc @@ -0,0 +1,737 @@ +/* + american fuzzy lop++ - LLVM LTO instrumentation pass + ---------------------------------------------------- + + Written by Marc Heuse + + Copyright 2019-2020 AFLplusplus Project. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at: + + http://www.apache.org/licenses/LICENSE-2.0 + + This library is plugged into LLVM when invoking clang through afl-clang-lto. + + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "llvm/Config/llvm-config.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/IRBuilder.h" + +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" + +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Pass.h" +#include "llvm/IR/Constants.h" + +#ifndef O_DSYNC + #define O_DSYNC O_SYNC +#endif + +// The max length of a token +#define MAX_AUTO_EXTRA 32 + +#define USE_AUTO_EXTRAS 4096 +#define MAX_AUTO_EXTRAS (USE_AUTO_EXTRAS * 8) + +#include + +#define FATAL(x...) \ + do { \ + \ + fprintf(stderr, "FATAL: " x); \ + exit(1); \ + \ + } while (0) + +using namespace llvm; + +namespace { + +/* Function that we never instrument or analyze */ +/* Note: this ignore check is also called in isInInstrumentList() */ +bool isIgnoreFunction(const llvm::Function *F) { + + // Starting from "LLVMFuzzer" these are functions used in libfuzzer based + // fuzzing campaign installations, e.g. oss-fuzz + + static constexpr const char *ignoreList[] = { + + "asan.", + "llvm.", + "sancov.", + "__ubsan", + "ign.", + "__afl", + "_fini", + "__libc_", + "__asan", + "__msan", + "__cmplog", + "__sancov", + "__san", + "__cxx_", + "__decide_deferred", + "_GLOBAL", + "_ZZN6__asan", + "_ZZN6__lsan", + "msan.", + "LLVMFuzzerM", + "LLVMFuzzerC", + "LLVMFuzzerI", + "maybe_duplicate_stderr", + "discard_output", + "close_stdout", + "dup_and_close_stderr", + "maybe_close_fd_mask", + "ExecuteFilesOnyByOne" + + }; + + for (auto const &ignoreListFunc : ignoreList) { + + if (F->getName().startswith(ignoreListFunc)) { return true; } + + } + + static constexpr const char *ignoreSubstringList[] = { + + "__asan", "__msan", "__ubsan", "__lsan", + "__san", "__sanitize", "__cxx", "_GLOBAL__", + "DebugCounter", "DwarfDebug", "DebugLoc" + + }; + + for (auto const &ignoreListFunc : ignoreSubstringList) { + + // hexcoder: F->getName().contains() not avaiilable in llvm 3.8.0 + if (StringRef::npos != F->getName().find(ignoreListFunc)) { return true; } + + } + + return false; + +} + +class AutoTokensPass : public ModulePass { + + public: + static char ID; + + AutoTokensPass() : ModulePass(ID) { + + + } + + bool runOnModule(Module &M) override; + + protected: + + private: + std::vector dictionary; + +}; + +} // namespace + +char AutoTokensPass::ID = 0; + + +void dict2file(int fd, uint8_t *mem, uint32_t len) { + uint32_t i, j, binary = 0; + char line[MAX_AUTO_EXTRA * 8], tmp[8]; + + strcpy(line, "\""); + j = 1; + for (i = 0; i < len; i++) { + + if (isprint(mem[i]) && mem[i] != '\\' && mem[i] != '"') { + + line[j++] = mem[i]; + + } else { + + if (i + 1 != len || mem[i] != 0 || binary || len == 4 || len == 8) { + + line[j] = 0; + sprintf(tmp, "\\x%02x", (uint8_t)mem[i]); + strcat(line, tmp); + j = strlen(line); + + } + + binary = 1; + + } + + } + + line[j] = 0; + strcat(line, "\"\n"); + if (write(fd, line, strlen(line)) <= 0) + FATAL("Could not write to dictionary file"); + fsync(fd); + +} + +bool AutoTokensPass::runOnModule(Module &M) { + + DenseMap valueMap; + char * ptr; + int fd, found = 0; + bool use_file = true; + + /* Show a banner */ + setvbuf(stdout, NULL, _IONBF, 0); + + ptr = getenv("AFL_LLVM_DICT2FILE"); + + if (!ptr || *ptr != '/') { + fprintf(stderr, "AFL_LLVM_DICT2FILE is not set to an absolute path: %s\n", ptr); + fprintf(stderr, "Writing tokens into libafl_tokens section\n"); + + use_file = false; + } + + if(use_file) { + if ((fd = open(ptr, O_WRONLY | O_APPEND | O_CREAT | O_DSYNC, 0644)) < 0) + FATAL("Could not open/create %s.", ptr); + } + + + /* Instrument all the things! */ + + for (auto &F : M) { + + if (isIgnoreFunction(&F)) continue; + + /* Some implementation notes. + * + * We try to handle 3 cases: + * - memcmp("foo", arg, 3) <- literal string + * - static char globalvar[] = "foo"; + * memcmp(globalvar, arg, 3) <- global variable + * - char localvar[] = "foo"; + * memcmp(locallvar, arg, 3) <- local variable + * + * The local variable case is the hardest. We can only detect that + * case if there is no reassignment or change in the variable. + * And it might not work across llvm version. + * What we do is hooking the initializer function for local variables + * (llvm.memcpy.p0i8.p0i8.i64) and note the string and the assigned + * variable. And if that variable is then used in a compare function + * we use that noted string. + * This seems not to work for tokens that have a size <= 4 :-( + * + * - if the compared length is smaller than the string length we + * save the full string. This is likely better for fuzzing but + * might be wrong in a few cases depending on optimizers + * + * - not using StringRef because there is a bug in the llvm 11 + * checkout I am using which sometimes points to wrong strings + * + * Over and out. Took me a full day. damn. mh/vh + */ + + for (auto &BB : F) { + + for (auto &IN : BB) { + + CallInst *callInst = nullptr; + CmpInst * cmpInst = nullptr; + + if ((cmpInst = dyn_cast(&IN))) { + + Value * op = cmpInst->getOperand(1); + ConstantInt *ilen = dyn_cast(op); + + /* We skip > 64 bit integers. why? first because their value is + difficult to obtain, and second because clang does not support + literals > 64 bit (as of llvm 12) */ + + if (ilen && ilen->uge(0xffffffffffffffff) == false) { + + uint64_t val2 = 0, val = ilen->getZExtValue(); + uint32_t len = 0; + if (val > 0x10000 && val < 0xffffffff) len = 4; + if (val > 0x100000001 && val < 0xffffffffffffffff) len = 8; + + if (len) { + + auto c = cmpInst->getPredicate(); + + switch (c) { + + case CmpInst::FCMP_OGT: // fall through + case CmpInst::FCMP_OLE: // fall through + case CmpInst::ICMP_SLE: // fall through + case CmpInst::ICMP_SGT: + + // signed comparison and it is a negative constant + if ((len == 4 && (val & 80000000)) || + (len == 8 && (val & 8000000000000000))) { + + if ((val & 0xffff) != 1) val2 = val - 1; + break; + + } + + // fall through + + case CmpInst::FCMP_UGT: // fall through + case CmpInst::FCMP_ULE: // fall through + case CmpInst::ICMP_UGT: // fall through + case CmpInst::ICMP_ULE: + if ((val & 0xffff) != 0xfffe) val2 = val + 1; + break; + + case CmpInst::FCMP_OLT: // fall through + case CmpInst::FCMP_OGE: // fall through + case CmpInst::ICMP_SLT: // fall through + case CmpInst::ICMP_SGE: + + // signed comparison and it is a negative constant + if ((len == 4 && (val & 80000000)) || + (len == 8 && (val & 8000000000000000))) { + + if ((val & 0xffff) != 1) val2 = val - 1; + break; + + } + + // fall through + + case CmpInst::FCMP_ULT: // fall through + case CmpInst::FCMP_UGE: // fall through + case CmpInst::ICMP_ULT: // fall through + case CmpInst::ICMP_UGE: + if ((val & 0xffff) != 1) val2 = val - 1; + break; + + default: + val2 = 0; + + } + + if(use_file) { + dict2file(fd, (uint8_t *)&val, len); + } + else{ + dictionary.push_back(std::string((char *)&val, len)); + } + + found++; + if (val2) { + + if(use_file) { + dict2file(fd, (uint8_t *)&val2, len); + } + else{ + dictionary.push_back(std::string((char *)&val2, len)); + } + found++; + + } + + } + + } + + } + + if ((callInst = dyn_cast(&IN))) { + + bool isStrcmp = true; + bool isMemcmp = true; + bool isStrncmp = true; + bool isStrcasecmp = true; + bool isStrncasecmp = true; + bool isIntMemcpy = true; + bool isStdString = true; + bool addedNull = false; + size_t optLen = 0; + + Function *Callee = callInst->getCalledFunction(); + if (!Callee) continue; + if (callInst->getCallingConv() != llvm::CallingConv::C) continue; + std::string FuncName = Callee->getName().str(); + isStrcmp &= !FuncName.compare("strcmp"); + isMemcmp &= + (!FuncName.compare("memcmp") || !FuncName.compare("bcmp")); + isStrncmp &= !FuncName.compare("strncmp"); + isStrcasecmp &= !FuncName.compare("strcasecmp"); + isStrncasecmp &= !FuncName.compare("strncasecmp"); + isIntMemcpy &= !FuncName.compare("llvm.memcpy.p0i8.p0i8.i64"); + isStdString &= ((FuncName.find("basic_string") != std::string::npos && + FuncName.find("compare") != std::string::npos) || + (FuncName.find("basic_string") != std::string::npos && + FuncName.find("find") != std::string::npos)); + + if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp && + !isStrncasecmp && !isIntMemcpy && !isStdString) + continue; + + /* Verify the strcmp/memcmp/strncmp/strcasecmp/strncasecmp function + * prototype */ + FunctionType *FT = Callee->getFunctionType(); + + isStrcmp &= + FT->getNumParams() == 2 && FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0) == IntegerType::getInt8PtrTy(M.getContext()); + isStrcasecmp &= + FT->getNumParams() == 2 && FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0) == IntegerType::getInt8PtrTy(M.getContext()); + isMemcmp &= FT->getNumParams() == 3 && + FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0)->isPointerTy() && + FT->getParamType(1)->isPointerTy() && + FT->getParamType(2)->isIntegerTy(); + isStrncmp &= FT->getNumParams() == 3 && + FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0) == + IntegerType::getInt8PtrTy(M.getContext()) && + FT->getParamType(2)->isIntegerTy(); + isStrncasecmp &= FT->getNumParams() == 3 && + FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0) == + IntegerType::getInt8PtrTy(M.getContext()) && + FT->getParamType(2)->isIntegerTy(); + isStdString &= FT->getNumParams() >= 2 && + FT->getParamType(0)->isPointerTy() && + FT->getParamType(1)->isPointerTy(); + + if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp && + !isStrncasecmp && !isIntMemcpy && !isStdString) + continue; + + /* is a str{n,}{case,}cmp/memcmp, check if we have + * str{case,}cmp(x, "const") or str{case,}cmp("const", x) + * strn{case,}cmp(x, "const", ..) or strn{case,}cmp("const", x, ..) + * memcmp(x, "const", ..) or memcmp("const", x, ..) */ + Value *Str1P = callInst->getArgOperand(0), + *Str2P = callInst->getArgOperand(1); + std::string Str1, Str2; + StringRef TmpStr; + bool HasStr1; + getConstantStringInfo(Str1P, TmpStr); + + if (TmpStr.empty()) { + + HasStr1 = false; + + } else { + + HasStr1 = true; + Str1 = TmpStr.str(); + + } + + bool HasStr2; + getConstantStringInfo(Str2P, TmpStr); + if (TmpStr.empty()) { + + HasStr2 = false; + + } else { + + HasStr2 = true; + Str2 = TmpStr.str(); + + } + + // we handle the 2nd parameter first because of llvm memcpy + if (!HasStr2) { + + auto *Ptr = dyn_cast(Str2P); + if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) { + + if (auto *Var = dyn_cast(Ptr->getOperand(0))) { + + if (Var->hasInitializer()) { + + if (auto *Array = + dyn_cast(Var->getInitializer())) { + + HasStr2 = true; + Str2 = Array->getRawDataValues().str(); + + } + + } + + } + + } + + } + + // for the internal memcpy routine we only care for the second + // parameter and are not reporting anything. + if (isIntMemcpy == true) { + + if (HasStr2 == true) { + + Value * op2 = callInst->getArgOperand(2); + ConstantInt *ilen = dyn_cast(op2); + if (ilen) { + + uint64_t literalLength = Str2.length(); + uint64_t optLength = ilen->getZExtValue(); + if (literalLength + 1 == optLength) { + + Str2.append("\0", 1); // add null byte + + } + + if (optLength > Str2.length()) { optLength = Str2.length(); } + + } + + valueMap[Str1P] = new std::string(Str2); + continue; + + } + + continue; + + } + + // Neither a literal nor a global variable? + // maybe it is a local variable that we saved + if (!HasStr2) { + + std::string *strng = valueMap[Str2P]; + if (strng && !strng->empty()) { + + Str2 = *strng; + HasStr2 = true; + + } + + } + + if (!HasStr1) { + + auto Ptr = dyn_cast(Str1P); + + if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) { + + if (auto *Var = dyn_cast(Ptr->getOperand(0))) { + + if (Var->hasInitializer()) { + + if (auto *Array = + dyn_cast(Var->getInitializer())) { + + HasStr1 = true; + Str1 = Array->getRawDataValues().str(); + + } + + } + + } + + } + + } + + // Neither a literal nor a global variable? + // maybe it is a local variable that we saved + if (!HasStr1) { + + std::string *strng = valueMap[Str1P]; + if (strng && !strng->empty()) { + + Str1 = *strng; + HasStr1 = true; + + } + + } + + /* handle cases of one string is const, one string is variable */ + if (!(HasStr1 ^ HasStr2)) continue; + + std::string thestring; + + if (HasStr1) + thestring = Str1; + else + thestring = Str2; + + optLen = thestring.length(); + + if (optLen < 2 || (optLen == 2 && !thestring[1])) { continue; } + + if (isMemcmp || isStrncmp || isStrncasecmp) { + + Value * op2 = callInst->getArgOperand(2); + ConstantInt *ilen = dyn_cast(op2); + + if (ilen) { + + uint64_t literalLength = optLen; + optLen = ilen->getZExtValue(); + if (optLen > thestring.length()) { optLen = thestring.length(); } + if (optLen < 2) { continue; } + if (literalLength + 1 == optLen) { // add null byte + thestring.append("\0", 1); + addedNull = true; + + } + + } + + } + + // add null byte if this is a string compare function and a null + // was not already added + if (!isMemcmp) { + + if (addedNull == false && thestring[optLen - 1] != '\0') { + + thestring.append("\0", 1); // add null byte + optLen++; + + } + + if (!isStdString) { + + // ensure we do not have garbage + size_t offset = thestring.find('\0', 0); + if (offset + 1 < optLen) optLen = offset + 1; + thestring = thestring.substr(0, optLen); + + } + + } + + // we take the longer string, even if the compare was to a + // shorter part. Note that depending on the optimizer of the + // compiler this can be wrong, but it is more likely that this + // is helping the fuzzer + if (optLen != thestring.length()) optLen = thestring.length(); + if (optLen > MAX_AUTO_EXTRA) optLen = MAX_AUTO_EXTRA; + if (optLen < 3) // too short? skip + continue; + + ptr = (char *)thestring.c_str(); + + if(use_file){ + dict2file(fd, (uint8_t *)ptr, optLen); + } + else{ + dictionary.push_back(thestring.substr(0, optLen)); + } + found++; + + } + + } + + } + + } + + if(use_file){ + close(fd); + return true; + } + + LLVMContext &Ctx = M.getContext(); + + if (dictionary.size()) { + + size_t memlen = 0, count = 0, offset = 0; + + // sort and unique the dictionary + std::sort(dictionary.begin(), dictionary.end()); + auto last = std::unique(dictionary.begin(), dictionary.end()); + dictionary.erase(last, dictionary.end()); + + for (auto token : dictionary) { + + memlen += token.length(); + count++; + + } + if (count) { + + auto ptrhld = std::unique_ptr(new char[memlen + count]); + + count = 0; + + for (auto token : dictionary) { + + if (offset + token.length() < 0xfffff0 && count < MAX_AUTO_EXTRAS) { + + // This lenght is guranteed to be < MAX_AUTO_EXTRA + ptrhld.get()[offset++] = (uint8_t)token.length(); + memcpy(ptrhld.get() + offset, token.c_str(), token.length()); + offset += token.length(); + count++; + } + } + + // Type + ArrayType* arrayTy = ArrayType::get(IntegerType::get(Ctx, 8), offset); + + // The actual dict + GlobalVariable *dict = new GlobalVariable(M, arrayTy, true, GlobalVariable::ExternalLinkage, ConstantDataArray::get(Ctx, *(new ArrayRef(ptrhld.get(), offset))), "libafl_dictionary_" + M.getName()); + dict->setSection("libafl_token"); + } + } + + return true; +} + + +static void registerAutoTokensPass(const PassManagerBuilder &, + legacy::PassManagerBase &PM) { + + PM.add(new AutoTokensPass()); + +} + +static RegisterPass X("autotokens", + "autotokens instrumentation pass", + false, false); + +static RegisterStandardPasses RegisterAutoTokensPass( + PassManagerBuilder::EP_OptimizerLast, registerAutoTokensPass); + +static RegisterStandardPasses RegisterAutoTokensPass0( + PassManagerBuilder::EP_EnabledOnOptLevel0, registerAutoTokensPass); diff --git a/libafl_cc/src/clang.rs b/libafl_cc/src/clang.rs index b6d77dbdf8..5f926f831b 100644 --- a/libafl_cc/src/clang.rs +++ b/libafl_cc/src/clang.rs @@ -31,6 +31,8 @@ pub enum LLVMPasses { CmpLogRtn, /// The AFL coverage pass AFLCoverage, + /// The Autotoken pass + AutoTokens, } impl LLVMPasses { @@ -42,6 +44,9 @@ impl LLVMPasses { .join(format!("cmplog-routines-pass.{}", dll_extension())), LLVMPasses::AFLCoverage => PathBuf::from(env!("OUT_DIR")) .join(format!("afl-coverage-pass.{}", dll_extension())), + LLVMPasses::AutoTokens => { + PathBuf::from(env!("OUT_DIR")).join(format!("autotokens-pass.{}", dll_extension())) + } } } } diff --git a/libafl_targets/src/common.h b/libafl_targets/src/common.h index 97489b4ce9..46e8b5ef57 100644 --- a/libafl_targets/src/common.h +++ b/libafl_targets/src/common.h @@ -133,6 +133,10 @@ #define EXT_FUNC_IMPL(NAME, RETURN_TYPE, FUNC_SIG, WARN) \ __attribute__((weak, visibility("default"))) RETURN_TYPE NAME FUNC_SIG + // Weakly defined globals + #define EXT_VAR(NAME, TYPE) \ + TYPE __attribute__((weak, visibility("default"))) NAME + #else #define EXT_FUNC_IMPL(NAME, RETURN_TYPE, FUNC_SIG, WARN) \ @@ -141,6 +145,11 @@ // Declare these symbols as weak to allow them to be optionally defined. #define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN) \ __attribute__((weak, visibility("default"))) RETURN_TYPE NAME FUNC_SIG + +// Weakly defined globals +#define EXT_VAR(NAME, TYPE) \ + TYPE __attribute__((weak, visibility("default"))) NAME + #endif #define CHECK_WEAK_FN(Name) (Name != NULL) diff --git a/libafl_targets/src/coverage.c b/libafl_targets/src/coverage.c index cf28f51e51..cf9dbd6616 100644 --- a/libafl_targets/src/coverage.c +++ b/libafl_targets/src/coverage.c @@ -9,8 +9,22 @@ typedef uint32_t prev_loc_t; #define CTX_MAX_K 32U extern uint8_t __afl_area_ptr_local[EDGES_MAP_SIZE]; + uint8_t* __afl_area_ptr = __afl_area_ptr_local; + +// Weak symbols, LLVM Passes overwrites them if we really use it +#ifdef __linux__ +extern EXT_VAR(__start_libafl_token, uint8_t); +extern EXT_VAR(__stop_libafl_token, uint8_t); + +// Expose the start of libafl_token section as C symbols +uint8_t* __token_start = &__start_libafl_token; +uint8_t* __token_stop = &__stop_libafl_token; +#endif + + + //#if defined(__ANDROID__) || defined(__HAIKU__) MAYBE_THREAD_LOCAL prev_loc_t __afl_prev_loc[NGRAM_SIZE_MAX]; MAYBE_THREAD_LOCAL prev_loc_t __afl_prev_caller[CTX_MAX_K]; diff --git a/libafl_targets/src/coverage.rs b/libafl_targets/src/coverage.rs index feb2fb5b91..58e3eb99d2 100644 --- a/libafl_targets/src/coverage.rs +++ b/libafl_targets/src/coverage.rs @@ -13,9 +13,24 @@ pub static mut MAX_EDGES_NUM: usize = 0; extern "C" { /// The area pointer points to the edges map. pub static mut __afl_area_ptr: *mut u8; + + /// Start of libafl token section + #[cfg(target_os = "linux")] + pub static __token_start: *const u8; + + /// End of libafl token section + #[cfg(target_os = "linux")] + pub static __token_stop: *const u8; } pub use __afl_area_ptr as EDGES_MAP_PTR; +/// Return token section's start and end as a tuple +#[cfg(target_os = "linux")] +#[must_use] +pub fn token_section() -> (*const u8, *const u8) { + unsafe { (__token_start, __token_stop) } +} + /// The size of the map for edges. #[no_mangle] pub static mut __afl_map_size: usize = EDGES_MAP_SIZE;