diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 5033423348..60617edf5a 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -333,6 +333,7 @@ jobs: - ./fuzzers/libfuzzer_stb_image - ./fuzzers/fuzzbench_forkserver # - ./fuzzers/libfuzzer_windows_asan + # - ./fuzzers/dynamic_analysis - ./fuzzers/baby_fuzzer_minimizing - ./fuzzers/frida_executable_libpng - ./fuzzers/tutorial diff --git a/.github/workflows/fuzzer-tester-prepare/action.yml b/.github/workflows/fuzzer-tester-prepare/action.yml index c161a74607..899cd500ab 100644 --- a/.github/workflows/fuzzer-tester-prepare/action.yml +++ b/.github/workflows/fuzzer-tester-prepare/action.yml @@ -36,7 +36,7 @@ runs: version: 17 - name: Install deps shell: bash - run: sudo apt update && sudo apt install -y nasm ninja-build gcc-arm-linux-gnueabi g++-arm-linux-gnueabi gcc-aarch64-linux-gnu g++-aarch64-linux-gnu gcc-mipsel-linux-gnu g++-mipsel-linux-gnu gcc-powerpc-linux-gnu g++-powerpc-linux-gnu libc6-dev-i386-cross libc6-dev libc6-dev-i386 lib32gcc-11-dev lib32stdc++-11-dev libgtk-3-dev pax-utils libz3-dev + run: sudo apt update && sudo apt install -y nasm nlohmann-json3-dev ninja-build gcc-arm-linux-gnueabi g++-arm-linux-gnueabi gcc-aarch64-linux-gnu g++-aarch64-linux-gnu gcc-mipsel-linux-gnu g++-mipsel-linux-gnu gcc-powerpc-linux-gnu g++-powerpc-linux-gnu libc6-dev-i386-cross libc6-dev libc6-dev-i386 lib32gcc-11-dev lib32stdc++-11-dev libgtk-3-dev pax-utils libz3-dev - name: pip install shell: bash run: python3 -m pip install msgpack jinja2 find_libpython diff --git a/fuzzers/dynamic_analysis/Cargo.toml b/fuzzers/dynamic_analysis/Cargo.toml new file mode 100644 index 0000000000..35235b25f1 --- /dev/null +++ b/fuzzers/dynamic_analysis/Cargo.toml @@ -0,0 +1,41 @@ +[package] +name = "fuzzbench" +version = "0.12.0" +authors = ["Andrea Fioraldi ", "Dominik Maier "] +edition = "2021" + +[features] +default = ["std"] +std = [] +no_link_main = ["libafl_targets/libfuzzer_no_link_main"] + +[profile.release] +lto = true +codegen-units = 1 +opt-level = 3 +debug = true + +[profile.release-fuzzbench] +inherits = "release" +debug = false +strip = true + +[build-dependencies] +cc = { version = "1.0", features = ["parallel"] } +which = "4.4" + +[dependencies] +env_logger = "0.10" +once_cell = "1.19" +libafl = { path = "../../libafl/" } +libafl_bolts = { path = "../../libafl_bolts/" } +libafl_targets = { path = "../../libafl_targets/", features = ["sancov_pcguard_hitcounts", "sancov_cmplog", "libfuzzer", "function-logging"] } +# TODO Include it only when building cc +libafl_cc = { path = "../../libafl_cc/" } +clap = { version = "4.0", features = ["default"] } +nix = { version = "0.27", features = ["fs"] } +mimalloc = { version = "*", default-features = false } + +[lib] +name = "fuzzbench" +crate-type = ["staticlib"] diff --git a/fuzzers/dynamic_analysis/Makefile.toml b/fuzzers/dynamic_analysis/Makefile.toml new file mode 100644 index 0000000000..036c83e5f2 --- /dev/null +++ b/fuzzers/dynamic_analysis/Makefile.toml @@ -0,0 +1,108 @@ +[env] +PROJECT_DIR = { script = ["pwd"] } +CARGO_TARGET_DIR = { value = "${PROJECT_DIR}/target", condition = { env_not_set = ["CARGO_TARGET_DIR"] } } +FUZZER_NAME="fuzzer" +PROFILE = { value = "release", condition = {env_not_set = ["PROFILE"]} } +PROFILE_DIR = {value = "release", condition = {env_not_set = ["PROFILE_DIR"] }} + +[tasks.unsupported] +script_runner="@shell" +script=''' +echo "Cargo-make not integrated yet on this" +''' + +# Compilers +[tasks.cxx] +linux_alias = "cxx_unix" +mac_alias = "cxx_unix" +windows_alias = "unsupported" + +[tasks.cxx_unix] +command = "cargo" +args = ["build", "--profile", "${PROFILE}"] + +[tasks.cc] +linux_alias = "cc_unix" +mac_alias = "cc_unix" +windows_alias = "unsupported" + +[tasks.cc_unix] +command = "cargo" +args = ["build", "--profile", "${PROFILE}"] + +# fuzz.o File +[tasks.fuzz_o] +linux_alias = "fuzz_o_unix" +mac_alias = "fuzz_o_unix" +windows_alias = "unsupported" + +[tasks.fuzz_o_unix] +command = "${CARGO_TARGET_DIR}/${PROFILE_DIR}/libafl_cxx" +args = ["--libafl-no-link", "-O3","-I", "./Little-CMS/include", "-c", "cms_transform_fuzzer.cc", "-o", "cms_transform_fuzzer.o"] +dependencies = ["cc", "cxx"] + +# Fuzzer +[tasks.fuzzer] +linux_alias = "fuzzer_unix" +mac_alias = "fuzzer_unix" +windows_alias = "unsupported" + +[tasks.fuzzer_unix] +command = "${CARGO_TARGET_DIR}/${PROFILE_DIR}/libafl_cxx" +args = ["--libafl", "cms_transform_fuzzer.o", "./Little-CMS/src/.libs/liblcms2.a", "-o", "${FUZZER_NAME}", "-lm", "-lz"] +dependencies = ["cc", "cxx", "fuzz_o"] + +# Run +[tasks.run] +linux_alias = "run_unix" +mac_alias = "run_unix" +windows_alias = "unsupported" + +[tasks.run_unix] +script_runner="@shell" +script=''' +rm -rf libafl_unix_shmem_server || true +mkdir in || true +echo a > in/a +./${FUZZER_NAME} -o out -i in +''' +dependencies = ["fuzzer"] + + +# Test +[tasks.test] +linux_alias = "test_unix" +mac_alias = "test_unix" +windows_alias = "unsupported" + +[tasks.test_unix] +script_runner="@shell" +script=''' +rm -rf libafl_unix_shmem_server || true +mkdir in || true +echo a > in/a +# Allow sigterm as exit code +timeout 31s ./${FUZZER_NAME} -o out -i in | tee fuzz_stdout.log || true +if grep -qa "objectives: 1" fuzz_stdout.log; then + echo "Fuzzer is working" +else + echo "Fuzzer does not generate any testcases or any crashes" + exit 1 +fi +rm -rf out || true +rm -rf in || true +''' +dependencies = ["fuzzer"] + +# Clean +[tasks.clean] +linux_alias = "clean_unix" +mac_alias = "clean_unix" +windows_alias = "unsupported" + +[tasks.clean_unix] +script_runner="@shell" +script=''' +rm ./${FUZZER_NAME} || true +rm fuzz.o || true +''' diff --git a/fuzzers/dynamic_analysis/README.md b/fuzzers/dynamic_analysis/README.md new file mode 100644 index 0000000000..9da06f3f4d --- /dev/null +++ b/fuzzers/dynamic_analysis/README.md @@ -0,0 +1,11 @@ +# Dynamic Analysis Fuzzer +This fuzzer is to show how you can collect runtime analysis information during fuzzing using LibAFL. We use the Little-CMS project for the example. +First, this fuzzer requires `nlohmann-json3-dev` to work. + +To run the fuzzer, +0. Compile the fuzzer with `cargo build --release` +1. `mkdir analysis` and run `build.sh`. This will compile Little-CMS to extract the analysis information and generate a json file for each module. +2. run `python3 concatenator.py analysis`. This will concatenate all the json into one single file. This json file maps a function id to its analysis information. +3. Compile the fuzzer with `cargo make fuzzer`. This will instrument the fuzzer at every function entry point. Therefore, whenever we reach the entry of any function, we +can log its id and logs what functions we executed. +4. Run the fuzzer `RUST_LOG=info ./fuzzer --input ./corpus --output ./out`. You'll see a stream of analysis data \ No newline at end of file diff --git a/fuzzers/dynamic_analysis/build.rs b/fuzzers/dynamic_analysis/build.rs new file mode 100644 index 0000000000..863d3cb258 --- /dev/null +++ b/fuzzers/dynamic_analysis/build.rs @@ -0,0 +1,25 @@ +use std::{env, process::Command}; + +fn main() { + let current_dir = env::current_dir().unwrap(); + let lcms_dir = current_dir.join("Little-CMS"); + if !lcms_dir.exists() { + println!("cargo:warning=Downloading Little-CMS"); + // Clone the Little-CMS repository if the directory doesn't exist + let status = Command::new("git") + .args(&[ + "clone", + "https://github.com/mm2/Little-CMS", + lcms_dir.to_str().unwrap(), + ]) + .status() + .expect("Failed to clone Little-CMS repository"); + + if !status.success() { + panic!("Failed to clone Little-CMS repository"); + } + } + + // Tell Cargo that if the given file changes, to rerun this build script + println!("cargo:rerun-if-changed=build.rs"); +} diff --git a/fuzzers/dynamic_analysis/build.sh b/fuzzers/dynamic_analysis/build.sh new file mode 100755 index 0000000000..a4ffe50bdb --- /dev/null +++ b/fuzzers/dynamic_analysis/build.sh @@ -0,0 +1,14 @@ +export CC=$(pwd)/target/release/libafl_cc +export CXX=$(pwd)/target/release/libafl_cxx +export CXXFLAGS='--libafl' +export CFLAGS='--libafl' +export LDFLAGS='--libafl' +export ANALYSIS_OUTPUT=`pwd`/analysis +cd Little-CMS +./autogen.sh +./configure + + +make -j $(nproc) + +$CXX $CXXFLAGS ../cms_transform_fuzzer.cc -I include/ src/.libs/liblcms2.a -o ../fuzzer diff --git a/fuzzers/dynamic_analysis/clean.sh b/fuzzers/dynamic_analysis/clean.sh new file mode 100755 index 0000000000..34d2c16fcb --- /dev/null +++ b/fuzzers/dynamic_analysis/clean.sh @@ -0,0 +1,5 @@ +export ANALYSIS_OUTPUT=`pwd`/analysis +rm -rf analysis/* +pushd Little-CMS +make clean +popd diff --git a/fuzzers/dynamic_analysis/cms_transform_fuzzer.cc b/fuzzers/dynamic_analysis/cms_transform_fuzzer.cc new file mode 100644 index 0000000000..b6433267d4 --- /dev/null +++ b/fuzzers/dynamic_analysis/cms_transform_fuzzer.cc @@ -0,0 +1,63 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "lcms2.h" + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + cmsHPROFILE srcProfile = cmsOpenProfileFromMem(data, size); + if (!srcProfile) return 0; + + cmsHPROFILE dstProfile = cmsCreate_sRGBProfile(); + if (!dstProfile) { + cmsCloseProfile(srcProfile); + return 0; + } + + cmsColorSpaceSignature srcCS = cmsGetColorSpace(srcProfile); + cmsUInt32Number nSrcComponents = cmsChannelsOf(srcCS); + cmsUInt32Number srcFormat; + if (srcCS == cmsSigLabData) { + srcFormat = + COLORSPACE_SH(PT_Lab) | CHANNELS_SH(nSrcComponents) | BYTES_SH(0); + } else { + srcFormat = + COLORSPACE_SH(PT_ANY) | CHANNELS_SH(nSrcComponents) | BYTES_SH(1); + } + + cmsUInt32Number intent = 0; + cmsUInt32Number flags = 0; + cmsHTRANSFORM hTransform = cmsCreateTransform( + srcProfile, srcFormat, dstProfile, TYPE_BGR_8, intent, flags); + cmsCloseProfile(srcProfile); + cmsCloseProfile(dstProfile); + if (!hTransform) return 0; + + uint8_t output[4]; + if (T_BYTES(srcFormat) == 0) { // 0 means double + double input[nSrcComponents]; + for (uint32_t i = 0; i < nSrcComponents; i++) + input[i] = 0.5f; + cmsDoTransform(hTransform, input, output, 1); + } else { + uint8_t input[nSrcComponents]; + for (uint32_t i = 0; i < nSrcComponents; i++) + input[i] = 128; + cmsDoTransform(hTransform, input, output, 1); + } + cmsDeleteTransform(hTransform); + + return 0; +} diff --git a/fuzzers/dynamic_analysis/concatenator.py b/fuzzers/dynamic_analysis/concatenator.py new file mode 100755 index 0000000000..72f09f56c6 --- /dev/null +++ b/fuzzers/dynamic_analysis/concatenator.py @@ -0,0 +1,36 @@ +#!/usr/bin/python3 + +import os +import json +import sys + +def concatenate_json_files(input_dir): + json_files = [] + for root, dirs, files in os.walk(input_dir): + for file in files: + if file.endswith('.json'): + json_files.append(os.path.join(root, file)) + + data = dict() + for json_file in json_files: + with open(json_file, 'r') as file: + if os.stat(json_file).st_size == 0: + # skip empty file else json.load() fails + continue + json_data = json.load(file) + print(type(json_data), file) + data = data | json_data + + output_file = os.path.join(os.getcwd(), 'concatenated.json') + with open(output_file, 'w') as file: + json.dump([data], file) + + print(f"JSON files concatenated successfully! Output file: {output_file}") + +if __name__ == '__main__': + if len(sys.argv) != 2: + print("Usage: python script.py ") + sys.exit(1) + + input_directory = sys.argv[1] + concatenate_json_files(input_directory) diff --git a/fuzzers/dynamic_analysis/src/bin/libafl_cc.rs b/fuzzers/dynamic_analysis/src/bin/libafl_cc.rs new file mode 100644 index 0000000000..c0b8e478de --- /dev/null +++ b/fuzzers/dynamic_analysis/src/bin/libafl_cc.rs @@ -0,0 +1,47 @@ +use std::env; + +use libafl_cc::{ClangWrapper, CompilerWrapper, LLVMPasses, ToolWrapper}; + +pub fn main() { + let mut args: Vec = env::args().collect(); + if args.len() > 1 { + let mut dir = env::current_exe().unwrap(); + let wrapper_name = dir.file_name().unwrap().to_str().unwrap(); + + let is_cpp = match wrapper_name[wrapper_name.len()-2..].to_lowercase().as_str() { + "cc" => false, + "++" | "pp" | "xx" => true, + _ => panic!("Could not figure out if c or c++ wrapper was called. Expected {dir:?} to end with c or cxx"), + }; + + dir.pop(); + + // Must be always present, even without --libafl + args.push("-fsanitize-coverage=trace-pc-guard,trace-cmp".into()); + + let mut cc = ClangWrapper::new(); + + #[cfg(any(target_os = "linux", target_vendor = "apple"))] + cc.add_pass(LLVMPasses::AutoTokens); + + if let Some(code) = cc + .cpp(is_cpp) + // silence the compiler wrapper output, needed for some configure scripts. + .silence(true) + // add arguments only if --libafl or --libafl-no-link are present + .need_libafl_arg(true) + .parse_args(&args) + .expect("Failed to parse the command line") + .link_staticlib(&dir, "fuzzbench") + .add_pass(LLVMPasses::CmpLogRtn) + .add_pass(LLVMPasses::FunctionLogging) + .add_pass(LLVMPasses::Profiling) + .run() + .expect("Failed to run the wrapped compiler") + { + std::process::exit(code); + } + } else { + panic!("LibAFL CC: No Arguments given"); + } +} diff --git a/fuzzers/dynamic_analysis/src/bin/libafl_cxx.rs b/fuzzers/dynamic_analysis/src/bin/libafl_cxx.rs new file mode 100644 index 0000000000..dabd22971a --- /dev/null +++ b/fuzzers/dynamic_analysis/src/bin/libafl_cxx.rs @@ -0,0 +1,5 @@ +pub mod libafl_cc; + +fn main() { + libafl_cc::main(); +} diff --git a/fuzzers/dynamic_analysis/src/lib.rs b/fuzzers/dynamic_analysis/src/lib.rs new file mode 100644 index 0000000000..472d8b4e36 --- /dev/null +++ b/fuzzers/dynamic_analysis/src/lib.rs @@ -0,0 +1,406 @@ +//! A singlethreaded libfuzzer-like fuzzer that can auto-restart. +use mimalloc::MiMalloc; +#[global_allocator] +static GLOBAL: MiMalloc = MiMalloc; +use core::{cell::RefCell, time::Duration}; +#[cfg(unix)] +use std::os::unix::io::{AsRawFd, FromRawFd}; +use std::{ + env, + fs::{self, File, OpenOptions}, + io::{self, Read, Write}, + path::PathBuf, + process, +}; + +use clap::{Arg, Command}; +use libafl::{ + corpus::{Corpus, InMemoryOnDiskCorpus, OnDiskCorpus}, + events::SimpleRestartingEventManager, + executors::{inprocess::HookableInProcessExecutor, ExitKind}, + feedback_or, + feedbacks::{CrashFeedback, MaxMapFeedback, TimeFeedback}, + fuzzer::{Fuzzer, StdFuzzer}, + inputs::{BytesInput, HasTargetBytes}, + monitors::SimpleMonitor, + mutators::{ + scheduled::havoc_mutations, token_mutations::I2SRandReplace, tokens_mutations, + StdMOptMutator, StdScheduledMutator, Tokens, + }, + observers::{CanTrack, HitcountsMapObserver, ProfilingObserver, TimeObserver}, + schedulers::{ + powersched::PowerSchedule, IndexesLenTimeMinimizerScheduler, StdWeightedScheduler, + }, + stages::{ + calibrate::CalibrationStage, power::StdPowerMutationalStage, StdMutationalStage, + TracingStage, + }, + state::{HasCorpus, StdState}, + Error, HasMetadata, +}; +use libafl_bolts::{ + current_time, + os::dup2, + ownedref::OwnedMutPtr, + rands::StdRand, + shmem::{ShMemProvider, StdShMemProvider}, + tuples::{tuple_list, Merge}, + AsSlice, +}; +#[cfg(any(target_os = "linux", target_vendor = "apple"))] +use libafl_targets::autotokens; +use libafl_targets::{ + libfuzzer_initialize, libfuzzer_test_one_input, std_edges_map_observer, CallHook, + CmpLogObserver, FUNCTION_LIST, +}; +#[cfg(unix)] +use nix::unistd::dup; +use once_cell::sync::Lazy; + +/// The fuzzer main (as `no_mangle` C function) +#[no_mangle] +pub extern "C" fn libafl_main() { + // Registry the metadata types used in this fuzzer + // Needed only on no_std + // unsafe { RegistryBuilder::register::(); } + env_logger::init(); + let res = match Command::new(env!("CARGO_PKG_NAME")) + .version(env!("CARGO_PKG_VERSION")) + .author("AFLplusplus team") + .about("LibAFL-based fuzzer for Fuzzbench") + .arg( + Arg::new("out") + .short('o') + .long("output") + .help("The directory to place finds in ('corpus')"), + ) + .arg( + Arg::new("in") + .short('i') + .long("input") + .help("The directory to read initial inputs from ('seeds')"), + ) + .arg( + Arg::new("tokens") + .short('x') + .long("tokens") + .help("A file to read tokens from, to be used during fuzzing"), + ) + .arg( + Arg::new("logfile") + .short('l') + .long("logfile") + .help("Duplicates all output to this file") + .default_value("libafl.log"), + ) + .arg( + Arg::new("timeout") + .short('t') + .long("timeout") + .help("Timeout for each individual execution, in milliseconds") + .default_value("1200"), + ) + .arg(Arg::new("remaining")) + .try_get_matches() + { + Ok(res) => res, + Err(err) => { + println!( + "Syntax: {}, [-x dictionary] -o corpus_dir -i seed_dir\n{:?}", + env::current_exe() + .unwrap_or_else(|_| "fuzzer".into()) + .to_string_lossy(), + err, + ); + return; + } + }; + + println!( + "Workdir: {:?}", + env::current_dir().unwrap().to_string_lossy().to_string() + ); + + if let Some(filenames) = res.get_many::("remaining") { + let filenames: Vec<&str> = filenames.map(String::as_str).collect(); + if !filenames.is_empty() { + run_testcases(&filenames); + return; + } + } + + // For fuzzbench, crashes and finds are inside the same `corpus` directory, in the "queue" and "crashes" subdir. + let mut out_dir = PathBuf::from( + res.get_one::("out") + .expect("The --output parameter is missing") + .to_string(), + ); + if fs::create_dir(&out_dir).is_err() { + println!("Out dir at {:?} already exists.", &out_dir); + if !out_dir.is_dir() { + println!("Out dir at {:?} is not a valid directory!", &out_dir); + return; + } + } + let mut crashes = out_dir.clone(); + crashes.push("crashes"); + out_dir.push("queue"); + + let in_dir = PathBuf::from( + res.get_one::("in") + .expect("The --input parameter is missing") + .to_string(), + ); + if !in_dir.is_dir() { + println!("In dir at {:?} is not a valid directory!", &in_dir); + return; + } + + let tokens = res.get_one::("tokens").map(PathBuf::from); + + let logfile = PathBuf::from(res.get_one::("logfile").unwrap().to_string()); + + let timeout = Duration::from_millis( + res.get_one::("timeout") + .unwrap() + .to_string() + .parse() + .expect("Could not parse timeout in milliseconds"), + ); + + fuzz(out_dir, crashes, &in_dir, tokens, &logfile, timeout) + .expect("An error occurred while fuzzing"); +} + +fn run_testcases(filenames: &[&str]) { + // The actual target run starts here. + // Call LLVMFUzzerInitialize() if present. + let args: Vec = env::args().collect(); + if libfuzzer_initialize(&args) == -1 { + println!("Warning: LLVMFuzzerInitialize failed with -1"); + } + + println!( + "You are not fuzzing, just executing {} testcases", + filenames.len() + ); + for fname in filenames { + println!("Executing {fname}"); + + let mut file = File::open(fname).expect("No file found"); + let mut buffer = vec![]; + file.read_to_end(&mut buffer).expect("Buffer overflow"); + + libfuzzer_test_one_input(&buffer); + } +} + +/// The actual fuzzer +#[allow(clippy::too_many_lines)] +fn fuzz( + corpus_dir: PathBuf, + objective_dir: PathBuf, + seed_dir: &PathBuf, + tokenfile: Option, + logfile: &PathBuf, + timeout: Duration, +) -> Result<(), Error> { + let log = RefCell::new(OpenOptions::new().append(true).create(true).open(logfile)?); + + #[cfg(unix)] + let mut stdout_cpy = unsafe { + let new_fd = dup(io::stdout().as_raw_fd())?; + File::from_raw_fd(new_fd) + }; + #[cfg(unix)] + let file_null = File::open("/dev/null")?; + + // 'While the monitor are state, they are usually used in the broker - which is likely never restarted + let monitor = SimpleMonitor::new(|s| { + #[cfg(unix)] + writeln!(&mut stdout_cpy, "{s}").unwrap(); + #[cfg(windows)] + println!("{s}"); + writeln!(log.borrow_mut(), "{:?} {s}", current_time()).unwrap(); + }); + + // We need a shared map to store our state before a crash. + // This way, we are able to continue fuzzing afterwards. + let mut shmem_provider = StdShMemProvider::new()?; + + let (state, mut mgr) = match SimpleRestartingEventManager::launch(monitor, &mut shmem_provider) + { + // The restarting state will spawn the same process again as child, then restarted it each time it crashes. + Ok(res) => res, + Err(err) => match err { + Error::ShuttingDown => { + return Ok(()); + } + _ => { + panic!("Failed to setup the restarter: {err}"); + } + }, + }; + + // Create an observation channel using the coverage map + // We don't use the hitcounts (see the Cargo.toml, we use pcguard_edges) + let edges_observer = + HitcountsMapObserver::new(unsafe { std_edges_map_observer("edges") }).track_indices(); + + // Create an observation channel to keep track of the execution time + let time_observer = TimeObserver::new("time"); + + let func_list = unsafe { OwnedMutPtr::from_raw_mut(Lazy::force_mut(&mut FUNCTION_LIST)) }; + let profiling_observer = ProfilingObserver::new("concatenated.json", func_list)?; + let callhook = CallHook::new(); + + let cmplog_observer = CmpLogObserver::new("cmplog", true); + + let map_feedback = MaxMapFeedback::new(&edges_observer); + + let calibration = CalibrationStage::new(&map_feedback); + + // Feedback to rate the interestingness of an input + // This one is composed by two Feedbacks in OR + let mut feedback = feedback_or!( + // New maximization map feedback linked to the edges observer and the feedback state + map_feedback, + // Time feedback, this one does not need a feedback state + TimeFeedback::new(&time_observer) + ); + + // A feedback to choose if an input is a solution or not + let mut objective = CrashFeedback::new(); + + // If not restarting, create a State from scratch + let mut state = state.unwrap_or_else(|| { + StdState::new( + // RNG + StdRand::new(), + // Corpus that will be evolved, we keep it in memory for performance + InMemoryOnDiskCorpus::new(corpus_dir).unwrap(), + // Corpus in which we store solutions (crashes in this example), + // on disk so the user can get them after stopping the fuzzer + OnDiskCorpus::new(objective_dir).unwrap(), + // States of the feedbacks. + // The feedbacks can report the data that should persist in the State. + &mut feedback, + // Same for objective feedbacks + &mut objective, + ) + .unwrap() + }); + + println!("Let's fuzz :)"); + + // The actual target run starts here. + // Call LLVMFUzzerInitialize() if present. + let args: Vec = env::args().collect(); + if libfuzzer_initialize(&args) == -1 { + println!("Warning: LLVMFuzzerInitialize failed with -1"); + } + + // Setup a randomic Input2State stage + let i2s = StdMutationalStage::new(StdScheduledMutator::new(tuple_list!(I2SRandReplace::new()))); + + // Setup a MOPT mutator + let mutator = StdMOptMutator::new( + &mut state, + havoc_mutations().merge(tokens_mutations()), + 7, + 5, + )?; + + let power = StdPowerMutationalStage::new(mutator); + + // A minimization+queue policy to get testcasess from the corpus + let scheduler = IndexesLenTimeMinimizerScheduler::new( + &edges_observer, + StdWeightedScheduler::with_schedule(&mut state, &edges_observer, Some(PowerSchedule::FAST)), + ); + + // A fuzzer with feedbacks and a corpus scheduler + let mut fuzzer = StdFuzzer::new(scheduler, feedback, objective); + + // The wrapped harness function, calling out to the LLVM-style harness + let mut harness = |input: &BytesInput| { + let target = input.target_bytes(); + let buf = target.as_slice(); + libfuzzer_test_one_input(buf); + ExitKind::Ok + }; + + let mut tracing_harness = harness; + + // Create the executor for an in-process function with one observer for edge coverage and one for the execution time + let mut executor = HookableInProcessExecutor::with_timeout_generic( + tuple_list!(callhook.clone()), + &mut harness, + tuple_list!(edges_observer, time_observer, profiling_observer), + &mut fuzzer, + &mut state, + &mut mgr, + timeout, + )?; + + // Setup a tracing stage in which we log comparisons + let tracing = TracingStage::new( + HookableInProcessExecutor::with_timeout_generic( + tuple_list!(callhook), + &mut tracing_harness, + tuple_list!(cmplog_observer), + &mut fuzzer, + &mut state, + &mut mgr, + timeout * 10, + )?, + // Give it more time! + ); + + // The order of the stages matter! + let mut stages = tuple_list!(calibration, tracing, i2s, power); + + // Read tokens + if state.metadata_map().get::().is_none() { + let mut toks = Tokens::default(); + if let Some(tokenfile) = tokenfile { + toks.add_from_file(tokenfile)?; + } + #[cfg(any(target_os = "linux", target_vendor = "apple"))] + { + toks += autotokens()?; + } + + if !toks.is_empty() { + state.add_metadata(toks); + } + } + + // In case the corpus is empty (on first run), reset + if state.must_load_initial_inputs() { + state + .load_initial_inputs(&mut fuzzer, &mut executor, &mut mgr, &[seed_dir.clone()]) + .unwrap_or_else(|_| { + println!("Failed to load initial corpus at {:?}", &seed_dir); + process::exit(0); + }); + println!("We imported {} inputs from disk.", state.corpus().count()); + } + + // Remove target output (logs still survive) + #[cfg(unix)] + { + let null_fd = file_null.as_raw_fd(); + dup2(null_fd, io::stdout().as_raw_fd())?; + if std::env::var("LIBAFL_FUZZBENCH_DEBUG").is_err() { + // dup2(null_fd, io::stderr().as_raw_fd())?; + } + } + // reopen file to make sure we're at the end + log.replace(OpenOptions::new().append(true).create(true).open(logfile)?); + + fuzzer.fuzz_loop(&mut stages, &mut executor, &mut state, &mut mgr)?; + + // Never reached + Ok(()) +} diff --git a/fuzzers/dynamic_analysis/stub_rt.c b/fuzzers/dynamic_analysis/stub_rt.c new file mode 100644 index 0000000000..825d6780af --- /dev/null +++ b/fuzzers/dynamic_analysis/stub_rt.c @@ -0,0 +1,34 @@ +#include + +__attribute__((weak)) void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, + uint32_t *stop) { +} + +__attribute__((weak)) void __sanitizer_cov_trace_pc_guard(uint32_t *guard) { +} + +__attribute__((weak)) void __cmplog_rtn_hook(uint8_t *ptr1, uint8_t *ptr2) { +} + +__attribute__((weak)) void __cmplog_rtn_gcc_stdstring_cstring( + uint8_t *stdstring, uint8_t *cstring) { +} + +__attribute__((weak)) void __cmplog_rtn_gcc_stdstring_stdstring( + uint8_t *stdstring1, uint8_t *stdstring2) { +} + +__attribute__((weak)) void __cmplog_rtn_llvm_stdstring_cstring( + uint8_t *stdstring, uint8_t *cstring) { +} + +__attribute__((weak)) void __cmplog_rtn_llvm_stdstring_stdstring( + uint8_t *stdstring1, uint8_t *stdstring2) { +} + +extern void libafl_main(void); + +int main(int argc, char **argv) { + libafl_main(); + return 0; +} diff --git a/libafl/src/observers/mod.rs b/libafl/src/observers/mod.rs index ea68117adf..d27ff4860a 100644 --- a/libafl/src/observers/mod.rs +++ b/libafl/src/observers/mod.rs @@ -14,6 +14,12 @@ pub mod stacktrace; #[cfg(feature = "regex")] pub use stacktrace::*; +/// Profiler observer +#[cfg(feature = "std")] +pub mod profiling; +#[cfg(feature = "std")] +pub use profiling::*; + pub mod concolic; pub mod map; pub use map::*; diff --git a/libafl/src/observers/profiling.rs b/libafl/src/observers/profiling.rs new file mode 100644 index 0000000000..c6c0e9bee9 --- /dev/null +++ b/libafl/src/observers/profiling.rs @@ -0,0 +1,146 @@ +use alloc::{borrow::Cow, string::String}; +use std::{fs::File, io::BufReader, path::Path}; + +use hashbrown::HashMap; +use libafl_bolts::{ownedref::OwnedMutPtr, Named}; +use serde::{Deserialize, Serialize}; + +use crate::{inputs::UsesInput, observers::Observer, state::State, Error}; +#[derive(Debug, Serialize, Deserialize)] +/// The json data +pub struct FunctionData { + #[serde(rename = "name")] + name: String, + #[serde(rename = "# BBs")] + bb_count: Option, + #[serde(rename = "# insts")] + inst_count: Option, + #[serde(rename = "# edges")] + edge_count: Option, + #[serde(rename = "# binaryOp")] + binary_op_count: Option, + #[serde(rename = "# call")] + call_count: Option, + #[serde(rename = "# cmp")] + cmp_count: Option, + #[serde(rename = "# load")] + load_count: Option, + #[serde(rename = "# store")] + store_count: Option, + #[serde(rename = "# alloca")] + alloca_count: Option, + #[serde(rename = "# branch")] + branch_count: Option, + #[serde(rename = "ABC metric")] + abc_metric: Option, + cyclomatic: Option, + #[serde(rename = "AP")] + api_calls: Option>, + #[serde(rename = "h AP")] + heap_apis: Option>, + #[serde(rename = "m AP")] + memory_apis: Option>, + #[serde(rename = "ne lv")] + nested_level: Option>, + #[serde(rename = "cm gl")] + cmp_globals: Option>, + #[serde(rename = "cm nz")] + cmp_non_zeros: Option>, + #[serde(rename = "wr st")] + struct_writes: Option>, + #[serde(rename = "str arg")] + struct_args: Option>, + #[serde(rename = "cm ty")] + cmp_types: Option>, + #[serde(rename = "cm cm")] + cmp_complexity: Option>, + #[serde(rename = "ar ty")] + call_arg_types: Option>, + #[serde(rename = "st ty")] + store_types: Option>, + #[serde(rename = "l ty")] + load_types: Option>, + #[serde(rename = "al ty")] + alloca_types: Option>, +} + +#[derive(Debug, Default, Serialize, Deserialize)] +struct AnalysisData { + data: HashMap, +} + +/// The observer to lookup the static analysis data at runtime +#[derive(Debug, Serialize, Deserialize)] +pub struct ProfilingObserver { + /// The name of the observer. + pub name: Cow<'static, str>, + db: AnalysisData, + /// The map + map: OwnedMutPtr>, +} + +impl ProfilingObserver { + /// The constructor + pub fn new

(json_path: P, map: OwnedMutPtr>) -> Result + where + P: AsRef, + { + let f = File::open(json_path)?; + let reader = BufReader::new(f); + let analysis_data: AnalysisData = serde_json::from_reader(reader)?; + // debug + /* + for record in &analysis_data.data { + for (key, _value) in record.iter() { + log::info!("Record {} found!", key); + } + } + */ + + Ok(Self { + name: Cow::from("profiling"), + db: analysis_data, + map, + }) + } + + /// Get the map + #[must_use] + pub fn map(&self) -> &HashMap { + self.map.as_ref() + } + + /// lookup the data through db + #[must_use] + pub fn lookup(&self, function_id: usize) -> Option<&FunctionData> { + let item = self.db.data.get(&function_id); + item + } +} + +impl Named for ProfilingObserver { + fn name(&self) -> &Cow<'static, str> { + &self.name + } +} + +impl Observer for ProfilingObserver +where + S: State, +{ + fn post_exec( + &mut self, + _state: &mut S, + _input: &::Input, + _exit_kind: &crate::executors::ExitKind, + ) -> Result<(), Error> { + // in reality, this should be done in a stage + // but here just for poc + for (key, _item) in self.map() { + let found = self.lookup(*key); + log::info!("key: {}, data: {:#?}", key, found); + } + log::info!(""); + Ok(()) + } +} diff --git a/libafl_cc/build.rs b/libafl_cc/build.rs index 3b63ed86de..57e83e3ec8 100644 --- a/libafl_cc/build.rs +++ b/libafl_cc/build.rs @@ -428,6 +428,7 @@ pub const LIBAFL_CC_LLVM_VERSION: Option = None; ); for pass in &[ + "function-logging.cc", "cmplog-routines-pass.cc", "autotokens-pass.cc", "coverage-accounting-pass.cc", @@ -447,7 +448,7 @@ pub const LIBAFL_CC_LLVM_VERSION: Option = None; } // Optional pass - for pass in &["dump-cfg-pass.cc"] { + for pass in &["dump-cfg-pass.cc", "profiling.cc"] { build_pass( bindir_path, out_dir, diff --git a/libafl_cc/src/clang.rs b/libafl_cc/src/clang.rs index 8151f9e6a6..56bad90685 100644 --- a/libafl_cc/src/clang.rs +++ b/libafl_cc/src/clang.rs @@ -41,6 +41,10 @@ pub enum LLVMPasses { CmpLogInstructions, /// Instrument caller for sancov coverage Ctx, + /// Function logging + FunctionLogging, + /// Profiling + Profiling, /// Data dependency instrumentation DDG, } @@ -66,6 +70,12 @@ impl LLVMPasses { LLVMPasses::Ctx => { PathBuf::from(env!("OUT_DIR")).join(format!("ctx-pass.{}", dll_extension())) } + LLVMPasses::FunctionLogging => { + PathBuf::from(env!("OUT_DIR")).join(format!("function-logging.{}", dll_extension())) + } + LLVMPasses::Profiling => { + PathBuf::from(env!("OUT_DIR")).join(format!("profiling.{}", dll_extension())) + } LLVMPasses::DDG => { PathBuf::from(env!("OUT_DIR")).join(format!("ddg-instr.{}", dll_extension())) } diff --git a/libafl_cc/src/function-logging.cc b/libafl_cc/src/function-logging.cc new file mode 100644 index 0000000000..b67641f5b6 --- /dev/null +++ b/libafl_cc/src/function-logging.cc @@ -0,0 +1,191 @@ +/* + LibAFL - Function Logging LLVM pass + -------------------------------------------------- + + Written by Dongjia Zhang + + Copyright 2022-2023 AFLplusplus Project. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at: + + http://www.apache.org/licenses/LICENSE-2.0 + +*/ + +#include +#include +#include "common-llvm.h" +#ifndef _WIN32 + #include + #include +#else + #include +#endif +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "llvm/Config/llvm-config.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/IRBuilder.h" + +#if USE_NEW_PM + #include "llvm/Passes/PassPlugin.h" + #include "llvm/Passes/PassBuilder.h" + #include "llvm/IR/PassManager.h" +#else + #include "llvm/IR/LegacyPassManager.h" + #include "llvm/Transforms/IPO/PassManagerBuilder.h" +#endif + +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Pass.h" +#include "llvm/IR/Constants.h" + +#include + +using namespace llvm; + +#define MAP_SIZE EDGES_MAP_SIZE_IN_USE + +namespace { + +#if USE_NEW_PM +class FunctionLogging : public PassInfoMixin { + public: + FunctionLogging() { +#else +class FunctionLogging : public ModulePass { + public: + static char ID; + + FunctionLogging() : ModulePass(ID) { +#endif + } + +#if USE_NEW_PM + PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); +#else + bool runOnModule(Module &M) override; +#endif + + protected: + uint32_t map_size = MAP_SIZE; + + private: + bool isLLVMIntrinsicFn(StringRef &n) { + // Not interested in these LLVM's functions +#if LLVM_VERSION_MAJOR >= 18 + if (n.starts_with("llvm.")) { +#else + if (n.startswith("llvm.")) { +#endif + return true; + } else { + return false; + } + } +}; + +} // namespace + +#if USE_NEW_PM +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { + return {LLVM_PLUGIN_API_VERSION, "FunctionLoggingPass", "v0.1", + /* lambda to insert our pass into the pass pipeline. */ + [](PassBuilder &PB) { + + #if LLVM_VERSION_MAJOR <= 13 + using OptimizationLevel = typename PassBuilder::OptimizationLevel; + #endif + PB.registerOptimizerLastEPCallback( + [](ModulePassManager &MPM, OptimizationLevel OL) { + MPM.addPass(FunctionLogging()); + }); + }}; +} +#else +char FunctionLogging::ID = 0; +#endif + +#if USE_NEW_PM +PreservedAnalyses FunctionLogging::run(Module &M, ModuleAnalysisManager &MAM) { +#else +bool FunctionLogging::runOnModule(Module &M) { + +#endif + LLVMContext &C = M.getContext(); + auto moduleName = M.getName(); + Type *VoidTy = Type::getVoidTy(C); + IntegerType *Int8Ty = IntegerType::getInt8Ty(C); + IntegerType *Int32Ty = IntegerType::getInt32Ty(C); + IntegerType *Int64Ty = IntegerType::getInt64Ty(C); + FunctionCallee callHook; + callHook = + M.getOrInsertFunction("__libafl_target_call_hook", VoidTy, Int64Ty); + uint32_t rand_seed; + + rand_seed = time(NULL); + srand(rand_seed); + + for (auto &F : M) { + int has_calls = 0; + + if (isIgnoreFunction(&F)) { continue; } + if (F.size() < 1) { continue; } + // instrument the first basic block of this fn + BasicBlock &entry = F.front(); + std::size_t function_id = std::hash{}(F.getName().str()); + IRBuilder<> IRB(&entry); + IRB.SetInsertPoint(&entry.front()); + std::vector args; + llvm::Value *value = llvm::ConstantInt::get( + llvm::Type::getInt64Ty(F.getContext()), function_id); + args.push_back(value); + IRB.CreateCall(callHook, args); + } + +#if USE_NEW_PM + auto PA = PreservedAnalyses::all(); + return PA; +#else + return true; +#endif +} + +#if USE_NEW_PM + +#else +static void registerFunctionLoggingPass(const PassManagerBuilder &, + legacy::PassManagerBase &PM) { + PM.add(new FunctionLoggingPass()); +} + +static RegisterPass X("function-logging", + "function logging pass", false, false); + +static RegisterStandardPasses RegisterFunctionLogging( + PassManagerBuilder::EP_OptimizerLast, registerFunctionLoggingPass); + +static RegisterStandardPasses RegisterFunctionLogging0( + PassManagerBuilder::EP_EnabledOnOptLevel0, registerFunctionLoggingPass); +#endif diff --git a/libafl_cc/src/profiling.cc b/libafl_cc/src/profiling.cc index 68b82ef914..1e9c2891b3 100644 --- a/libafl_cc/src/profiling.cc +++ b/libafl_cc/src/profiling.cc @@ -61,7 +61,6 @@ #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Support/FileSystem.h" @@ -97,11 +96,10 @@ class AnalysisPass : public ModulePass { #endif protected: - DenseMap bb_to_cur_loc; - DenseMap entry_bb; - DenseMap> calls_in_bb; - DenseMap> structLinks; - DenseMap> structDesc; + DenseMap bb_to_cur_loc; + DenseMap entry_bb; + DenseMap> calls_in_bb; + // DenseMap> structDesc; // The type name is not in the memory, so create std::strign impromptu private: @@ -163,11 +161,11 @@ class AnalysisPass : public ModulePass { !FuncName.compare("xmlStrcasestr") || !FuncName.compare("g_str_has_prefix") || !FuncName.compare("g_str_has_suffix")); - isStrcmp &= - FT->getNumParams() == 2 && FT->getReturnType()->isIntegerTy(32) && - FT->getParamType(0) == FT->getParamType(1) && - FT->getParamType(0) == IntegerType::getInt8PtrTy(M.getContext()); - + isStrcmp &= FT->getNumParams() == 2 && + FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0) == + IntegerType::getInt8Ty(M.getContext())->getPointerTo(0); return isStrcmp; } @@ -185,11 +183,12 @@ class AnalysisPass : public ModulePass { !FuncName.compare("g_ascii_strncasecmp") || !FuncName.compare("Curl_strncasecompare") || !FuncName.compare("g_strncasecmp")); - isStrncmp &= - FT->getNumParams() == 3 && FT->getReturnType()->isIntegerTy(32) && - FT->getParamType(0) == FT->getParamType(1) && - FT->getParamType(0) == IntegerType::getInt8PtrTy(M.getContext()) && - FT->getParamType(2)->isIntegerTy(); + isStrncmp &= FT->getNumParams() == 3 && + FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0) == + IntegerType::getInt8Ty(M.getContext())->getPointerTo(0) && + FT->getParamType(2)->isIntegerTy(); return isStrncmp; } @@ -246,7 +245,7 @@ class AnalysisPass : public ModulePass { bool isLLVMIntrinsicFn(StringRef &n) { // Not interested in these LLVM's functions - if (n.startswith("llvm.")) { + if (n.starts_with("llvm.")) { return true; } else { return false; @@ -411,37 +410,33 @@ bool AnalysisPass::runOnModule(Module &M) { */ bool run = true; - bool done_already = file_exist("/out/." + genericFilePath + ".json"); + std::string output_dir; + const char *path = std::getenv("ANALYSIS_OUTPUT"); + if (path != nullptr) { + output_dir = path; + if (std::filesystem::exists(output_dir) && + std::filesystem::is_directory(output_dir)) { + // good + } else { + std::cerr << "Output path is empty!" << std::endl; + } + // Use the output_dir string here + } else { + std::cerr << "Output path not set!" << std::endl; + } + bool done_already = + file_exist(output_dir + std::string("/") + genericFilePath + ".json"); if (done_already) { run = false; } else { - std::ofstream out_lock("/out/." + genericFilePath + ".json"); + std::ofstream out_lock(output_dir + std::string("/") + genericFilePath + + ".json"); } if (run) { outs() << "Analysis on " + genericFilePath << "\n"; - LLVMContext &Ctx = M.getContext(); - auto moduleName = M.getName().str(); - // printf("Hello\n"); - for (auto ST : M.getIdentifiedStructTypes()) { - std::unordered_map types; - for (auto T : ST->elements()) { - types[T->getTypeID()] += 1; - auto ty = T; - while (true) { - // Recursive - if (ty->isPointerTy()) { - ty = ty->getPointerElementType(); - continue; - } else if (ty->isStructTy()) { - structLinks[ST->getStructName()].push_back(ty->getStructName()); - } - break; - } - } - - structDesc[ST->getStructName()] = types; - } + LLVMContext &Ctx = M.getContext(); + auto moduleName = M.getName().str(); nlohmann::json res; for (auto &F : M) { @@ -475,6 +470,13 @@ bool AnalysisPass::runOnModule(Module &M) { unsigned binary_op_cnt = 0; entry_bb[F.getName()] = &F.getEntryBlock(); + // now we get the sha256sum for this function. (mangled function name + // should be unique else it will result in linker error) by this we make a + // map ( |-> ) + std::size_t hashed = std::hash{}(F.getName().str()); + // cast again as string, it's json, key has to be a string + std::string function_id = std::to_string(hashed); + for (auto &BB : F) { bb_to_cur_loc[&BB] = bb_cnt; bb_cnt++; @@ -543,18 +545,6 @@ bool AnalysisPass::runOnModule(Module &M) { auto arg_ty = arg->getType(); std::string type_str = typeWriter(arg_ty); callArgTypes[type_str]++; - - auto ty = arg_ty; - while (true) { - // recursive - if (ty->isPointerTy()) { - ty = ty->getPointerElementType(); - continue; - } else if (ty->isStructTy()) { - structArgs[type_str]++; - } - break; - } } } } else if ((cmpInst = dyn_cast(&IN))) { @@ -625,36 +615,39 @@ bool AnalysisPass::runOnModule(Module &M) { } std::string fnname = std::string(F.getName()); - if (bb_cnt) { res[fnname]["# BBs"] = bb_cnt; } - if (inst_cnt) { res[fnname]["# insts"] = inst_cnt; } + res[function_id]["name"] = fnname; - if (edges_cnt) { res[fnname]["# edges"] = edges_cnt; } + if (bb_cnt) { res[function_id]["# BBs"] = bb_cnt; } - if (binary_op_cnt) { res[fnname]["# binaryOp"] = binary_op_cnt; } + if (inst_cnt) { res[function_id]["# insts"] = inst_cnt; } - if (call_cnt) { res[fnname]["# call"] = call_cnt; } + if (edges_cnt) { res[function_id]["# edges"] = edges_cnt; } - if (cmp_cnt) { res[fnname]["# cmp"] = cmp_cnt; } + if (binary_op_cnt) { res[function_id]["# binaryOp"] = binary_op_cnt; } - if (load_cnt) { res[fnname]["# load"] = load_cnt; } + if (call_cnt) { res[function_id]["# call"] = call_cnt; } - if (store_cnt) { res[fnname]["# store"] = store_cnt; } + if (cmp_cnt) { res[function_id]["# cmp"] = cmp_cnt; } - if (alloca_cnt) { res[fnname]["# alloca"] = alloca_cnt; } + if (load_cnt) { res[function_id]["# load"] = load_cnt; } - if (branch_cnt) { res[fnname]["# branch"] = branch_cnt; } + if (store_cnt) { res[function_id]["# store"] = store_cnt; } - res[fnname]["ABC metric"] = + if (alloca_cnt) { res[function_id]["# alloca"] = alloca_cnt; } + + if (branch_cnt) { res[function_id]["# branch"] = branch_cnt; } + + res[function_id]["ABC metric"] = sqrt(alloca_cnt * alloca_cnt + branch_cnt * branch_cnt + call_cnt * call_cnt); - res[fnname]["cyclomatic"] = edges_cnt - bb_cnt + 2; + res[function_id]["cyclomatic"] = edges_cnt - bb_cnt + 2; // outs() << "APIs:\n"; for (auto record = APIcalls.begin(); record != APIcalls.end(); record++) { auto key = record->getFirst(); if (!isLLVMIntrinsicFn(key)) { - res[fnname]["AP"][std::string(key)] = APIcalls[key]; + res[function_id]["AP"][std::string(key)] = APIcalls[key]; // outs() << key << " " << APIcalls[key] << "\n"; } } @@ -663,7 +656,7 @@ bool AnalysisPass::runOnModule(Module &M) { // outs() << "memoryAPIs:\n"; for (auto record = heapAPIs.begin(); record != heapAPIs.end(); record++) { auto key = record->getFirst(); - res[fnname]["h AP"][std::string(key)] = heapAPIs[key]; + res[function_id]["h AP"][std::string(key)] = heapAPIs[key]; // outs() << key << " " << heapAPIs[key] << "\n"; } // outs() << "\n"; @@ -671,28 +664,28 @@ bool AnalysisPass::runOnModule(Module &M) { for (auto record = memoryAPIs.begin(); record != memoryAPIs.end(); record++) { auto key = record->getFirst(); - res[fnname]["m AP"][std::string(key)] = memoryAPIs[key]; + res[function_id]["m AP"][std::string(key)] = memoryAPIs[key]; // outs() << key << " " << memoryAPIs[key] << "\n"; } for (auto record = nestedLevel.begin(); record != nestedLevel.end(); record++) { auto key = record->first; - res[fnname]["ne lv"][std::to_string(key)] = nestedLevel[key]; + res[function_id]["ne lv"][std::to_string(key)] = nestedLevel[key]; // outs() << key << " " << memoryAPIs[key] << "\n"; } for (auto record = cmpGlobals.begin(); record != cmpGlobals.end(); record++) { auto key = record->first; - res[fnname]["cm gl"][std::to_string(key)] = cmpGlobals[key]; + res[function_id]["cm gl"][std::to_string(key)] = cmpGlobals[key]; // outs() << key << " " << memoryAPIs[key] << "\n"; } for (auto record = cmpNonZeros.begin(); record != cmpNonZeros.end(); record++) { auto key = record->first; - res[fnname]["cm nz"][std::to_string(key)] = cmpNonZeros[key]; + res[function_id]["cm nz"][std::to_string(key)] = cmpNonZeros[key]; // outs() << key << " " << memoryAPIs[key] << "\n"; } @@ -701,7 +694,7 @@ bool AnalysisPass::runOnModule(Module &M) { record++) { auto key = record->getFirst(); // Some are nameless struct - res[fnname]["wr st"][std::string(key)] = structWrites[key]; + res[function_id]["wr st"][std::string(key)] = structWrites[key]; // outs() << key << " " << structWrites[key] << "\n"; } // outs() << "\n"; @@ -710,28 +703,28 @@ bool AnalysisPass::runOnModule(Module &M) { for (auto record = structArgs.begin(); record != structArgs.end(); record++) { auto key = record->first; - res[fnname]["str arg"][std::string(key)] = record->second; + res[function_id]["str arg"][std::string(key)] = record->second; // outs() << key << " " << record->second << "\n"; } // outs() << "\n"; // outs() << "CmpTypes:\n"; for (auto record = cmpTypes.begin(); record != cmpTypes.end(); record++) { - res[fnname]["cm ty"][record->first] = record->second; + res[function_id]["cm ty"][record->first] = record->second; // outs() << record->first << " " << record->second << "\n"; } // outs() << "\n"; for (auto record = cmpComplexity.begin(); record != cmpComplexity.end(); record++) { - res[fnname]["cm cm"][record->first] = record->second; + res[function_id]["cm cm"][record->first] = record->second; // outs() << record->first << " " << record->second << "\n"; } // outs() << "CallArgTypes:\n"; for (auto record = callArgTypes.begin(); record != callArgTypes.end(); record++) { - res[fnname]["ar ty"][record->first] = record->second; + res[function_id]["ar ty"][record->first] = record->second; // outs() << record->first << " " << record->second << "\n"; } // outs() << "\n"; @@ -739,7 +732,7 @@ bool AnalysisPass::runOnModule(Module &M) { // outs() << "storeTypes:\n"; for (auto record = storeTypes.begin(); record != storeTypes.end(); record++) { - res[fnname]["st ty"][record->first] = record->second; + res[function_id]["st ty"][record->first] = record->second; // outs() << record->first << " " << record->second << "\n"; } // outs() << "\n"; @@ -747,7 +740,7 @@ bool AnalysisPass::runOnModule(Module &M) { // outs() << "loadTypes:\n"; for (auto record = loadTypes.begin(); record != loadTypes.end(); record++) { - res[fnname]["l ty"][record->first] = record->second; + res[function_id]["l ty"][record->first] = record->second; // outs() << record->first << " " << record->second << "\n"; } // outs() << "\n"; @@ -755,121 +748,24 @@ bool AnalysisPass::runOnModule(Module &M) { // outs() << "allocaTypes:\n"; for (auto record = allocaTypes.begin(); record != allocaTypes.end(); record++) { - res[fnname]["al ty"][record->first] = record->second; + res[function_id]["al ty"][record->first] = record->second; // outs() << record->first << " " << record->second << "\n"; } // outs() << "\n"; - if (getenv("ANALYSIS_OUTPUT_PATH")) { - if (std::ofstream(getenv("ANALYSIS_OUTPUT_PATH") + std::string("/") + + if (getenv("ANALYSIS_OUTPUT")) { + if (std::ofstream(getenv("ANALYSIS_OUTPUT") + std::string("/") + genericFilePath + ".json") << res << "\n") { } else { - abort(); + errs() << "Failed to write the data" + << "\n"; } } else { - errs() << "output path not set!" + errs() << "Failed to write the data, output path not set!" << "\n"; } } - - nlohmann::json struct_links; - // outs() << "StructLinks:\n"; - for (auto record = structLinks.begin(); record != structLinks.end(); - record++) { - StringRef key = record->getFirst(); - // outs() << "struct: " << key << "\t"; - std::vector links{}; - // outs() << "links: "; - for (auto item = structLinks[key].begin(); item != structLinks[key].end(); - item++) { - links.push_back(std::string(*item)); - // outs() << *item << " "; - } - struct_links[moduleName][std::string(key)]["lks"] = links; - // outs() << "\n"; - } - - for (auto record = structDesc.begin(); record != structDesc.end(); - record++) { - auto key = record->getFirst(); - struct_links[moduleName][std::string(key)]["desc"] = record->second; - } - - // outs() << "\n"; - - if (getenv("ANALYSIS_OUTPUT_PATH")) { - if (std::ofstream(getenv("ANALYSIS_OUTPUT_PATH") + std::string("/") + - genericFilePath + ".lks") - << struct_links << "\n") { - } else { - abort(); - } - } else { - errs() << "output path not set!" - << "\n"; - } - - nlohmann::json cfg; - - for (auto record = bb_to_cur_loc.begin(); record != bb_to_cur_loc.end(); - record++) { - auto current_bb = record->getFirst(); - auto loc = record->getSecond(); - Function *calling_func = current_bb->getParent(); - std::string func_name = std::string(""); - - if (calling_func) { - func_name = std::string(calling_func->getName()); - // outs() << "Function name: " << calling_func->getName() << "\n"; - } - - std::vector outgoing; - for (auto bb_successor = succ_begin(current_bb); - bb_successor != succ_end(current_bb); bb_successor++) { - outgoing.push_back(bb_to_cur_loc[*bb_successor]); - } - cfg["edges"][func_name][loc] = outgoing; - } - - for (auto record = calls_in_bb.begin(); record != calls_in_bb.end(); - record++) { - auto current_bb = record->getFirst(); - auto loc = bb_to_cur_loc[current_bb]; - Function *calling_func = current_bb->getParent(); - std::string func_name = std::string(""); - - if (calling_func) { - func_name = std::string(calling_func->getName()); - // outs() << "Function name: " << calling_func->getName() << "\n"; - } - - std::vector outgoing_funcs; - for (auto &item : record->getSecond()) { - outgoing_funcs.push_back(std::string(item)); - } - if (!outgoing_funcs.empty()) { - cfg["calls"][func_name][std::to_string(loc)] = outgoing_funcs; - } - } - - for (auto record = entry_bb.begin(); record != entry_bb.end(); record++) { - cfg["entries"][std::string(record->getFirst())] = - bb_to_cur_loc[record->getSecond()]; - } - - if (getenv("ANALYSIS_OUTPUT_PATH")) { - if (std::ofstream(getenv("ANALYSIS_OUTPUT_PATH") + std::string("/") + - genericFilePath + ".cfg") - << cfg << "\n") { - } else { - abort(); - } - - } else { - errs() << "output path not set!" - << "\n"; - } } #if USE_NEW_PM diff --git a/libafl_targets/Cargo.toml b/libafl_targets/Cargo.toml index 7ffbacc4ce..ccffa4eb24 100644 --- a/libafl_targets/Cargo.toml +++ b/libafl_targets/Cargo.toml @@ -58,6 +58,7 @@ forkserver = ["common"] # Compile C code for forkserver support windows_asan = ["common"] # Compile C code for ASAN on Windows whole_archive = [] # use +whole-archive to ensure the presence of weak symbols cmplog_extended_instrumentation = [] # support for aflpp cmplog map, we will remove this once aflpp and libafl cmplog shares the same LLVM passes. +function-logging = ["common"] [build-dependencies] bindgen = "0.69.4" @@ -68,6 +69,8 @@ rustversion = "1.0" libafl = { path = "../libafl", version = "0.12.0", default-features = false, features = [] } libafl_bolts = { path = "../libafl_bolts", version = "0.12.0", default-features = false, features = [] } libc = "0.2" +hashbrown = "0.14" +once_cell = "1.19" log = "0.4.20" rustversion = "1.0" diff --git a/libafl_targets/src/call.rs b/libafl_targets/src/call.rs new file mode 100644 index 0000000000..581a3f2fcd --- /dev/null +++ b/libafl_targets/src/call.rs @@ -0,0 +1,48 @@ +use core::marker::PhantomData; + +use hashbrown::HashMap; +use libafl::{ + executors::{hooks::ExecutorHook, HasObservers}, + inputs::UsesInput, +}; +use once_cell::sync::Lazy; +/// The list of functions that this execution has observed +pub static mut FUNCTION_LIST: Lazy> = Lazy::new(HashMap::new); + +#[no_mangle] +/// The runtime code inserted at every callinst invokation (if you used the function-logging.cc) +/// # Safety +/// unsafe because it touches pub static mut +pub unsafe extern "C" fn __libafl_target_call_hook(id: usize) { + *FUNCTION_LIST.entry(id).or_insert(0) += 1; +} + +/// The empty struct to clear the `FUNCTION_LIST` before the execution +#[derive(Debug, Clone, Copy, Default)] +pub struct CallHook { + phantom: PhantomData, +} + +impl CallHook { + /// The constructor + #[must_use] + pub fn new() -> Self { + Self { + phantom: PhantomData, + } + } +} + +impl ExecutorHook for CallHook +where + S: UsesInput, +{ + fn init(&mut self, _state: &mut S) {} + + fn pre_exec(&mut self, _state: &mut S, _input: &::Input) { + // clear it before the execution + unsafe { FUNCTION_LIST.clear() } + } + + fn post_exec(&mut self, _state: &mut S, _input: &::Input) {} +} diff --git a/libafl_targets/src/lib.rs b/libafl_targets/src/lib.rs index 7928c595d0..12a6ee648c 100644 --- a/libafl_targets/src/lib.rs +++ b/libafl_targets/src/lib.rs @@ -121,6 +121,12 @@ pub use coverage::*; pub mod value_profile; pub use value_profile::*; +/// The module to hook call instructions +#[cfg(feature = "function-logging")] +pub mod call; +#[cfg(feature = "function-logging")] +pub use call::*; + /// runtime related to comparisons pub mod cmps; pub use cmps::*;