LibAFL Dynamic Analysis (#2208)

* aa

* push

* push

* push

* claude ai

* push

* add

* add stuff

* upd

* rdm

* fix

* ci

* fix

* fix

* fixing

* feature

* revert

* no submodules

* ci
This commit is contained in:
Dongjia "toka" Zhang 2024-05-24 14:43:27 +02:00 committed by GitHub
parent 07dca4b59b
commit e6eb6c48d2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
23 changed files with 1286 additions and 183 deletions

View File

@ -333,6 +333,7 @@ jobs:
- ./fuzzers/libfuzzer_stb_image
- ./fuzzers/fuzzbench_forkserver
# - ./fuzzers/libfuzzer_windows_asan
# - ./fuzzers/dynamic_analysis
- ./fuzzers/baby_fuzzer_minimizing
- ./fuzzers/frida_executable_libpng
- ./fuzzers/tutorial

View File

@ -36,7 +36,7 @@ runs:
version: 17
- name: Install deps
shell: bash
run: sudo apt update && sudo apt install -y nasm ninja-build gcc-arm-linux-gnueabi g++-arm-linux-gnueabi gcc-aarch64-linux-gnu g++-aarch64-linux-gnu gcc-mipsel-linux-gnu g++-mipsel-linux-gnu gcc-powerpc-linux-gnu g++-powerpc-linux-gnu libc6-dev-i386-cross libc6-dev libc6-dev-i386 lib32gcc-11-dev lib32stdc++-11-dev libgtk-3-dev pax-utils libz3-dev
run: sudo apt update && sudo apt install -y nasm nlohmann-json3-dev ninja-build gcc-arm-linux-gnueabi g++-arm-linux-gnueabi gcc-aarch64-linux-gnu g++-aarch64-linux-gnu gcc-mipsel-linux-gnu g++-mipsel-linux-gnu gcc-powerpc-linux-gnu g++-powerpc-linux-gnu libc6-dev-i386-cross libc6-dev libc6-dev-i386 lib32gcc-11-dev lib32stdc++-11-dev libgtk-3-dev pax-utils libz3-dev
- name: pip install
shell: bash
run: python3 -m pip install msgpack jinja2 find_libpython

View File

@ -0,0 +1,41 @@
[package]
name = "fuzzbench"
version = "0.12.0"
authors = ["Andrea Fioraldi <andreafioraldi@gmail.com>", "Dominik Maier <domenukk@gmail.com>"]
edition = "2021"
[features]
default = ["std"]
std = []
no_link_main = ["libafl_targets/libfuzzer_no_link_main"]
[profile.release]
lto = true
codegen-units = 1
opt-level = 3
debug = true
[profile.release-fuzzbench]
inherits = "release"
debug = false
strip = true
[build-dependencies]
cc = { version = "1.0", features = ["parallel"] }
which = "4.4"
[dependencies]
env_logger = "0.10"
once_cell = "1.19"
libafl = { path = "../../libafl/" }
libafl_bolts = { path = "../../libafl_bolts/" }
libafl_targets = { path = "../../libafl_targets/", features = ["sancov_pcguard_hitcounts", "sancov_cmplog", "libfuzzer", "function-logging"] }
# TODO Include it only when building cc
libafl_cc = { path = "../../libafl_cc/" }
clap = { version = "4.0", features = ["default"] }
nix = { version = "0.27", features = ["fs"] }
mimalloc = { version = "*", default-features = false }
[lib]
name = "fuzzbench"
crate-type = ["staticlib"]

View File

@ -0,0 +1,108 @@
[env]
PROJECT_DIR = { script = ["pwd"] }
CARGO_TARGET_DIR = { value = "${PROJECT_DIR}/target", condition = { env_not_set = ["CARGO_TARGET_DIR"] } }
FUZZER_NAME="fuzzer"
PROFILE = { value = "release", condition = {env_not_set = ["PROFILE"]} }
PROFILE_DIR = {value = "release", condition = {env_not_set = ["PROFILE_DIR"] }}
[tasks.unsupported]
script_runner="@shell"
script='''
echo "Cargo-make not integrated yet on this"
'''
# Compilers
[tasks.cxx]
linux_alias = "cxx_unix"
mac_alias = "cxx_unix"
windows_alias = "unsupported"
[tasks.cxx_unix]
command = "cargo"
args = ["build", "--profile", "${PROFILE}"]
[tasks.cc]
linux_alias = "cc_unix"
mac_alias = "cc_unix"
windows_alias = "unsupported"
[tasks.cc_unix]
command = "cargo"
args = ["build", "--profile", "${PROFILE}"]
# fuzz.o File
[tasks.fuzz_o]
linux_alias = "fuzz_o_unix"
mac_alias = "fuzz_o_unix"
windows_alias = "unsupported"
[tasks.fuzz_o_unix]
command = "${CARGO_TARGET_DIR}/${PROFILE_DIR}/libafl_cxx"
args = ["--libafl-no-link", "-O3","-I", "./Little-CMS/include", "-c", "cms_transform_fuzzer.cc", "-o", "cms_transform_fuzzer.o"]
dependencies = ["cc", "cxx"]
# Fuzzer
[tasks.fuzzer]
linux_alias = "fuzzer_unix"
mac_alias = "fuzzer_unix"
windows_alias = "unsupported"
[tasks.fuzzer_unix]
command = "${CARGO_TARGET_DIR}/${PROFILE_DIR}/libafl_cxx"
args = ["--libafl", "cms_transform_fuzzer.o", "./Little-CMS/src/.libs/liblcms2.a", "-o", "${FUZZER_NAME}", "-lm", "-lz"]
dependencies = ["cc", "cxx", "fuzz_o"]
# Run
[tasks.run]
linux_alias = "run_unix"
mac_alias = "run_unix"
windows_alias = "unsupported"
[tasks.run_unix]
script_runner="@shell"
script='''
rm -rf libafl_unix_shmem_server || true
mkdir in || true
echo a > in/a
./${FUZZER_NAME} -o out -i in
'''
dependencies = ["fuzzer"]
# Test
[tasks.test]
linux_alias = "test_unix"
mac_alias = "test_unix"
windows_alias = "unsupported"
[tasks.test_unix]
script_runner="@shell"
script='''
rm -rf libafl_unix_shmem_server || true
mkdir in || true
echo a > in/a
# Allow sigterm as exit code
timeout 31s ./${FUZZER_NAME} -o out -i in | tee fuzz_stdout.log || true
if grep -qa "objectives: 1" fuzz_stdout.log; then
echo "Fuzzer is working"
else
echo "Fuzzer does not generate any testcases or any crashes"
exit 1
fi
rm -rf out || true
rm -rf in || true
'''
dependencies = ["fuzzer"]
# Clean
[tasks.clean]
linux_alias = "clean_unix"
mac_alias = "clean_unix"
windows_alias = "unsupported"
[tasks.clean_unix]
script_runner="@shell"
script='''
rm ./${FUZZER_NAME} || true
rm fuzz.o || true
'''

View File

@ -0,0 +1,11 @@
# Dynamic Analysis Fuzzer
This fuzzer is to show how you can collect runtime analysis information during fuzzing using LibAFL. We use the Little-CMS project for the example.
First, this fuzzer requires `nlohmann-json3-dev` to work.
To run the fuzzer,
0. Compile the fuzzer with `cargo build --release`
1. `mkdir analysis` and run `build.sh`. This will compile Little-CMS to extract the analysis information and generate a json file for each module.
2. run `python3 concatenator.py analysis`. This will concatenate all the json into one single file. This json file maps a function id to its analysis information.
3. Compile the fuzzer with `cargo make fuzzer`. This will instrument the fuzzer at every function entry point. Therefore, whenever we reach the entry of any function, we
can log its id and logs what functions we executed.
4. Run the fuzzer `RUST_LOG=info ./fuzzer --input ./corpus --output ./out`. You'll see a stream of analysis data

View File

@ -0,0 +1,25 @@
use std::{env, process::Command};
fn main() {
let current_dir = env::current_dir().unwrap();
let lcms_dir = current_dir.join("Little-CMS");
if !lcms_dir.exists() {
println!("cargo:warning=Downloading Little-CMS");
// Clone the Little-CMS repository if the directory doesn't exist
let status = Command::new("git")
.args(&[
"clone",
"https://github.com/mm2/Little-CMS",
lcms_dir.to_str().unwrap(),
])
.status()
.expect("Failed to clone Little-CMS repository");
if !status.success() {
panic!("Failed to clone Little-CMS repository");
}
}
// Tell Cargo that if the given file changes, to rerun this build script
println!("cargo:rerun-if-changed=build.rs");
}

View File

@ -0,0 +1,14 @@
export CC=$(pwd)/target/release/libafl_cc
export CXX=$(pwd)/target/release/libafl_cxx
export CXXFLAGS='--libafl'
export CFLAGS='--libafl'
export LDFLAGS='--libafl'
export ANALYSIS_OUTPUT=`pwd`/analysis
cd Little-CMS
./autogen.sh
./configure
make -j $(nproc)
$CXX $CXXFLAGS ../cms_transform_fuzzer.cc -I include/ src/.libs/liblcms2.a -o ../fuzzer

View File

@ -0,0 +1,5 @@
export ANALYSIS_OUTPUT=`pwd`/analysis
rm -rf analysis/*
pushd Little-CMS
make clean
popd

View File

@ -0,0 +1,63 @@
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdint.h>
#include "lcms2.h"
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
cmsHPROFILE srcProfile = cmsOpenProfileFromMem(data, size);
if (!srcProfile) return 0;
cmsHPROFILE dstProfile = cmsCreate_sRGBProfile();
if (!dstProfile) {
cmsCloseProfile(srcProfile);
return 0;
}
cmsColorSpaceSignature srcCS = cmsGetColorSpace(srcProfile);
cmsUInt32Number nSrcComponents = cmsChannelsOf(srcCS);
cmsUInt32Number srcFormat;
if (srcCS == cmsSigLabData) {
srcFormat =
COLORSPACE_SH(PT_Lab) | CHANNELS_SH(nSrcComponents) | BYTES_SH(0);
} else {
srcFormat =
COLORSPACE_SH(PT_ANY) | CHANNELS_SH(nSrcComponents) | BYTES_SH(1);
}
cmsUInt32Number intent = 0;
cmsUInt32Number flags = 0;
cmsHTRANSFORM hTransform = cmsCreateTransform(
srcProfile, srcFormat, dstProfile, TYPE_BGR_8, intent, flags);
cmsCloseProfile(srcProfile);
cmsCloseProfile(dstProfile);
if (!hTransform) return 0;
uint8_t output[4];
if (T_BYTES(srcFormat) == 0) { // 0 means double
double input[nSrcComponents];
for (uint32_t i = 0; i < nSrcComponents; i++)
input[i] = 0.5f;
cmsDoTransform(hTransform, input, output, 1);
} else {
uint8_t input[nSrcComponents];
for (uint32_t i = 0; i < nSrcComponents; i++)
input[i] = 128;
cmsDoTransform(hTransform, input, output, 1);
}
cmsDeleteTransform(hTransform);
return 0;
}

View File

@ -0,0 +1,36 @@
#!/usr/bin/python3
import os
import json
import sys
def concatenate_json_files(input_dir):
json_files = []
for root, dirs, files in os.walk(input_dir):
for file in files:
if file.endswith('.json'):
json_files.append(os.path.join(root, file))
data = dict()
for json_file in json_files:
with open(json_file, 'r') as file:
if os.stat(json_file).st_size == 0:
# skip empty file else json.load() fails
continue
json_data = json.load(file)
print(type(json_data), file)
data = data | json_data
output_file = os.path.join(os.getcwd(), 'concatenated.json')
with open(output_file, 'w') as file:
json.dump([data], file)
print(f"JSON files concatenated successfully! Output file: {output_file}")
if __name__ == '__main__':
if len(sys.argv) != 2:
print("Usage: python script.py <directory_path>")
sys.exit(1)
input_directory = sys.argv[1]
concatenate_json_files(input_directory)

View File

@ -0,0 +1,47 @@
use std::env;
use libafl_cc::{ClangWrapper, CompilerWrapper, LLVMPasses, ToolWrapper};
pub fn main() {
let mut args: Vec<String> = env::args().collect();
if args.len() > 1 {
let mut dir = env::current_exe().unwrap();
let wrapper_name = dir.file_name().unwrap().to_str().unwrap();
let is_cpp = match wrapper_name[wrapper_name.len()-2..].to_lowercase().as_str() {
"cc" => false,
"++" | "pp" | "xx" => true,
_ => panic!("Could not figure out if c or c++ wrapper was called. Expected {dir:?} to end with c or cxx"),
};
dir.pop();
// Must be always present, even without --libafl
args.push("-fsanitize-coverage=trace-pc-guard,trace-cmp".into());
let mut cc = ClangWrapper::new();
#[cfg(any(target_os = "linux", target_vendor = "apple"))]
cc.add_pass(LLVMPasses::AutoTokens);
if let Some(code) = cc
.cpp(is_cpp)
// silence the compiler wrapper output, needed for some configure scripts.
.silence(true)
// add arguments only if --libafl or --libafl-no-link are present
.need_libafl_arg(true)
.parse_args(&args)
.expect("Failed to parse the command line")
.link_staticlib(&dir, "fuzzbench")
.add_pass(LLVMPasses::CmpLogRtn)
.add_pass(LLVMPasses::FunctionLogging)
.add_pass(LLVMPasses::Profiling)
.run()
.expect("Failed to run the wrapped compiler")
{
std::process::exit(code);
}
} else {
panic!("LibAFL CC: No Arguments given");
}
}

View File

@ -0,0 +1,5 @@
pub mod libafl_cc;
fn main() {
libafl_cc::main();
}

View File

@ -0,0 +1,406 @@
//! A singlethreaded libfuzzer-like fuzzer that can auto-restart.
use mimalloc::MiMalloc;
#[global_allocator]
static GLOBAL: MiMalloc = MiMalloc;
use core::{cell::RefCell, time::Duration};
#[cfg(unix)]
use std::os::unix::io::{AsRawFd, FromRawFd};
use std::{
env,
fs::{self, File, OpenOptions},
io::{self, Read, Write},
path::PathBuf,
process,
};
use clap::{Arg, Command};
use libafl::{
corpus::{Corpus, InMemoryOnDiskCorpus, OnDiskCorpus},
events::SimpleRestartingEventManager,
executors::{inprocess::HookableInProcessExecutor, ExitKind},
feedback_or,
feedbacks::{CrashFeedback, MaxMapFeedback, TimeFeedback},
fuzzer::{Fuzzer, StdFuzzer},
inputs::{BytesInput, HasTargetBytes},
monitors::SimpleMonitor,
mutators::{
scheduled::havoc_mutations, token_mutations::I2SRandReplace, tokens_mutations,
StdMOptMutator, StdScheduledMutator, Tokens,
},
observers::{CanTrack, HitcountsMapObserver, ProfilingObserver, TimeObserver},
schedulers::{
powersched::PowerSchedule, IndexesLenTimeMinimizerScheduler, StdWeightedScheduler,
},
stages::{
calibrate::CalibrationStage, power::StdPowerMutationalStage, StdMutationalStage,
TracingStage,
},
state::{HasCorpus, StdState},
Error, HasMetadata,
};
use libafl_bolts::{
current_time,
os::dup2,
ownedref::OwnedMutPtr,
rands::StdRand,
shmem::{ShMemProvider, StdShMemProvider},
tuples::{tuple_list, Merge},
AsSlice,
};
#[cfg(any(target_os = "linux", target_vendor = "apple"))]
use libafl_targets::autotokens;
use libafl_targets::{
libfuzzer_initialize, libfuzzer_test_one_input, std_edges_map_observer, CallHook,
CmpLogObserver, FUNCTION_LIST,
};
#[cfg(unix)]
use nix::unistd::dup;
use once_cell::sync::Lazy;
/// The fuzzer main (as `no_mangle` C function)
#[no_mangle]
pub extern "C" fn libafl_main() {
// Registry the metadata types used in this fuzzer
// Needed only on no_std
// unsafe { RegistryBuilder::register::<Tokens>(); }
env_logger::init();
let res = match Command::new(env!("CARGO_PKG_NAME"))
.version(env!("CARGO_PKG_VERSION"))
.author("AFLplusplus team")
.about("LibAFL-based fuzzer for Fuzzbench")
.arg(
Arg::new("out")
.short('o')
.long("output")
.help("The directory to place finds in ('corpus')"),
)
.arg(
Arg::new("in")
.short('i')
.long("input")
.help("The directory to read initial inputs from ('seeds')"),
)
.arg(
Arg::new("tokens")
.short('x')
.long("tokens")
.help("A file to read tokens from, to be used during fuzzing"),
)
.arg(
Arg::new("logfile")
.short('l')
.long("logfile")
.help("Duplicates all output to this file")
.default_value("libafl.log"),
)
.arg(
Arg::new("timeout")
.short('t')
.long("timeout")
.help("Timeout for each individual execution, in milliseconds")
.default_value("1200"),
)
.arg(Arg::new("remaining"))
.try_get_matches()
{
Ok(res) => res,
Err(err) => {
println!(
"Syntax: {}, [-x dictionary] -o corpus_dir -i seed_dir\n{:?}",
env::current_exe()
.unwrap_or_else(|_| "fuzzer".into())
.to_string_lossy(),
err,
);
return;
}
};
println!(
"Workdir: {:?}",
env::current_dir().unwrap().to_string_lossy().to_string()
);
if let Some(filenames) = res.get_many::<String>("remaining") {
let filenames: Vec<&str> = filenames.map(String::as_str).collect();
if !filenames.is_empty() {
run_testcases(&filenames);
return;
}
}
// For fuzzbench, crashes and finds are inside the same `corpus` directory, in the "queue" and "crashes" subdir.
let mut out_dir = PathBuf::from(
res.get_one::<String>("out")
.expect("The --output parameter is missing")
.to_string(),
);
if fs::create_dir(&out_dir).is_err() {
println!("Out dir at {:?} already exists.", &out_dir);
if !out_dir.is_dir() {
println!("Out dir at {:?} is not a valid directory!", &out_dir);
return;
}
}
let mut crashes = out_dir.clone();
crashes.push("crashes");
out_dir.push("queue");
let in_dir = PathBuf::from(
res.get_one::<String>("in")
.expect("The --input parameter is missing")
.to_string(),
);
if !in_dir.is_dir() {
println!("In dir at {:?} is not a valid directory!", &in_dir);
return;
}
let tokens = res.get_one::<String>("tokens").map(PathBuf::from);
let logfile = PathBuf::from(res.get_one::<String>("logfile").unwrap().to_string());
let timeout = Duration::from_millis(
res.get_one::<String>("timeout")
.unwrap()
.to_string()
.parse()
.expect("Could not parse timeout in milliseconds"),
);
fuzz(out_dir, crashes, &in_dir, tokens, &logfile, timeout)
.expect("An error occurred while fuzzing");
}
fn run_testcases(filenames: &[&str]) {
// The actual target run starts here.
// Call LLVMFUzzerInitialize() if present.
let args: Vec<String> = env::args().collect();
if libfuzzer_initialize(&args) == -1 {
println!("Warning: LLVMFuzzerInitialize failed with -1");
}
println!(
"You are not fuzzing, just executing {} testcases",
filenames.len()
);
for fname in filenames {
println!("Executing {fname}");
let mut file = File::open(fname).expect("No file found");
let mut buffer = vec![];
file.read_to_end(&mut buffer).expect("Buffer overflow");
libfuzzer_test_one_input(&buffer);
}
}
/// The actual fuzzer
#[allow(clippy::too_many_lines)]
fn fuzz(
corpus_dir: PathBuf,
objective_dir: PathBuf,
seed_dir: &PathBuf,
tokenfile: Option<PathBuf>,
logfile: &PathBuf,
timeout: Duration,
) -> Result<(), Error> {
let log = RefCell::new(OpenOptions::new().append(true).create(true).open(logfile)?);
#[cfg(unix)]
let mut stdout_cpy = unsafe {
let new_fd = dup(io::stdout().as_raw_fd())?;
File::from_raw_fd(new_fd)
};
#[cfg(unix)]
let file_null = File::open("/dev/null")?;
// 'While the monitor are state, they are usually used in the broker - which is likely never restarted
let monitor = SimpleMonitor::new(|s| {
#[cfg(unix)]
writeln!(&mut stdout_cpy, "{s}").unwrap();
#[cfg(windows)]
println!("{s}");
writeln!(log.borrow_mut(), "{:?} {s}", current_time()).unwrap();
});
// We need a shared map to store our state before a crash.
// This way, we are able to continue fuzzing afterwards.
let mut shmem_provider = StdShMemProvider::new()?;
let (state, mut mgr) = match SimpleRestartingEventManager::launch(monitor, &mut shmem_provider)
{
// The restarting state will spawn the same process again as child, then restarted it each time it crashes.
Ok(res) => res,
Err(err) => match err {
Error::ShuttingDown => {
return Ok(());
}
_ => {
panic!("Failed to setup the restarter: {err}");
}
},
};
// Create an observation channel using the coverage map
// We don't use the hitcounts (see the Cargo.toml, we use pcguard_edges)
let edges_observer =
HitcountsMapObserver::new(unsafe { std_edges_map_observer("edges") }).track_indices();
// Create an observation channel to keep track of the execution time
let time_observer = TimeObserver::new("time");
let func_list = unsafe { OwnedMutPtr::from_raw_mut(Lazy::force_mut(&mut FUNCTION_LIST)) };
let profiling_observer = ProfilingObserver::new("concatenated.json", func_list)?;
let callhook = CallHook::new();
let cmplog_observer = CmpLogObserver::new("cmplog", true);
let map_feedback = MaxMapFeedback::new(&edges_observer);
let calibration = CalibrationStage::new(&map_feedback);
// Feedback to rate the interestingness of an input
// This one is composed by two Feedbacks in OR
let mut feedback = feedback_or!(
// New maximization map feedback linked to the edges observer and the feedback state
map_feedback,
// Time feedback, this one does not need a feedback state
TimeFeedback::new(&time_observer)
);
// A feedback to choose if an input is a solution or not
let mut objective = CrashFeedback::new();
// If not restarting, create a State from scratch
let mut state = state.unwrap_or_else(|| {
StdState::new(
// RNG
StdRand::new(),
// Corpus that will be evolved, we keep it in memory for performance
InMemoryOnDiskCorpus::new(corpus_dir).unwrap(),
// Corpus in which we store solutions (crashes in this example),
// on disk so the user can get them after stopping the fuzzer
OnDiskCorpus::new(objective_dir).unwrap(),
// States of the feedbacks.
// The feedbacks can report the data that should persist in the State.
&mut feedback,
// Same for objective feedbacks
&mut objective,
)
.unwrap()
});
println!("Let's fuzz :)");
// The actual target run starts here.
// Call LLVMFUzzerInitialize() if present.
let args: Vec<String> = env::args().collect();
if libfuzzer_initialize(&args) == -1 {
println!("Warning: LLVMFuzzerInitialize failed with -1");
}
// Setup a randomic Input2State stage
let i2s = StdMutationalStage::new(StdScheduledMutator::new(tuple_list!(I2SRandReplace::new())));
// Setup a MOPT mutator
let mutator = StdMOptMutator::new(
&mut state,
havoc_mutations().merge(tokens_mutations()),
7,
5,
)?;
let power = StdPowerMutationalStage::new(mutator);
// A minimization+queue policy to get testcasess from the corpus
let scheduler = IndexesLenTimeMinimizerScheduler::new(
&edges_observer,
StdWeightedScheduler::with_schedule(&mut state, &edges_observer, Some(PowerSchedule::FAST)),
);
// A fuzzer with feedbacks and a corpus scheduler
let mut fuzzer = StdFuzzer::new(scheduler, feedback, objective);
// The wrapped harness function, calling out to the LLVM-style harness
let mut harness = |input: &BytesInput| {
let target = input.target_bytes();
let buf = target.as_slice();
libfuzzer_test_one_input(buf);
ExitKind::Ok
};
let mut tracing_harness = harness;
// Create the executor for an in-process function with one observer for edge coverage and one for the execution time
let mut executor = HookableInProcessExecutor::with_timeout_generic(
tuple_list!(callhook.clone()),
&mut harness,
tuple_list!(edges_observer, time_observer, profiling_observer),
&mut fuzzer,
&mut state,
&mut mgr,
timeout,
)?;
// Setup a tracing stage in which we log comparisons
let tracing = TracingStage::new(
HookableInProcessExecutor::with_timeout_generic(
tuple_list!(callhook),
&mut tracing_harness,
tuple_list!(cmplog_observer),
&mut fuzzer,
&mut state,
&mut mgr,
timeout * 10,
)?,
// Give it more time!
);
// The order of the stages matter!
let mut stages = tuple_list!(calibration, tracing, i2s, power);
// Read tokens
if state.metadata_map().get::<Tokens>().is_none() {
let mut toks = Tokens::default();
if let Some(tokenfile) = tokenfile {
toks.add_from_file(tokenfile)?;
}
#[cfg(any(target_os = "linux", target_vendor = "apple"))]
{
toks += autotokens()?;
}
if !toks.is_empty() {
state.add_metadata(toks);
}
}
// In case the corpus is empty (on first run), reset
if state.must_load_initial_inputs() {
state
.load_initial_inputs(&mut fuzzer, &mut executor, &mut mgr, &[seed_dir.clone()])
.unwrap_or_else(|_| {
println!("Failed to load initial corpus at {:?}", &seed_dir);
process::exit(0);
});
println!("We imported {} inputs from disk.", state.corpus().count());
}
// Remove target output (logs still survive)
#[cfg(unix)]
{
let null_fd = file_null.as_raw_fd();
dup2(null_fd, io::stdout().as_raw_fd())?;
if std::env::var("LIBAFL_FUZZBENCH_DEBUG").is_err() {
// dup2(null_fd, io::stderr().as_raw_fd())?;
}
}
// reopen file to make sure we're at the end
log.replace(OpenOptions::new().append(true).create(true).open(logfile)?);
fuzzer.fuzz_loop(&mut stages, &mut executor, &mut state, &mut mgr)?;
// Never reached
Ok(())
}

View File

@ -0,0 +1,34 @@
#include <stdint.h>
__attribute__((weak)) void __sanitizer_cov_trace_pc_guard_init(uint32_t *start,
uint32_t *stop) {
}
__attribute__((weak)) void __sanitizer_cov_trace_pc_guard(uint32_t *guard) {
}
__attribute__((weak)) void __cmplog_rtn_hook(uint8_t *ptr1, uint8_t *ptr2) {
}
__attribute__((weak)) void __cmplog_rtn_gcc_stdstring_cstring(
uint8_t *stdstring, uint8_t *cstring) {
}
__attribute__((weak)) void __cmplog_rtn_gcc_stdstring_stdstring(
uint8_t *stdstring1, uint8_t *stdstring2) {
}
__attribute__((weak)) void __cmplog_rtn_llvm_stdstring_cstring(
uint8_t *stdstring, uint8_t *cstring) {
}
__attribute__((weak)) void __cmplog_rtn_llvm_stdstring_stdstring(
uint8_t *stdstring1, uint8_t *stdstring2) {
}
extern void libafl_main(void);
int main(int argc, char **argv) {
libafl_main();
return 0;
}

View File

@ -14,6 +14,12 @@ pub mod stacktrace;
#[cfg(feature = "regex")]
pub use stacktrace::*;
/// Profiler observer
#[cfg(feature = "std")]
pub mod profiling;
#[cfg(feature = "std")]
pub use profiling::*;
pub mod concolic;
pub mod map;
pub use map::*;

View File

@ -0,0 +1,146 @@
use alloc::{borrow::Cow, string::String};
use std::{fs::File, io::BufReader, path::Path};
use hashbrown::HashMap;
use libafl_bolts::{ownedref::OwnedMutPtr, Named};
use serde::{Deserialize, Serialize};
use crate::{inputs::UsesInput, observers::Observer, state::State, Error};
#[derive(Debug, Serialize, Deserialize)]
/// The json data
pub struct FunctionData {
#[serde(rename = "name")]
name: String,
#[serde(rename = "# BBs")]
bb_count: Option<u32>,
#[serde(rename = "# insts")]
inst_count: Option<u32>,
#[serde(rename = "# edges")]
edge_count: Option<u32>,
#[serde(rename = "# binaryOp")]
binary_op_count: Option<u32>,
#[serde(rename = "# call")]
call_count: Option<u32>,
#[serde(rename = "# cmp")]
cmp_count: Option<u32>,
#[serde(rename = "# load")]
load_count: Option<u32>,
#[serde(rename = "# store")]
store_count: Option<u32>,
#[serde(rename = "# alloca")]
alloca_count: Option<u32>,
#[serde(rename = "# branch")]
branch_count: Option<u32>,
#[serde(rename = "ABC metric")]
abc_metric: Option<f64>,
cyclomatic: Option<u32>,
#[serde(rename = "AP")]
api_calls: Option<HashMap<String, u32>>,
#[serde(rename = "h AP")]
heap_apis: Option<HashMap<String, u32>>,
#[serde(rename = "m AP")]
memory_apis: Option<HashMap<String, u32>>,
#[serde(rename = "ne lv")]
nested_level: Option<HashMap<String, u32>>,
#[serde(rename = "cm gl")]
cmp_globals: Option<HashMap<String, u32>>,
#[serde(rename = "cm nz")]
cmp_non_zeros: Option<HashMap<String, u32>>,
#[serde(rename = "wr st")]
struct_writes: Option<HashMap<String, u32>>,
#[serde(rename = "str arg")]
struct_args: Option<HashMap<String, u32>>,
#[serde(rename = "cm ty")]
cmp_types: Option<HashMap<String, u32>>,
#[serde(rename = "cm cm")]
cmp_complexity: Option<HashMap<String, u32>>,
#[serde(rename = "ar ty")]
call_arg_types: Option<HashMap<String, u32>>,
#[serde(rename = "st ty")]
store_types: Option<HashMap<String, u32>>,
#[serde(rename = "l ty")]
load_types: Option<HashMap<String, u32>>,
#[serde(rename = "al ty")]
alloca_types: Option<HashMap<String, u32>>,
}
#[derive(Debug, Default, Serialize, Deserialize)]
struct AnalysisData {
data: HashMap<usize, FunctionData>,
}
/// The observer to lookup the static analysis data at runtime
#[derive(Debug, Serialize, Deserialize)]
pub struct ProfilingObserver {
/// The name of the observer.
pub name: Cow<'static, str>,
db: AnalysisData,
/// The map
map: OwnedMutPtr<HashMap<usize, usize>>,
}
impl ProfilingObserver {
/// The constructor
pub fn new<P>(json_path: P, map: OwnedMutPtr<HashMap<usize, usize>>) -> Result<Self, Error>
where
P: AsRef<Path>,
{
let f = File::open(json_path)?;
let reader = BufReader::new(f);
let analysis_data: AnalysisData = serde_json::from_reader(reader)?;
// debug
/*
for record in &analysis_data.data {
for (key, _value) in record.iter() {
log::info!("Record {} found!", key);
}
}
*/
Ok(Self {
name: Cow::from("profiling"),
db: analysis_data,
map,
})
}
/// Get the map
#[must_use]
pub fn map(&self) -> &HashMap<usize, usize> {
self.map.as_ref()
}
/// lookup the data through db
#[must_use]
pub fn lookup(&self, function_id: usize) -> Option<&FunctionData> {
let item = self.db.data.get(&function_id);
item
}
}
impl Named for ProfilingObserver {
fn name(&self) -> &Cow<'static, str> {
&self.name
}
}
impl<S> Observer<S> for ProfilingObserver
where
S: State,
{
fn post_exec(
&mut self,
_state: &mut S,
_input: &<S as UsesInput>::Input,
_exit_kind: &crate::executors::ExitKind,
) -> Result<(), Error> {
// in reality, this should be done in a stage
// but here just for poc
for (key, _item) in self.map() {
let found = self.lookup(*key);
log::info!("key: {}, data: {:#?}", key, found);
}
log::info!("");
Ok(())
}
}

View File

@ -428,6 +428,7 @@ pub const LIBAFL_CC_LLVM_VERSION: Option<usize> = None;
);
for pass in &[
"function-logging.cc",
"cmplog-routines-pass.cc",
"autotokens-pass.cc",
"coverage-accounting-pass.cc",
@ -447,7 +448,7 @@ pub const LIBAFL_CC_LLVM_VERSION: Option<usize> = None;
}
// Optional pass
for pass in &["dump-cfg-pass.cc"] {
for pass in &["dump-cfg-pass.cc", "profiling.cc"] {
build_pass(
bindir_path,
out_dir,

View File

@ -41,6 +41,10 @@ pub enum LLVMPasses {
CmpLogInstructions,
/// Instrument caller for sancov coverage
Ctx,
/// Function logging
FunctionLogging,
/// Profiling
Profiling,
/// Data dependency instrumentation
DDG,
}
@ -66,6 +70,12 @@ impl LLVMPasses {
LLVMPasses::Ctx => {
PathBuf::from(env!("OUT_DIR")).join(format!("ctx-pass.{}", dll_extension()))
}
LLVMPasses::FunctionLogging => {
PathBuf::from(env!("OUT_DIR")).join(format!("function-logging.{}", dll_extension()))
}
LLVMPasses::Profiling => {
PathBuf::from(env!("OUT_DIR")).join(format!("profiling.{}", dll_extension()))
}
LLVMPasses::DDG => {
PathBuf::from(env!("OUT_DIR")).join(format!("ddg-instr.{}", dll_extension()))
}

View File

@ -0,0 +1,191 @@
/*
LibAFL - Function Logging LLVM pass
--------------------------------------------------
Written by Dongjia Zhang <toka@aflplus.plus>
Copyright 2022-2023 AFLplusplus Project. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at:
http://www.apache.org/licenses/LICENSE-2.0
*/
#include <stdio.h>
#include <stdlib.h>
#include "common-llvm.h"
#ifndef _WIN32
#include <unistd.h>
#include <sys/time.h>
#else
#include <io.h>
#endif
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <ctype.h>
#include <list>
#include <string>
#include <fstream>
#include <set>
#include "llvm/Config/llvm-config.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/IRBuilder.h"
#if USE_NEW_PM
#include "llvm/Passes/PassPlugin.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/IR/PassManager.h"
#else
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#endif
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Pass.h"
#include "llvm/IR/Constants.h"
#include <iostream>
using namespace llvm;
#define MAP_SIZE EDGES_MAP_SIZE_IN_USE
namespace {
#if USE_NEW_PM
class FunctionLogging : public PassInfoMixin<FunctionLogging> {
public:
FunctionLogging() {
#else
class FunctionLogging : public ModulePass {
public:
static char ID;
FunctionLogging() : ModulePass(ID) {
#endif
}
#if USE_NEW_PM
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
#else
bool runOnModule(Module &M) override;
#endif
protected:
uint32_t map_size = MAP_SIZE;
private:
bool isLLVMIntrinsicFn(StringRef &n) {
// Not interested in these LLVM's functions
#if LLVM_VERSION_MAJOR >= 18
if (n.starts_with("llvm.")) {
#else
if (n.startswith("llvm.")) {
#endif
return true;
} else {
return false;
}
}
};
} // namespace
#if USE_NEW_PM
extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK
llvmGetPassPluginInfo() {
return {LLVM_PLUGIN_API_VERSION, "FunctionLoggingPass", "v0.1",
/* lambda to insert our pass into the pass pipeline. */
[](PassBuilder &PB) {
#if LLVM_VERSION_MAJOR <= 13
using OptimizationLevel = typename PassBuilder::OptimizationLevel;
#endif
PB.registerOptimizerLastEPCallback(
[](ModulePassManager &MPM, OptimizationLevel OL) {
MPM.addPass(FunctionLogging());
});
}};
}
#else
char FunctionLogging::ID = 0;
#endif
#if USE_NEW_PM
PreservedAnalyses FunctionLogging::run(Module &M, ModuleAnalysisManager &MAM) {
#else
bool FunctionLogging::runOnModule(Module &M) {
#endif
LLVMContext &C = M.getContext();
auto moduleName = M.getName();
Type *VoidTy = Type::getVoidTy(C);
IntegerType *Int8Ty = IntegerType::getInt8Ty(C);
IntegerType *Int32Ty = IntegerType::getInt32Ty(C);
IntegerType *Int64Ty = IntegerType::getInt64Ty(C);
FunctionCallee callHook;
callHook =
M.getOrInsertFunction("__libafl_target_call_hook", VoidTy, Int64Ty);
uint32_t rand_seed;
rand_seed = time(NULL);
srand(rand_seed);
for (auto &F : M) {
int has_calls = 0;
if (isIgnoreFunction(&F)) { continue; }
if (F.size() < 1) { continue; }
// instrument the first basic block of this fn
BasicBlock &entry = F.front();
std::size_t function_id = std::hash<std::string>{}(F.getName().str());
IRBuilder<> IRB(&entry);
IRB.SetInsertPoint(&entry.front());
std::vector<Value *> args;
llvm::Value *value = llvm::ConstantInt::get(
llvm::Type::getInt64Ty(F.getContext()), function_id);
args.push_back(value);
IRB.CreateCall(callHook, args);
}
#if USE_NEW_PM
auto PA = PreservedAnalyses::all();
return PA;
#else
return true;
#endif
}
#if USE_NEW_PM
#else
static void registerFunctionLoggingPass(const PassManagerBuilder &,
legacy::PassManagerBase &PM) {
PM.add(new FunctionLoggingPass());
}
static RegisterPass<FunctionLogging> X("function-logging",
"function logging pass", false, false);
static RegisterStandardPasses RegisterFunctionLogging(
PassManagerBuilder::EP_OptimizerLast, registerFunctionLoggingPass);
static RegisterStandardPasses RegisterFunctionLogging0(
PassManagerBuilder::EP_EnabledOnOptLevel0, registerFunctionLoggingPass);
#endif

View File

@ -61,7 +61,6 @@
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Support/FileSystem.h"
@ -97,11 +96,10 @@ class AnalysisPass : public ModulePass {
#endif
protected:
DenseMap<BasicBlock *, uint32_t> bb_to_cur_loc;
DenseMap<StringRef, BasicBlock *> entry_bb;
DenseMap<BasicBlock *, std::vector<StringRef>> calls_in_bb;
DenseMap<StringRef, std::vector<StringRef>> structLinks;
DenseMap<StringRef, std::unordered_map<int, int>> structDesc;
DenseMap<BasicBlock *, uint32_t> bb_to_cur_loc;
DenseMap<StringRef, BasicBlock *> entry_bb;
DenseMap<BasicBlock *, std::vector<StringRef>> calls_in_bb;
// DenseMap<StringRef, std::unordered_map<int, int>> structDesc;
// The type name is not in the memory, so create std::strign impromptu
private:
@ -163,11 +161,11 @@ class AnalysisPass : public ModulePass {
!FuncName.compare("xmlStrcasestr") ||
!FuncName.compare("g_str_has_prefix") ||
!FuncName.compare("g_str_has_suffix"));
isStrcmp &=
FT->getNumParams() == 2 && FT->getReturnType()->isIntegerTy(32) &&
FT->getParamType(0) == FT->getParamType(1) &&
FT->getParamType(0) == IntegerType::getInt8PtrTy(M.getContext());
isStrcmp &= FT->getNumParams() == 2 &&
FT->getReturnType()->isIntegerTy(32) &&
FT->getParamType(0) == FT->getParamType(1) &&
FT->getParamType(0) ==
IntegerType::getInt8Ty(M.getContext())->getPointerTo(0);
return isStrcmp;
}
@ -185,11 +183,12 @@ class AnalysisPass : public ModulePass {
!FuncName.compare("g_ascii_strncasecmp") ||
!FuncName.compare("Curl_strncasecompare") ||
!FuncName.compare("g_strncasecmp"));
isStrncmp &=
FT->getNumParams() == 3 && FT->getReturnType()->isIntegerTy(32) &&
FT->getParamType(0) == FT->getParamType(1) &&
FT->getParamType(0) == IntegerType::getInt8PtrTy(M.getContext()) &&
FT->getParamType(2)->isIntegerTy();
isStrncmp &= FT->getNumParams() == 3 &&
FT->getReturnType()->isIntegerTy(32) &&
FT->getParamType(0) == FT->getParamType(1) &&
FT->getParamType(0) ==
IntegerType::getInt8Ty(M.getContext())->getPointerTo(0) &&
FT->getParamType(2)->isIntegerTy();
return isStrncmp;
}
@ -246,7 +245,7 @@ class AnalysisPass : public ModulePass {
bool isLLVMIntrinsicFn(StringRef &n) {
// Not interested in these LLVM's functions
if (n.startswith("llvm.")) {
if (n.starts_with("llvm.")) {
return true;
} else {
return false;
@ -411,37 +410,33 @@ bool AnalysisPass::runOnModule(Module &M) {
*/
bool run = true;
bool done_already = file_exist("/out/." + genericFilePath + ".json");
std::string output_dir;
const char *path = std::getenv("ANALYSIS_OUTPUT");
if (path != nullptr) {
output_dir = path;
if (std::filesystem::exists(output_dir) &&
std::filesystem::is_directory(output_dir)) {
// good
} else {
std::cerr << "Output path is empty!" << std::endl;
}
// Use the output_dir string here
} else {
std::cerr << "Output path not set!" << std::endl;
}
bool done_already =
file_exist(output_dir + std::string("/") + genericFilePath + ".json");
if (done_already) {
run = false;
} else {
std::ofstream out_lock("/out/." + genericFilePath + ".json");
std::ofstream out_lock(output_dir + std::string("/") + genericFilePath +
".json");
}
if (run) {
outs() << "Analysis on " + genericFilePath << "\n";
LLVMContext &Ctx = M.getContext();
auto moduleName = M.getName().str();
// printf("Hello\n");
for (auto ST : M.getIdentifiedStructTypes()) {
std::unordered_map<int, int> types;
for (auto T : ST->elements()) {
types[T->getTypeID()] += 1;
auto ty = T;
while (true) {
// Recursive
if (ty->isPointerTy()) {
ty = ty->getPointerElementType();
continue;
} else if (ty->isStructTy()) {
structLinks[ST->getStructName()].push_back(ty->getStructName());
}
break;
}
}
structDesc[ST->getStructName()] = types;
}
LLVMContext &Ctx = M.getContext();
auto moduleName = M.getName().str();
nlohmann::json res;
for (auto &F : M) {
@ -475,6 +470,13 @@ bool AnalysisPass::runOnModule(Module &M) {
unsigned binary_op_cnt = 0;
entry_bb[F.getName()] = &F.getEntryBlock();
// now we get the sha256sum for this function. (mangled function name
// should be unique else it will result in linker error) by this we make a
// map (<fn name> |-> <analysis data>)
std::size_t hashed = std::hash<std::string>{}(F.getName().str());
// cast again as string, it's json, key has to be a string
std::string function_id = std::to_string(hashed);
for (auto &BB : F) {
bb_to_cur_loc[&BB] = bb_cnt;
bb_cnt++;
@ -543,18 +545,6 @@ bool AnalysisPass::runOnModule(Module &M) {
auto arg_ty = arg->getType();
std::string type_str = typeWriter(arg_ty);
callArgTypes[type_str]++;
auto ty = arg_ty;
while (true) {
// recursive
if (ty->isPointerTy()) {
ty = ty->getPointerElementType();
continue;
} else if (ty->isStructTy()) {
structArgs[type_str]++;
}
break;
}
}
}
} else if ((cmpInst = dyn_cast<CmpInst>(&IN))) {
@ -625,36 +615,39 @@ bool AnalysisPass::runOnModule(Module &M) {
}
std::string fnname = std::string(F.getName());
if (bb_cnt) { res[fnname]["# BBs"] = bb_cnt; }
if (inst_cnt) { res[fnname]["# insts"] = inst_cnt; }
res[function_id]["name"] = fnname;
if (edges_cnt) { res[fnname]["# edges"] = edges_cnt; }
if (bb_cnt) { res[function_id]["# BBs"] = bb_cnt; }
if (binary_op_cnt) { res[fnname]["# binaryOp"] = binary_op_cnt; }
if (inst_cnt) { res[function_id]["# insts"] = inst_cnt; }
if (call_cnt) { res[fnname]["# call"] = call_cnt; }
if (edges_cnt) { res[function_id]["# edges"] = edges_cnt; }
if (cmp_cnt) { res[fnname]["# cmp"] = cmp_cnt; }
if (binary_op_cnt) { res[function_id]["# binaryOp"] = binary_op_cnt; }
if (load_cnt) { res[fnname]["# load"] = load_cnt; }
if (call_cnt) { res[function_id]["# call"] = call_cnt; }
if (store_cnt) { res[fnname]["# store"] = store_cnt; }
if (cmp_cnt) { res[function_id]["# cmp"] = cmp_cnt; }
if (alloca_cnt) { res[fnname]["# alloca"] = alloca_cnt; }
if (load_cnt) { res[function_id]["# load"] = load_cnt; }
if (branch_cnt) { res[fnname]["# branch"] = branch_cnt; }
if (store_cnt) { res[function_id]["# store"] = store_cnt; }
res[fnname]["ABC metric"] =
if (alloca_cnt) { res[function_id]["# alloca"] = alloca_cnt; }
if (branch_cnt) { res[function_id]["# branch"] = branch_cnt; }
res[function_id]["ABC metric"] =
sqrt(alloca_cnt * alloca_cnt + branch_cnt * branch_cnt +
call_cnt * call_cnt);
res[fnname]["cyclomatic"] = edges_cnt - bb_cnt + 2;
res[function_id]["cyclomatic"] = edges_cnt - bb_cnt + 2;
// outs() << "APIs:\n";
for (auto record = APIcalls.begin(); record != APIcalls.end(); record++) {
auto key = record->getFirst();
if (!isLLVMIntrinsicFn(key)) {
res[fnname]["AP"][std::string(key)] = APIcalls[key];
res[function_id]["AP"][std::string(key)] = APIcalls[key];
// outs() << key << " " << APIcalls[key] << "\n";
}
}
@ -663,7 +656,7 @@ bool AnalysisPass::runOnModule(Module &M) {
// outs() << "memoryAPIs:\n";
for (auto record = heapAPIs.begin(); record != heapAPIs.end(); record++) {
auto key = record->getFirst();
res[fnname]["h AP"][std::string(key)] = heapAPIs[key];
res[function_id]["h AP"][std::string(key)] = heapAPIs[key];
// outs() << key << " " << heapAPIs[key] << "\n";
}
// outs() << "\n";
@ -671,28 +664,28 @@ bool AnalysisPass::runOnModule(Module &M) {
for (auto record = memoryAPIs.begin(); record != memoryAPIs.end();
record++) {
auto key = record->getFirst();
res[fnname]["m AP"][std::string(key)] = memoryAPIs[key];
res[function_id]["m AP"][std::string(key)] = memoryAPIs[key];
// outs() << key << " " << memoryAPIs[key] << "\n";
}
for (auto record = nestedLevel.begin(); record != nestedLevel.end();
record++) {
auto key = record->first;
res[fnname]["ne lv"][std::to_string(key)] = nestedLevel[key];
res[function_id]["ne lv"][std::to_string(key)] = nestedLevel[key];
// outs() << key << " " << memoryAPIs[key] << "\n";
}
for (auto record = cmpGlobals.begin(); record != cmpGlobals.end();
record++) {
auto key = record->first;
res[fnname]["cm gl"][std::to_string(key)] = cmpGlobals[key];
res[function_id]["cm gl"][std::to_string(key)] = cmpGlobals[key];
// outs() << key << " " << memoryAPIs[key] << "\n";
}
for (auto record = cmpNonZeros.begin(); record != cmpNonZeros.end();
record++) {
auto key = record->first;
res[fnname]["cm nz"][std::to_string(key)] = cmpNonZeros[key];
res[function_id]["cm nz"][std::to_string(key)] = cmpNonZeros[key];
// outs() << key << " " << memoryAPIs[key] << "\n";
}
@ -701,7 +694,7 @@ bool AnalysisPass::runOnModule(Module &M) {
record++) {
auto key = record->getFirst();
// Some are nameless struct
res[fnname]["wr st"][std::string(key)] = structWrites[key];
res[function_id]["wr st"][std::string(key)] = structWrites[key];
// outs() << key << " " << structWrites[key] << "\n";
}
// outs() << "\n";
@ -710,28 +703,28 @@ bool AnalysisPass::runOnModule(Module &M) {
for (auto record = structArgs.begin(); record != structArgs.end();
record++) {
auto key = record->first;
res[fnname]["str arg"][std::string(key)] = record->second;
res[function_id]["str arg"][std::string(key)] = record->second;
// outs() << key << " " << record->second << "\n";
}
// outs() << "\n";
// outs() << "CmpTypes:\n";
for (auto record = cmpTypes.begin(); record != cmpTypes.end(); record++) {
res[fnname]["cm ty"][record->first] = record->second;
res[function_id]["cm ty"][record->first] = record->second;
// outs() << record->first << " " << record->second << "\n";
}
// outs() << "\n";
for (auto record = cmpComplexity.begin(); record != cmpComplexity.end();
record++) {
res[fnname]["cm cm"][record->first] = record->second;
res[function_id]["cm cm"][record->first] = record->second;
// outs() << record->first << " " << record->second << "\n";
}
// outs() << "CallArgTypes:\n";
for (auto record = callArgTypes.begin(); record != callArgTypes.end();
record++) {
res[fnname]["ar ty"][record->first] = record->second;
res[function_id]["ar ty"][record->first] = record->second;
// outs() << record->first << " " << record->second << "\n";
}
// outs() << "\n";
@ -739,7 +732,7 @@ bool AnalysisPass::runOnModule(Module &M) {
// outs() << "storeTypes:\n";
for (auto record = storeTypes.begin(); record != storeTypes.end();
record++) {
res[fnname]["st ty"][record->first] = record->second;
res[function_id]["st ty"][record->first] = record->second;
// outs() << record->first << " " << record->second << "\n";
}
// outs() << "\n";
@ -747,7 +740,7 @@ bool AnalysisPass::runOnModule(Module &M) {
// outs() << "loadTypes:\n";
for (auto record = loadTypes.begin(); record != loadTypes.end();
record++) {
res[fnname]["l ty"][record->first] = record->second;
res[function_id]["l ty"][record->first] = record->second;
// outs() << record->first << " " << record->second << "\n";
}
// outs() << "\n";
@ -755,121 +748,24 @@ bool AnalysisPass::runOnModule(Module &M) {
// outs() << "allocaTypes:\n";
for (auto record = allocaTypes.begin(); record != allocaTypes.end();
record++) {
res[fnname]["al ty"][record->first] = record->second;
res[function_id]["al ty"][record->first] = record->second;
// outs() << record->first << " " << record->second << "\n";
}
// outs() << "\n";
if (getenv("ANALYSIS_OUTPUT_PATH")) {
if (std::ofstream(getenv("ANALYSIS_OUTPUT_PATH") + std::string("/") +
if (getenv("ANALYSIS_OUTPUT")) {
if (std::ofstream(getenv("ANALYSIS_OUTPUT") + std::string("/") +
genericFilePath + ".json")
<< res << "\n") {
} else {
abort();
errs() << "Failed to write the data"
<< "\n";
}
} else {
errs() << "output path not set!"
errs() << "Failed to write the data, output path not set!"
<< "\n";
}
}
nlohmann::json struct_links;
// outs() << "StructLinks:\n";
for (auto record = structLinks.begin(); record != structLinks.end();
record++) {
StringRef key = record->getFirst();
// outs() << "struct: " << key << "\t";
std::vector<std::string> links{};
// outs() << "links: ";
for (auto item = structLinks[key].begin(); item != structLinks[key].end();
item++) {
links.push_back(std::string(*item));
// outs() << *item << " ";
}
struct_links[moduleName][std::string(key)]["lks"] = links;
// outs() << "\n";
}
for (auto record = structDesc.begin(); record != structDesc.end();
record++) {
auto key = record->getFirst();
struct_links[moduleName][std::string(key)]["desc"] = record->second;
}
// outs() << "\n";
if (getenv("ANALYSIS_OUTPUT_PATH")) {
if (std::ofstream(getenv("ANALYSIS_OUTPUT_PATH") + std::string("/") +
genericFilePath + ".lks")
<< struct_links << "\n") {
} else {
abort();
}
} else {
errs() << "output path not set!"
<< "\n";
}
nlohmann::json cfg;
for (auto record = bb_to_cur_loc.begin(); record != bb_to_cur_loc.end();
record++) {
auto current_bb = record->getFirst();
auto loc = record->getSecond();
Function *calling_func = current_bb->getParent();
std::string func_name = std::string("");
if (calling_func) {
func_name = std::string(calling_func->getName());
// outs() << "Function name: " << calling_func->getName() << "\n";
}
std::vector<uint32_t> outgoing;
for (auto bb_successor = succ_begin(current_bb);
bb_successor != succ_end(current_bb); bb_successor++) {
outgoing.push_back(bb_to_cur_loc[*bb_successor]);
}
cfg["edges"][func_name][loc] = outgoing;
}
for (auto record = calls_in_bb.begin(); record != calls_in_bb.end();
record++) {
auto current_bb = record->getFirst();
auto loc = bb_to_cur_loc[current_bb];
Function *calling_func = current_bb->getParent();
std::string func_name = std::string("");
if (calling_func) {
func_name = std::string(calling_func->getName());
// outs() << "Function name: " << calling_func->getName() << "\n";
}
std::vector<std::string> outgoing_funcs;
for (auto &item : record->getSecond()) {
outgoing_funcs.push_back(std::string(item));
}
if (!outgoing_funcs.empty()) {
cfg["calls"][func_name][std::to_string(loc)] = outgoing_funcs;
}
}
for (auto record = entry_bb.begin(); record != entry_bb.end(); record++) {
cfg["entries"][std::string(record->getFirst())] =
bb_to_cur_loc[record->getSecond()];
}
if (getenv("ANALYSIS_OUTPUT_PATH")) {
if (std::ofstream(getenv("ANALYSIS_OUTPUT_PATH") + std::string("/") +
genericFilePath + ".cfg")
<< cfg << "\n") {
} else {
abort();
}
} else {
errs() << "output path not set!"
<< "\n";
}
}
#if USE_NEW_PM

View File

@ -58,6 +58,7 @@ forkserver = ["common"] # Compile C code for forkserver support
windows_asan = ["common"] # Compile C code for ASAN on Windows
whole_archive = [] # use +whole-archive to ensure the presence of weak symbols
cmplog_extended_instrumentation = [] # support for aflpp cmplog map, we will remove this once aflpp and libafl cmplog shares the same LLVM passes.
function-logging = ["common"]
[build-dependencies]
bindgen = "0.69.4"
@ -68,6 +69,8 @@ rustversion = "1.0"
libafl = { path = "../libafl", version = "0.12.0", default-features = false, features = [] }
libafl_bolts = { path = "../libafl_bolts", version = "0.12.0", default-features = false, features = [] }
libc = "0.2"
hashbrown = "0.14"
once_cell = "1.19"
log = "0.4.20"
rustversion = "1.0"

View File

@ -0,0 +1,48 @@
use core::marker::PhantomData;
use hashbrown::HashMap;
use libafl::{
executors::{hooks::ExecutorHook, HasObservers},
inputs::UsesInput,
};
use once_cell::sync::Lazy;
/// The list of functions that this execution has observed
pub static mut FUNCTION_LIST: Lazy<HashMap<usize, usize>> = Lazy::new(HashMap::new);
#[no_mangle]
/// The runtime code inserted at every callinst invokation (if you used the function-logging.cc)
/// # Safety
/// unsafe because it touches pub static mut
pub unsafe extern "C" fn __libafl_target_call_hook(id: usize) {
*FUNCTION_LIST.entry(id).or_insert(0) += 1;
}
/// The empty struct to clear the `FUNCTION_LIST` before the execution
#[derive(Debug, Clone, Copy, Default)]
pub struct CallHook<S> {
phantom: PhantomData<S>,
}
impl<S> CallHook<S> {
/// The constructor
#[must_use]
pub fn new() -> Self {
Self {
phantom: PhantomData,
}
}
}
impl<S> ExecutorHook<S> for CallHook<S>
where
S: UsesInput,
{
fn init<E: HasObservers>(&mut self, _state: &mut S) {}
fn pre_exec(&mut self, _state: &mut S, _input: &<S as UsesInput>::Input) {
// clear it before the execution
unsafe { FUNCTION_LIST.clear() }
}
fn post_exec(&mut self, _state: &mut S, _input: &<S as UsesInput>::Input) {}
}

View File

@ -121,6 +121,12 @@ pub use coverage::*;
pub mod value_profile;
pub use value_profile::*;
/// The module to hook call instructions
#[cfg(feature = "function-logging")]
pub mod call;
#[cfg(feature = "function-logging")]
pub use call::*;
/// runtime related to comparisons
pub mod cmps;
pub use cmps::*;