From 0f2cf80085a83e7d0c88ed2a0f16c6e2f443a875 Mon Sep 17 00:00:00 2001 From: van Hauser Date: Tue, 9 Jan 2024 19:56:19 +0100 Subject: [PATCH] libafl_qemu injections (#1743) * nits * first steps * different approach * fixes * remove temps * remove temp * initial import * more tests * bug hunt * cleanup * yaml function target 0x.... support * final * update doc * other work * Clippy, fmt * Removed lazystatic dependency * More small cleanups * optimize to_lowercase * move funtionality to libafl_qemu * add missing file * ready * remove qemu_injections * move test files to test directory * doc update * add todos * fixes * add file comment * add test and other platform support * fix clippy * Replace Emulator::new_empty by Emulator::get. Fix visibility identifier. * clippy * let's try this * cpu_target? * fmt * cleanup build system, enable missing fuzzers * fix qemu_launcher * enable hexagon in qemu_launcher * Removed useless `any` predicate in cfg attribute. Replaced wrong types in `syscall_hook` signature. * format * move to read_function_argument * add hexagon injections support * enable injections fuzzing everywhere * unify error msg * Fix build, add initial toml support * intermediate push, wip * fix build * More WIP * Fix build * Clippy * fix qemu * Fix arm * fix more wrong things * fix testcase * try to fix it again? * more release? * make makefile independent of dev/release * trying more fix? * More ugly more works * more trying to fix the testcase * allow yml as filename too * more docs --------- Co-authored-by: Dominik Maier Co-authored-by: Romain Malmain Co-authored-by: Dominik Maier --- fuzzers/qemu_launcher/Cargo.toml | 10 +- fuzzers/qemu_launcher/Makefile.toml | 23 +- fuzzers/qemu_launcher/README.md | 3 + fuzzers/qemu_launcher/build.rs | 4 +- fuzzers/qemu_launcher/injection_test/Makefile | 13 + .../qemu_launcher/injection_test/README.md | 10 + .../qemu_launcher/injection_test/example.db | Bin 0 -> 8192 bytes .../qemu_launcher/injection_test/sqltest.c | 63 +++ fuzzers/qemu_launcher/injections.toml | 63 +++ fuzzers/qemu_launcher/injections.yaml | 79 +++ fuzzers/qemu_launcher/src/client.rs | 66 ++- fuzzers/qemu_launcher/src/harness.rs | 4 +- fuzzers/qemu_launcher/src/instance.rs | 18 +- fuzzers/qemu_launcher/src/options.rs | 11 +- fuzzers/qemu_launcher/src/version.rs | 10 +- libafl/src/fuzzer/mod.rs | 2 +- libafl/src/mutators/token_mutations.rs | 2 +- libafl_qemu/Cargo.toml | 34 +- libafl_qemu/src/aarch64.rs | 18 +- libafl_qemu/src/arm.rs | 17 +- libafl_qemu/src/emu.rs | 10 +- libafl_qemu/src/hexagon.rs | 16 +- libafl_qemu/src/hooks.rs | 2 + libafl_qemu/src/i386.rs | 2 +- libafl_qemu/src/injections.rs | 479 ++++++++++++++++++ libafl_qemu/src/lib.rs | 10 + libafl_qemu/src/mips.rs | 17 +- libafl_qemu/src/ppc.rs | 18 +- libafl_qemu/src/x86_64.rs | 18 +- 29 files changed, 951 insertions(+), 71 deletions(-) create mode 100644 fuzzers/qemu_launcher/injection_test/Makefile create mode 100644 fuzzers/qemu_launcher/injection_test/README.md create mode 100644 fuzzers/qemu_launcher/injection_test/example.db create mode 100644 fuzzers/qemu_launcher/injection_test/sqltest.c create mode 100644 fuzzers/qemu_launcher/injections.toml create mode 100644 fuzzers/qemu_launcher/injections.yaml create mode 100644 libafl_qemu/src/injections.rs diff --git a/fuzzers/qemu_launcher/Cargo.toml b/fuzzers/qemu_launcher/Cargo.toml index e6c36be33c..7f06f96dd1 100644 --- a/fuzzers/qemu_launcher/Cargo.toml +++ b/fuzzers/qemu_launcher/Cargo.toml @@ -5,15 +5,23 @@ authors = ["Andrea Fioraldi ", "Dominik Maier in/a +timeout 10s "$(find ${TARGET_DIR} -name 'qemu_launcher')" -o out -i in -j ../injections.toml -v -- ./static >/dev/null 2>fuzz.log || true +if [ -z "$(grep -Ei "found.*injection" fuzz.log)" ]; then + echo "Fuzzer does not generate any testcases or any crashes" + echo "Logs:" + tail fuzz.log + exit 1 +else + echo "Fuzzer is working" +fi +make clean +#rm -rf in out fuzz.log || true ''' +dependencies = ["build_unix"] [tasks.clean] linux_alias = "clean_unix" diff --git a/fuzzers/qemu_launcher/README.md b/fuzzers/qemu_launcher/README.md index c955e83f73..a173b5a2f8 100644 --- a/fuzzers/qemu_launcher/README.md +++ b/fuzzers/qemu_launcher/README.md @@ -11,6 +11,9 @@ The following architectures are supported: * mips * ppc +Note that the injection feature `-y` is currently only supported on x86_64 +and aarch64. + ## Prerequisites ```bash sudo apt install \ diff --git a/fuzzers/qemu_launcher/build.rs b/fuzzers/qemu_launcher/build.rs index 16317b1560..6f93e95067 100644 --- a/fuzzers/qemu_launcher/build.rs +++ b/fuzzers/qemu_launcher/build.rs @@ -22,7 +22,7 @@ fn main() { .emit() .unwrap(); - assert_unique_feature!("arm", "aarch64", "i386", "x86_64", "mips", "ppc"); + assert_unique_feature!("arm", "aarch64", "i386", "x86_64", "mips", "ppc", "hexagon"); let cpu_target = if cfg!(feature = "x86_64") { "x86_64".to_string() @@ -36,6 +36,8 @@ fn main() { "mips".to_string() } else if cfg!(feature = "ppc") { "ppc".to_string() + } else if cfg!(feature = "hexagon") { + "hexagon".to_string() } else { println!("cargo:warning=No architecture specified defaulting to x86_64..."); println!("cargo:rustc-cfg=feature=\"x86_64\""); diff --git a/fuzzers/qemu_launcher/injection_test/Makefile b/fuzzers/qemu_launcher/injection_test/Makefile new file mode 100644 index 0000000000..d566aa2899 --- /dev/null +++ b/fuzzers/qemu_launcher/injection_test/Makefile @@ -0,0 +1,13 @@ +all: static sqltest + +sqltest: sqltest.c + gcc -g -o sqltest sqltest.c -l sqlite3 -lm + +static: sqltest.c + gcc -g -o static sqltest.c -l sqlite3 -lm -static + +fuzz: sqltest.c + afl-clang-fast -o fuzz sqltest.c -l sqlite3 + +clean: + rm -f sqltest static fuzz diff --git a/fuzzers/qemu_launcher/injection_test/README.md b/fuzzers/qemu_launcher/injection_test/README.md new file mode 100644 index 0000000000..30ca9be958 --- /dev/null +++ b/fuzzers/qemu_launcher/injection_test/README.md @@ -0,0 +1,10 @@ +# Injection test setup + +To build the injection test target: +`make` + +To run qemu_launcher with the injection detection activated: + +``` +target/release/qemu_launcher -y injections.yaml -i in -o out -- injection_test/static +``` diff --git a/fuzzers/qemu_launcher/injection_test/example.db b/fuzzers/qemu_launcher/injection_test/example.db new file mode 100644 index 0000000000000000000000000000000000000000..a32800c79b91815957f588bc4723a8b308f0c1fb GIT binary patch literal 8192 zcmeI#F$%&k6b9gzRNPA4I&^uXf(We_uu2A@4r&J1sue_~(pJQSc_DA$4NRm3H(iDQ zk>uqknZE5U=gGdxc$XKawBjLkNC+A+6Om)KG`lHbF0Ce*Zu7TtXs{8k`6awNnT3J? z1Rwwb2tWV=5P$##AOHafK;Tz_sc8E_AP!Z!K4!C<{%CTtP_b65<8h+6e&gz{}fB*y_009U<00Izz00bcLrvk2YeBaLhk)8Y_ KSJW}IF?<2YJSs~7 literal 0 HcmV?d00001 diff --git a/fuzzers/qemu_launcher/injection_test/sqltest.c b/fuzzers/qemu_launcher/injection_test/sqltest.c new file mode 100644 index 0000000000..94b14de292 --- /dev/null +++ b/fuzzers/qemu_launcher/injection_test/sqltest.c @@ -0,0 +1,63 @@ +#include +#include +#include +#include +#include + +static int callback(void *NotUsed, int argc, char **argv, char **azColName) { + int i; + for (i = 0; i < argc; i++) { + printf("%s=%s ", azColName[i], argv[i] ? argv[i] : "NULL"); + } + printf("\n"); + return 0; +} + +int LLVMFuzzerTestOneInput(char *data, size_t len) { + sqlite3 *db; + char *err_msg = 0, query[1024]; + + if (data[0] % 2) { + + int rc = sqlite3_open_v2("example.db", &db, SQLITE_OPEN_READONLY, 0); + if (rc != SQLITE_OK) { + fprintf(stderr, "Cannot open database: %s\n", sqlite3_errmsg(db)); + sqlite3_close(db); + return 1; + } + + snprintf( + query, sizeof(query), + "SELECT * FROM MyTable where user = \"user1\" and password = \"%s\"", + data); + + rc = sqlite3_exec(db, query, callback, 0, &err_msg); + + if (rc != SQLITE_OK) { + sqlite3_free(err_msg); + } + + sqlite3_close(db); + + } else { + snprintf(query, sizeof(query), "/usr/bin/id \"%s\"", data); + system(query); + } + + return 0; +} + +int main(int argc, char **argv) { + char pw[16]; + ssize_t len = 1; + + memset(pw, 0, sizeof(pw)); + if (argc > 1) { + if ((len = read(0, pw, sizeof(pw) - 1)) < 4) { + fprintf(stderr, "Error: short read from stdin\n"); + return -1; + } + } + + return LLVMFuzzerTestOneInput(pw, (size_t)len + 1); +} diff --git a/fuzzers/qemu_launcher/injections.toml b/fuzzers/qemu_launcher/injections.toml new file mode 100644 index 0000000000..69789e17ae --- /dev/null +++ b/fuzzers/qemu_launcher/injections.toml @@ -0,0 +1,63 @@ +# The TOML Structure: +# +# You can specify multiple different injection types if you want. +# [name] # any name you want, it is not important +# tokens = ["a string", ...] # an injection string to add to the tokens list +# matches = ["a string", ...] # if on of these substrings (case insensitive) is found +# # in the parameter of the function then crash! +# # note that this is not a regex. +# +# [name.functions] +# # multiple function targets to hook can be defined +# function_name = # name of the function you want to hook. +# # if the function name starts with 0x then +# # this is the QEMU Guest address of a +# # function you want to hook that does not +# # have a symbol. +# {param = number} # which parameter to the function contains the string +# # 0 = first, 1 = second, ... 0-5 are supported (depending on architecture) + +[sql] +tokens = [ "'\"\"'\"\n", "\"1\" OR '1'=\"1\"" ] +matches = [ "'\"\"'\"", "1\" OR '1'=\"1" ] + +[sql.functions] +sqlite3_exec = {param = 1} +PQexec = {param = 1} +PQexecParams = {param = 1} +mysql_query = {param = 1} +mysql_send_query = {param = 1} + + +# Command injection. Note that for most you will need a libc with debug symbols +# We do not need this as we watch the SYS_execve syscall, this is just an +# example. +[cmd] +tokens = [ + "'\"FUZZ\"'", + "\";FUZZ;\"", + "';FUZZ;'", + "$(FUZZ)", +] +matches = ["'\"FUZZ\"'"] + +[cmd.functions] +popen = {param = 0} +system = {param = 0} + +# LDAP injection tests +[ldap] +tokens = ["*)(FUZZ=*))(|"] +matches = ["*)(FUZZ=*))(|"] + +[ldap.functions] +ldap_search_ext = {param = 3} +ldap_search_ext_s = {param = 3} + +# XSS injection tests +# This is a minimal example that only checks for libxml2 +[xss] +tokens = ["'\">, StdRand, OnDiskCorpus>; @@ -42,11 +46,11 @@ impl<'a> Client<'a> { Ok(args) } - fn env(&self) -> Result, Error> { - let env = env::vars() + #[allow(clippy::unused_self)] // Api should look the same as args above + fn env(&self) -> Vec<(String, String)> { + env::vars() .filter(|(k, _v)| k != "LD_LIBRARY_PATH") - .collect::>(); - Ok(env) + .collect::>() } fn start_pc(emu: &Emulator) -> Result { @@ -59,6 +63,7 @@ impl<'a> Client<'a> { Ok(start_pc) } + #[allow(clippy::similar_names)] // elf != self fn coverage_filter( &self, emu: &Emulator, @@ -105,7 +110,7 @@ impl<'a> Client<'a> { let mut args = self.args()?; log::debug!("ARGS: {:#?}", args); - let mut env = self.env()?; + let mut env = self.env(); log::debug!("ENV: {:#?}", env); let (emu, mut asan) = { @@ -120,6 +125,29 @@ impl<'a> Client<'a> { let start_pc = Self::start_pc(&emu)?; log::debug!("start_pc @ {start_pc:#x}"); + #[cfg(not(feature = "injections"))] + let extra_tokens = None; + + #[cfg(feature = "injections")] + let injection_helper = self + .options + .injections + .as_ref() + .map(|injections_file| { + let lower = injections_file.to_lowercase(); + if lower.ends_with("yaml") || lower.ends_with("yml") { + QemuInjectionHelper::from_yaml(injections_file) + } else if lower.ends_with("toml") { + QemuInjectionHelper::from_toml(injections_file) + } else { + todo!("No injections given, what to do?"); + } + }) + .unwrap() + .unwrap(); + #[cfg(feature = "injections")] + let extra_tokens = Some(injection_helper.tokens.clone()); + emu.entry_break(start_pc); let ret_addr: GuestAddr = emu @@ -137,25 +165,51 @@ impl<'a> Client<'a> { .options(self.options) .emu(&emu) .mgr(mgr) - .core_id(core_id); + .core_id(core_id) + .extra_tokens(extra_tokens); if is_asan && is_cmplog { + #[cfg(not(feature = "injections"))] let helpers = tuple_list!( edge_coverage_helper, QemuCmpLogHelper::default(), QemuAsanHelper::default(asan.take().unwrap()), ); + #[cfg(feature = "injections")] + let helpers = tuple_list!( + edge_coverage_helper, + QemuCmpLogHelper::default(), + QemuAsanHelper::default(asan.take().unwrap()), + injection_helper, + ); instance.build().run(helpers, state) } else if is_asan { + #[cfg(not(feature = "injections"))] let helpers = tuple_list!( edge_coverage_helper, QemuAsanHelper::default(asan.take().unwrap()), ); + #[cfg(feature = "injections")] + let helpers = tuple_list!( + edge_coverage_helper, + QemuAsanHelper::default(asan.take().unwrap()), + injection_helper, + ); instance.build().run(helpers, state) } else if is_cmplog { + #[cfg(not(feature = "injections"))] let helpers = tuple_list!(edge_coverage_helper, QemuCmpLogHelper::default(),); + #[cfg(feature = "injections")] + let helpers = tuple_list!( + edge_coverage_helper, + QemuCmpLogHelper::default(), + injection_helper, + ); instance.build().run(helpers, state) } else { + #[cfg(not(feature = "injections"))] let helpers = tuple_list!(edge_coverage_helper,); + #[cfg(feature = "injections")] + let helpers = tuple_list!(edge_coverage_helper, injection_helper,); instance.build().run(helpers, state) } } diff --git a/fuzzers/qemu_launcher/src/harness.rs b/fuzzers/qemu_launcher/src/harness.rs index 68bb7fcda2..43b2aabb33 100644 --- a/fuzzers/qemu_launcher/src/harness.rs +++ b/fuzzers/qemu_launcher/src/harness.rs @@ -14,7 +14,7 @@ pub struct Harness<'a> { ret_addr: GuestAddr, } -pub const MAX_INPUT_SIZE: usize = 1048576; // 1MB +pub const MAX_INPUT_SIZE: usize = 1_048_576; // 1MB impl<'a> Harness<'a> { pub fn new(emu: &Emulator) -> Result { @@ -24,7 +24,7 @@ impl<'a> Harness<'a> { let pc: GuestReg = emu .read_reg(Regs::Pc) - .map_err(|e| Error::unknown(format!("Failed to read PC: {e:}")))?; + .map_err(|e| Error::unknown(format!("Failed to read PC: {e:}")))?; let stack_ptr: GuestAddr = emu .read_reg(Regs::Sp) diff --git a/fuzzers/qemu_launcher/src/instance.rs b/fuzzers/qemu_launcher/src/instance.rs index 26c28c4408..27a18e59ce 100644 --- a/fuzzers/qemu_launcher/src/instance.rs +++ b/fuzzers/qemu_launcher/src/instance.rs @@ -52,6 +52,7 @@ pub struct Instance<'a> { emu: &'a Emulator, mgr: ClientMgr, core_id: CoreId, + extra_tokens: Option>, } impl<'a> Instance<'a> { @@ -119,12 +120,21 @@ impl<'a> Instance<'a> { let observers = tuple_list!(edges_observer, time_observer); - if let Some(tokenfile) = &self.options.tokens { - if state.metadata_map().get::().is_none() { - state.add_metadata(Tokens::from_file(tokenfile)?); + let mut tokens = Tokens::new(); + + if let Some(extra_tokens) = &self.extra_tokens { + for token in extra_tokens { + let bytes = token.as_bytes().to_vec(); + let _ = tokens.add_token(&bytes); } } + if let Some(tokenfile) = &self.options.tokens { + tokens.add_from_file(tokenfile)?; + } + + state.add_metadata(tokens); + let harness = Harness::new(self.emu)?; let mut harness = |input: &BytesInput| harness.run(input); @@ -213,7 +223,7 @@ impl<'a> Instance<'a> { state .load_initial_inputs(fuzzer, executor, &mut self.mgr, &corpus_dirs) .unwrap_or_else(|_| { - println!("Failed to load initial corpus at {:?}", corpus_dirs); + println!("Failed to load initial corpus at {corpus_dirs:?}"); process::exit(0); }); println!("We imported {} inputs from disk.", state.corpus().count()); diff --git a/fuzzers/qemu_launcher/src/options.rs b/fuzzers/qemu_launcher/src/options.rs index da7a64fd41..7ba40fdfa1 100644 --- a/fuzzers/qemu_launcher/src/options.rs +++ b/fuzzers/qemu_launcher/src/options.rs @@ -11,6 +11,7 @@ use crate::version::Version; #[readonly::make] #[derive(Parser, Debug)] #[clap(author, version, about, long_about = None)] +#[allow(clippy::module_name_repetitions)] #[command( name = format!("qemu_coverage-{}",env!("CPU_TARGET")), version = Version::default(), @@ -24,9 +25,17 @@ pub struct FuzzerOptions { #[arg(short, long, help = "Output directory")] pub output: String, - #[arg(long, help = "Tokens file")] + #[arg(short = 'x', long, help = "Tokens file")] pub tokens: Option, + #[cfg(feature = "injections")] + #[arg( + short = 'j', + long, + help = "Injections TOML or YAML file definition. Filename must end in .toml or .yaml/.yml." + )] + pub injections: Option, + #[arg(long, help = "Log file")] pub log: Option, diff --git a/fuzzers/qemu_launcher/src/version.rs b/fuzzers/qemu_launcher/src/version.rs index 264501ace3..31fe17b61c 100644 --- a/fuzzers/qemu_launcher/src/version.rs +++ b/fuzzers/qemu_launcher/src/version.rs @@ -1,4 +1,4 @@ -use std::env; +use std::{env, fmt::Write}; use clap::builder::Str; @@ -21,8 +21,12 @@ impl From for Str { ("Cargo Target Triple", env!("VERGEN_CARGO_TARGET_TRIPLE")), ] .iter() - .map(|(k, v)| format!("{k:25}: {v}\n")) - .collect::(); + .fold(String::new(), |mut output, (k, v)| { + // Note that write!-ing into a String can never fail, despite the return type of write! being std::fmt::Result, so it can be safely ignored or unwrapped. + // See https://rust-lang.github.io/rust-clippy/master/index.html#/format_collect + let _ = writeln!(output, "{k:25}: {v}"); + output + }); format!("\n{version:}").into() } diff --git a/libafl/src/fuzzer/mod.rs b/libafl/src/fuzzer/mod.rs index 83b8aed00a..ad0cc53332 100644 --- a/libafl/src/fuzzer/mod.rs +++ b/libafl/src/fuzzer/mod.rs @@ -221,7 +221,7 @@ where // If we would assume the fuzzer loop will always exit after this, we could do this here: // manager.on_restart(state)?; // But as the state may grow to a few megabytes, - // for now we won' and the user has to do it (unless we find a way to do this on `Drop`). + // for now we won't, and the user has to do it (unless we find a way to do this on `Drop`). Ok(ret.unwrap()) } diff --git a/libafl/src/mutators/token_mutations.rs b/libafl/src/mutators/token_mutations.rs index f2b487afba..ca76c927c4 100644 --- a/libafl/src/mutators/token_mutations.rs +++ b/libafl/src/mutators/token_mutations.rs @@ -105,7 +105,7 @@ impl Tokens { /// /// # Safety /// The caller must ensure that the region between `token_start` and `token_stop` - /// is a valid region, containing autotokens in the exepcted format. + /// is a valid region, containing autotokens in the expected format. #[cfg(any(target_os = "linux", target_vendor = "apple"))] pub unsafe fn from_mut_ptrs( token_start: *const u8, diff --git a/libafl_qemu/Cargo.toml b/libafl_qemu/Cargo.toml index 5aeb103253..017fb91872 100644 --- a/libafl_qemu/Cargo.toml +++ b/libafl_qemu/Cargo.toml @@ -12,17 +12,30 @@ edition = "2021" categories = ["development-tools::testing", "emulators", "embedded", "os", "no-std"] [package.metadata.docs.rs] +features = ["document-features"] all-features = true rustdoc-args = ["--cfg", "docsrs"] [features] default = ["fork", "build_libqasan", "serdeany_autoreg"] +clippy = [] # special feature for clippy, don't use in normal projects§ +document-features = ["dep:document-features"] + +#! # Feature Flags +#! ### General Features +## Find injections during fuzzing +injections = ["serde_yaml", "toml"] +## Python bindings support python = ["pyo3", "pyo3-build-config"] +## Fork support fork = ["libafl/fork"] +## Build libqasan for address sanitization build_libqasan = [] -# The following architecture features are mutually exclusive. -x86_64 = ["libafl_qemu_sys/x86_64"] # build qemu for x86_64 (default) +#! ## The following architecture features are mutually exclusive. + +## build qemu for x86_64 (default) +x86_64 = ["libafl_qemu_sys/x86_64"] i386 = ["libafl_qemu_sys/i386"] # build qemu for i386 arm = ["libafl_qemu_sys/arm"] # build qemu for arm aarch64 = ["libafl_qemu_sys/aarch64"] # build qemu for aarch64 @@ -30,18 +43,22 @@ mips = ["libafl_qemu_sys/mips"] # build qemu for mips (el, use with the 'be' fea ppc = ["libafl_qemu_sys/ppc"] # build qemu for powerpc hexagon = ["libafl_qemu_sys/hexagon"] # build qemu for hexagon +## Big Endian mode be = ["libafl_qemu_sys/be"] +## Usermode (mutually exclusive to Systemmode) usermode = ["libafl_qemu_sys/usermode"] +## Systemmode (mutually exclusive to Usermode) systemmode = ["libafl_qemu_sys/systemmode"] -# SerdeAny features -serdeany_autoreg = ["libafl_bolts/serdeany_autoreg"] # Automatically register all `#[derive(SerdeAny)]` types at startup. +#! ## SerdeAny features +## Automatically register all `#[derive(SerdeAny)]` types at startup. +serdeany_autoreg = ["libafl_bolts/serdeany_autoreg"] + +## Automatically register all `#[derive(SerdeAny)]` types at startup. slirp = [ "systemmode", "libafl_qemu_sys/slirp" ] # build qemu with host libslirp (for user networking) -clippy = [] # special feature for clippy, don't use in normal projects§ - [dependencies] libafl = { path = "../libafl", version = "0.11.2", default-features = false, features = ["std", "derive", "regex"] } libafl_bolts = { path = "../libafl_bolts", version = "0.11.2", default-features = false, features = ["std", "derive"] } @@ -67,8 +84,11 @@ addr2line = "0.21" typed-arena = "2.0" paste = "1" enum-map = "2.7" - +serde_yaml = { version = "0.8", optional = true } # For parsing the injections yaml file +toml = { version = "0.4.2", optional = true } # For parsing the injections toml file pyo3 = { version = "0.18", optional = true } +# Document all features of this crate (for `cargo doc`) +document-features = { version = "0.2", optional = true } [build-dependencies] pyo3-build-config = { version = "0.18", optional = true } diff --git a/libafl_qemu/src/aarch64.rs b/libafl_qemu/src/aarch64.rs index 877a9319e6..4489b57171 100644 --- a/libafl_qemu/src/aarch64.rs +++ b/libafl_qemu/src/aarch64.rs @@ -105,7 +105,7 @@ impl crate::ArchExtras for crate::CPU { self.write_reg(Regs::Lr, val) } - fn read_function_argument(&self, conv: CallingConvention, idx: i32) -> Result + fn read_function_argument(&self, conv: CallingConvention, idx: u8) -> Result where T: From, { @@ -113,11 +113,17 @@ impl crate::ArchExtras for crate::CPU { return Err(format!("Unsupported calling convention: {conv:#?}")); } - match idx { - 0 => self.read_reg(Regs::X0), - 1 => self.read_reg(Regs::X1), - _ => Err(format!("Unsupported argument: {idx:}")), - } + let reg_id = match idx { + 0 => Regs::X0, + 1 => Regs::X1, + 2 => Regs::X2, + 3 => Regs::X3, + 4 => Regs::X4, + 5 => Regs::X5, + r => return Err(format!("Unsupported argument: {r:}")), + }; + + self.read_reg(reg_id) } fn write_function_argument( diff --git a/libafl_qemu/src/arm.rs b/libafl_qemu/src/arm.rs index 5220f60e6b..926f8bef89 100644 --- a/libafl_qemu/src/arm.rs +++ b/libafl_qemu/src/arm.rs @@ -102,7 +102,7 @@ impl crate::ArchExtras for crate::CPU { self.write_reg(Regs::Lr, val) } - fn read_function_argument(&self, conv: CallingConvention, idx: i32) -> Result + fn read_function_argument(&self, conv: CallingConvention, idx: u8) -> Result where T: From, { @@ -110,11 +110,16 @@ impl crate::ArchExtras for crate::CPU { return Err(format!("Unsupported calling convention: {conv:#?}")); } - match idx { - 0 => self.read_reg(Regs::R0), - 1 => self.read_reg(Regs::R1), - _ => Err(format!("Unsupported argument: {idx:}")), - } + let reg_id = match idx { + 0 => Regs::R0, + 1 => Regs::R1, + 2 => Regs::R2, + 3 => Regs::R3, + // 4.. would be on the stack, let's not do this for now + r => return Err(format!("Unsupported argument: {r:}")), + }; + + self.read_reg(reg_id) } fn write_function_argument( diff --git a/libafl_qemu/src/emu.rs b/libafl_qemu/src/emu.rs index 002563ac37..301d5667e5 100644 --- a/libafl_qemu/src/emu.rs +++ b/libafl_qemu/src/emu.rs @@ -491,7 +491,7 @@ pub trait ArchExtras { fn write_return_address(&self, val: T) -> Result<(), String> where T: Into; - fn read_function_argument(&self, conv: CallingConvention, idx: i32) -> Result + fn read_function_argument(&self, conv: CallingConvention, idx: u8) -> Result where T: From; fn write_function_argument( @@ -1004,7 +1004,7 @@ impl Emulator { /// Should not be used if `Emulator::new` has never been used before (otherwise QEMU will not be initialized). /// Prefer `Emulator::get` for a safe version of this method. #[must_use] - unsafe fn new_empty() -> Emulator { + pub unsafe fn new_empty() -> Emulator { Emulator { _private: () } } @@ -1640,7 +1640,7 @@ impl ArchExtras for Emulator { .write_return_address::(val) } - fn read_function_argument(&self, conv: CallingConvention, idx: i32) -> Result + fn read_function_argument(&self, conv: CallingConvention, idx: u8) -> Result where T: From, { @@ -1676,7 +1676,7 @@ pub mod pybind { static mut PY_GENERIC_HOOKS: Vec<(GuestAddr, PyObject)> = vec![]; extern "C" fn py_syscall_hook_wrapper( - data: u64, + _data: u64, sys_num: i32, a0: u64, a1: u64, @@ -1774,7 +1774,7 @@ pub mod pybind { fn run(&self) { unsafe { - self.emu.run(); + self.emu.run().unwrap(); } } diff --git a/libafl_qemu/src/hexagon.rs b/libafl_qemu/src/hexagon.rs index e44885e1b8..726c3b0a54 100644 --- a/libafl_qemu/src/hexagon.rs +++ b/libafl_qemu/src/hexagon.rs @@ -106,7 +106,7 @@ impl crate::ArchExtras for crate::CPU { self.write_reg(Regs::Lr, val) } - fn read_function_argument(&self, conv: CallingConvention, idx: i32) -> Result + fn read_function_argument(&self, conv: CallingConvention, idx: u8) -> Result where T: From, { @@ -114,8 +114,18 @@ impl crate::ArchExtras for crate::CPU { return Err(format!("Unsupported calling convention: {conv:#?}")); } - // TODO - Err(format!("Unsupported argument: {idx:}")) + // Note that 64 bit values may be passed in two registers (and may have padding), then this mapping is off. + let reg_id = match idx { + 0 => Regs::R0, + 1 => Regs::R1, + 2 => Regs::R2, + 3 => Regs::R3, + 4 => Regs::R4, + 5 => Regs::R5, + r => return Err(format!("Unsupported argument: {r:}")), + }; + + self.read_reg(reg_id) } fn write_function_argument( diff --git a/libafl_qemu/src/hooks.rs b/libafl_qemu/src/hooks.rs index 6183d55f48..feee6ae9ab 100644 --- a/libafl_qemu/src/hooks.rs +++ b/libafl_qemu/src/hooks.rs @@ -567,6 +567,7 @@ where } } + #[allow(clippy::similar_names)] pub fn reads( &self, generation_hook: Hook< @@ -730,6 +731,7 @@ where write_3_exec_hook_wrapper::, extern "C" fn(&mut HookState<5>, id: u64, addr: GuestAddr) ); + #[allow(clippy::similar_names)] let execn = get_raw_hook!( execution_hook_n, write_4_exec_hook_wrapper::, diff --git a/libafl_qemu/src/i386.rs b/libafl_qemu/src/i386.rs index a6c5c9bc30..44bbd0ca92 100644 --- a/libafl_qemu/src/i386.rs +++ b/libafl_qemu/src/i386.rs @@ -88,7 +88,7 @@ impl crate::ArchExtras for crate::CPU { Ok(()) } - fn read_function_argument(&self, conv: CallingConvention, idx: i32) -> Result + fn read_function_argument(&self, conv: CallingConvention, idx: u8) -> Result where T: From, { diff --git a/libafl_qemu/src/injections.rs b/libafl_qemu/src/injections.rs new file mode 100644 index 0000000000..bf9e7233ac --- /dev/null +++ b/libafl_qemu/src/injections.rs @@ -0,0 +1,479 @@ +//! Detect injection vulnerabilities + +/* + * TODOs: + * - read in export addresses of shared libraries to resolve functions + * + * Maybe: + * - return code analysis support (not needed currently) + * - regex support (not needed currently) + * - std::string and Rust String support (would need such target functions added) + * + */ + +use std::{ffi::CStr, fmt::Display, fs, os::raw::c_char, path::Path}; + +use hashbrown::HashMap; +use libafl::{inputs::UsesInput, Error}; +use serde::{Deserialize, Serialize}; + +use crate::{ + elf::EasyElf, emu::ArchExtras, CallingConvention, Emulator, GuestAddr, Hook, QemuHelper, + QemuHelperTuple, QemuHooks, SYS_execve, SyscallHookResult, +}; + +/// Parses `injections.yaml` +fn parse_yaml + Display>(path: P) -> Result, Error> { + serde_yaml::from_str(&fs::read_to_string(&path)?) + .map_err(|e| Error::serialize(format!("Failed to deserialize yaml at {path}: {e}"))) +} + +/// Parses `injections.toml` +fn parse_toml + Display>( + path: P, +) -> Result, Error> { + toml::from_str(&fs::read_to_string(&path)?) + .map_err(|e| Error::serialize(format!("Failed to deserialize toml at {path}: {e}"))) +} + +/// Converts the injects.yaml format to the internal toml-like format +fn yaml_entries_to_definition( + yaml_entries: &Vec, +) -> Result, Error> { + let mut ret = HashMap::new(); + + for entry in yaml_entries { + let mut functions = HashMap::new(); + for function in &entry.functions { + functions.insert( + function.function.clone(), + FunctionDescription { + param: function.parameter, + }, + ); + } + + let mut matches = Vec::new(); + let mut tokens = Vec::new(); + for test in &entry.tests { + matches.push(test.match_value.clone()); + tokens.push(test.input_value.clone()); + } + + if ret + .insert( + entry.name.clone(), + InjectionDefinition { + tokens, + matches, + functions, + }, + ) + .is_some() + { + return Err(Error::illegal_argument(format!( + "Entry {} was multiply defined!", + entry.name + ))); + } + } + Ok(ret) +} + +#[derive(Debug, Clone)] +struct LibInfo { + name: String, + off: GuestAddr, +} + +impl LibInfo { + fn add_unique(libs: &mut Vec, new_lib: LibInfo) { + if !libs.iter().any(|lib| lib.name == new_lib.name) { + libs.push(new_lib); + } + } +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +struct Test { + input_value: String, + match_value: String, +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +struct Functions { + function: String, + parameter: u8, +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +struct YamlInjectionEntry { + name: String, + functions: Vec, + tests: Vec, +} + +#[derive(Serialize, Deserialize, Debug, Clone, Copy)] +struct FunctionDescription { + param: u8, +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct InjectionDefinition { + tokens: Vec, + matches: Vec, + functions: HashMap, +} + +#[derive(Clone, Debug)] +pub struct Matches { + id: usize, + lib_name: String, + matches: Vec, +} + +#[derive(Clone, Debug)] +pub struct Match { + bytes_lower: Vec, + original_value: String, +} + +#[derive(Debug)] +pub struct QemuInjectionHelper { + pub tokens: Vec, + definitions: HashMap, + matches_list: Vec, +} + +impl QemuInjectionHelper { + /// `configure_injections` is the main function to activate the injection + /// vulnerability detection feature. + pub fn from_yaml + Display>(yaml_file: P) -> Result { + let yaml_entries = parse_yaml(yaml_file)?; + let definition = yaml_entries_to_definition(&yaml_entries)?; + Self::new(definition) + } + + /// `configure_injections` is the main function to activate the injection + /// vulnerability detection feature. + pub fn from_toml + Display>(toml_file: P) -> Result { + let definition = parse_toml(toml_file)?; + Self::new(definition) + } + + pub fn new(definitions: HashMap) -> Result { + let tokens = definitions + .iter() + .flat_map(|(_lib_name, definition)| &definition.tokens) + .map(ToString::to_string) + .collect(); + + let mut matches_list = Vec::with_capacity(definitions.len()); + + for (lib_name, definition) in &definitions { + let matches: Vec = definition + .matches + .iter() + .map(|match_str| { + let mut bytes_lower = match_str.as_bytes().to_vec(); + bytes_lower.make_ascii_lowercase(); + + Match { + original_value: match_str.clone(), + bytes_lower, + } + }) + .collect(); + + let id = matches_list.len(); + matches_list.push(Matches { + lib_name: lib_name.clone(), + id, + matches, + }); + } + + Ok(Self { + tokens, + definitions, + matches_list, + }) + } + + fn on_call_check>( + hooks: &mut QemuHooks, + id: usize, + parameter: u8, + ) { + let emu = hooks.emulator(); + let reg: GuestAddr = emu + .current_cpu() + .unwrap() + .read_function_argument(CallingConvention::Cdecl, parameter) + .unwrap_or_default(); + + let helper = hooks.helpers_mut().match_first_type_mut::().unwrap(); + let matches = &helper.matches_list[id]; + + //println!("reg value = {:x}", reg); + + if reg != 0x00 { + let mut query = unsafe { + let c_str_ptr = reg as *const c_char; + let c_str = CStr::from_ptr(c_str_ptr); + c_str.to_bytes().to_vec() + }; + query.make_ascii_lowercase(); + + //println!("query={}", query); + log::trace!("Checking {}", matches.lib_name); + + for match_value in &matches.matches { + if match_value.bytes_lower.len() > matches.matches.len() { + continue; + } + + // "crash" if we found the right value + assert!( + find_subsequence(&query, &match_value.bytes_lower).is_none(), + "Found value \"{}\" for {query:?} in {}", + match_value.original_value, + matches.lib_name + ); + } + } + } +} + +impl QemuHelper for QemuInjectionHelper +where + S: UsesInput, +{ + fn init_hooks(&self, hooks: &QemuHooks) + where + QT: QemuHelperTuple, + { + hooks.syscalls(Hook::Function(syscall_hook::)); + } + + fn first_exec(&self, hooks: &QemuHooks) + where + QT: QemuHelperTuple, + { + let emu = hooks.emulator(); + let mut libs: Vec = Vec::new(); + + for region in emu.mappings() { + if let Some(path) = region.path().map(ToOwned::to_owned) { + if !path.is_empty() { + LibInfo::add_unique( + &mut libs, + LibInfo { + name: path.clone(), + off: region.start(), + }, + ); + } + } + } + + for matches in &self.matches_list { + let id = matches.id; + let lib_name = &matches.lib_name; + + for (name, func_definition) in &self.definitions[lib_name].functions { + let hook_addrs = if name.to_lowercase().starts_with(&"0x".to_string()) { + let func_pc = u64::from_str_radix(&name[2..], 16) + .map_err(|e| { + Error::illegal_argument(format!( + "Failed to parse hex string {name} from definition for {lib_name}: {e}" + )) + }) + .unwrap() as GuestAddr; + log::info!("Injections: Hooking hardcoded function {func_pc:#x}"); + vec![func_pc] + } else { + libs.iter() + .filter_map(|lib| find_function(emu, &lib.name, name, lib.off).unwrap()) + .map(|func_pc| { + log::info!("Injections: Function {name} found at {func_pc:#x}",); + func_pc + }) + .collect() + }; + + if hook_addrs.is_empty() { + log::warn!("Injections: Function not found for {lib_name}: {name}",); + } + + let param = func_definition.param; + + for hook_addr in hook_addrs { + hooks.instruction( + hook_addr, + Hook::Closure(Box::new(move |hooks, _state, _guest_addr| { + Self::on_call_check(hooks, id, param); + })), + true, + ); + } + } + } + } +} + +fn syscall_hook( + hooks: &mut QemuHooks, // our instantiated QemuHooks + _state: Option<&mut S>, + syscall: i32, // syscall number + x0: GuestAddr, // registers ... + x1: GuestAddr, + _x2: GuestAddr, + _x3: GuestAddr, + _x4: GuestAddr, + _x5: GuestAddr, + _x6: GuestAddr, + _x7: GuestAddr, +) -> SyscallHookResult +where + QT: QemuHelperTuple, + S: UsesInput, +{ + log::trace!("syscall_hook {syscall} {SYS_execve}"); + debug_assert!(i32::try_from(SYS_execve).is_ok()); + if syscall == SYS_execve as i32 { + let _helper = hooks + .helpers_mut() + .match_first_type_mut::() + .unwrap(); + if x0 > 0 && x1 > 0 { + let c_array = x1 as *const *const c_char; + let cmd = unsafe { + let c_str_ptr = x0 as *const c_char; + CStr::from_ptr(c_str_ptr).to_string_lossy() + }; + assert_ne!( + cmd.to_lowercase(), + "fuzz", + "Found verified command injection!" + ); + //println!("CMD {}", cmd); + + let first_parameter = unsafe { + if (*c_array.offset(1)).is_null() { + return SyscallHookResult::new(None); + } + CStr::from_ptr(*c_array.offset(1)).to_string_lossy() + }; + let second_parameter = unsafe { + if (*c_array.offset(2)).is_null() { + return SyscallHookResult::new(None); + } + CStr::from_ptr(*c_array.offset(2)).to_string_lossy() + }; + if first_parameter == "-c" + && (second_parameter.to_lowercase().contains("';fuzz;'") + || second_parameter.to_lowercase().contains("\";fuzz;\"")) + { + panic!("Found command injection!"); + } + + //println!("PARAMETERS First {} Second {}", first_parameter, second_ + } + SyscallHookResult::new(Some(0)) + } else { + SyscallHookResult::new(None) + } +} + +fn find_function( + emu: &Emulator, + file: &String, + function: &str, + loadaddr: GuestAddr, +) -> Result, Error> { + let mut elf_buffer = Vec::new(); + let elf = EasyElf::from_file(file, &mut elf_buffer)?; + let offset = if loadaddr > 0 { + loadaddr + } else { + emu.load_addr() + }; + Ok(elf.resolve_symbol(function, offset)) +} + +fn find_subsequence(haystack: &[u8], needle: &[u8]) -> Option { + haystack + .windows(needle.len()) + .position(|window| window == needle) +} + +#[cfg(test)] +mod tests { + use hashbrown::HashMap; + + use super::{yaml_entries_to_definition, InjectionDefinition, YamlInjectionEntry}; + + #[test] + fn test_yaml_parsing() { + let injections: Vec = serde_yaml::from_str( + r#" + # LDAP injection tests + - name: "ldap" + functions: + - function: "ldap_search_ext" + parameter: 3 + - function: "ldap_search_ext_s" + parameter: 3 + tests: + - input_value: "*)(FUZZ=*))(|" + match_value: "*)(FUZZ=*))(|" + + # XSS injection tests + # This is a minimal example that only checks for libxml2 + - name: "xss" + functions: + - function: "htmlReadMemory" + parameter: 0 + tests: + - input_value: "'\"> = toml::from_str( + r#" + [ldap] + tokens = ["*)(FUZZ=*))(|"] + matches = ["*)(FUZZ=*))(|"] + + [ldap.functions] + ldap_search_ext = {param = 3} + ldap_search_ext_s = {param = 3} + + # XSS injection tests + # This is a minimal example that only checks for libxml2 + [xss] + tokens = ["'\">(&self, conv: CallingConvention, idx: i32) -> Result + fn read_function_argument(&self, conv: CallingConvention, idx: u8) -> Result where T: From, { @@ -110,11 +110,16 @@ impl crate::ArchExtras for crate::CPU { return Err(format!("Unsupported calling convention: {conv:#?}")); } - match idx { - 0 => self.read_reg(Regs::A0), - 1 => self.read_reg(Regs::A1), - _ => Err(format!("Unsupported argument: {idx:}")), - } + let reg_id = match idx { + 0 => Regs::A0, + 1 => Regs::A1, + 2 => Regs::A2, + 3 => Regs::A3, + // 4.. would be on the stack, let's not do this for now + r => return Err(format!("Unsupported argument: {r:}")), + }; + + self.read_reg(reg_id) } fn write_function_argument( diff --git a/libafl_qemu/src/ppc.rs b/libafl_qemu/src/ppc.rs index 3722277d96..3f4bbcdccc 100644 --- a/libafl_qemu/src/ppc.rs +++ b/libafl_qemu/src/ppc.rs @@ -142,7 +142,7 @@ impl crate::ArchExtras for crate::CPU { self.write_reg(Regs::Lr, val) } - fn read_function_argument(&self, conv: CallingConvention, idx: i32) -> Result + fn read_function_argument(&self, conv: CallingConvention, idx: u8) -> Result where T: From, { @@ -150,11 +150,17 @@ impl crate::ArchExtras for crate::CPU { return Err(format!("Unsupported calling convention: {conv:#?}")); } - match idx { - 0 => self.read_reg(Regs::R3), - 1 => self.read_reg(Regs::R4), - _ => Err(format!("Unsupported argument: {idx:}")), - } + let reg_id = match idx { + 0 => Regs::R3, + 1 => Regs::R4, + 2 => Regs::R5, + 3 => Regs::R6, + 4 => Regs::R7, + 5 => Regs::R8, + r => return Err(format!("Unsupported argument: {r:}")), + }; + + self.read_reg(reg_id) } fn write_function_argument( diff --git a/libafl_qemu/src/x86_64.rs b/libafl_qemu/src/x86_64.rs index 1a48c1e32e..3003a922f1 100644 --- a/libafl_qemu/src/x86_64.rs +++ b/libafl_qemu/src/x86_64.rs @@ -97,7 +97,7 @@ impl crate::ArchExtras for crate::CPU { Ok(()) } - fn read_function_argument(&self, conv: CallingConvention, idx: i32) -> Result + fn read_function_argument(&self, conv: CallingConvention, idx: u8) -> Result where T: From, { @@ -105,11 +105,17 @@ impl crate::ArchExtras for crate::CPU { return Err(format!("Unsupported calling convention: {conv:#?}")); } - match idx { - 0 => self.read_reg(Regs::Rdi), - 1 => self.read_reg(Regs::Rsi), - _ => Err(format!("Unsupported argument: {idx:}")), - } + let reg_id = match idx { + 0 => Regs::Rdi, + 1 => Regs::Rsi, + 2 => Regs::Rdx, + 3 => Regs::Rcx, + 4 => Regs::R8, + 5 => Regs::R9, + r => return Err(format!("Unsupported argument: {r:}")), + }; + + self.read_reg(reg_id) } fn write_function_argument(