Commandline args handling for binary only target on libafl_qemu (#3098)

* POC

* POC

* pin the obj

* add map_input_to_memory

* still trying to get things to work

* Justfile

* lol

* nah bad idea

* done

* revert

* revert

* lol

* Move to libafl_qemu

* a

* add

* add

* lol

* clp

* a

* tmate

* Thank you bash I love you

* aaaaaaaa

* a

* bbb
This commit is contained in:
Dongjia "toka" Zhang 2025-03-21 18:43:08 +01:00 committed by GitHub
parent 9195245998
commit 60d0ccb8cb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 233 additions and 7 deletions

4
.gitignore vendored
View File

@ -5,8 +5,8 @@ vendor
# cargo lockfiles except from binaries
**/Cargo.lock
!fuzzers/**/Cargo.lock
!utils/**/Cargo.lock
# !fuzzers/**/Cargo.lock
# !utils/**/Cargo.lock
.DS_Store

View File

@ -24,7 +24,8 @@ fuzzer: cxx
[macos]
run: fuzzer
#!/bin/bash
./{{FUZZER}} &
./{{FUZZER_NAME}} &
./{{FUZZER_NAME}}
sleep 0.2
[windows]

View File

@ -24,7 +24,7 @@ fuzzer: cxx
[macos]
run: fuzzer
#!/bin/bash
./{{FUZZER}} &
./{{FUZZER_NAME}} &
sleep 0.2
[windows]
@ -37,9 +37,7 @@ test: fuzzer
#!/bin/bash
success=0
rm -rf libafl_unix_shmem_server || true
(timeout 5s ./{{FUZZER_NAME}} >fuzz_stdout.log 2>/dev/null || true) &
sleep 0.2
timeout 5s ./{{FUZZER_NAME}} >/dev/null 2>/dev/null || true
timeout 5s ./{{FUZZER_NAME}} >fuzz_stdout.log
while read -r line; do
corpus_number=$(echo "$line" | cut -d' ' -f2)
if (( corpus_number > 50 )); then

222
libafl_bolts/src/cargs.rs Normal file
View File

@ -0,0 +1,222 @@
//! Parse command line argument like AFL, then put it in a C-compatible way
use alloc::{borrow::ToOwned, boxed::Box, ffi::CString, vec::Vec};
use core::{
ffi::{c_char, c_int},
pin::Pin,
};
use std::{
ffi::{OsStr, OsString},
os::unix::ffi::OsStrExt,
path::Path,
};
use crate::{Error, fs::get_unique_std_input_file};
/// For creating an C-compatible argument
#[derive(Debug)]
pub struct CMainArgsBuilder {
use_stdin: bool,
program: Option<OsString>,
input_filename: Option<OsString>,
args: Vec<OsString>,
}
impl Default for CMainArgsBuilder {
fn default() -> Self {
Self::new()
}
}
impl CMainArgsBuilder {
/// Constructor
#[must_use]
pub fn new() -> Self {
Self {
program: None,
use_stdin: false,
input_filename: None,
args: Vec::new(),
}
}
/// The harness
#[must_use]
pub fn program<O>(mut self, program: O) -> Self
where
O: AsRef<OsStr>,
{
self.program = Some(program.as_ref().to_owned());
self
}
/// Adds an argument to the harness's commandline
///
/// You may want to use `parse_afl_cmdline` if you're going to pass `@@`
/// represents the input file generated by the fuzzer (similar to the `afl-fuzz` command line).
#[must_use]
pub fn arg<O>(mut self, arg: O) -> Self
where
O: AsRef<OsStr>,
{
self.args.push(arg.as_ref().to_owned());
self
}
/// Adds arguments to the harness's commandline
///
/// You may want to use `parse_afl_cmdline` if you're going to pass `@@`
/// represents the input file generated by the fuzzer (similar to the `afl-fuzz` command line).
#[must_use]
pub fn args<IT, O>(mut self, args: IT) -> Self
where
IT: IntoIterator<Item = O>,
O: AsRef<OsStr>,
{
let mut res = vec![];
for arg in args {
res.push(arg.as_ref().to_owned());
}
self.args.append(&mut res);
self
}
/// Place the input at this position and set the filename for the input.
///
/// Note: If you use this, you should ensure that there is only one instance using this
/// file at any given time.
#[must_use]
pub fn arg_input_file<P: AsRef<Path>>(self, path: P) -> Self {
let mut moved = self.arg(path.as_ref());
let path_as_string = path.as_ref().as_os_str().to_os_string();
assert!(
// It's only save to set the input_filename, if it does not overwrite an existing one.
(moved.input_filename.is_none() || moved.input_filename.unwrap() == path_as_string),
"Already specified an input file under a different name. This is not supported"
);
moved.input_filename = Some(path_as_string);
moved
}
/// Place the input at this position and set the default filename for the input.
#[must_use]
/// The filename includes the PID of the fuzzer to ensure that no two fuzzers write to the same file
pub fn arg_input_file_std(self) -> Self {
self.arg_input_file(get_unique_std_input_file())
}
#[must_use]
/// Parse afl style command line
///
/// Replaces `@@` with the path to the input file generated by the fuzzer. If `@@` is omitted,
/// `stdin` is used to pass the test case instead.
///
/// Interprets the first argument as the path to the program as long as it is not set yet.
/// You have to omit the program path in case you have set it already. Otherwise
/// it will be interpreted as a regular argument, leading to probably unintended results.
pub fn parse_afl_cmdline<IT, O>(self, args: IT) -> Self
where
IT: IntoIterator<Item = O>,
O: AsRef<OsStr>,
{
let mut moved = self;
let mut use_arg_0_as_program = false;
if moved.program.is_none() {
use_arg_0_as_program = true;
}
for item in args {
if use_arg_0_as_program {
moved = moved.program(item);
// After the program has been set, unset `use_arg_0_as_program` to treat all
// subsequent arguments as regular arguments
use_arg_0_as_program = false;
} else if item.as_ref() == "@@" {
match &moved.input_filename.clone() {
Some(name) => {
// If the input file name has been modified, use this one
moved = moved.arg_input_file(name);
}
_ => {
moved = moved.arg_input_file_std();
}
}
} else {
moved = moved.arg(item);
}
}
// If we have not set an input file, use stdin as it is AFLs default
moved.use_stdin = moved.input_filename.is_none();
moved
}
/// Build it
pub fn build(&self) -> Result<CMainArgs, Error> {
let mut argv: Vec<Pin<Box<CString>>> = Vec::new();
if let Some(program) = &self.program {
argv.push(Box::pin(CString::new(program.as_bytes()).unwrap()));
} else {
return Err(Error::illegal_argument("Program not specified"));
}
for args in &self.args {
argv.push(Box::pin(CString::new(args.as_bytes()).unwrap()));
}
let mut argv_ptr: Vec<*const c_char> = argv.iter().map(|arg| arg.as_ptr()).collect();
argv_ptr.push(core::ptr::null());
Ok(CMainArgs {
use_stdin: self.use_stdin,
argv,
argv_ptr,
})
}
}
/// For creating an C-compatible argument
#[derive(Debug)]
#[allow(dead_code)]
pub struct CMainArgs {
use_stdin: bool,
/// This guys have to sit here, else Rust will free them
argv: Vec<Pin<Box<CString>>>,
argv_ptr: Vec<*const c_char>,
}
// From https://gist.github.com/TrinityCoder/793c097b5a4ab25b8fabf5cd67e92f05
impl CMainArgs {
/// If stdin is used for this or no
#[must_use]
pub fn use_stdin(&self) -> bool {
self.use_stdin
}
/// Returns the C language's `argv` (`*const *const c_char`).
#[must_use]
pub fn argv(&self) -> *const *const c_char {
// println!("{:#?}", self.argv_ptr);
self.argv_ptr.as_ptr()
}
/// Returns the C language's `argv[0]` (`*const c_char`).
/// On x64 you would pass this to Rsi before starting emulation
/// Like: `qemu.write_reg(Regs::Rsi, main_args.argv() as u64).unwrap();`
#[must_use]
pub fn argv0(&self) -> *const c_char {
self.argv_ptr[0]
}
/// Gets total number of args.
/// On x64 you would pass this to Rdi before starting emulation
/// Like: `qemu.write_reg(Regs::Rdi, main_args.argc() as u64).unwrap();`
#[must_use]
pub fn argc(&self) -> c_int {
(self.argv_ptr.len() - 1).try_into().unwrap()
}
}

View File

@ -112,6 +112,11 @@ pub mod subrange;
#[cfg(any(feature = "xxh3", feature = "alloc"))]
pub mod tuples;
#[cfg(all(feature = "std", unix))]
pub mod cargs;
#[cfg(all(feature = "std", unix))]
pub use cargs::*;
/// The purpose of this module is to alleviate imports of the bolts by adding a glob import.
#[cfg(feature = "prelude")]
pub mod bolts_prelude {