From 9d38fff662f4647117dff841fb9e8b30bc7ae5fe Mon Sep 17 00:00:00 2001 From: Dongjia Zhang Date: Thu, 10 Feb 2022 18:27:51 +0900 Subject: [PATCH] Autodict forkserver (#525) * Builder for ForkserverExecutor * add * clippy warnings * comment * stash * tmp * change * revert * use_shmem_feature field * change the harness back * wip * wip * revert * works * clippy * Makefile fix * doc * clippy * rename to program * rename, fix, envs * lifetime * arg_input_file * stash * read autodict from forkserver * works * clippy & fmt * fmt * fix * fix * fmt * better harness * arg_input_file_std * rename * fix --- fuzzers/forkserver_simple/src/main.rs | 12 ++- fuzzers/forkserver_simple/src/program.c | 11 ++- libafl/src/executors/forkserver.rs | 110 +++++++++++++++++++++--- libafl/src/mutators/token_mutations.rs | 46 +++++----- 4 files changed, 138 insertions(+), 41 deletions(-) diff --git a/fuzzers/forkserver_simple/src/main.rs b/fuzzers/forkserver_simple/src/main.rs index 9c5f6e0b89..53f45ce334 100644 --- a/fuzzers/forkserver_simple/src/main.rs +++ b/fuzzers/forkserver_simple/src/main.rs @@ -4,7 +4,7 @@ use libafl::{ current_nanos, rands::StdRand, shmem::{ShMem, ShMemProvider, StdShMemProvider}, - tuples::tuple_list, + tuples::{tuple_list, Merge}, AsMutSlice, }, corpus::{ @@ -18,10 +18,10 @@ use libafl::{ fuzzer::{Fuzzer, StdFuzzer}, inputs::BytesInput, monitors::SimpleMonitor, - mutators::scheduled::{havoc_mutations, StdScheduledMutator}, + mutators::{scheduled::havoc_mutations, tokens_mutations, StdScheduledMutator, Tokens}, observers::{ConstMapObserver, HitcountsMapObserver, TimeObserver}, stages::mutational::StdMutationalStage, - state::{HasCorpus, StdState}, + state::{HasCorpus, HasMetadata, StdState}, }; use std::path::PathBuf; @@ -145,11 +145,13 @@ pub fn main() { None => [].to_vec(), }; + let mut tokens = Tokens::new(); let forkserver = ForkserverExecutor::builder() .program(res.value_of("executable").unwrap().to_string()) .args(&args) .debug_child(debug_child) .shmem_provider(&mut shmem_provider) + .autotokens(&mut tokens) .build(tuple_list!(time_observer, edges_observer)) .unwrap(); @@ -178,8 +180,10 @@ pub fn main() { println!("We imported {} inputs from disk.", state.corpus().count()); } + state.add_metadata(tokens); + // Setup a mutational stage with a basic bytes mutator - let mutator = StdScheduledMutator::new(havoc_mutations()); + let mutator = StdScheduledMutator::new(havoc_mutations().merge(tokens_mutations())); let mut stages = tuple_list!(StdMutationalStage::new(mutator)); fuzzer diff --git a/fuzzers/forkserver_simple/src/program.c b/fuzzers/forkserver_simple/src/program.c index e868a66dc8..de3bec4124 100644 --- a/fuzzers/forkserver_simple/src/program.c +++ b/fuzzers/forkserver_simple/src/program.c @@ -1,10 +1,17 @@ #include #include - +#include // The following line is needed for shared memeory testcase fuzzing __AFL_FUZZ_INIT(); +void vuln(char *buf) { + if(strcmp(buf, "vuln") == 0) { + abort(); + } +} + + int main(int argc, char **argv){ FILE* file = stdin; @@ -24,7 +31,6 @@ int main(int argc, char **argv){ printf("input: %s\n", buf); - if(buf[0] == 'b'){ if(buf[1] == 'a'){ if(buf[2] == 'd'){ @@ -32,6 +38,7 @@ int main(int argc, char **argv){ } } } + vuln(buf); return 0; } diff --git a/libafl/src/executors/forkserver.rs b/libafl/src/executors/forkserver.rs index 18ba783a0b..30088aed56 100644 --- a/libafl/src/executors/forkserver.rs +++ b/libafl/src/executors/forkserver.rs @@ -15,13 +15,14 @@ use std::{ use crate::{ bolts::{ - fs::OutFile, + fs::{OutFile, OUTFILE_STD}, os::{dup2, pipes::Pipe}, shmem::{ShMem, ShMemProvider, StdShMemProvider}, AsMutSlice, AsSlice, }, executors::{Executor, ExitKind, HasObservers}, inputs::{HasTargetBytes, Input}, + mutators::Tokens, observers::{get_asan_runtime_flags_with_log_path, ASANBacktraceObserver, ObserversTuple}, Error, }; @@ -40,6 +41,8 @@ const FORKSRV_FD: i32 = 198; const FS_OPT_ENABLED: i32 = 0x80000001_u32 as i32; #[allow(clippy::cast_possible_wrap)] const FS_OPT_SHDMEM_FUZZ: i32 = 0x01000000_u32 as i32; +#[allow(clippy::cast_possible_wrap)] +const FS_OPT_AUTODICT: i32 = 0x10000000_u32 as i32; const SHMEM_FUZZ_HDR_SIZE: usize = 4; const MAX_FILE: usize = 1024 * 1024; @@ -268,6 +271,14 @@ impl Forkserver { Ok((rlen, val)) } + /// Read bytes of any length from the st pipe + pub fn read_st_size(&mut self, size: usize) -> Result<(usize, Vec), Error> { + let mut buf = vec![0; size]; + + let rlen = self.st_pipe.read(&mut buf)?; + Ok((rlen, buf)) + } + /// Write to the ctl pipe pub fn write_ctl(&mut self, val: i32) -> Result { let slen = self.ctl_pipe.write(&val.to_ne_bytes())?; @@ -536,13 +547,15 @@ pub struct ForkserverExecutorBuilder<'a, SP> { program: Option, arguments: Vec, envs: Vec<(OsString, OsString)>, - out_filename: Option, debug_child: bool, + autotokens: Option<&'a mut Tokens>, + out_filename: Option, shmem_provider: Option<&'a mut SP>, } impl<'a, SP> ForkserverExecutorBuilder<'a, SP> { /// Builds `ForkserverExecutor`. + #[allow(clippy::pedantic)] pub fn build( &mut self, observers: OT, @@ -560,11 +573,23 @@ impl<'a, SP> ForkserverExecutorBuilder<'a, SP> { }; for item in &self.arguments { + // need special handling for @@ if item == "@@" && use_stdin { use_stdin = false; args.push(out_filename.clone()); } else { - args.push(item.clone()); + // if the filename set by arg_input_file matches the item, then set use_stdin to false + if let Some(name) = &self.out_filename { + if name == item && use_stdin { + use_stdin = false; + args.push(out_filename.clone()); + } else { + args.push(item.clone()); + } + } else { + // default case, just push item into the arguments. + args.push(item.clone()); + } } } @@ -614,24 +639,65 @@ impl<'a, SP> ForkserverExecutorBuilder<'a, SP> { println!("All right - fork server is up."); // If forkserver is responding, we then check if there's any option enabled. if status & FS_OPT_ENABLED == FS_OPT_ENABLED { - if (status & FS_OPT_SHDMEM_FUZZ == FS_OPT_SHDMEM_FUZZ) & map.is_some() { - println!("Using SHARED MEMORY FUZZING feature."); - let send_status = FS_OPT_ENABLED | FS_OPT_SHDMEM_FUZZ; + let mut send_status = FS_OPT_ENABLED; - let send_len = forkserver.write_ctl(send_status)?; - if send_len != 4 { + if (status & FS_OPT_SHDMEM_FUZZ == FS_OPT_SHDMEM_FUZZ) && map.is_some() { + println!("Using SHARED MEMORY FUZZING feature."); + send_status = send_status | FS_OPT_SHDMEM_FUZZ; + } + + if (status & FS_OPT_AUTODICT == FS_OPT_AUTODICT) && self.autotokens.is_some() { + println!("Using AUTODICT feature"); + send_status = send_status | FS_OPT_AUTODICT; + } + + let send_len = forkserver.write_ctl(send_status)?; + if send_len != 4 { + return Err(Error::Forkserver( + "Writing to forkserver failed.".to_string(), + )); + } + + if (send_status & FS_OPT_AUTODICT) == FS_OPT_AUTODICT { + let (read_len, dict_size) = forkserver.read_st()?; + if read_len != 4 { return Err(Error::Forkserver( - "Writing to forkserver failed.".to_string(), + "Reading from forkserver failed.".to_string(), )); } + + if dict_size < 2 || dict_size > 0xffffff { + return Err(Error::Forkserver( + "Dictionary has an illegal size".to_string(), + )); + } + + println!("Autodict size {:x}", dict_size); + + let (rlen, buf) = forkserver.read_st_size(dict_size as usize)?; + + if rlen != dict_size as usize { + return Err(Error::Forkserver( + "Failed to load autodictionary".to_string(), + )); + } + + if let Some(t) = &mut self.autotokens { + t.parse_autodict(&buf, dict_size as usize); + } } } else { println!("Forkserver Options are not available."); } + println!( + "ForkserverExecutor: program: {:?}, arguments: {:?}, use_stdin: {:?}", + target, args, use_stdin + ); + Ok(ForkserverExecutor { target, - args, + args: self.arguments.clone(), out_file, forkserver, observers, @@ -654,19 +720,20 @@ impl<'a> ForkserverExecutorBuilder<'a, StdShMemProvider> { program: None, arguments: vec![], envs: vec![], - out_filename: None, debug_child: false, + autotokens: None, + out_filename: None, shmem_provider: None, } } /// The harness #[must_use] - pub fn program(mut self, target: O) -> Self + pub fn program(mut self, program: O) -> Self where O: AsRef, { - self.program = Some(target.as_ref().to_owned()); + self.program = Some(program.as_ref().to_owned()); self } @@ -731,6 +798,13 @@ impl<'a> ForkserverExecutorBuilder<'a, StdShMemProvider> { moved } + #[must_use] + /// Place the input at this position and set the default filename for the input. + pub fn arg_input_file_std(self) -> Self { + let moved = self.arg_input_file(OUTFILE_STD); + moved + } + #[must_use] /// If `debug_child` is set, the child will print to `stdout`/`stderr`. pub fn debug_child(mut self, debug_child: bool) -> Self { @@ -738,6 +812,13 @@ impl<'a> ForkserverExecutorBuilder<'a, StdShMemProvider> { self } + /// Use autodict? + #[must_use] + pub fn autotokens(mut self, tokens: &'a mut Tokens) -> Self { + self.autotokens = Some(tokens); + self + } + /// Shmem provider for forkserver's shared memory testcase feature. pub fn shmem_provider( self, @@ -747,8 +828,9 @@ impl<'a> ForkserverExecutorBuilder<'a, StdShMemProvider> { program: self.program, arguments: self.arguments, envs: self.envs, - out_filename: self.out_filename, debug_child: self.debug_child, + autotokens: self.autotokens, + out_filename: self.out_filename, shmem_provider: Some(shmem_provider), } } diff --git a/libafl/src/mutators/token_mutations.rs b/libafl/src/mutators/token_mutations.rs index b8f8bb87b6..bf0d8b7ffc 100644 --- a/libafl/src/mutators/token_mutations.rs +++ b/libafl/src/mutators/token_mutations.rs @@ -76,6 +76,30 @@ impl Tokens { Ok(self) } + /// Parse autodict section + pub fn parse_autodict(&mut self, slice: &[u8], size: usize) { + let mut head = 0; + loop { + if head >= size { + // Sanity Check + assert!(head == size); + break; + } + let size = slice[head] as usize; + head += 1; + if size > 0 { + self.add_token(&slice[head..head + size].to_vec()); + #[cfg(feature = "std")] + println!( + "Token size: {} content: {:x?}", + size, + &slice[head..head + size].to_vec() + ); + head += size; + } + } + } + /// Create a token section from a start and an end pointer /// Reads from an autotokens section, returning the count of new entries read #[must_use] @@ -95,28 +119,8 @@ impl Tokens { // println!("size: {}", section_size); let slice = from_raw_parts(token_start, section_size); - let mut head = 0; - // Now we know the beginning and the end of the token section.. let's parse them into tokens - loop { - if head >= section_size { - // Sanity Check - assert!(head == section_size); - break; - } - let size = slice[head] as usize; - head += 1; - if size > 0 { - ret.add_token(&slice[head..head + size].to_vec()); - /* #[cfg(feature = "std")] - println!( - "Token size: {} content: {:x?}", - size, - &slice[head..head + size].to_vec() - ); */ - head += size; - } - } + ret.parse_autodict(slice, section_size); Ok(ret) }