From 31357aa7e2f1aba78d99db6cd3ae3243202ddb25 Mon Sep 17 00:00:00 2001 From: Dominik Maier Date: Tue, 28 Feb 2023 16:36:04 +0100 Subject: [PATCH] Track parent testcase id, tuneable stage probabilistic settings (#1081) * Added local event handlers * clippy * move tuned mutator to pow2 * Tunable updates * parent ids * no_std, etc * windows * remove local event manager handler * maybe fix win * win: * win docs * docs * ASAN -> ASan --- libafl/src/bolts/cli.rs | 82 ++++++++++++++-------------- libafl/src/bolts/mod.rs | 25 ++++++++- libafl/src/bolts/shmem.rs | 2 +- libafl/src/corpus/testcase.rs | 49 ++++++++++++++--- libafl/src/events/llmp.rs | 6 +- libafl/src/events/simple.rs | 2 +- libafl/src/executors/command.rs | 2 +- libafl/src/executors/inprocess.rs | 32 ++++++----- libafl/src/fuzzer/mod.rs | 22 ++++++-- libafl/src/mutators/tuneable.rs | 60 +++++++++++++++----- libafl/src/stages/push/mutational.rs | 29 ++++++++-- libafl/src/state/mod.rs | 32 +++++++++++ libafl_cc/build.rs | 2 +- libafl_qemu/src/executor.rs | 12 +++- libafl_targets/src/windows_asan.rs | 22 +++++--- 15 files changed, 276 insertions(+), 103 deletions(-) diff --git a/libafl/src/bolts/cli.rs b/libafl/src/bolts/cli.rs index d9855c9428..180eea5dfa 100644 --- a/libafl/src/bolts/cli.rs +++ b/libafl/src/bolts/cli.rs @@ -113,47 +113,47 @@ fn parse_instrumentation_location( )] #[allow(clippy::struct_excessive_bools)] pub struct FuzzerOptions { - /// timeout for each target execution (milliseconds) + /// Timeout for each target execution (milliseconds) #[arg(short, long, default_value = "1000", value_parser = parse_timeout, help_heading = "Fuzz Options")] pub timeout: Duration, - /// whether or not to print debug info + /// Whether or not to print debug info #[arg(short, long)] pub verbose: bool, - /// file to which all client output should be written + /// File to which all client output should be written #[arg(short, long, default_value = "/dev/null")] pub stdout: String, - /// the name of the configuration to use + /// The name of the configuration to use #[arg(long, default_value = "default configuration")] pub configuration: String, - /// enable Address Sanitizer (ASAN) + /// Enable Address Sanitizer (`ASan`) #[arg(short = 'A', long, help_heading = "Fuzz Options")] pub asan: bool, - /// Enable ASAN on each of the provided cores. Use 'all' to select all available + /// Enable `ASan` on each of the provided cores. Use 'all' to select all available /// cores. 'none' to run a client without binding to any core. /// ex: '1,2-4,6' selects the cores 1, 2, 3, 4, and 6. #[cfg(feature = "frida_cli")] - #[arg(long, default_value = "0", value_parser = Cores::from_cmdline, help_heading = "Cores that should use ASAN")] + #[arg(long, default_value = "0", value_parser = Cores::from_cmdline, help_heading = "Cores that should use ASan")] pub asan_cores: Cores, - /// number of fuzz iterations to perform + /// Number of fuzz iterations to perform #[arg(short = 'I', long, help_heading = "Fuzz Options", default_value = "0")] pub iterations: usize, - /// path to the harness + /// Path to the harness #[arg(short = 'H', long, help_heading = "Fuzz Options")] pub harness: Option, - /// trailing arguments (after "--"); can be passed directly to the harness + /// Trailing arguments (after "`--`"); can be passed directly to the harness #[cfg(not(feature = "qemu_cli"))] #[arg(last = true, value_name = "HARNESS_ARGS")] pub harness_args: Vec, - /// harness function to call + /// Harness function to call #[cfg(feature = "frida_cli")] #[arg( short = 'F', @@ -163,12 +163,12 @@ pub struct FuzzerOptions { )] pub harness_function: String, - /// additional libraries to instrument + /// Additional libraries to instrument #[cfg(feature = "frida_cli")] #[arg(short, long, help_heading = "Frida Options")] pub libs_to_instrument: Vec, - /// enable CmpLog instrumentation + /// Enable `CmpLog` instrumentation #[cfg_attr( feature = "frida_cli", arg(short = 'C', long, help_heading = "Frida Options") @@ -179,85 +179,85 @@ pub struct FuzzerOptions { )] pub cmplog: bool, - /// Enable CmpLog on each of the provided cores. Use 'all' to select all available + /// Enable `CmpLog` on each of the provided cores. Use 'all' to select all available /// cores. 'none' to run a client without binding to any core. /// ex: '1,2-4,6' selects the cores 1, 2, 3, 4, and 6. #[cfg(feature = "frida_cli")] #[arg(long, default_value = "0", value_parser = Cores::from_cmdline, help_heading = "Frida Options")] pub cmplog_cores: Cores, - /// enable ASAN leak detection + /// Enable `ASan` leak detection #[cfg(feature = "frida_cli")] - #[arg(short, long, help_heading = "ASAN Options")] + #[arg(short, long, help_heading = "ASan Options")] pub detect_leaks: bool, - /// instruct ASAN to continue after a memory error is detected + /// Instruct `ASan` to continue after a memory error is detected #[cfg(feature = "frida_cli")] - #[arg(long, help_heading = "ASAN Options")] + #[arg(long, help_heading = "ASan Options")] pub continue_on_error: bool, - /// instruct ASAN to gather (and report) allocation-/free-site backtraces + /// Instruct `ASan` to gather (and report) allocation-/free-site backtraces #[cfg(feature = "frida_cli")] - #[arg(long, help_heading = "ASAN Options")] + #[arg(long, help_heading = "ASan Options")] pub allocation_backtraces: bool, - /// the maximum size that the ASAN allocator should allocate + /// The maximum size that the `ASan` allocator should allocate #[cfg(feature = "frida_cli")] #[arg( short, long, default_value = "1073741824", // 1_usize << 30 - help_heading = "ASAN Options" + help_heading = "ASan Options" )] pub max_allocation: usize, - /// the maximum total allocation size that the ASAN allocator should allocate + /// The maximum total allocation size that the `ASan` allocator should allocate #[cfg(feature = "frida_cli")] #[arg( short = 'M', long, default_value = "4294967296", // 1_usize << 32 - help_heading = "ASAN Options" + help_heading = "ASan Options" )] pub max_total_allocation: usize, - /// instruct ASAN to panic if the max ASAN allocation size is exceeded + /// Instruct `ASan` to panic if the max `ASan` allocation size is exceeded #[cfg(feature = "frida_cli")] - #[arg(long, help_heading = "ASAN Options")] + #[arg(long, help_heading = "ASan Options")] pub max_allocation_panics: bool, - /// disable coverage + /// Disable coverage #[cfg(feature = "frida_cli")] #[arg(long, help_heading = "Frida Options")] pub disable_coverage: bool, - /// enable DrCov (aarch64 only) + /// Enable `DrCov` (aarch64 only) #[cfg(feature = "frida_cli")] #[arg(long, help_heading = "Frida Options")] pub drcov: bool, - /// disable stalker.exclude() if true - /// It's better to disable this on windows or your harness uses c++ exception handling - /// See https://github.com/AFLplusplus/LibAFL/issues/830 + /// Disable `stalker.exclude()` if `true` + /// It's better to disable this on Windows or your harness uses c++ exception handling + /// See #[cfg(feature = "frida_cli")] #[arg(long, help_heading = "Frida Options")] pub disable_excludes: bool, - /// locations which will not be instrumented for ASAN or coverage purposes (ex: mod_name@0x12345) + /// Locations which will not be instrumented for `ASan` or coverage purposes (ex: `mod_name@0x12345`) #[cfg(feature = "frida_cli")] #[arg(short = 'D', long, help_heading = "Frida Options", value_parser = parse_instrumentation_location)] pub dont_instrument: Vec<(String, usize)>, - /// trailing arguments (after "--"); can be passed directly to QEMU + /// Trailing arguments (after "`--`"); can be passed directly to QEMU #[cfg(feature = "qemu_cli")] #[arg(last = true)] pub qemu_args: Vec, - /// paths to fuzzer token files (aka 'dictionaries') + /// Paths to fuzzer token files (aka 'dictionaries') #[arg(short = 'x', long, help_heading = "Fuzz Options")] pub tokens: Vec, - /// input corpus directories + /// Input corpus directories #[arg( short, long, @@ -266,7 +266,7 @@ pub struct FuzzerOptions { )] pub input: Vec, - /// output solutions directory + /// Output solutions directory #[arg( short, long, @@ -281,15 +281,15 @@ pub struct FuzzerOptions { #[arg(short = 'c', long, default_value = "0", value_parser = Cores::from_cmdline)] pub cores: Cores, - /// port on which the broker should listen + /// Port on which the broker should listen #[arg(short = 'p', long, default_value = "1337", value_name = "PORT")] pub broker_port: u16, - /// ip:port where a remote broker is already listening + /// `ip:port` where a remote broker is already listening #[arg(short = 'a', long, value_name = "REMOTE")] pub remote_broker_addr: Option, - /// path to file that should be sent to the harness for crash reproduction + /// Path to file that should be sent to the harness for crash reproduction #[arg(short, long, help_heading = "Replay Options")] pub replay: Option, @@ -305,7 +305,7 @@ pub struct FuzzerOptions { } impl FuzzerOptions { - /// given an `App`, add it to `FuzzerOptions` as a subcommand and return the resulting `App` + /// Given an `App`, add it to `FuzzerOptions` as a subcommand and return the resulting `App` /// /// # Examples /// @@ -354,7 +354,7 @@ impl FuzzerOptions { /// Parse from `std::env::args_os()`, exit on error /// -/// for more information, see the [cli](super::cli) documentation +/// For more information, see the [cli](super::cli) documentation #[must_use] pub fn parse_args() -> FuzzerOptions { FuzzerOptions::parse() diff --git a/libafl/src/bolts/mod.rs b/libafl/src/bolts/mod.rs index dd4afaa1f5..048adb800a 100644 --- a/libafl/src/bolts/mod.rs +++ b/libafl/src/bolts/mod.rs @@ -30,7 +30,7 @@ pub mod staterestore; pub mod tuples; use alloc::{string::String, vec::Vec}; -use core::{iter::Iterator, time}; +use core::{iter::Iterator, ops::AddAssign, time}; #[cfg(feature = "std")] use std::time::{SystemTime, UNIX_EPOCH}; @@ -216,6 +216,29 @@ pub fn format_duration_hms(duration: &time::Duration) -> String { format!("{}h-{}m-{}s", (secs / 60) / 60, (secs / 60) % 60, secs % 60) } +/// Calculates the cumulative sum for a slice, in-place. +/// The values are useful for example for cumulative probabilities. +/// +/// So, to give an example: +/// ```rust +/// use libafl::bolts::calculate_cumulative_sum_in_place; +/// +/// let mut value = [2, 4, 1, 3]; +/// calculate_cumulative_sum_in_place(&mut value); +/// assert_eq!(&[2, 6, 7, 10], &value); +/// ``` +pub fn calculate_cumulative_sum_in_place(mut_slice: &mut [T]) +where + T: Default + AddAssign + Copy, +{ + let mut acc = T::default(); + + for val in mut_slice { + acc += *val; + *val = acc; + } +} + /// A simple logger struct that logs to stderr when used with [`log::set_logger`]. #[derive(Debug)] #[cfg(feature = "std")] diff --git a/libafl/src/bolts/shmem.rs b/libafl/src/bolts/shmem.rs index 0960933afe..3b1f0ca9e3 100644 --- a/libafl/src/bolts/shmem.rs +++ b/libafl/src/bolts/shmem.rs @@ -1213,7 +1213,7 @@ pub mod win32_shmem { }, }; - /// The default Sharedmap impl for windows using shmctl & shmget + /// The default [`ShMem`] impl for Windows using `shmctl` & `shmget` #[derive(Clone)] pub struct Win32ShMem { id: ShMemId, diff --git a/libafl/src/corpus/testcase.rs b/libafl/src/corpus/testcase.rs index 24e2bfb829..837d02a313 100644 --- a/libafl/src/corpus/testcase.rs +++ b/libafl/src/corpus/testcase.rs @@ -8,6 +8,7 @@ use serde::{Deserialize, Serialize}; use crate::{ bolts::{serdeany::SerdeAnyMap, HasLen}, + corpus::CorpusId, inputs::Input, state::HasMetadata, Error, @@ -36,6 +37,8 @@ where fuzz_level: usize, /// If it has been fuzzed fuzzed: bool, + /// Parent [`CorpusId`], if known + parent_id: Option, } impl HasMetadata for Testcase @@ -183,20 +186,30 @@ where /// Create a new Testcase instance given an input #[inline] - pub fn new(input: I) -> Self { - let mut slf = Testcase { + pub fn new(mut input: I) -> Self { + input.wrapped_as_testcase(); + Self { input: Some(input), ..Testcase::default() - }; - slf.input.as_mut().unwrap().wrapped_as_testcase(); - slf + } + } + + /// Creates a testcase, attaching the id of the parent + /// that this [`Testcase`] was derived from on creation + pub fn with_parent_id(mut input: I, parent_id: CorpusId) -> Self { + input.wrapped_as_testcase(); + Self { + input: Some(input), + parent_id: Some(parent_id), + ..Testcase::default() + } } /// Create a new Testcase instance given an [`Input`] and a `filename` #[inline] pub fn with_filename(mut input: I, filename: String) -> Self { input.wrapped_as_testcase(); - Testcase { + Self { input: Some(input), filename: Some(filename), ..Testcase::default() @@ -207,12 +220,28 @@ where #[inline] pub fn with_executions(mut input: I, executions: usize) -> Self { input.wrapped_as_testcase(); - Testcase { + Self { input: Some(input), executions, ..Testcase::default() } } + + /// Get the id of the parent, that this testcase was derived from + #[must_use] + pub fn parent_id(&self) -> Option { + self.parent_id + } + + /// Sets the id of the parent, that this testcase was derived from + pub fn set_parent_id(&mut self, parent_id: CorpusId) { + self.parent_id = Some(parent_id); + } + + /// Sets the id of the parent, that this testcase was derived from + pub fn set_parent_id_optional(&mut self, parent_id: Option) { + self.parent_id = parent_id; + } } impl Default for Testcase @@ -231,6 +260,7 @@ where fuzz_level: 0, executions: 0, fuzzed: false, + parent_id: None, } } } @@ -450,6 +480,11 @@ pub mod pybind { *self.inner.as_ref().executions() } + #[getter] + fn parent_id(&self) -> Option { + self.inner.as_ref().parent_id().map(|x| x.0) + } + #[getter] fn fuzz_level(&self) -> usize { self.inner.as_ref().fuzz_level() diff --git a/libafl/src/events/llmp.rs b/libafl/src/events/llmp.rs index c1d923b6ee..91c7022414 100644 --- a/libafl/src/events/llmp.rs +++ b/libafl/src/events/llmp.rs @@ -309,11 +309,15 @@ where S: UsesInput, SP: ShMemProvider + 'static, { + /// The llmp client for inter process communication llmp: LlmpClient, /// The custom buf handler custom_buf_handlers: Vec>>, #[cfg(feature = "llmp_compression")] compressor: GzipCompressor, + /// The configuration defines this specific fuzzer. + /// A node will not re-use the observer values sent over `LLMP` + /// from nodes with other configurations. configuration: EventConfig, phantom: PhantomData, } @@ -1015,7 +1019,7 @@ where } }; - // On windows (or in any case without fork), we spawn ourself again + // On Windows (or in any case without fork), we spawn ourself again #[cfg(any(windows, not(feature = "fork")))] let child_status = startable_self()?.status()?; #[cfg(all(unix, not(feature = "fork")))] diff --git a/libafl/src/events/simple.rs b/libafl/src/events/simple.rs index 5c2f36d6e8..80db06a182 100644 --- a/libafl/src/events/simple.rs +++ b/libafl/src/events/simple.rs @@ -499,7 +499,7 @@ where } }; - // On windows (or in any case without forks), we spawn ourself again + // On Windows (or in any case without forks), we spawn ourself again #[cfg(any(windows, not(feature = "fork")))] let child_status = startable_self()?.status()?; #[cfg(all(unix, not(feature = "fork")))] diff --git a/libafl/src/executors/command.rs b/libafl/src/executors/command.rs index 1b86f3708a..0fed13c669 100644 --- a/libafl/src/executors/command.rs +++ b/libafl/src/executors/command.rs @@ -113,7 +113,7 @@ impl CommandConfigurator for StdCommandConfigurator { debug_assert_eq!(arg, "DUMMY"); #[cfg(unix)] cmd.arg(OsStr::from_bytes(input.target_bytes().as_slice())); - // There is an issue here that the chars on windows are 16 bit wide. + // There is an issue here that the chars on Windows are 16 bit wide. // I can't really test it. Please open a PR if this goes wrong. #[cfg(not(unix))] cmd.arg(OsString::from_vec(input.target_bytes().as_vec())); diff --git a/libafl/src/executors/inprocess.rs b/libafl/src/executors/inprocess.rs index e3d97025ca..cb33f56175 100644 --- a/libafl/src/executors/inprocess.rs +++ b/libafl/src/executors/inprocess.rs @@ -50,7 +50,7 @@ use crate::{ fuzzer::HasObjective, inputs::UsesInput, observers::{ObserversTuple, UsesObservers}, - state::{HasClientPerfMonitor, HasSolutions, UsesState}, + state::{HasClientPerfMonitor, HasFuzzedCorpusId, HasSolutions, UsesState}, Error, }; @@ -167,7 +167,7 @@ where H: FnMut(&::Input) -> ExitKind + ?Sized, HB: BorrowMut, OT: ObserversTuple, - S: HasSolutions + HasClientPerfMonitor, + S: HasSolutions + HasClientPerfMonitor + HasFuzzedCorpusId, { /// Create a new in mem executor. /// Caution: crash and restart in one of them will lead to odd behavior if multiple are used, @@ -344,7 +344,7 @@ impl InProcessHandlers { E: Executor + HasObservers, EM: EventFirer + EventRestarter, OF: Feedback, - E::State: HasSolutions + HasClientPerfMonitor, + E::State: HasSolutions + HasClientPerfMonitor + HasFuzzedCorpusId, Z: HasObjective, { #[cfg(unix)] @@ -543,7 +543,7 @@ pub fn run_observers_and_save_state( E: HasObservers, EM: EventFirer + EventRestarter, OF: Feedback, - E::State: HasSolutions + HasClientPerfMonitor, + E::State: HasSolutions + HasClientPerfMonitor + HasFuzzedCorpusId, Z: HasObjective, { let observers = executor.observers_mut(); @@ -559,6 +559,7 @@ pub fn run_observers_and_save_state( if interesting { let mut new_testcase = Testcase::new(input.clone()); + new_testcase.set_parent_id_optional(state.fuzzed_corpus_id()); new_testcase.add_metadata(exitkind); fuzzer .objective_mut() @@ -578,6 +579,9 @@ pub fn run_observers_and_save_state( .expect("Could not save state in run_observers_and_save_state"); } + // We will start mutators from scratch after restart. + state.clear_fuzzed_corpus_id(); + event_mgr.on_restart(state).unwrap(); log::info!("Waiting for broker..."); @@ -608,7 +612,7 @@ mod unix_signal_handler { feedbacks::Feedback, fuzzer::HasObjective, inputs::UsesInput, - state::{HasClientPerfMonitor, HasSolutions}, + state::{HasClientPerfMonitor, HasFuzzedCorpusId, HasSolutions}, }; pub(crate) type HandlerFuncPtr = @@ -667,7 +671,7 @@ mod unix_signal_handler { E: HasObservers, EM: EventFirer + EventRestarter, OF: Feedback, - E::State: HasSolutions + HasClientPerfMonitor, + E::State: HasSolutions + HasClientPerfMonitor + HasFuzzedCorpusId, Z: HasObjective, { let old_hook = panic::take_hook(); @@ -708,7 +712,7 @@ mod unix_signal_handler { E: HasObservers, EM: EventFirer + EventRestarter, OF: Feedback, - E::State: HasSolutions + HasClientPerfMonitor, + E::State: HasSolutions + HasClientPerfMonitor + HasFuzzedCorpusId, Z: HasObjective, { if !data.is_valid() { @@ -751,7 +755,7 @@ mod unix_signal_handler { E: Executor + HasObservers, EM: EventFirer + EventRestarter, OF: Feedback, - E::State: HasSolutions + HasClientPerfMonitor, + E::State: HasSolutions + HasClientPerfMonitor + HasFuzzedCorpusId, Z: HasObjective, { #[cfg(all(target_os = "android", target_arch = "aarch64"))] @@ -859,7 +863,7 @@ pub mod windows_asan_handler { feedbacks::Feedback, fuzzer::HasObjective, inputs::UsesInput, - state::{HasClientPerfMonitor, HasSolutions}, + state::{HasClientPerfMonitor, HasFuzzedCorpusId, HasSolutions}, }; /// # Safety @@ -869,7 +873,7 @@ pub mod windows_asan_handler { E: Executor + HasObservers, EM: EventFirer + EventRestarter, OF: Feedback, - E::State: HasSolutions + HasClientPerfMonitor, + E::State: HasSolutions + HasClientPerfMonitor + HasFuzzedCorpusId, Z: HasObjective, { let mut data = &mut GLOBAL_STATE; @@ -968,7 +972,7 @@ mod windows_exception_handler { feedbacks::Feedback, fuzzer::HasObjective, inputs::UsesInput, - state::{HasClientPerfMonitor, HasSolutions}, + state::{HasClientPerfMonitor, HasFuzzedCorpusId, HasSolutions}, }; pub(crate) type HandlerFuncPtr = @@ -1007,7 +1011,7 @@ mod windows_exception_handler { E: HasObservers, EM: EventFirer + EventRestarter, OF: Feedback, - E::State: HasSolutions + HasClientPerfMonitor, + E::State: HasSolutions + HasClientPerfMonitor + HasFuzzedCorpusId, Z: HasObjective, { let old_hook = panic::take_hook(); @@ -1066,7 +1070,7 @@ mod windows_exception_handler { E: HasObservers, EM: EventFirer + EventRestarter, OF: Feedback, - E::State: HasSolutions + HasClientPerfMonitor, + E::State: HasSolutions + HasClientPerfMonitor + HasFuzzedCorpusId, Z: HasObjective, { let data: &mut InProcessExecutorHandlerData = @@ -1127,7 +1131,7 @@ mod windows_exception_handler { E: Executor + HasObservers, EM: EventFirer + EventRestarter, OF: Feedback, - E::State: HasSolutions + HasClientPerfMonitor, + E::State: HasSolutions + HasClientPerfMonitor + HasFuzzedCorpusId, Z: HasObjective, { // Have we set a timer_before? diff --git a/libafl/src/fuzzer/mod.rs b/libafl/src/fuzzer/mod.rs index 4143481e69..08c9be0afd 100644 --- a/libafl/src/fuzzer/mod.rs +++ b/libafl/src/fuzzer/mod.rs @@ -23,7 +23,10 @@ use crate::{ schedulers::Scheduler, stages::StagesTuple, start_timer, - state::{HasClientPerfMonitor, HasCorpus, HasExecutions, HasMetadata, HasSolutions, UsesState}, + state::{ + HasClientPerfMonitor, HasCorpus, HasExecutions, HasFuzzedCorpusId, HasMetadata, + HasSolutions, UsesState, + }, Error, }; @@ -327,7 +330,7 @@ where F: Feedback, OF: Feedback, OT: ObserversTuple + Serialize + DeserializeOwned, - CS::State: HasCorpus + HasSolutions + HasClientPerfMonitor + HasExecutions, + CS::State: HasCorpus + HasSolutions + HasClientPerfMonitor + HasExecutions + HasFuzzedCorpusId, { /// Evaluate if a set of observation channels has an interesting state fn process_execution( @@ -384,6 +387,7 @@ where // Add the input to the main corpus let mut testcase = Testcase::with_executions(input.clone(), *state.executions()); + testcase.set_parent_id_optional(state.fuzzed_corpus_id()); self.feedback_mut() .append_metadata(state, observers, &mut testcase)?; let idx = state.corpus_mut().add(testcase)?; @@ -417,6 +421,7 @@ where // The input is a solution, add it to the respective corpus let mut testcase = Testcase::with_executions(input, *state.executions()); + testcase.set_parent_id_optional(state.fuzzed_corpus_id()); self.objective_mut() .append_metadata(state, observers, &mut testcase)?; state.solutions_mut().add(testcase)?; @@ -442,7 +447,7 @@ where OT: ObserversTuple + Serialize + DeserializeOwned, F: Feedback, OF: Feedback, - CS::State: HasCorpus + HasSolutions + HasClientPerfMonitor + HasExecutions, + CS::State: HasCorpus + HasSolutions + HasClientPerfMonitor + HasExecutions + HasFuzzedCorpusId, { /// Process one input, adding to the respective corpora if needed and firing the right events #[inline] @@ -475,7 +480,7 @@ where F: Feedback, OF: Feedback, OT: ObserversTuple + Serialize + DeserializeOwned, - CS::State: HasCorpus + HasSolutions + HasClientPerfMonitor + HasExecutions, + CS::State: HasCorpus + HasSolutions + HasClientPerfMonitor + HasExecutions + HasFuzzedCorpusId, { /// Process one input, adding to the respective corpora if needed and firing the right events #[inline] @@ -513,6 +518,7 @@ where // Add the input to the main corpus let mut testcase = Testcase::with_executions(input.clone(), *state.executions()); + testcase.set_parent_id_optional(state.fuzzed_corpus_id()); self.feedback_mut() .append_metadata(state, observers, &mut testcase)?; let idx = state.corpus_mut().add(testcase)?; @@ -546,7 +552,7 @@ where EM: ProgressReporter + EventProcessor, F: Feedback, OF: Feedback, - CS::State: HasClientPerfMonitor + HasExecutions + HasMetadata, + CS::State: HasClientPerfMonitor + HasExecutions + HasMetadata + HasFuzzedCorpusId, ST: StagesTuple, { fn fuzz_one( @@ -571,9 +577,15 @@ where #[cfg(feature = "introspection")] state.introspection_monitor_mut().reset_stage_index(); + // Set the parent id - all new testcases will have this id as parent in the following stages. + state.set_fuzzed_corpus_id(idx); + // Execute all stages stages.perform_all(self, executor, state, manager, idx)?; + // Reset the parent id + state.clear_fuzzed_corpus_id(); + // Init timer for manager #[cfg(feature = "introspection")] state.introspection_monitor_mut().start_timer(); diff --git a/libafl/src/mutators/tuneable.rs b/libafl/src/mutators/tuneable.rs index 72043c4b43..2bb0acc9c3 100644 --- a/libafl/src/mutators/tuneable.rs +++ b/libafl/src/mutators/tuneable.rs @@ -12,7 +12,7 @@ use serde::{Deserialize, Serialize}; pub use crate::mutators::{mutations::*, token_mutations::*}; use crate::{ - bolts::rands::Rand, + bolts::{calculate_cumulative_sum_in_place, rands::Rand}, impl_serdeany, mutators::{ ComposedByMutations, MutationId, MutationResult, Mutator, MutatorsTuple, ScheduledMutator, @@ -33,10 +33,12 @@ pub struct TuneableScheduledMutatorMetadata { /// Will not be used when `mutation_ids` are set. /// Clear to fall back to random. pub mutation_probabilities_cumulative: Vec, - /// The count of total mutations to perform. + /// The count of mutations to stack. /// If `mutation_ids` is of length `10`, and this number is `20`, /// the mutations will be iterated through twice. pub iters: Option, + /// The probability of each number of mutations to stack. + pub iter_probabilities_pow_cumulative: Vec, } impl Default for TuneableScheduledMutatorMetadata { @@ -46,6 +48,7 @@ impl Default for TuneableScheduledMutatorMetadata { next_id: 0.into(), mutation_probabilities_cumulative: Vec::default(), iters: None, + iter_probabilities_pow_cumulative: Vec::default(), } } } @@ -138,11 +141,27 @@ where { /// Compute the number of iterations used to apply stacked mutations fn iterations(&self, state: &mut S, _: &I) -> u64 { - if let Some(iters) = TuneableScheduledMutator::get_iters(state) { - iters + let metadata = TuneableScheduledMutatorMetadata::get_mut(state).unwrap(); + + if metadata.iter_probabilities_pow_cumulative.is_empty() { + if let Some(iters) = metadata.iters { + iters + } else { + // fall back to random + 1 << (1 + state.rand_mut().below(self.max_stack_pow)) + } } else { - // fall back to random - 1 << (1 + state.rand_mut().below(self.max_stack_pow)) + // We will sample using the mutation probabilities. + // Doing this outside of the original if branch to make the borrow checker happy. + #[allow(clippy::cast_precision_loss)] + let coin = state.rand_mut().next() as f32 / u64::MAX as f32; + + let metadata = TuneableScheduledMutatorMetadata::get_mut(state).unwrap(); + 1 << (1 + metadata + .iter_probabilities_pow_cumulative + .iter() + .position(|i| *i >= coin) + .unwrap()) } } @@ -229,6 +248,27 @@ where pub fn set_iters(state: &mut S, iters: u64) { let metadata = Self::metadata_mut(state); metadata.iters = Some(iters); + metadata.iter_probabilities_pow_cumulative.clear(); + } + + /// Sets the probability of next iteration counts, + /// i.e., how many times the mutation is likely to get mutated. + /// + /// So, setting the `iter_probabilities` to `vec![0.1, 0.7, 0.2]` + /// would apply 2^one mutation with the likelihood of 10%, 2^2 mutations with the + /// a probability of 70% (0.7), and 2^3 mutations with the likelihood of 20%. + /// These will be applied for each call of this `mutate` function. + /// + /// Setting this function will unset everything previously set in `set_iters`. + pub fn set_iter_probabilities_pow(state: &mut S, iter_probabilities_pow: Vec) { + let metadata = Self::metadata_mut(state); + metadata.iters = None; + + // we precalculate the cumulative probability to be faster when sampling later. + let mut iter_probabilities_pow_cumulative = iter_probabilities_pow; + calculate_cumulative_sum_in_place(&mut iter_probabilities_pow_cumulative); + + metadata.iter_probabilities_pow_cumulative = iter_probabilities_pow_cumulative; } /// Gets the set amount of iterations @@ -254,13 +294,7 @@ where // we precalculate the cumulative probability to be faster when sampling later. let mut mutation_probabilities_cumulative = mutation_probabilities; - let mut acc = 0.0; - - for probability in &mut mutation_probabilities_cumulative { - let l = *probability; - *probability += acc; - acc += l; - } + calculate_cumulative_sum_in_place(&mut mutation_probabilities_cumulative); metadata.mutation_probabilities_cumulative = mutation_probabilities_cumulative; } diff --git a/libafl/src/stages/push/mutational.rs b/libafl/src/stages/push/mutational.rs index bd352d6c2e..39b22374e2 100644 --- a/libafl/src/stages/push/mutational.rs +++ b/libafl/src/stages/push/mutational.rs @@ -21,7 +21,9 @@ use crate::{ observers::ObserversTuple, schedulers::Scheduler, start_timer, - state::{HasClientPerfMonitor, HasCorpus, HasExecutions, HasMetadata, HasRand}, + state::{ + HasClientPerfMonitor, HasCorpus, HasExecutions, HasFuzzedCorpusId, HasMetadata, HasRand, + }, Error, EvaluatorObservers, ExecutionProcessor, HasScheduler, }; @@ -88,8 +90,14 @@ where EM: EventFirer + EventRestarter + HasEventManagerId + ProgressReporter, M: Mutator, OT: ObserversTuple, - CS::State: - HasClientPerfMonitor + HasCorpus + HasRand + HasExecutions + HasMetadata + Clone + Debug, + CS::State: HasFuzzedCorpusId + + HasClientPerfMonitor + + HasCorpus + + HasRand + + HasExecutions + + HasMetadata + + Clone + + Debug, Z: ExecutionProcessor + EvaluatorObservers + HasScheduler, @@ -136,6 +144,8 @@ where return None; } + let idx = self.current_corpus_idx.unwrap(); + state.set_fuzzed_corpus_id(idx); start_timer!(state); let mut input = state .corpus() @@ -169,7 +179,7 @@ where last_input: ::Input, exit_kind: ExitKind, ) -> Result<(), Error> { - // todo: isintersting, etc. + // todo: is_interesting, etc. fuzzer.process_execution(state, event_mgr, last_input, observers, &exit_kind, true)?; @@ -178,6 +188,7 @@ where .post_exec(state, self.stage_idx, self.current_corpus_idx)?; mark_feature_time!(state, PerfFeature::MutatePostExec); self.testcases_done += 1; + state.clear_fuzzed_corpus_id(); Ok(()) } @@ -201,8 +212,14 @@ where EM: EventFirer + EventRestarter + HasEventManagerId + ProgressReporter, M: Mutator, OT: ObserversTuple, - CS::State: - HasClientPerfMonitor + HasCorpus + HasRand + HasExecutions + HasMetadata + Clone + Debug, + CS::State: HasClientPerfMonitor + + HasCorpus + + HasRand + + HasExecutions + + HasMetadata + + Clone + + Debug + + HasFuzzedCorpusId, Z: ExecutionProcessor + EvaluatorObservers + HasScheduler, diff --git a/libafl/src/state/mod.rs b/libafl/src/state/mod.rs index 3d0da72fb4..06ee006c04 100644 --- a/libafl/src/state/mod.rs +++ b/libafl/src/state/mod.rs @@ -60,6 +60,21 @@ pub trait HasCorpus: UsesInput { fn corpus_mut(&mut self) -> &mut Self::Corpus; } +/// Trait for a state that has information about which [`CorpusId`] +/// is currently being fuzzed. +/// When a new interesting input was found, this value becomes the `parent_id`. +pub trait HasFuzzedCorpusId { + /// The currently fuzzed [`CorpusId`], if known. + /// If a new interesting testcase was found, this should usually become the `parent_id`. + fn fuzzed_corpus_id(&self) -> Option; + + /// Sets the currently fuzzed [`CorpusId`]. + fn set_fuzzed_corpus_id(&mut self, corpus_id: CorpusId); + + /// Resets the currently fuzzed [`CorpusId`]. + fn clear_fuzzed_corpus_id(&mut self); +} + /// Interact with the maximum size pub trait HasMaxSize { /// The maximum size hint for items and mutations returned @@ -192,6 +207,8 @@ pub struct StdState { named_metadata: NamedSerdeAnyMap, /// MaxSize testcase size for mutators that appreciate it max_size: usize, + /// The [`CorpusId`] of the testcase we're currently fuzzing. + fuzzed_corpus_id: Option, /// Performance statistics for this fuzzer #[cfg(feature = "introspection")] introspection_monitor: ClientPerfMonitor, @@ -257,6 +274,20 @@ where } } +impl HasFuzzedCorpusId for StdState { + fn fuzzed_corpus_id(&self) -> Option { + self.fuzzed_corpus_id + } + + fn set_fuzzed_corpus_id(&mut self, fuzzed_corpus_id: CorpusId) { + self.fuzzed_corpus_id = Some(fuzzed_corpus_id); + } + + fn clear_fuzzed_corpus_id(&mut self) { + self.fuzzed_corpus_id = None; + } +} + impl HasSolutions for StdState where I: Input, @@ -694,6 +725,7 @@ where corpus, solutions, max_size: DEFAULT_MAX_SIZE, + fuzzed_corpus_id: None, #[cfg(feature = "introspection")] introspection_monitor: ClientPerfMonitor::new(), #[cfg(feature = "std")] diff --git a/libafl_cc/build.rs b/libafl_cc/build.rs index a9dc5cae31..551d812f87 100644 --- a/libafl_cc/build.rs +++ b/libafl_cc/build.rs @@ -266,7 +266,7 @@ pub const LIBAFL_CC_LLVM_VERSION: Option = None; if cfg!(windows) { cxxflags.push(String::from("-fuse-ld=lld")); cxxflags.push(String::from("/LD")); - /* clang on windows links against the libcmt.lib runtime + /* clang on Windows links against the libcmt.lib runtime * however, the distributed binaries are compiled against msvcrt.lib * we need to also use msvcrt.lib instead of libcmt.lib when building the optimization passes * first, we tell clang-cl (and indirectly link) to ignore libcmt.lib via -nodefaultlib:libcmt diff --git a/libafl_qemu/src/executor.rs b/libafl_qemu/src/executor.rs index 8f83d25335..27094ed991 100644 --- a/libafl_qemu/src/executor.rs +++ b/libafl_qemu/src/executor.rs @@ -13,7 +13,10 @@ use libafl::{ fuzzer::HasObjective, inputs::UsesInput, observers::{ObserversTuple, UsesObservers}, - state::{HasClientPerfMonitor, HasCorpus, HasExecutions, HasSolutions, State, UsesState}, + state::{ + HasClientPerfMonitor, HasCorpus, HasExecutions, HasFuzzedCorpusId, HasSolutions, State, + UsesState, + }, Error, }; @@ -64,7 +67,12 @@ where where EM: EventFirer + EventRestarter, OF: Feedback, - S: State + HasExecutions + HasCorpus + HasSolutions + HasClientPerfMonitor, + S: State + + HasExecutions + + HasCorpus + + HasSolutions + + HasClientPerfMonitor + + HasFuzzedCorpusId, Z: HasObjective, { Ok(Self { diff --git a/libafl_targets/src/windows_asan.rs b/libafl_targets/src/windows_asan.rs index 7667850faf..a9044a8eb2 100644 --- a/libafl_targets/src/windows_asan.rs +++ b/libafl_targets/src/windows_asan.rs @@ -4,7 +4,7 @@ use libafl::{ events::{EventFirer, EventRestarter}, executors::{inprocess::windows_asan_handler::asan_death_handler, Executor, HasObservers}, feedbacks::Feedback, - state::{HasClientPerfMonitor, HasSolutions}, + state::{HasClientPerfMonitor, HasFuzzedCorpusId, HasSolutions}, HasObjective, }; @@ -15,20 +15,24 @@ extern "C" { fn __sanitizer_set_death_callback(cb: CB); } +/// Setup `ASan` callback on windows +/// +/// This is needed to intercept `ASan` error exit. +/// +/// When we use `AddressSanitizer` on Windows, the crash handler is not called when `ASan` detects an error +/// This is because, on linux, `ASan` runtime raises `SIGABRT` so we can rely on the signal handler +/// but on Windows it simply calls `TerminateProcess`. +/// so we need to call the API by `ASan` to register the callback when `ASan` is about to finish the process. +/// See . +/// /// # Safety -/// Setup asan callback on windows -// See https://github.com/AFLplusplus/LibAFL/issues/769 -// This is needed to intercept asan error exit -// When we use AddressSanitizer on windows, the crash handler is not called when ASAN detects an error -// This is because, on linux, ASAN runtime raises SIGABRT so we can rely on the signal handler -// but on windows it simply calls TerminateProcess. -// so we need to call the api by asan to register the callback when asan is about to finish the process. +/// Calls the unsafe `__sanitizer_set_death_callback` symbol, but should be safe to call otherwise. pub unsafe fn setup_asan_callback(_executor: &E, _event_mgr: &EM, _fuzzer: &Z) where E: Executor + HasObservers, EM: EventFirer + EventRestarter, OF: Feedback, - E::State: HasSolutions + HasClientPerfMonitor, + E::State: HasSolutions + HasClientPerfMonitor + HasFuzzedCorpusId, Z: HasObjective, { __sanitizer_set_death_callback(asan_death_handler::);