Corpus pruning stage (#2399)

* push

* upd

* add last found time

* add common as prerequisite

* clp

* aa

* more clp

* fix how to get corpus id

* pruning

* aa

* no std

* fix
This commit is contained in:
Dongjia "toka" Zhang 2024-07-16 18:04:23 +02:00 committed by GitHub
parent d8e53d5b65
commit f00470ddaa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 200 additions and 8 deletions

View File

@ -276,6 +276,7 @@ jobs:
needs:
- ubuntu
- fuzzers-preflight
- common
strategy:
fail-fast: true
matrix:
@ -366,7 +367,8 @@ jobs:
- 'fuzzers/*qemu*/**'
fuzzers-qemu:
needs: changes
needs:
- common
if: ${{ needs.changes.outputs.qemu == 'true' }}
strategy:
matrix:

View File

@ -18,8 +18,8 @@ use crate::{
stages::{HasCurrentStage, StagesTuple},
start_timer,
state::{
HasCorpus, HasCurrentTestcase, HasExecutions, HasImported, HasLastReportTime, HasSolutions,
Stoppable, UsesState,
HasCorpus, HasCurrentTestcase, HasExecutions, HasLastFoundTime, HasLastReportTime,
HasSolutions, Stoppable, UsesState,
},
Error, HasMetadata,
};
@ -378,7 +378,6 @@ where
+ HasSolutions
+ HasExecutions
+ HasCorpus
+ HasImported
+ HasCurrentTestcase<<Self::State as UsesInput>::Input>
+ HasCurrentCorpusId,
{
@ -597,7 +596,7 @@ where
OT: ObserversTuple<Self::State> + Serialize + DeserializeOwned,
F: Feedback<Self::State>,
OF: Feedback<Self::State>,
CS::State: HasCorpus + HasSolutions + HasExecutions + HasImported,
CS::State: HasCorpus + HasSolutions + HasExecutions,
{
/// Process one input, adding to the respective corpora if needed and firing the right events
#[inline]
@ -630,7 +629,7 @@ where
F: Feedback<Self::State>,
OF: Feedback<Self::State>,
OT: ObserversTuple<Self::State> + Serialize + DeserializeOwned,
CS::State: HasCorpus + HasSolutions + HasExecutions + HasImported,
CS::State: HasCorpus + HasSolutions + HasExecutions + HasLastFoundTime,
{
/// Process one input, adding to the respective corpora if needed and firing the right events
#[inline]
@ -663,6 +662,8 @@ where
manager: &mut EM,
input: <Self::State as UsesInput>::Input,
) -> Result<CorpusId, Error> {
*state.last_found_time_mut() = current_time();
let exit_kind = self.execute_input(state, executor, manager, &input)?;
let observers = executor.observers();
// Always consider this to be "interesting"
@ -670,7 +671,7 @@ where
// Maybe a solution
#[cfg(not(feature = "introspection"))]
let is_solution =
let is_solution: bool =
self.objective_mut()
.is_interesting(state, manager, &input, &*observers, &exit_kind)?;
@ -766,7 +767,6 @@ where
+ HasMetadata
+ HasCorpus
+ HasTestcase
+ HasImported
+ HasLastReportTime
+ HasCurrentCorpusId
+ HasCurrentStage,

View File

@ -80,6 +80,8 @@ pub mod tuneable;
#[cfg(feature = "unicode")]
pub mod unicode;
pub mod pruning;
/// A stage is one step in the fuzzing process.
/// Multiple stages will be scheduled one by one for each input.
pub trait Stage<E, EM, Z>: UsesState

View File

@ -0,0 +1,162 @@
//! Corpus pruning stage
use core::marker::PhantomData;
use libafl_bolts::{rands::Rand, Error};
use crate::{
corpus::Corpus,
stages::Stage,
state::{HasCorpus, HasRand, UsesState},
};
#[cfg(feature = "std")]
use crate::{events::EventRestarter, state::Stoppable};
#[derive(Debug)]
/// The stage to probablistically disable a corpus entry.
/// This stage should be wrapped in a if stage and run only when the fuzzer perform restarting
/// The idea comes from `https://mschloegel.me/paper/schiller2023fuzzerrestarts.pdf`
pub struct CorpusPruning<EM> {
/// The chance of retaining this corpus
prob: f64,
phantom: PhantomData<EM>,
}
impl<EM> CorpusPruning<EM> {
fn new(prob: f64) -> Self {
Self {
prob,
phantom: PhantomData,
}
}
}
impl<EM> Default for CorpusPruning<EM> {
fn default() -> Self {
Self::new(0.05)
}
}
impl<EM> UsesState for CorpusPruning<EM>
where
EM: UsesState,
{
type State = EM::State;
}
impl<E, EM, Z> Stage<E, EM, Z> for CorpusPruning<EM>
where
EM: UsesState,
E: UsesState<State = Self::State>,
Z: UsesState<State = Self::State>,
Self::State: HasCorpus + HasRand,
{
#[allow(clippy::cast_precision_loss)]
fn perform(
&mut self,
_fuzzer: &mut Z,
_executor: &mut E,
state: &mut Self::State,
_manager: &mut EM,
) -> Result<(), Error> {
// Iterate over every corpus entry
let n_corpus = state.corpus().count_all();
let mut do_retain = vec![];
let mut retain_any = false;
for _ in 0..n_corpus {
let r = state.rand_mut().below(100) as f64;
let retain = self.prob * 100_f64 < r;
if retain {
retain_any = true;
}
do_retain.push(retain);
}
// Make sure that at least somthing is in the
if !retain_any {
let r = state.rand_mut().below(n_corpus);
do_retain[r] = true;
}
for (i_th, retain) in do_retain.iter().enumerate().take(n_corpus) {
if !retain {
let corpus_id = state.corpus().nth_from_all(i_th);
let corpus = state.corpus_mut();
let removed = corpus.remove(corpus_id)?;
corpus.add_disabled(removed)?;
}
}
// println!("There was {}, and we retained {} corpura", n_corpus, state.corpus().count());
Ok(())
}
fn should_restart(&mut self, _state: &mut Self::State) -> Result<bool, Error> {
// Not executing the target, so restart safety is not needed
Ok(true)
}
fn clear_progress(&mut self, _state: &mut Self::State) -> Result<(), Error> {
// Not executing the target, so restart safety is not needed
Ok(())
}
}
/// A stage for conditional restart
#[derive(Debug, Default)]
#[cfg(feature = "std")]
pub struct RestartStage<E, EM, Z> {
phantom: PhantomData<(E, EM, Z)>,
}
#[cfg(feature = "std")]
impl<E, EM, Z> UsesState for RestartStage<E, EM, Z>
where
E: UsesState,
{
type State = E::State;
}
#[cfg(feature = "std")]
impl<E, EM, Z> Stage<E, EM, Z> for RestartStage<E, EM, Z>
where
E: UsesState,
EM: UsesState<State = Self::State> + EventRestarter,
Z: UsesState<State = Self::State>,
{
#[allow(unreachable_code)]
fn perform(
&mut self,
_fuzzer: &mut Z,
_executor: &mut E,
state: &mut Self::State,
manager: &mut EM,
) -> Result<(), Error> {
manager.on_restart(state).unwrap();
state.request_stop();
Ok(())
}
fn should_restart(&mut self, _state: &mut Self::State) -> Result<bool, Error> {
Ok(true)
}
fn clear_progress(&mut self, _state: &mut Self::State) -> Result<(), Error> {
Ok(())
}
}
#[cfg(feature = "std")]
impl<E, EM, Z> RestartStage<E, EM, Z>
where
E: UsesState,
{
/// Constructor for this conditionally enabled stage.
/// If the closure returns true, the wrapped stage will be executed, else it will be skipped.
#[must_use]
pub fn new() -> Self {
Self {
phantom: PhantomData,
}
}
}

View File

@ -187,6 +187,15 @@ pub trait HasStartTime {
fn start_time_mut(&mut self) -> &mut Duration;
}
/// Trait for the last report time, the last time this node reported progress
pub trait HasLastFoundTime {
/// The last time we found something by ourselves
fn last_found_time(&self) -> &Duration;
/// The last time we found something by ourselves (mutable)
fn last_found_time_mut(&mut self) -> &mut Duration;
}
/// Trait for the last report time, the last time this node reported progress
pub trait HasLastReportTime {
/// The last time we reported progress,if available/used.
@ -260,6 +269,8 @@ pub struct StdState<I, C, R, SC> {
/// The last time we reported progress (if available/used).
/// This information is used by fuzzer `maybe_report_progress`.
last_report_time: Option<Duration>,
/// The last time something was added to the corpus
last_found_time: Duration,
/// The current index of the corpus; used to record for resumable fuzzing.
corpus_id: Option<CorpusId>,
/// Request the fuzzer to stop at the start of the next stage
@ -424,6 +435,20 @@ impl<I, C, R, SC> HasImported for StdState<I, C, R, SC> {
}
}
impl<I, C, R, SC> HasLastFoundTime for StdState<I, C, R, SC> {
/// Return the number of new paths that imported from other fuzzers
#[inline]
fn last_found_time(&self) -> &Duration {
&self.last_found_time
}
/// Return the number of new paths that imported from other fuzzers
#[inline]
fn last_found_time_mut(&mut self) -> &mut Duration {
&mut self.last_found_time
}
}
impl<I, C, R, SC> HasLastReportTime for StdState<I, C, R, SC> {
/// The last time we reported progress,if available/used.
/// This information is used by fuzzer `maybe_report_progress`.
@ -1127,6 +1152,7 @@ where
#[cfg(feature = "std")]
dont_reenter: None,
last_report_time: None,
last_found_time: libafl_bolts::current_time(),
corpus_id: None,
stage_stack: StageStack::default(),
phantom: PhantomData,