Fix OOM restarts with LlmpShouldSaveState (#1974)
* LlmpSaveState and OOM restarts * clippy * clippy * rename
This commit is contained in:
parent
69d7d7b1e3
commit
45d47214c2
@ -9,10 +9,13 @@ static GLOBAL: MiMalloc = MiMalloc;
|
||||
use core::time::Duration;
|
||||
use std::{env, net::SocketAddr, path::PathBuf};
|
||||
|
||||
use clap::{self, Parser};
|
||||
use clap::Parser;
|
||||
use libafl::{
|
||||
corpus::{Corpus, InMemoryOnDiskCorpus, OnDiskCorpus},
|
||||
events::{launcher::Launcher, EventConfig, EventRestarter, LlmpRestartingEventManager},
|
||||
events::{
|
||||
launcher::Launcher, llmp::LlmpShouldSaveState, EventConfig, EventRestarter,
|
||||
LlmpRestartingEventManager,
|
||||
},
|
||||
executors::{inprocess::InProcessExecutor, ExitKind},
|
||||
feedback_or, feedback_or_fast,
|
||||
feedbacks::{CrashFeedback, MaxMapFeedback, TimeFeedback, TimeoutFeedback},
|
||||
@ -279,7 +282,11 @@ pub extern "C" fn libafl_main() {
|
||||
.broker_port(broker_port)
|
||||
.remote_broker_addr(opt.remote_broker_addr)
|
||||
.stdout_file(Some("/dev/null"))
|
||||
.serialize_state(!opt.reload_corpus)
|
||||
.serialize_state(if opt.reload_corpus {
|
||||
LlmpShouldSaveState::OOMSafeNever
|
||||
} else {
|
||||
LlmpShouldSaveState::OOMSafeOnRestart
|
||||
})
|
||||
.build()
|
||||
.launch()
|
||||
{
|
||||
|
@ -49,7 +49,7 @@ use crate::events::{CentralizedEventManager, CentralizedLlmpEventBroker};
|
||||
#[cfg(feature = "std")]
|
||||
use crate::{
|
||||
events::{
|
||||
llmp::{LlmpRestartingEventManager, ManagerKind, RestartingMgr},
|
||||
llmp::{LlmpRestartingEventManager, LlmpShouldSaveState, ManagerKind, RestartingMgr},
|
||||
EventConfig,
|
||||
},
|
||||
monitors::Monitor,
|
||||
@ -126,8 +126,8 @@ where
|
||||
#[builder(default = DEFAULT_CLIENT_TIMEOUT_SECS)]
|
||||
client_timeout: Duration,
|
||||
/// Tell the manager to serialize or not the state on restart
|
||||
#[builder(default = true)]
|
||||
serialize_state: bool,
|
||||
#[builder(default = LlmpShouldSaveState::OnRestart)]
|
||||
serialize_state: LlmpShouldSaveState,
|
||||
#[builder(setter(skip), default = PhantomData)]
|
||||
phantom_data: PhantomData<(&'a S, &'a SP, EMH)>,
|
||||
}
|
||||
@ -493,8 +493,8 @@ where
|
||||
#[builder(default = true)]
|
||||
spawn_broker: bool,
|
||||
/// Tell the manager to serialize or not the state on restart
|
||||
#[builder(default = true)]
|
||||
serialize_state: bool,
|
||||
#[builder(default = LlmpShouldSaveState::OnRestart)]
|
||||
serialize_state: LlmpShouldSaveState,
|
||||
/// The duration for the llmp client timeout
|
||||
#[builder(default = DEFAULT_CLIENT_TIMEOUT_SECS)]
|
||||
client_timeout: Duration,
|
||||
|
@ -892,6 +892,41 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
/// Specify if the State must be persistent over restarts
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub enum LlmpShouldSaveState {
|
||||
/// Always save and restore the state on restart (not OOM resistant)
|
||||
OnRestart,
|
||||
/// Never save the state (not OOM resistant)
|
||||
Never,
|
||||
/// Best-effort save and restore the state on restart (OOM safe)
|
||||
/// This adds additional runtime costs when processing events
|
||||
OOMSafeOnRestart,
|
||||
/// Never save the state (OOM safe)
|
||||
/// This adds additional runtime costs when processing events
|
||||
OOMSafeNever,
|
||||
}
|
||||
|
||||
impl LlmpShouldSaveState {
|
||||
/// Check if the state must be saved `on_restart()`
|
||||
#[must_use]
|
||||
pub fn on_restart(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
LlmpShouldSaveState::OnRestart | LlmpShouldSaveState::OOMSafeOnRestart
|
||||
)
|
||||
}
|
||||
|
||||
/// Check if the policy is OOM safe
|
||||
#[must_use]
|
||||
pub fn oom_safe(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
LlmpShouldSaveState::OOMSafeOnRestart | LlmpShouldSaveState::OOMSafeNever
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// A manager that can restart on the fly, storing states in-between (in `on_restart`)
|
||||
#[cfg(feature = "std")]
|
||||
#[derive(Debug)]
|
||||
@ -906,7 +941,7 @@ where
|
||||
/// The staterestorer to serialize the state for the next runner
|
||||
staterestorer: StateRestorer<SP>,
|
||||
/// Decide if the state restorer must save the serialized state
|
||||
save_state: bool,
|
||||
save_state: LlmpShouldSaveState,
|
||||
}
|
||||
|
||||
#[cfg(all(feature = "std", feature = "adaptive_serialization"))]
|
||||
@ -980,7 +1015,9 @@ where
|
||||
event: Event<<Self::State as UsesInput>::Input>,
|
||||
) -> Result<(), Error> {
|
||||
// Check if we are going to crash in the event, in which case we store our current state for the next runner
|
||||
self.llmp_mgr.fire(state, event)
|
||||
self.llmp_mgr.fire(state, event)?;
|
||||
self.intermediate_save()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn serialize_observers<OT>(&mut self, observers: &OT) -> Result<Option<Vec<u8>>, Error>
|
||||
@ -1016,7 +1053,11 @@ where
|
||||
// First, reset the page to 0 so the next iteration can read read from the beginning of this page
|
||||
self.staterestorer.reset();
|
||||
self.staterestorer.save(&(
|
||||
if self.save_state { Some(state) } else { None },
|
||||
if self.save_state.on_restart() {
|
||||
Some(state)
|
||||
} else {
|
||||
None
|
||||
},
|
||||
&self.llmp_mgr.describe()?,
|
||||
))?;
|
||||
|
||||
@ -1044,7 +1085,9 @@ where
|
||||
Z: EvaluatorObservers<E::Observers, State = S> + ExecutionProcessor<E::Observers>, //CE: CustomEvent<I>,
|
||||
{
|
||||
fn process(&mut self, fuzzer: &mut Z, state: &mut S, executor: &mut E) -> Result<usize, Error> {
|
||||
self.llmp_mgr.process(fuzzer, state, executor)
|
||||
let res = self.llmp_mgr.process(fuzzer, state, executor)?;
|
||||
self.intermediate_save()?;
|
||||
Ok(res)
|
||||
}
|
||||
}
|
||||
|
||||
@ -1089,7 +1132,7 @@ where
|
||||
Self {
|
||||
llmp_mgr,
|
||||
staterestorer,
|
||||
save_state: true,
|
||||
save_state: LlmpShouldSaveState::OnRestart,
|
||||
}
|
||||
}
|
||||
|
||||
@ -1097,7 +1140,7 @@ where
|
||||
pub fn with_save_state(
|
||||
llmp_mgr: LlmpEventManager<EMH, S, SP>,
|
||||
staterestorer: StateRestorer<SP>,
|
||||
save_state: bool,
|
||||
save_state: LlmpShouldSaveState,
|
||||
) -> Self {
|
||||
Self {
|
||||
llmp_mgr,
|
||||
@ -1115,6 +1158,17 @@ where
|
||||
pub fn staterestorer_mut(&mut self) -> &mut StateRestorer<SP> {
|
||||
&mut self.staterestorer
|
||||
}
|
||||
|
||||
/// Save LLMP state and empty state in staterestorer
|
||||
pub fn intermediate_save(&mut self) -> Result<(), Error> {
|
||||
// First, reset the page to 0 so the next iteration can read read from the beginning of this page
|
||||
if self.save_state.oom_safe() {
|
||||
self.staterestorer.reset();
|
||||
self.staterestorer
|
||||
.save(&(None::<S>, &self.llmp_mgr.describe()?))?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// The kind of manager we're creating right now
|
||||
@ -1202,8 +1256,8 @@ where
|
||||
#[builder(default = None)]
|
||||
exit_cleanly_after: Option<NonZeroUsize>,
|
||||
/// Tell the manager to serialize or not the state on restart
|
||||
#[builder(default = true)]
|
||||
serialize_state: bool,
|
||||
#[builder(default = LlmpShouldSaveState::OnRestart)]
|
||||
serialize_state: LlmpShouldSaveState,
|
||||
/// The timeout duration used for llmp client timeout
|
||||
#[builder(default = DEFAULT_CLIENT_TIMEOUT_SECS)]
|
||||
client_timeout: Duration,
|
||||
@ -1380,7 +1434,7 @@ where
|
||||
compiler_fence(Ordering::SeqCst);
|
||||
|
||||
#[allow(clippy::manual_assert)]
|
||||
if !staterestorer.has_content() && self.serialize_state {
|
||||
if !staterestorer.has_content() && !self.serialize_state.oom_safe() {
|
||||
#[cfg(unix)]
|
||||
if child_status == 137 {
|
||||
// Out of Memory, see https://tldp.org/LDP/abs/html/exitcodes.html
|
||||
@ -1450,7 +1504,11 @@ where
|
||||
)
|
||||
};
|
||||
// We reset the staterestorer, the next staterestorer and receiver (after crash) will reuse the page from the initial message.
|
||||
if self.serialize_state.oom_safe() {
|
||||
mgr.intermediate_save()?;
|
||||
} else {
|
||||
mgr.staterestorer.reset();
|
||||
}
|
||||
|
||||
/* TODO: Not sure if this is needed
|
||||
// We commit an empty NO_RESTART message to this buf, against infinite loops,
|
||||
|
Loading…
x
Reference in New Issue
Block a user