Fix OOM restarts with LlmpShouldSaveState (#1974)
* LlmpSaveState and OOM restarts * clippy * clippy * rename
This commit is contained in:
parent
69d7d7b1e3
commit
45d47214c2
@ -9,10 +9,13 @@ static GLOBAL: MiMalloc = MiMalloc;
|
|||||||
use core::time::Duration;
|
use core::time::Duration;
|
||||||
use std::{env, net::SocketAddr, path::PathBuf};
|
use std::{env, net::SocketAddr, path::PathBuf};
|
||||||
|
|
||||||
use clap::{self, Parser};
|
use clap::Parser;
|
||||||
use libafl::{
|
use libafl::{
|
||||||
corpus::{Corpus, InMemoryOnDiskCorpus, OnDiskCorpus},
|
corpus::{Corpus, InMemoryOnDiskCorpus, OnDiskCorpus},
|
||||||
events::{launcher::Launcher, EventConfig, EventRestarter, LlmpRestartingEventManager},
|
events::{
|
||||||
|
launcher::Launcher, llmp::LlmpShouldSaveState, EventConfig, EventRestarter,
|
||||||
|
LlmpRestartingEventManager,
|
||||||
|
},
|
||||||
executors::{inprocess::InProcessExecutor, ExitKind},
|
executors::{inprocess::InProcessExecutor, ExitKind},
|
||||||
feedback_or, feedback_or_fast,
|
feedback_or, feedback_or_fast,
|
||||||
feedbacks::{CrashFeedback, MaxMapFeedback, TimeFeedback, TimeoutFeedback},
|
feedbacks::{CrashFeedback, MaxMapFeedback, TimeFeedback, TimeoutFeedback},
|
||||||
@ -279,7 +282,11 @@ pub extern "C" fn libafl_main() {
|
|||||||
.broker_port(broker_port)
|
.broker_port(broker_port)
|
||||||
.remote_broker_addr(opt.remote_broker_addr)
|
.remote_broker_addr(opt.remote_broker_addr)
|
||||||
.stdout_file(Some("/dev/null"))
|
.stdout_file(Some("/dev/null"))
|
||||||
.serialize_state(!opt.reload_corpus)
|
.serialize_state(if opt.reload_corpus {
|
||||||
|
LlmpShouldSaveState::OOMSafeNever
|
||||||
|
} else {
|
||||||
|
LlmpShouldSaveState::OOMSafeOnRestart
|
||||||
|
})
|
||||||
.build()
|
.build()
|
||||||
.launch()
|
.launch()
|
||||||
{
|
{
|
||||||
|
@ -49,7 +49,7 @@ use crate::events::{CentralizedEventManager, CentralizedLlmpEventBroker};
|
|||||||
#[cfg(feature = "std")]
|
#[cfg(feature = "std")]
|
||||||
use crate::{
|
use crate::{
|
||||||
events::{
|
events::{
|
||||||
llmp::{LlmpRestartingEventManager, ManagerKind, RestartingMgr},
|
llmp::{LlmpRestartingEventManager, LlmpShouldSaveState, ManagerKind, RestartingMgr},
|
||||||
EventConfig,
|
EventConfig,
|
||||||
},
|
},
|
||||||
monitors::Monitor,
|
monitors::Monitor,
|
||||||
@ -126,8 +126,8 @@ where
|
|||||||
#[builder(default = DEFAULT_CLIENT_TIMEOUT_SECS)]
|
#[builder(default = DEFAULT_CLIENT_TIMEOUT_SECS)]
|
||||||
client_timeout: Duration,
|
client_timeout: Duration,
|
||||||
/// Tell the manager to serialize or not the state on restart
|
/// Tell the manager to serialize or not the state on restart
|
||||||
#[builder(default = true)]
|
#[builder(default = LlmpShouldSaveState::OnRestart)]
|
||||||
serialize_state: bool,
|
serialize_state: LlmpShouldSaveState,
|
||||||
#[builder(setter(skip), default = PhantomData)]
|
#[builder(setter(skip), default = PhantomData)]
|
||||||
phantom_data: PhantomData<(&'a S, &'a SP, EMH)>,
|
phantom_data: PhantomData<(&'a S, &'a SP, EMH)>,
|
||||||
}
|
}
|
||||||
@ -493,8 +493,8 @@ where
|
|||||||
#[builder(default = true)]
|
#[builder(default = true)]
|
||||||
spawn_broker: bool,
|
spawn_broker: bool,
|
||||||
/// Tell the manager to serialize or not the state on restart
|
/// Tell the manager to serialize or not the state on restart
|
||||||
#[builder(default = true)]
|
#[builder(default = LlmpShouldSaveState::OnRestart)]
|
||||||
serialize_state: bool,
|
serialize_state: LlmpShouldSaveState,
|
||||||
/// The duration for the llmp client timeout
|
/// The duration for the llmp client timeout
|
||||||
#[builder(default = DEFAULT_CLIENT_TIMEOUT_SECS)]
|
#[builder(default = DEFAULT_CLIENT_TIMEOUT_SECS)]
|
||||||
client_timeout: Duration,
|
client_timeout: Duration,
|
||||||
|
@ -892,6 +892,41 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Specify if the State must be persistent over restarts
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||||
|
pub enum LlmpShouldSaveState {
|
||||||
|
/// Always save and restore the state on restart (not OOM resistant)
|
||||||
|
OnRestart,
|
||||||
|
/// Never save the state (not OOM resistant)
|
||||||
|
Never,
|
||||||
|
/// Best-effort save and restore the state on restart (OOM safe)
|
||||||
|
/// This adds additional runtime costs when processing events
|
||||||
|
OOMSafeOnRestart,
|
||||||
|
/// Never save the state (OOM safe)
|
||||||
|
/// This adds additional runtime costs when processing events
|
||||||
|
OOMSafeNever,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl LlmpShouldSaveState {
|
||||||
|
/// Check if the state must be saved `on_restart()`
|
||||||
|
#[must_use]
|
||||||
|
pub fn on_restart(&self) -> bool {
|
||||||
|
matches!(
|
||||||
|
self,
|
||||||
|
LlmpShouldSaveState::OnRestart | LlmpShouldSaveState::OOMSafeOnRestart
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if the policy is OOM safe
|
||||||
|
#[must_use]
|
||||||
|
pub fn oom_safe(&self) -> bool {
|
||||||
|
matches!(
|
||||||
|
self,
|
||||||
|
LlmpShouldSaveState::OOMSafeOnRestart | LlmpShouldSaveState::OOMSafeNever
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// A manager that can restart on the fly, storing states in-between (in `on_restart`)
|
/// A manager that can restart on the fly, storing states in-between (in `on_restart`)
|
||||||
#[cfg(feature = "std")]
|
#[cfg(feature = "std")]
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
@ -906,7 +941,7 @@ where
|
|||||||
/// The staterestorer to serialize the state for the next runner
|
/// The staterestorer to serialize the state for the next runner
|
||||||
staterestorer: StateRestorer<SP>,
|
staterestorer: StateRestorer<SP>,
|
||||||
/// Decide if the state restorer must save the serialized state
|
/// Decide if the state restorer must save the serialized state
|
||||||
save_state: bool,
|
save_state: LlmpShouldSaveState,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(all(feature = "std", feature = "adaptive_serialization"))]
|
#[cfg(all(feature = "std", feature = "adaptive_serialization"))]
|
||||||
@ -980,7 +1015,9 @@ where
|
|||||||
event: Event<<Self::State as UsesInput>::Input>,
|
event: Event<<Self::State as UsesInput>::Input>,
|
||||||
) -> Result<(), Error> {
|
) -> Result<(), Error> {
|
||||||
// Check if we are going to crash in the event, in which case we store our current state for the next runner
|
// Check if we are going to crash in the event, in which case we store our current state for the next runner
|
||||||
self.llmp_mgr.fire(state, event)
|
self.llmp_mgr.fire(state, event)?;
|
||||||
|
self.intermediate_save()?;
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn serialize_observers<OT>(&mut self, observers: &OT) -> Result<Option<Vec<u8>>, Error>
|
fn serialize_observers<OT>(&mut self, observers: &OT) -> Result<Option<Vec<u8>>, Error>
|
||||||
@ -1016,7 +1053,11 @@ where
|
|||||||
// First, reset the page to 0 so the next iteration can read read from the beginning of this page
|
// First, reset the page to 0 so the next iteration can read read from the beginning of this page
|
||||||
self.staterestorer.reset();
|
self.staterestorer.reset();
|
||||||
self.staterestorer.save(&(
|
self.staterestorer.save(&(
|
||||||
if self.save_state { Some(state) } else { None },
|
if self.save_state.on_restart() {
|
||||||
|
Some(state)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
},
|
||||||
&self.llmp_mgr.describe()?,
|
&self.llmp_mgr.describe()?,
|
||||||
))?;
|
))?;
|
||||||
|
|
||||||
@ -1044,7 +1085,9 @@ where
|
|||||||
Z: EvaluatorObservers<E::Observers, State = S> + ExecutionProcessor<E::Observers>, //CE: CustomEvent<I>,
|
Z: EvaluatorObservers<E::Observers, State = S> + ExecutionProcessor<E::Observers>, //CE: CustomEvent<I>,
|
||||||
{
|
{
|
||||||
fn process(&mut self, fuzzer: &mut Z, state: &mut S, executor: &mut E) -> Result<usize, Error> {
|
fn process(&mut self, fuzzer: &mut Z, state: &mut S, executor: &mut E) -> Result<usize, Error> {
|
||||||
self.llmp_mgr.process(fuzzer, state, executor)
|
let res = self.llmp_mgr.process(fuzzer, state, executor)?;
|
||||||
|
self.intermediate_save()?;
|
||||||
|
Ok(res)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1089,7 +1132,7 @@ where
|
|||||||
Self {
|
Self {
|
||||||
llmp_mgr,
|
llmp_mgr,
|
||||||
staterestorer,
|
staterestorer,
|
||||||
save_state: true,
|
save_state: LlmpShouldSaveState::OnRestart,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1097,7 +1140,7 @@ where
|
|||||||
pub fn with_save_state(
|
pub fn with_save_state(
|
||||||
llmp_mgr: LlmpEventManager<EMH, S, SP>,
|
llmp_mgr: LlmpEventManager<EMH, S, SP>,
|
||||||
staterestorer: StateRestorer<SP>,
|
staterestorer: StateRestorer<SP>,
|
||||||
save_state: bool,
|
save_state: LlmpShouldSaveState,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
Self {
|
Self {
|
||||||
llmp_mgr,
|
llmp_mgr,
|
||||||
@ -1115,6 +1158,17 @@ where
|
|||||||
pub fn staterestorer_mut(&mut self) -> &mut StateRestorer<SP> {
|
pub fn staterestorer_mut(&mut self) -> &mut StateRestorer<SP> {
|
||||||
&mut self.staterestorer
|
&mut self.staterestorer
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Save LLMP state and empty state in staterestorer
|
||||||
|
pub fn intermediate_save(&mut self) -> Result<(), Error> {
|
||||||
|
// First, reset the page to 0 so the next iteration can read read from the beginning of this page
|
||||||
|
if self.save_state.oom_safe() {
|
||||||
|
self.staterestorer.reset();
|
||||||
|
self.staterestorer
|
||||||
|
.save(&(None::<S>, &self.llmp_mgr.describe()?))?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The kind of manager we're creating right now
|
/// The kind of manager we're creating right now
|
||||||
@ -1202,8 +1256,8 @@ where
|
|||||||
#[builder(default = None)]
|
#[builder(default = None)]
|
||||||
exit_cleanly_after: Option<NonZeroUsize>,
|
exit_cleanly_after: Option<NonZeroUsize>,
|
||||||
/// Tell the manager to serialize or not the state on restart
|
/// Tell the manager to serialize or not the state on restart
|
||||||
#[builder(default = true)]
|
#[builder(default = LlmpShouldSaveState::OnRestart)]
|
||||||
serialize_state: bool,
|
serialize_state: LlmpShouldSaveState,
|
||||||
/// The timeout duration used for llmp client timeout
|
/// The timeout duration used for llmp client timeout
|
||||||
#[builder(default = DEFAULT_CLIENT_TIMEOUT_SECS)]
|
#[builder(default = DEFAULT_CLIENT_TIMEOUT_SECS)]
|
||||||
client_timeout: Duration,
|
client_timeout: Duration,
|
||||||
@ -1380,7 +1434,7 @@ where
|
|||||||
compiler_fence(Ordering::SeqCst);
|
compiler_fence(Ordering::SeqCst);
|
||||||
|
|
||||||
#[allow(clippy::manual_assert)]
|
#[allow(clippy::manual_assert)]
|
||||||
if !staterestorer.has_content() && self.serialize_state {
|
if !staterestorer.has_content() && !self.serialize_state.oom_safe() {
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
if child_status == 137 {
|
if child_status == 137 {
|
||||||
// Out of Memory, see https://tldp.org/LDP/abs/html/exitcodes.html
|
// Out of Memory, see https://tldp.org/LDP/abs/html/exitcodes.html
|
||||||
@ -1450,7 +1504,11 @@ where
|
|||||||
)
|
)
|
||||||
};
|
};
|
||||||
// We reset the staterestorer, the next staterestorer and receiver (after crash) will reuse the page from the initial message.
|
// We reset the staterestorer, the next staterestorer and receiver (after crash) will reuse the page from the initial message.
|
||||||
mgr.staterestorer.reset();
|
if self.serialize_state.oom_safe() {
|
||||||
|
mgr.intermediate_save()?;
|
||||||
|
} else {
|
||||||
|
mgr.staterestorer.reset();
|
||||||
|
}
|
||||||
|
|
||||||
/* TODO: Not sure if this is needed
|
/* TODO: Not sure if this is needed
|
||||||
// We commit an empty NO_RESTART message to this buf, against infinite loops,
|
// We commit an empty NO_RESTART message to this buf, against infinite loops,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user