Real OnDiskCorpus (#1096)
* Real OnDiskCorpus * clippy * python * docs * clippy * docs * move to reuse cachedinmem corpus * fmt
This commit is contained in:
parent
d36296c654
commit
fbe8cce1b8
@ -2,7 +2,7 @@
|
||||
|
||||
LibAFL offers a standard mechanism for message passing between processes and machines with a low overhead.
|
||||
We use message passing to inform the other connected clients/fuzzers/nodes about new testcases, metadata, and statistics about the current run.
|
||||
Depending on individual needs, LibAFL can also write testcase contents to disk, while still using events to notify other fuzzers, using an `OnDiskCorpus`.
|
||||
Depending on individual needs, LibAFL can also write testcase contents to disk, while still using events to notify other fuzzers, using the `CachedOnDiskCorpus` or similar.
|
||||
|
||||
In our tests, message passing scales very well to share new testcases and metadata between multiple running fuzzer instances for multi-core fuzzing.
|
||||
Specifically, it scales _a lot_ better than using memory locks on a shared corpus, and _a lot_ better than sharing the testcases via the filesystem, as AFL traditionally does.
|
||||
@ -12,7 +12,7 @@ The `EventManager` interface is used to send Events over the wire using `Low Lev
|
||||
|
||||
## Low Level Message Passing (LLMP)
|
||||
|
||||
LibAFL comes with a reasonably lock-free message passing mechanism that scales well across cores and, using its *broker2broker* mechanism, even to connected machines via TCP.
|
||||
LibAFL comes with a reasonably lock-free message passing mechanism that scales well across cores and, using its _broker2broker_ mechanism, even to connected machines via TCP.
|
||||
Most example fuzzers use this mechanism, and it is the best `EventManager` if you want to fuzz on more than a single core.
|
||||
In the following, we will describe the inner workings of `LLMP`.
|
||||
|
||||
|
@ -56,7 +56,7 @@ pub fn main() -> Result<(), Error> {
|
||||
// RNG
|
||||
StdRand::with_seed(current_nanos()),
|
||||
// Corpus that will be evolved, we keep it in memory for performance
|
||||
OnDiskCorpus::new(&corpus_dir).unwrap(),
|
||||
InMemoryOnDiskCorpus::new(&corpus_dir).unwrap(),
|
||||
// Corpus in which we store solutions (crashes in this example),
|
||||
// on disk so the user can get them after stopping the fuzzer
|
||||
OnDiskCorpus::new(&solution_dir).unwrap(),
|
||||
@ -108,7 +108,7 @@ pub fn main() -> Result<(), Error> {
|
||||
|
||||
let mut state = StdState::new(
|
||||
StdRand::with_seed(current_nanos()),
|
||||
OnDiskCorpus::new(&minimized_dir).unwrap(),
|
||||
InMemoryOnDiskCorpus::new(&minimized_dir).unwrap(),
|
||||
InMemoryCorpus::new(),
|
||||
&mut (),
|
||||
&mut (),
|
||||
|
@ -4,7 +4,7 @@ use std::ptr::write_volatile;
|
||||
|
||||
use libafl::{
|
||||
bolts::{current_nanos, rands::StdRand, tuples::tuple_list},
|
||||
corpus::{InMemoryCorpus, OnDiskCorpus},
|
||||
corpus::{InMemoryCorpus, InMemoryOnDiskCorpus, OnDiskCorpus},
|
||||
events::SimpleEventManager,
|
||||
executors::{inprocess::InProcessExecutor, ExitKind},
|
||||
feedback_or,
|
||||
|
@ -11,7 +11,7 @@ use libafl::monitors::tui::TuiMonitor;
|
||||
use libafl::monitors::SimpleMonitor;
|
||||
use libafl::{
|
||||
bolts::{current_nanos, rands::StdRand, tuples::tuple_list, AsSlice},
|
||||
corpus::{Corpus, InMemoryCorpus, OnDiskCorpus},
|
||||
corpus::{Corpus, InMemoryCorpus, InMemoryOnDiskCorpus},
|
||||
events::SimpleEventManager,
|
||||
executors::{inprocess::InProcessExecutor, DiffExecutor, ExitKind},
|
||||
feedbacks::{CrashFeedback, MaxMapFeedback},
|
||||
@ -180,7 +180,7 @@ pub fn main() {
|
||||
InMemoryCorpus::new(),
|
||||
// Corpus in which we store solutions (crashes in this example),
|
||||
// on disk so the user can get them after stopping the fuzzer
|
||||
OnDiskCorpus::new(PathBuf::from("./crashes")).unwrap(),
|
||||
InMemoryOnDiskCorpus::new(PathBuf::from("./crashes")).unwrap(),
|
||||
// States of the feedbacks.
|
||||
// The feedbacks can report the data that should persist in the State.
|
||||
&mut feedback,
|
||||
|
@ -21,7 +21,7 @@ use libafl::{
|
||||
tuples::{tuple_list, Merge},
|
||||
AsMutSlice, AsSlice,
|
||||
},
|
||||
corpus::{Corpus, OnDiskCorpus},
|
||||
corpus::{Corpus, InMemoryOnDiskCorpus, OnDiskCorpus},
|
||||
events::SimpleRestartingEventManager,
|
||||
executors::{ExitKind, ShadowExecutor},
|
||||
feedback_or,
|
||||
@ -268,7 +268,7 @@ fn fuzz(
|
||||
// RNG
|
||||
StdRand::with_seed(current_nanos()),
|
||||
// Corpus that will be evolved, we keep it in memory for performance
|
||||
OnDiskCorpus::new(corpus_dir).unwrap(),
|
||||
InMemoryOnDiskCorpus::new(corpus_dir).unwrap(),
|
||||
// Corpus in which we store solutions (crashes in this example),
|
||||
// on disk so the user can get them after stopping the fuzzer
|
||||
OnDiskCorpus::new(objective_dir).unwrap(),
|
||||
|
@ -16,7 +16,7 @@ use libafl::{
|
||||
tuples::{tuple_list, Merge},
|
||||
AsMutSlice,
|
||||
},
|
||||
corpus::{Corpus, OnDiskCorpus},
|
||||
corpus::{Corpus, InMemoryOnDiskCorpus, OnDiskCorpus},
|
||||
events::SimpleEventManager,
|
||||
executors::forkserver::{ForkserverExecutor, TimeoutForkserverExecutor},
|
||||
feedback_or,
|
||||
@ -272,7 +272,7 @@ fn fuzz(
|
||||
// RNG
|
||||
StdRand::with_seed(current_nanos()),
|
||||
// Corpus that will be evolved, we keep it in memory for performance
|
||||
OnDiskCorpus::<BytesInput>::new(corpus_dir).unwrap(),
|
||||
InMemoryOnDiskCorpus::<BytesInput>::new(corpus_dir).unwrap(),
|
||||
// Corpus in which we store solutions (crashes in this example),
|
||||
// on disk so the user can get them after stopping the fuzzer
|
||||
OnDiskCorpus::new(objective_dir).unwrap(),
|
||||
|
@ -21,7 +21,7 @@ use libafl::{
|
||||
tuples::{tuple_list, Merge},
|
||||
AsSlice,
|
||||
},
|
||||
corpus::{Corpus, OnDiskCorpus},
|
||||
corpus::{Corpus, InMemoryOnDiskCorpus, OnDiskCorpus},
|
||||
events::SimpleRestartingEventManager,
|
||||
executors::{ExitKind, ShadowExecutor, TimeoutExecutor},
|
||||
feedback_or,
|
||||
@ -280,7 +280,7 @@ fn fuzz(
|
||||
// RNG
|
||||
StdRand::with_seed(current_nanos()),
|
||||
// Corpus that will be evolved, we keep it in memory for performance
|
||||
OnDiskCorpus::new(corpus_dir).unwrap(),
|
||||
InMemoryOnDiskCorpus::new(corpus_dir).unwrap(),
|
||||
// Corpus in which we store solutions (crashes in this example),
|
||||
// on disk so the user can get them after stopping the fuzzer
|
||||
OnDiskCorpus::new(objective_dir).unwrap(),
|
||||
|
@ -25,7 +25,7 @@ use libafl::{
|
||||
tuples::{tuple_list, Merge},
|
||||
AsSlice,
|
||||
},
|
||||
corpus::{Corpus, OnDiskCorpus},
|
||||
corpus::{Corpus, InMemoryOnDiskCorpus, OnDiskCorpus},
|
||||
events::SimpleRestartingEventManager,
|
||||
executors::{inprocess::InProcessExecutor, ExitKind, TimeoutExecutor},
|
||||
feedback_or,
|
||||
@ -332,7 +332,7 @@ fn fuzz_binary(
|
||||
// RNG
|
||||
StdRand::with_seed(current_nanos()),
|
||||
// Corpus that will be evolved, we keep it in memory for performance
|
||||
OnDiskCorpus::new(corpus_dir).unwrap(),
|
||||
InMemoryOnDiskCorpus::new(corpus_dir).unwrap(),
|
||||
// Corpus in which we store solutions (crashes in this example),
|
||||
// on disk so the user can get them after stopping the fuzzer
|
||||
OnDiskCorpus::new(objective_dir).unwrap(),
|
||||
@ -536,7 +536,7 @@ fn fuzz_text(
|
||||
// RNG
|
||||
StdRand::with_seed(current_nanos()),
|
||||
// Corpus that will be evolved, we keep it in memory for performance
|
||||
OnDiskCorpus::new(corpus_dir).unwrap(),
|
||||
InMemoryOnDiskCorpus::new(corpus_dir).unwrap(),
|
||||
// Corpus in which we store solutions (crashes in this example),
|
||||
// on disk so the user can get them after stopping the fuzzer
|
||||
OnDiskCorpus::new(objective_dir).unwrap(),
|
||||
|
@ -112,7 +112,7 @@ fn fuzz(corpus_dirs: &[PathBuf], objective_dir: PathBuf, broker_port: u16) -> Re
|
||||
// RNG
|
||||
StdRand::with_seed(current_nanos()),
|
||||
// Corpus that will be evolved, we keep it in memory for performance
|
||||
InMemoryCorpus::new(),
|
||||
OnDiskCorpus::new("corpus_out").unwrap(),
|
||||
// Corpus in which we store solutions (crashes in this example),
|
||||
// on disk so the user can get them after stopping the fuzzer
|
||||
OnDiskCorpus::new(objective_dir).unwrap(),
|
||||
|
@ -1,4 +1,4 @@
|
||||
//! The cached ondisk corpus stores testcases to disk keeping a part of them in memory.
|
||||
//! The [`CachedOnDiskCorpus`] stores [`Testcase`]s to disk, keeping a subset of them in memory/cache, evicting in a FIFO manner.
|
||||
|
||||
use alloc::collections::vec_deque::VecDeque;
|
||||
use core::cell::RefCell;
|
||||
@ -8,14 +8,16 @@ use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{
|
||||
corpus::{
|
||||
ondisk::{OnDiskCorpus, OnDiskMetadataFormat},
|
||||
Corpus, CorpusId, Testcase,
|
||||
inmemory_ondisk::InMemoryOnDiskCorpus, ondisk::OnDiskMetadataFormat, Corpus, CorpusId,
|
||||
Testcase,
|
||||
},
|
||||
inputs::{Input, UsesInput},
|
||||
Error,
|
||||
};
|
||||
|
||||
/// A corpus that keeps a maximum number of [`Testcase`]s in memory. The eviction policy is FIFO.
|
||||
/// A corpus that keeps a maximum number of [`Testcase`]s in memory
|
||||
/// and load them from disk, when they are being used
|
||||
/// The eviction policy is FIFO.
|
||||
#[cfg(feature = "std")]
|
||||
#[derive(Default, Serialize, Deserialize, Clone, Debug)]
|
||||
#[serde(bound = "I: serde::de::DeserializeOwned")]
|
||||
@ -23,7 +25,7 @@ pub struct CachedOnDiskCorpus<I>
|
||||
where
|
||||
I: Input,
|
||||
{
|
||||
inner: OnDiskCorpus<I>,
|
||||
inner: InMemoryOnDiskCorpus<I>,
|
||||
cached_indexes: RefCell<VecDeque<CorpusId>>,
|
||||
cache_max_len: usize,
|
||||
}
|
||||
@ -148,7 +150,7 @@ where
|
||||
where
|
||||
P: AsRef<Path>,
|
||||
{
|
||||
Self::_new(OnDiskCorpus::new(dir_path)?, cache_max_len)
|
||||
Self::_new(InMemoryOnDiskCorpus::new(dir_path)?, cache_max_len)
|
||||
}
|
||||
|
||||
/// Creates an [`CachedOnDiskCorpus`] that does not store [`Testcase`] metadata to disk.
|
||||
@ -156,7 +158,7 @@ where
|
||||
where
|
||||
P: AsRef<Path>,
|
||||
{
|
||||
Self::_new(OnDiskCorpus::no_meta(dir_path)?, cache_max_len)
|
||||
Self::_new(InMemoryOnDiskCorpus::no_meta(dir_path)?, cache_max_len)
|
||||
}
|
||||
|
||||
/// Creates the [`CachedOnDiskCorpus`] specifying the format in which `Metadata` will be saved to disk.
|
||||
@ -171,13 +173,13 @@ where
|
||||
P: AsRef<Path>,
|
||||
{
|
||||
Self::_new(
|
||||
OnDiskCorpus::with_meta_format(dir_path, meta_format)?,
|
||||
InMemoryOnDiskCorpus::with_meta_format(dir_path, meta_format)?,
|
||||
cache_max_len,
|
||||
)
|
||||
}
|
||||
|
||||
/// Internal constructor `fn`
|
||||
fn _new(on_disk_corpus: OnDiskCorpus<I>, cache_max_len: usize) -> Result<Self, Error> {
|
||||
fn _new(on_disk_corpus: InMemoryOnDiskCorpus<I>, cache_max_len: usize) -> Result<Self, Error> {
|
||||
if cache_max_len == 0 {
|
||||
return Err(Error::illegal_argument(
|
||||
"The max cache len in CachedOnDiskCorpus cannot be 0",
|
||||
|
315
libafl/src/corpus/inmemory_ondisk.rs
Normal file
315
libafl/src/corpus/inmemory_ondisk.rs
Normal file
@ -0,0 +1,315 @@
|
||||
//! The [`InMemoryOnDiskCorpus`] stores [`Testcase`]s to disk.
|
||||
//! Additionally, _all_ of them are kept in memory.
|
||||
//! For a lower memory footprint, consider using [`crate::corpus::CachedOnDiskCorpus`]
|
||||
//! which only stores a certain number of [`Testcase`]s and removes additional ones in a FIFO manner.
|
||||
|
||||
use core::{cell::RefCell, time::Duration};
|
||||
#[cfg(feature = "std")]
|
||||
use std::{fs, fs::File, io::Write};
|
||||
use std::{
|
||||
fs::OpenOptions,
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::ondisk::{OnDiskMetadata, OnDiskMetadataFormat};
|
||||
#[cfg(feature = "gzip")]
|
||||
use crate::bolts::compress::GzipCompressor;
|
||||
use crate::{
|
||||
bolts::serdeany::SerdeAnyMap,
|
||||
corpus::{Corpus, CorpusId, InMemoryCorpus, Testcase},
|
||||
inputs::{Input, UsesInput},
|
||||
state::HasMetadata,
|
||||
Error,
|
||||
};
|
||||
|
||||
/// The [`Testcase`] metadata that'll be stored to disk
|
||||
#[cfg(feature = "std")]
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct InMemoryOnDiskMetadata<'a> {
|
||||
metadata: &'a SerdeAnyMap,
|
||||
exec_time: &'a Option<Duration>,
|
||||
executions: &'a usize,
|
||||
}
|
||||
|
||||
/// A corpus able to store [`Testcase`]s to disk, while also keeping all of them in memory.
|
||||
///
|
||||
/// Metadata is written to a `.<filename>.metadata` file in the same folder by default.
|
||||
#[cfg(feature = "std")]
|
||||
#[derive(Default, Serialize, Deserialize, Clone, Debug)]
|
||||
#[serde(bound = "I: serde::de::DeserializeOwned")]
|
||||
pub struct InMemoryOnDiskCorpus<I>
|
||||
where
|
||||
I: Input,
|
||||
{
|
||||
inner: InMemoryCorpus<I>,
|
||||
dir_path: PathBuf,
|
||||
meta_format: Option<OnDiskMetadataFormat>,
|
||||
}
|
||||
|
||||
impl<I> UsesInput for InMemoryOnDiskCorpus<I>
|
||||
where
|
||||
I: Input,
|
||||
{
|
||||
type Input = I;
|
||||
}
|
||||
|
||||
impl<I> Corpus for InMemoryOnDiskCorpus<I>
|
||||
where
|
||||
I: Input,
|
||||
{
|
||||
/// Returns the number of elements
|
||||
#[inline]
|
||||
fn count(&self) -> usize {
|
||||
self.inner.count()
|
||||
}
|
||||
|
||||
/// Add an entry to the corpus and return its index
|
||||
#[inline]
|
||||
fn add(&mut self, testcase: Testcase<I>) -> Result<CorpusId, Error> {
|
||||
let idx = self.inner.add(testcase)?;
|
||||
self.save_testcase(&mut self.get(idx).unwrap().borrow_mut(), idx)?;
|
||||
Ok(idx)
|
||||
}
|
||||
|
||||
/// Replaces the testcase at the given idx
|
||||
#[inline]
|
||||
fn replace(&mut self, idx: CorpusId, testcase: Testcase<I>) -> Result<Testcase<I>, Error> {
|
||||
let entry = self.inner.replace(idx, testcase)?;
|
||||
self.remove_testcase(&entry)?;
|
||||
self.save_testcase(&mut self.get(idx).unwrap().borrow_mut(), idx)?;
|
||||
Ok(entry)
|
||||
}
|
||||
|
||||
/// Removes an entry from the corpus, returning it if it was present.
|
||||
#[inline]
|
||||
fn remove(&mut self, idx: CorpusId) -> Result<Testcase<I>, Error> {
|
||||
let entry = self.inner.remove(idx)?;
|
||||
self.remove_testcase(&entry)?;
|
||||
Ok(entry)
|
||||
}
|
||||
|
||||
/// Get by id
|
||||
#[inline]
|
||||
fn get(&self, idx: CorpusId) -> Result<&RefCell<Testcase<I>>, Error> {
|
||||
self.inner.get(idx)
|
||||
}
|
||||
|
||||
/// Current testcase scheduled
|
||||
#[inline]
|
||||
fn current(&self) -> &Option<CorpusId> {
|
||||
self.inner.current()
|
||||
}
|
||||
|
||||
/// Current testcase scheduled (mutable)
|
||||
#[inline]
|
||||
fn current_mut(&mut self) -> &mut Option<CorpusId> {
|
||||
self.inner.current_mut()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn next(&self, idx: CorpusId) -> Option<CorpusId> {
|
||||
self.inner.next(idx)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn prev(&self, idx: CorpusId) -> Option<CorpusId> {
|
||||
self.inner.prev(idx)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn first(&self) -> Option<CorpusId> {
|
||||
self.inner.first()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn last(&self) -> Option<CorpusId> {
|
||||
self.inner.last()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn nth(&self, nth: usize) -> CorpusId {
|
||||
self.inner.nth(nth)
|
||||
}
|
||||
}
|
||||
|
||||
impl<I> InMemoryOnDiskCorpus<I>
|
||||
where
|
||||
I: Input,
|
||||
{
|
||||
/// Creates an [`InMemoryOnDiskCorpus`].
|
||||
///
|
||||
/// This corpus stores all testcases to disk, and keeps all of them in memory, as well.
|
||||
///
|
||||
/// By default, it stores metadata for each [`Testcase`] as prettified json.
|
||||
/// Metadata will be written to a file named `.<testcase>.metadata`
|
||||
/// The metadata may include objective reason, specific information for a fuzz job, and more.
|
||||
///
|
||||
/// If you don't want metadata, use [`InMemoryOnDiskCorpus::no_meta`].
|
||||
/// To pick a different metadata format, use [`InMemoryOnDiskCorpus::with_meta_format`].
|
||||
///
|
||||
/// Will error, if [`std::fs::create_dir_all()`] failed for `dir_path`.
|
||||
pub fn new<P>(dir_path: P) -> Result<Self, Error>
|
||||
where
|
||||
P: AsRef<Path>,
|
||||
{
|
||||
Self::_new(dir_path.as_ref(), Some(OnDiskMetadataFormat::JsonPretty))
|
||||
}
|
||||
|
||||
/// Creates the [`InMemoryOnDiskCorpus`] specifying the format in which `Metadata` will be saved to disk.
|
||||
///
|
||||
/// Will error, if [`std::fs::create_dir_all()`] failed for `dir_path`.
|
||||
pub fn with_meta_format<P>(
|
||||
dir_path: P,
|
||||
meta_format: OnDiskMetadataFormat,
|
||||
) -> Result<Self, Error>
|
||||
where
|
||||
P: AsRef<Path>,
|
||||
{
|
||||
Self::_new(dir_path.as_ref(), Some(meta_format))
|
||||
}
|
||||
|
||||
/// Creates an [`InMemoryOnDiskCorpus`] that will not store .metadata files
|
||||
///
|
||||
/// Will error, if [`std::fs::create_dir_all()`] failed for `dir_path`.
|
||||
pub fn no_meta<P>(dir_path: P) -> Result<Self, Error>
|
||||
where
|
||||
P: AsRef<Path>,
|
||||
{
|
||||
Self::_new(dir_path.as_ref(), None)
|
||||
}
|
||||
|
||||
/// Private fn to crate a new corpus at the given (non-generic) path with the given optional `meta_format`
|
||||
fn _new(dir_path: &Path, meta_format: Option<OnDiskMetadataFormat>) -> Result<Self, Error> {
|
||||
fs::create_dir_all(dir_path)?;
|
||||
Ok(InMemoryOnDiskCorpus {
|
||||
inner: InMemoryCorpus::new(),
|
||||
dir_path: dir_path.into(),
|
||||
meta_format,
|
||||
})
|
||||
}
|
||||
|
||||
fn save_testcase(&self, testcase: &mut Testcase<I>, idx: CorpusId) -> Result<(), Error> {
|
||||
if testcase.filename().is_none() {
|
||||
// TODO walk entry metadata to ask for pieces of filename (e.g. :havoc in AFL)
|
||||
let file_orig = testcase.input().as_ref().unwrap().generate_name(idx.0);
|
||||
let mut file = file_orig.clone();
|
||||
|
||||
let mut ctr = 2;
|
||||
let filename = loop {
|
||||
let lockfile = format!(".{file}.lafl_lock");
|
||||
// try to create lockfile.
|
||||
|
||||
if OpenOptions::new()
|
||||
.write(true)
|
||||
.create_new(true)
|
||||
.open(self.dir_path.join(lockfile))
|
||||
.is_ok()
|
||||
{
|
||||
break self.dir_path.join(file);
|
||||
}
|
||||
|
||||
file = format!("{file_orig}-{ctr}");
|
||||
ctr += 1;
|
||||
};
|
||||
|
||||
let filename_str = filename.to_str().expect("Invalid Path");
|
||||
testcase.set_filename(filename_str.into());
|
||||
};
|
||||
if self.meta_format.is_some() {
|
||||
let mut filename = PathBuf::from(testcase.filename().as_ref().unwrap());
|
||||
filename.set_file_name(format!(
|
||||
".{}.metadata",
|
||||
filename.file_name().unwrap().to_string_lossy()
|
||||
));
|
||||
let mut tmpfile_name = PathBuf::from(&filename);
|
||||
tmpfile_name.set_file_name(format!(
|
||||
".{}.tmp",
|
||||
tmpfile_name.file_name().unwrap().to_string_lossy()
|
||||
));
|
||||
|
||||
let ondisk_meta = OnDiskMetadata {
|
||||
metadata: testcase.metadata(),
|
||||
exec_time: testcase.exec_time(),
|
||||
executions: testcase.executions(),
|
||||
};
|
||||
|
||||
let mut tmpfile = File::create(&tmpfile_name)?;
|
||||
|
||||
let serialized = match self.meta_format.as_ref().unwrap() {
|
||||
OnDiskMetadataFormat::Postcard => postcard::to_allocvec(&ondisk_meta)?,
|
||||
OnDiskMetadataFormat::Json => serde_json::to_vec(&ondisk_meta)?,
|
||||
OnDiskMetadataFormat::JsonPretty => serde_json::to_vec_pretty(&ondisk_meta)?,
|
||||
#[cfg(feature = "gzip")]
|
||||
OnDiskMetadataFormat::JsonGzip => GzipCompressor::new(0)
|
||||
.compress(&serde_json::to_vec_pretty(&ondisk_meta)?)?
|
||||
.unwrap(),
|
||||
};
|
||||
tmpfile.write_all(&serialized)?;
|
||||
fs::rename(&tmpfile_name, &filename)?;
|
||||
}
|
||||
testcase
|
||||
.store_input()
|
||||
.expect("Could not save testcase to disk");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn remove_testcase(&self, testcase: &Testcase<I>) -> Result<(), Error> {
|
||||
if let Some(filename) = testcase.filename() {
|
||||
fs::remove_file(filename)?;
|
||||
}
|
||||
if self.meta_format.is_some() {
|
||||
let mut filename = PathBuf::from(testcase.filename().as_ref().unwrap());
|
||||
filename.set_file_name(format!(
|
||||
".{}.metadata",
|
||||
filename.file_name().unwrap().to_string_lossy()
|
||||
));
|
||||
fs::remove_file(filename)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "python")]
|
||||
/// `InMemoryOnDiskCorpus` Python bindings
|
||||
pub mod pybind {
|
||||
use alloc::string::String;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use pyo3::prelude::*;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{
|
||||
corpus::{pybind::PythonCorpus, InMemoryOnDiskCorpus},
|
||||
inputs::BytesInput,
|
||||
};
|
||||
|
||||
#[pyclass(unsendable, name = "InMemoryOnDiskCorpus")]
|
||||
#[allow(clippy::unsafe_derive_deserialize)]
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
/// Python class for InMemoryOnDiskCorpus
|
||||
pub struct PythonInMemoryOnDiskCorpus {
|
||||
/// Rust wrapped InMemoryOnDiskCorpus object
|
||||
pub inner: InMemoryOnDiskCorpus<BytesInput>,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl PythonInMemoryOnDiskCorpus {
|
||||
#[new]
|
||||
fn new(path: String) -> Self {
|
||||
Self {
|
||||
inner: InMemoryOnDiskCorpus::new(PathBuf::from(path)).unwrap(),
|
||||
}
|
||||
}
|
||||
|
||||
fn as_corpus(slf: Py<Self>) -> PythonCorpus {
|
||||
PythonCorpus::new_in_memory_on_disk(slf)
|
||||
}
|
||||
}
|
||||
/// Register the classes to the python module
|
||||
pub fn register(_py: Python, m: &PyModule) -> PyResult<()> {
|
||||
m.add_class::<PythonInMemoryOnDiskCorpus>()?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
@ -6,6 +6,11 @@ pub use testcase::{SchedulerTestcaseMetaData, Testcase};
|
||||
pub mod inmemory;
|
||||
pub use inmemory::InMemoryCorpus;
|
||||
|
||||
#[cfg(feature = "std")]
|
||||
pub mod inmemory_ondisk;
|
||||
#[cfg(feature = "std")]
|
||||
pub use inmemory_ondisk::InMemoryOnDiskCorpus;
|
||||
|
||||
#[cfg(feature = "std")]
|
||||
pub mod ondisk;
|
||||
#[cfg(feature = "std")]
|
||||
@ -173,6 +178,7 @@ pub mod pybind {
|
||||
use crate::{
|
||||
corpus::{
|
||||
cached::pybind::PythonCachedOnDiskCorpus, inmemory::pybind::PythonInMemoryCorpus,
|
||||
inmemory_ondisk::pybind::PythonInMemoryOnDiskCorpus,
|
||||
ondisk::pybind::PythonOnDiskCorpus, testcase::pybind::PythonTestcaseWrapper, Corpus,
|
||||
CorpusId, Testcase,
|
||||
},
|
||||
@ -185,6 +191,7 @@ pub mod pybind {
|
||||
InMemory(Py<PythonInMemoryCorpus>),
|
||||
CachedOnDisk(Py<PythonCachedOnDiskCorpus>),
|
||||
OnDisk(Py<PythonOnDiskCorpus>),
|
||||
InMemoryOnDisk(Py<PythonInMemoryOnDiskCorpus>),
|
||||
}
|
||||
|
||||
/// Corpus Trait binding
|
||||
@ -204,6 +211,7 @@ pub mod pybind {
|
||||
PythonCorpusWrapper,
|
||||
{
|
||||
InMemory,
|
||||
InMemoryOnDisk,
|
||||
CachedOnDisk,
|
||||
OnDisk
|
||||
}
|
||||
@ -220,6 +228,7 @@ pub mod pybind {
|
||||
PythonCorpusWrapper,
|
||||
{
|
||||
InMemory,
|
||||
InMemoryOnDisk,
|
||||
CachedOnDisk,
|
||||
OnDisk
|
||||
}
|
||||
@ -253,6 +262,16 @@ pub mod pybind {
|
||||
}
|
||||
}
|
||||
|
||||
#[staticmethod]
|
||||
#[must_use]
|
||||
pub fn new_in_memory_on_disk(
|
||||
py_in_memory_on_disk_corpus: Py<PythonInMemoryOnDiskCorpus>,
|
||||
) -> Self {
|
||||
Self {
|
||||
wrapper: PythonCorpusWrapper::InMemoryOnDisk(py_in_memory_on_disk_corpus),
|
||||
}
|
||||
}
|
||||
|
||||
#[pyo3(name = "count")]
|
||||
fn pycount(&self) -> usize {
|
||||
self.count()
|
||||
|
@ -1,40 +1,35 @@
|
||||
//! The ondisk corpus stores [`Testcase`]s to disk.
|
||||
//! Additionally, all of them are kept in memory.
|
||||
//! For a lower memory footprint, consider using [`crate::corpus::CachedOnDiskCorpus`]
|
||||
//! which only stores a certain number of testcases and removes additional ones in a FIFO manner.
|
||||
//! The ondisk corpus stores all [`Testcase`]s to disk.
|
||||
//! It never keeps any of them in memory.
|
||||
//! This is a good solution for solutions that are never reused, and for very memory-constraint environments.
|
||||
//! For any other occasions, consider using [`crate::corpus::CachedOnDiskCorpus`]
|
||||
//! which stores a certain number of testcases in memory and removes additional ones in a FIFO manner.
|
||||
|
||||
use core::{cell::RefCell, time::Duration};
|
||||
#[cfg(feature = "std")]
|
||||
use std::{fs, fs::File, io::Write};
|
||||
use std::{
|
||||
fs::OpenOptions,
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[cfg(feature = "gzip")]
|
||||
use crate::bolts::compress::GzipCompressor;
|
||||
use super::CachedOnDiskCorpus;
|
||||
use crate::{
|
||||
bolts::serdeany::SerdeAnyMap,
|
||||
corpus::{Corpus, CorpusId, InMemoryCorpus, Testcase},
|
||||
corpus::{Corpus, CorpusId, Testcase},
|
||||
inputs::{Input, UsesInput},
|
||||
state::HasMetadata,
|
||||
Error,
|
||||
};
|
||||
|
||||
/// Options for the the format of the on-disk metadata
|
||||
#[cfg(feature = "std")]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[derive(Default, Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum OnDiskMetadataFormat {
|
||||
/// A binary-encoded postcard
|
||||
Postcard,
|
||||
/// JSON
|
||||
Json,
|
||||
/// JSON formatted for readability
|
||||
#[default]
|
||||
JsonPretty,
|
||||
#[cfg(feature = "gzip")]
|
||||
/// The same as [`OnDiskMetadataFormat::JsonPretty`], but compressed
|
||||
#[cfg(feature = "gzip")]
|
||||
JsonGzip,
|
||||
}
|
||||
|
||||
@ -42,9 +37,12 @@ pub enum OnDiskMetadataFormat {
|
||||
#[cfg(feature = "std")]
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct OnDiskMetadata<'a> {
|
||||
metadata: &'a SerdeAnyMap,
|
||||
exec_time: &'a Option<Duration>,
|
||||
executions: &'a usize,
|
||||
/// The dynamic metadata [`SerdeAnyMap`] stored to disk
|
||||
pub metadata: &'a SerdeAnyMap,
|
||||
/// The exec time for this [`Testcase`]
|
||||
pub exec_time: &'a Option<Duration>,
|
||||
/// The amount of executions for this [`Testcase`]
|
||||
pub executions: &'a usize,
|
||||
}
|
||||
|
||||
/// A corpus able to store [`Testcase`]s to disk, and load them from disk, when they are being used.
|
||||
@ -57,9 +55,10 @@ pub struct OnDiskCorpus<I>
|
||||
where
|
||||
I: Input,
|
||||
{
|
||||
inner: InMemoryCorpus<I>,
|
||||
/// The root directory backing this corpus
|
||||
dir_path: PathBuf,
|
||||
meta_format: Option<OnDiskMetadataFormat>,
|
||||
/// We wrapp a cached corpus and set its size to 1.
|
||||
inner: CachedOnDiskCorpus<I>,
|
||||
}
|
||||
|
||||
impl<I> UsesInput for OnDiskCorpus<I>
|
||||
@ -82,26 +81,19 @@ where
|
||||
/// Add an entry to the corpus and return its index
|
||||
#[inline]
|
||||
fn add(&mut self, testcase: Testcase<I>) -> Result<CorpusId, Error> {
|
||||
let idx = self.inner.add(testcase)?;
|
||||
self.save_testcase(&mut self.get(idx).unwrap().borrow_mut(), idx)?;
|
||||
Ok(idx)
|
||||
self.inner.add(testcase)
|
||||
}
|
||||
|
||||
/// Replaces the testcase at the given idx
|
||||
#[inline]
|
||||
fn replace(&mut self, idx: CorpusId, testcase: Testcase<I>) -> Result<Testcase<I>, Error> {
|
||||
let entry = self.inner.replace(idx, testcase)?;
|
||||
self.remove_testcase(&entry)?;
|
||||
self.save_testcase(&mut self.get(idx).unwrap().borrow_mut(), idx)?;
|
||||
Ok(entry)
|
||||
self.inner.replace(idx, testcase)
|
||||
}
|
||||
|
||||
/// Removes an entry from the corpus, returning it if it was present.
|
||||
#[inline]
|
||||
fn remove(&mut self, idx: CorpusId) -> Result<Testcase<I>, Error> {
|
||||
let entry = self.inner.remove(idx)?;
|
||||
self.remove_testcase(&entry)?;
|
||||
Ok(entry)
|
||||
self.inner.remove(idx)
|
||||
}
|
||||
|
||||
/// Get by id
|
||||
@ -154,13 +146,12 @@ where
|
||||
{
|
||||
/// Creates an [`OnDiskCorpus`].
|
||||
///
|
||||
/// This corpus stores all testcases to disk, and keeps all of them in memory, as well.
|
||||
/// This corpus stores all testcases to disk.
|
||||
///
|
||||
/// By default, it stores metadata for each [`Testcase`] as prettified json.
|
||||
/// Metadata will be written to a file named `.<testcase>.metadata`
|
||||
/// The metadata may include objective reason, specific information for a fuzz job, and more.
|
||||
///
|
||||
/// If you don't want metadata, use [`OnDiskCorpus::no_meta`].
|
||||
/// To pick a different metadata format, use [`OnDiskCorpus::with_meta_format`].
|
||||
///
|
||||
/// Will error, if [`std::fs::create_dir_all()`] failed for `dir_path`.
|
||||
@ -168,7 +159,7 @@ where
|
||||
where
|
||||
P: AsRef<Path>,
|
||||
{
|
||||
Self::_new(dir_path.as_ref(), Some(OnDiskMetadataFormat::JsonPretty))
|
||||
Self::_new(dir_path.as_ref(), OnDiskMetadataFormat::JsonPretty)
|
||||
}
|
||||
|
||||
/// Creates the [`OnDiskCorpus`] specifying the format in which `Metadata` will be saved to disk.
|
||||
@ -181,108 +172,16 @@ where
|
||||
where
|
||||
P: AsRef<Path>,
|
||||
{
|
||||
Self::_new(dir_path.as_ref(), Some(meta_format))
|
||||
}
|
||||
|
||||
/// Creates an [`OnDiskCorpus`] that will not store .metadata files
|
||||
///
|
||||
/// Will error, if [`std::fs::create_dir_all()`] failed for `dir_path`.
|
||||
pub fn no_meta<P>(dir_path: P) -> Result<Self, Error>
|
||||
where
|
||||
P: AsRef<Path>,
|
||||
{
|
||||
Self::_new(dir_path.as_ref(), None)
|
||||
Self::_new(dir_path.as_ref(), meta_format)
|
||||
}
|
||||
|
||||
/// Private fn to crate a new corpus at the given (non-generic) path with the given optional `meta_format`
|
||||
fn _new(dir_path: &Path, meta_format: Option<OnDiskMetadataFormat>) -> Result<Self, Error> {
|
||||
fs::create_dir_all(dir_path)?;
|
||||
fn _new(dir_path: &Path, meta_format: OnDiskMetadataFormat) -> Result<Self, Error> {
|
||||
Ok(OnDiskCorpus {
|
||||
inner: InMemoryCorpus::new(),
|
||||
dir_path: dir_path.into(),
|
||||
meta_format,
|
||||
inner: CachedOnDiskCorpus::with_meta_format(dir_path, 1, meta_format)?,
|
||||
})
|
||||
}
|
||||
|
||||
fn save_testcase(&self, testcase: &mut Testcase<I>, idx: CorpusId) -> Result<(), Error> {
|
||||
if testcase.filename().is_none() {
|
||||
// TODO walk entry metadata to ask for pieces of filename (e.g. :havoc in AFL)
|
||||
let file_orig = testcase.input().as_ref().unwrap().generate_name(idx.0);
|
||||
let mut file = file_orig.clone();
|
||||
|
||||
let mut ctr = 2;
|
||||
let filename = loop {
|
||||
let lockfile = format!(".{file}.lafl_lock");
|
||||
// try to create lockfile.
|
||||
|
||||
if OpenOptions::new()
|
||||
.write(true)
|
||||
.create_new(true)
|
||||
.open(self.dir_path.join(lockfile))
|
||||
.is_ok()
|
||||
{
|
||||
break self.dir_path.join(file);
|
||||
}
|
||||
|
||||
file = format!("{file_orig}-{ctr}");
|
||||
ctr += 1;
|
||||
};
|
||||
|
||||
let filename_str = filename.to_str().expect("Invalid Path");
|
||||
testcase.set_filename(filename_str.into());
|
||||
};
|
||||
if self.meta_format.is_some() {
|
||||
let mut filename = PathBuf::from(testcase.filename().as_ref().unwrap());
|
||||
filename.set_file_name(format!(
|
||||
".{}.metadata",
|
||||
filename.file_name().unwrap().to_string_lossy()
|
||||
));
|
||||
let mut tmpfile_name = PathBuf::from(&filename);
|
||||
tmpfile_name.set_file_name(format!(
|
||||
".{}.tmp",
|
||||
tmpfile_name.file_name().unwrap().to_string_lossy()
|
||||
));
|
||||
|
||||
let ondisk_meta = OnDiskMetadata {
|
||||
metadata: testcase.metadata(),
|
||||
exec_time: testcase.exec_time(),
|
||||
executions: testcase.executions(),
|
||||
};
|
||||
|
||||
let mut tmpfile = File::create(&tmpfile_name)?;
|
||||
|
||||
let serialized = match self.meta_format.as_ref().unwrap() {
|
||||
OnDiskMetadataFormat::Postcard => postcard::to_allocvec(&ondisk_meta)?,
|
||||
OnDiskMetadataFormat::Json => serde_json::to_vec(&ondisk_meta)?,
|
||||
OnDiskMetadataFormat::JsonPretty => serde_json::to_vec_pretty(&ondisk_meta)?,
|
||||
#[cfg(feature = "gzip")]
|
||||
OnDiskMetadataFormat::JsonGzip => GzipCompressor::new(0)
|
||||
.compress(&serde_json::to_vec_pretty(&ondisk_meta)?)?
|
||||
.unwrap(),
|
||||
};
|
||||
tmpfile.write_all(&serialized)?;
|
||||
fs::rename(&tmpfile_name, &filename)?;
|
||||
}
|
||||
testcase
|
||||
.store_input()
|
||||
.expect("Could not save testcase to disk");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn remove_testcase(&self, testcase: &Testcase<I>) -> Result<(), Error> {
|
||||
if let Some(filename) = testcase.filename() {
|
||||
fs::remove_file(filename)?;
|
||||
}
|
||||
if self.meta_format.is_some() {
|
||||
let mut filename = PathBuf::from(testcase.filename().as_ref().unwrap());
|
||||
filename.set_file_name(format!(
|
||||
".{}.metadata",
|
||||
filename.file_name().unwrap().to_string_lossy()
|
||||
));
|
||||
fs::remove_file(filename)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "python")]
|
||||
|
Loading…
x
Reference in New Issue
Block a user