Deduplicate OnDisk Corpus (#2827)
* testcase name logic * implement locking * implement logic for removing testcase * minor modifications * minor modifications to remove_testcase() * extract generate_name() from trait Input (broken) * Revert "extract generate_name() from trait Input (broken)" This reverts commit 9e217be2810a8c33970846cdd380f542975cc05b. * fix ci errors * remove CorpusId from generate_name() calls * toml formatting * write from file instead of fs * fmt and clippy * fix windows clippy * handle renaming of testcase * fix failing cmplog test * overwrite lockfile on remove testcase * format * bring back corpus id in generate_name * missed windows executors hook * fix failing tests * some more errors --------- Co-authored-by: Dongjia "toka" Zhang <tokazerkje@outlook.com>
This commit is contained in:
parent
5bd6a6f754
commit
72986fc129
@ -124,7 +124,7 @@ uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||
which = "6.0.3"
|
||||
windows = "0.59.0"
|
||||
z3 = "0.12.1"
|
||||
|
||||
fs2 = "0.4.3" # Used by OnDisk Corpus for file locking
|
||||
|
||||
[workspace.lints.rust]
|
||||
# Deny
|
||||
|
@ -2,10 +2,7 @@
|
||||
use std::hash::Hash;
|
||||
|
||||
use lain::prelude::*;
|
||||
use libafl::{
|
||||
corpus::CorpusId,
|
||||
inputs::{HasTargetBytes, Input},
|
||||
};
|
||||
use libafl::inputs::{HasTargetBytes, Input};
|
||||
use libafl_bolts::{ownedref::OwnedSlice, HasLen};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
@ -48,15 +45,7 @@ pub enum PacketType {
|
||||
Reset = 0x2,
|
||||
}
|
||||
|
||||
impl Input for PacketData {
|
||||
fn generate_name(&self, id: Option<CorpusId>) -> String {
|
||||
if let Some(id) = id {
|
||||
format!("id_{}", id.0)
|
||||
} else {
|
||||
"id_unknown".into()
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Input for PacketData {}
|
||||
|
||||
impl HasTargetBytes for PacketData {
|
||||
#[inline]
|
||||
|
@ -2,14 +2,13 @@ use core::num::NonZeroUsize;
|
||||
use std::{borrow::Cow, hash::Hash};
|
||||
|
||||
use libafl::{
|
||||
corpus::CorpusId,
|
||||
generators::{Generator, RandBytesGenerator},
|
||||
inputs::{BytesInput, HasTargetBytes, Input},
|
||||
mutators::{MutationResult, Mutator},
|
||||
state::HasRand,
|
||||
Error, SerdeAny,
|
||||
};
|
||||
use libafl_bolts::{generic_hash_std, rands::Rand, Named};
|
||||
use libafl_bolts::{rands::Rand, Named};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// The custom [`Input`] type used in this example, consisting of a byte array part, a byte array that is not always present, and a boolean
|
||||
@ -28,11 +27,7 @@ pub struct CustomInput {
|
||||
}
|
||||
|
||||
/// Hash-based implementation
|
||||
impl Input for CustomInput {
|
||||
fn generate_name(&self, _id: Option<CorpusId>) -> String {
|
||||
format!("{:016x}", generic_hash_std(self))
|
||||
}
|
||||
}
|
||||
impl Input for CustomInput {}
|
||||
|
||||
impl CustomInput {
|
||||
/// Returns a mutable reference to the byte array
|
||||
|
@ -60,6 +60,7 @@ std = [
|
||||
"libafl_bolts/std",
|
||||
"typed-builder",
|
||||
"fastbloom",
|
||||
"fs2",
|
||||
]
|
||||
|
||||
## Tracks the Feedbacks and the Objectives that were interesting for a Testcase
|
||||
@ -290,6 +291,8 @@ const_panic = { version = "0.2.9", default-features = false } # similarly, for f
|
||||
pyo3 = { workspace = true, optional = true }
|
||||
regex-syntax = { version = "0.8.4", optional = true } # For nautilus
|
||||
|
||||
fs2 = { workspace = true, optional = true } # used by OnDisk Corpus for file locking
|
||||
|
||||
# optional-dev deps (change when target.'cfg(accessible(::std))'.test-dependencies will be stable)
|
||||
serial_test = { workspace = true, optional = true, default-features = false, features = [
|
||||
"logging",
|
||||
|
@ -4,16 +4,17 @@
|
||||
//! For a lower memory footprint, consider using [`crate::corpus::CachedOnDiskCorpus`]
|
||||
//! which only stores a certain number of [`Testcase`]s and removes additional ones in a FIFO manner.
|
||||
|
||||
use alloc::string::String;
|
||||
use alloc::string::{String, ToString};
|
||||
use core::cell::{Ref, RefCell, RefMut};
|
||||
use std::{
|
||||
fs,
|
||||
fs::{File, OpenOptions},
|
||||
io,
|
||||
io::Write,
|
||||
io::{Read, Seek, SeekFrom, Write},
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
use fs2::FileExt;
|
||||
#[cfg(feature = "gzip")]
|
||||
use libafl_bolts::compress::GzipCompressor;
|
||||
use serde::{Deserialize, Serialize};
|
||||
@ -33,7 +34,11 @@ use crate::{
|
||||
/// If the create fails for _any_ reason, including, but not limited to, a preexisting existing file of that name,
|
||||
/// it will instead return the respective [`io::Error`].
|
||||
fn create_new<P: AsRef<Path>>(path: P) -> Result<File, io::Error> {
|
||||
OpenOptions::new().write(true).create_new(true).open(path)
|
||||
OpenOptions::new()
|
||||
.write(true)
|
||||
.read(true)
|
||||
.create_new(true)
|
||||
.open(path)
|
||||
}
|
||||
|
||||
/// Tries to create the given `path` and returns `None` _only_ if the file already existed.
|
||||
@ -85,7 +90,7 @@ where
|
||||
fn add(&mut self, testcase: Testcase<I>) -> Result<CorpusId, Error> {
|
||||
let id = self.inner.add(testcase)?;
|
||||
let testcase = &mut self.get(id).unwrap().borrow_mut();
|
||||
self.save_testcase(testcase, id)?;
|
||||
self.save_testcase(testcase, Some(id))?;
|
||||
*testcase.input_mut() = None;
|
||||
Ok(id)
|
||||
}
|
||||
@ -95,7 +100,7 @@ where
|
||||
fn add_disabled(&mut self, testcase: Testcase<I>) -> Result<CorpusId, Error> {
|
||||
let id = self.inner.add_disabled(testcase)?;
|
||||
let testcase = &mut self.get_from_all(id).unwrap().borrow_mut();
|
||||
self.save_testcase(testcase, id)?;
|
||||
self.save_testcase(testcase, Some(id))?;
|
||||
*testcase.input_mut() = None;
|
||||
Ok(id)
|
||||
}
|
||||
@ -106,7 +111,7 @@ where
|
||||
let entry = self.inner.replace(id, testcase)?;
|
||||
self.remove_testcase(&entry)?;
|
||||
let testcase = &mut self.get(id).unwrap().borrow_mut();
|
||||
self.save_testcase(testcase, id)?;
|
||||
self.save_testcase(testcase, Some(id))?;
|
||||
*testcase.input_mut() = None;
|
||||
Ok(entry)
|
||||
}
|
||||
@ -309,12 +314,19 @@ impl<I> InMemoryOnDiskCorpus<I> {
|
||||
|
||||
/// Sets the filename for a [`Testcase`].
|
||||
/// If an error gets returned from the corpus (i.e., file exists), we'll have to retry with a different filename.
|
||||
/// Renaming testcases will most likely cause duplicate testcases to not be handled correctly
|
||||
/// if testcases with the same input are not given the same filename.
|
||||
/// Only rename when you know what you are doing.
|
||||
#[inline]
|
||||
pub fn rename_testcase(
|
||||
&self,
|
||||
testcase: &mut Testcase<I>,
|
||||
filename: String,
|
||||
) -> Result<(), Error> {
|
||||
id: Option<CorpusId>,
|
||||
) -> Result<(), Error>
|
||||
where
|
||||
I: Input,
|
||||
{
|
||||
if testcase.filename().is_some() {
|
||||
// We are renaming!
|
||||
|
||||
@ -327,36 +339,10 @@ impl<I> InMemoryOnDiskCorpus<I> {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if self.locking {
|
||||
let new_lock_filename = format!(".{new_filename}.lafl_lock");
|
||||
|
||||
// Try to create lock file for new testcases
|
||||
if let Err(err) = create_new(self.dir_path.join(&new_lock_filename)) {
|
||||
*testcase.filename_mut() = Some(old_filename);
|
||||
return Err(Error::illegal_state(format!(
|
||||
"Unable to create lock file {new_lock_filename} for new testcase: {err}"
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
let new_file_path = self.dir_path.join(&new_filename);
|
||||
|
||||
fs::rename(testcase.file_path().as_ref().unwrap(), &new_file_path)?;
|
||||
|
||||
let new_metadata_path = {
|
||||
if let Some(old_metadata_path) = testcase.metadata_path() {
|
||||
// We have metadata. Let's rename it.
|
||||
let new_metadata_path = self.dir_path.join(format!(".{new_filename}.metadata"));
|
||||
fs::rename(old_metadata_path, &new_metadata_path)?;
|
||||
|
||||
Some(new_metadata_path)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
};
|
||||
|
||||
*testcase.metadata_path_mut() = new_metadata_path;
|
||||
self.remove_testcase(testcase)?;
|
||||
*testcase.filename_mut() = Some(new_filename);
|
||||
self.save_testcase(testcase, id)?;
|
||||
*testcase.file_path_mut() = Some(new_file_path);
|
||||
|
||||
Ok(())
|
||||
@ -367,42 +353,54 @@ impl<I> InMemoryOnDiskCorpus<I> {
|
||||
}
|
||||
}
|
||||
|
||||
fn save_testcase(&self, testcase: &mut Testcase<I>, id: CorpusId) -> Result<(), Error>
|
||||
fn save_testcase(&self, testcase: &mut Testcase<I>, id: Option<CorpusId>) -> Result<(), Error>
|
||||
where
|
||||
I: Input,
|
||||
{
|
||||
let file_name_orig = testcase.filename_mut().take().unwrap_or_else(|| {
|
||||
let file_name = testcase.filename_mut().take().unwrap_or_else(|| {
|
||||
// TODO walk entry metadata to ask for pieces of filename (e.g. :havoc in AFL)
|
||||
testcase.input().as_ref().unwrap().generate_name(Some(id))
|
||||
testcase.input().as_ref().unwrap().generate_name(id)
|
||||
});
|
||||
|
||||
// New testcase, we need to save it.
|
||||
let mut file_name = file_name_orig.clone();
|
||||
|
||||
let mut ctr = 2;
|
||||
let file_name = if self.locking {
|
||||
loop {
|
||||
let lockfile_name = format!(".{file_name}.lafl_lock");
|
||||
let mut ctr = String::new();
|
||||
if self.locking {
|
||||
let lockfile_name = format!(".{file_name}");
|
||||
let lockfile_path = self.dir_path.join(lockfile_name);
|
||||
|
||||
if try_create_new(lockfile_path)?.is_some() {
|
||||
break file_name;
|
||||
}
|
||||
let mut lockfile = try_create_new(&lockfile_path)?.unwrap_or(
|
||||
OpenOptions::new()
|
||||
.write(true)
|
||||
.read(true)
|
||||
.open(&lockfile_path)?,
|
||||
);
|
||||
lockfile.lock_exclusive()?;
|
||||
|
||||
file_name = format!("{file_name_orig}-{ctr}");
|
||||
ctr += 1;
|
||||
}
|
||||
lockfile.read_to_string(&mut ctr)?;
|
||||
ctr = if ctr.is_empty() {
|
||||
String::from("1")
|
||||
} else {
|
||||
file_name
|
||||
(ctr.trim().parse::<u32>()? + 1).to_string()
|
||||
};
|
||||
|
||||
lockfile.seek(SeekFrom::Start(0))?;
|
||||
lockfile.write_all(ctr.as_bytes())?;
|
||||
}
|
||||
|
||||
if testcase.file_path().is_none() {
|
||||
*testcase.file_path_mut() = Some(self.dir_path.join(&file_name));
|
||||
}
|
||||
*testcase.filename_mut() = Some(file_name);
|
||||
|
||||
if self.meta_format.is_some() {
|
||||
let metafile_name = format!(".{}.metadata", testcase.filename().as_ref().unwrap());
|
||||
let metafile_name = if self.locking {
|
||||
format!(
|
||||
".{}_{}.metadata",
|
||||
testcase.filename().as_ref().unwrap(),
|
||||
ctr
|
||||
)
|
||||
} else {
|
||||
format!(".{}.metadata", testcase.filename().as_ref().unwrap())
|
||||
};
|
||||
let metafile_path = self.dir_path.join(&metafile_name);
|
||||
let mut tmpfile_path = metafile_path.clone();
|
||||
tmpfile_path.set_file_name(format!(".{metafile_name}.tmp",));
|
||||
@ -445,15 +443,36 @@ impl<I> InMemoryOnDiskCorpus<I> {
|
||||
|
||||
fn remove_testcase(&self, testcase: &Testcase<I>) -> Result<(), Error> {
|
||||
if let Some(filename) = testcase.filename() {
|
||||
let mut ctr = String::new();
|
||||
if self.locking {
|
||||
let lockfile_path = self.dir_path.join(format!(".{filename}"));
|
||||
let mut lockfile = OpenOptions::new()
|
||||
.write(true)
|
||||
.read(true)
|
||||
.open(&lockfile_path)?;
|
||||
|
||||
lockfile.lock_exclusive()?;
|
||||
lockfile.read_to_string(&mut ctr)?;
|
||||
ctr = ctr.trim().to_string();
|
||||
|
||||
if ctr == "1" {
|
||||
FileExt::unlock(&lockfile)?;
|
||||
drop(fs::remove_file(lockfile_path));
|
||||
} else {
|
||||
lockfile.seek(SeekFrom::Start(0))?;
|
||||
lockfile.write_all(&(ctr.parse::<u32>()? - 1).to_le_bytes())?;
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
fs::remove_file(self.dir_path.join(filename))?;
|
||||
if self.meta_format.is_some() {
|
||||
if self.locking {
|
||||
fs::remove_file(self.dir_path.join(format!(".{filename}_{ctr}.metadata")))?;
|
||||
} else {
|
||||
fs::remove_file(self.dir_path.join(format!(".{filename}.metadata")))?;
|
||||
}
|
||||
// also try to remove the corresponding `.lafl_lock` file if it still exists
|
||||
// (even though it shouldn't exist anymore, at this point in time)
|
||||
drop(fs::remove_file(
|
||||
self.dir_path.join(format!(".{filename}.lafl_lock")),
|
||||
));
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
@ -264,6 +264,8 @@ impl<I> Testcase<I> {
|
||||
}
|
||||
|
||||
/// Create a new Testcase instance given an input and a `filename`
|
||||
/// If locking is enabled, make sure that testcases with the same input have the same filename
|
||||
/// to prevent ending up with duplicate testcases
|
||||
#[inline]
|
||||
pub fn with_filename(input: I, filename: String) -> Self {
|
||||
Self {
|
||||
|
@ -28,21 +28,23 @@ pub mod nautilus;
|
||||
|
||||
use alloc::{
|
||||
boxed::Box,
|
||||
string::{String, ToString},
|
||||
string::String,
|
||||
vec::{Drain, Splice, Vec},
|
||||
};
|
||||
use core::{
|
||||
clone::Clone,
|
||||
fmt::Debug,
|
||||
hash::Hash,
|
||||
marker::PhantomData,
|
||||
ops::{DerefMut, RangeBounds},
|
||||
};
|
||||
#[cfg(feature = "std")]
|
||||
use std::{fs::File, hash::Hash, io::Read, path::Path};
|
||||
use std::{fs::File, io::Read, path::Path};
|
||||
|
||||
#[cfg(feature = "std")]
|
||||
use libafl_bolts::fs::write_file_atomic;
|
||||
use libafl_bolts::{
|
||||
generic_hash_std,
|
||||
ownedref::{OwnedMutSlice, OwnedSlice},
|
||||
subrange::{SubRangeMutSlice, SubRangeSlice},
|
||||
Error, HasLen,
|
||||
@ -55,7 +57,7 @@ use crate::corpus::CorpusId;
|
||||
|
||||
/// An input for the target
|
||||
#[cfg(not(feature = "std"))]
|
||||
pub trait Input: Clone + Serialize + serde::de::DeserializeOwned + Debug {
|
||||
pub trait Input: Clone + Serialize + serde::de::DeserializeOwned + Debug + Hash {
|
||||
/// Write this input to the file
|
||||
fn to_file<P>(&self, _path: P) -> Result<(), Error> {
|
||||
Err(Error::not_implemented("Not supported in no_std"))
|
||||
@ -67,12 +69,14 @@ pub trait Input: Clone + Serialize + serde::de::DeserializeOwned + Debug {
|
||||
}
|
||||
|
||||
/// Generate a name for this input
|
||||
fn generate_name(&self, id: Option<CorpusId>) -> String;
|
||||
fn generate_name(&self, _id: Option<CorpusId>) -> String {
|
||||
format!("{:016x}", generic_hash_std(self))
|
||||
}
|
||||
}
|
||||
|
||||
/// An input for the target
|
||||
#[cfg(feature = "std")]
|
||||
pub trait Input: Clone + Serialize + serde::de::DeserializeOwned + Debug {
|
||||
pub trait Input: Clone + Serialize + serde::de::DeserializeOwned + Debug + Hash {
|
||||
/// Write this input to the file
|
||||
fn to_file<P>(&self, path: P) -> Result<(), Error>
|
||||
where
|
||||
@ -93,7 +97,9 @@ pub trait Input: Clone + Serialize + serde::de::DeserializeOwned + Debug {
|
||||
}
|
||||
|
||||
/// Generate a name for this input, the user is responsible for making each name of testcase unique.
|
||||
fn generate_name(&self, id: Option<CorpusId>) -> String;
|
||||
fn generate_name(&self, _id: Option<CorpusId>) -> String {
|
||||
format!("{:016x}", generic_hash_std(self))
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert between two input types with a state
|
||||
@ -127,12 +133,7 @@ impl NopInput {
|
||||
}
|
||||
}
|
||||
|
||||
impl Input for NopInput {
|
||||
fn generate_name(&self, _id: Option<CorpusId>) -> String {
|
||||
"nop-input".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
impl Input for NopInput {}
|
||||
impl HasTargetBytes for NopInput {
|
||||
fn target_bytes(&self) -> OwnedSlice<u8> {
|
||||
OwnedSlice::from(vec![0])
|
||||
|
@ -16,7 +16,7 @@ use crate::{corpus::CorpusId, inputs::Input};
|
||||
|
||||
/// An input composed of multiple parts. Use in situations where subcomponents are not necessarily
|
||||
/// related, or represent distinct parts of the input.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, Hash)]
|
||||
pub struct MultipartInput<I> {
|
||||
parts: Vec<I>,
|
||||
names: Vec<String>,
|
||||
|
@ -1,7 +1,7 @@
|
||||
//! Input for the [`Nautilus`](https://github.com/RUB-SysSec/nautilus) grammar fuzzer methods
|
||||
//!
|
||||
//!
|
||||
use alloc::{rc::Rc, string::String, vec::Vec};
|
||||
use alloc::{rc::Rc, vec::Vec};
|
||||
use core::cell::RefCell;
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
||||
@ -15,7 +15,6 @@ use crate::{
|
||||
rule::RuleIdOrCustom,
|
||||
tree::{Tree, TreeLike},
|
||||
},
|
||||
corpus::CorpusId,
|
||||
generators::nautilus::NautilusContext,
|
||||
inputs::{BytesInput, Input, InputConverter},
|
||||
Error,
|
||||
@ -28,23 +27,7 @@ pub struct NautilusInput {
|
||||
pub tree: Tree,
|
||||
}
|
||||
|
||||
impl Input for NautilusInput {
|
||||
/// Generate a name for this input
|
||||
#[must_use]
|
||||
fn generate_name(&self, id: Option<CorpusId>) -> String {
|
||||
/*let mut hasher = AHasher::new_with_keys(0, 0);
|
||||
for term in &self.terms {
|
||||
hasher.write(term.symbol.as_bytes());
|
||||
}
|
||||
format!("{:016x}", hasher.finish())*/
|
||||
|
||||
if let Some(id) = id {
|
||||
format!("id_{}", id.0)
|
||||
} else {
|
||||
"id_unknown".into()
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Input for NautilusInput {}
|
||||
|
||||
/// Rc Ref-cell from Input
|
||||
impl From<NautilusInput> for Rc<RefCell<NautilusInput>> {
|
||||
|
@ -1,9 +1,9 @@
|
||||
//! Newtype pattern style wrapper for [`super::Input`]s
|
||||
|
||||
use alloc::{string::String, vec::Vec};
|
||||
use alloc::vec::Vec;
|
||||
use core::{fmt::Debug, hash::Hash};
|
||||
|
||||
use libafl_bolts::{generic_hash_std, rands::Rand};
|
||||
use libafl_bolts::rands::Rand;
|
||||
use serde::{Deserialize, Serialize};
|
||||
#[cfg(feature = "std")]
|
||||
use {
|
||||
@ -12,7 +12,7 @@ use {
|
||||
};
|
||||
|
||||
use super::Input;
|
||||
use crate::{corpus::CorpusId, mutators::numeric::Numeric};
|
||||
use crate::mutators::numeric::Numeric;
|
||||
|
||||
/// Newtype pattern wrapper around an underlying structure to implement inputs
|
||||
///
|
||||
@ -56,11 +56,7 @@ impl<I: Copy> Copy for ValueInput<I> {}
|
||||
macro_rules! impl_input_for_value_input {
|
||||
($($t:ty => $name:ident),+ $(,)?) => {
|
||||
$(
|
||||
impl Input for ValueInput<$t> {
|
||||
fn generate_name(&self, _id: Option<CorpusId>) -> String {
|
||||
format!("{:016x}", generic_hash_std(self))
|
||||
}
|
||||
}
|
||||
impl Input for ValueInput<$t> {}
|
||||
|
||||
/// Input wrapping a <$t>
|
||||
pub type $name = ValueInput<$t>;
|
||||
@ -86,10 +82,6 @@ impl_input_for_value_input!(
|
||||
|
||||
/// manually implemented because files can be written more efficiently
|
||||
impl Input for ValueInput<Vec<u8>> {
|
||||
fn generate_name(&self, _id: Option<CorpusId>) -> String {
|
||||
format!("{:016x}", generic_hash_std(self))
|
||||
}
|
||||
|
||||
/// Write this input to the file
|
||||
#[cfg(feature = "std")]
|
||||
fn to_file<P>(&self, path: P) -> Result<(), Error>
|
||||
|
@ -255,7 +255,7 @@ fn parse_path(s: &str) -> PathBuf {
|
||||
let s = s.trim();
|
||||
|
||||
// If first and last character is a quote, let's remove them
|
||||
let s = if s.starts_with('\"') && s.ends_with('\"'){
|
||||
let s = if s.starts_with('\"') && s.ends_with('\"') {
|
||||
&s[1..s.len() - 1]
|
||||
} else {
|
||||
s
|
||||
|
Loading…
x
Reference in New Issue
Block a user