From 55c4b0c7783b5add35829627fdfd616569669ca9 Mon Sep 17 00:00:00 2001 From: Dominik Maier Date: Mon, 13 Sep 2021 15:38:28 +0200 Subject: [PATCH] added write_file_atomic against ondisk corpus races (#294) * fix ondisk corpus race condition * move metadata name to be a dotfile * note ExitKind for crashes and timeouts in inprocess executor * potential fix for windows * added write_file_atomic * no_std fixes * no_std testcase fix * typo fix, windows * clippy * more no_std testing --- .github/workflows/build_and_test.yml | 2 + .gitignore | 2 + docs/src/design/metadata.md | 2 +- fuzzers/frida_libpng/src/fuzzer.rs | 2 +- libafl/src/bolts/fs.rs | 55 ++++++++++++++++++++++++++ libafl/src/bolts/mod.rs | 9 +++-- libafl/src/bolts/serdeany.rs | 1 - libafl/src/bolts/staterestore.rs | 1 - libafl/src/corpus/ondisk.rs | 53 +++++++++++++++++++------ libafl/src/executors/inprocess.rs | 8 +++- libafl/src/executors/mod.rs | 2 + libafl/src/inputs/bytes.rs | 15 ++----- libafl/src/inputs/mod.rs | 16 +++----- libafl/src/lib.rs | 8 ++++ libafl_concolic/symcc_runtime/build.rs | 1 + 15 files changed, 133 insertions(+), 44 deletions(-) create mode 100644 libafl/src/bolts/fs.rs diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index ebe51d77ff..b3d913e324 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -108,6 +108,8 @@ jobs: run: cd ./fuzzers/baby_no_std && cargo +nightly build -Zbuild-std=core,alloc --target aarch64-unknown-none -v --release && cd ../.. - name: run x86_64 until panic! run: cd ./fuzzers/baby_no_std && cargo +nightly run || test $? -eq 1 || exit 1 + - name: no_std tests + run: cd ./libafl && cargo test --no-default-features build-docker: runs-on: ubuntu-latest steps: diff --git a/.gitignore b/.gitignore index a6aa4df4cc..91bbb8039a 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,5 @@ AFLplusplus # Ignore common dummy and logfiles *.log a + +forkserver_test \ No newline at end of file diff --git a/docs/src/design/metadata.md b/docs/src/design/metadata.md index 9eac4eae7b..9230f2551f 100644 --- a/docs/src/design/metadata.md +++ b/docs/src/design/metadata.md @@ -32,7 +32,7 @@ By default, Testcase and State implement it and hold a SerdeAnyMap testcase. We are interested to store State's Metadata to not lose them in case of crash or stop of a fuzzer. To do that, they must be serialized and unserialized using Serde. -As Metadata are stored in a SerdeAnyMap as trait objects, they cannot be deserialized using Serde by default. +As Metadata is stored in a SerdeAnyMap as trait objects, they cannot be deserialized using Serde by default. To cope with this problem, in LibAFL each SerdeAny struct must be registered in a global registry that keeps track of types and allows the (de)serialization of the registered types. diff --git a/fuzzers/frida_libpng/src/fuzzer.rs b/fuzzers/frida_libpng/src/fuzzer.rs index 107ca21aec..a907b6a495 100644 --- a/fuzzers/frida_libpng/src/fuzzer.rs +++ b/fuzzers/frida_libpng/src/fuzzer.rs @@ -16,7 +16,7 @@ use libafl::{ ondisk::OnDiskMetadataFormat, Corpus, IndexesLenTimeMinimizerCorpusScheduler, OnDiskCorpus, QueueCorpusScheduler, }, - events::{llmp::LlmpRestartingEventManager, EventConfig, HasEventManagerId}, + events::{llmp::LlmpRestartingEventManager, EventConfig}, executors::{ inprocess::InProcessExecutor, timeout::TimeoutExecutor, Executor, ExitKind, HasObservers, ShadowExecutor, diff --git a/libafl/src/bolts/fs.rs b/libafl/src/bolts/fs.rs new file mode 100644 index 0000000000..4fd755836b --- /dev/null +++ b/libafl/src/bolts/fs.rs @@ -0,0 +1,55 @@ +//! `LibAFL` functionality for filesystem interaction + +use std::{ + fs::{self, OpenOptions}, + io::Write, + path::Path, +}; + +use crate::Error; + +/// Creates a `.{file_name}.tmp` file, and writes all bytes to it. +/// After all bytes have been written, the tmp-file is moved to it's original `path`. +/// This way, on the majority of operating systems, the final file will never be incomplete or racey. +/// It will overwrite existing files with the same filename. +/// +/// # Errors +/// Can error if the file doesn't exist, or if the `.{file-name}.tmp` file already exists. +pub fn write_file_atomic

(path: P, bytes: &[u8]) -> Result<(), Error> +where + P: AsRef, +{ + fn inner(path: &Path, bytes: &[u8]) -> Result<(), Error> { + let mut tmpfile_name = path.to_path_buf(); + tmpfile_name.set_file_name(format!( + ".{}.tmp", + tmpfile_name.file_name().unwrap().to_string_lossy() + )); + + let mut tmpfile = OpenOptions::new() + .write(true) + .create_new(true) + .open(&tmpfile_name)?; + + tmpfile.write_all(bytes)?; + fs::rename(&tmpfile_name, path)?; + Ok(()) + } + inner(path.as_ref(), bytes) +} + +#[cfg(test)] +mod test { + use crate::bolts::fs::write_file_atomic; + use std::fs; + + #[test] + fn test_atomic_file_write() { + let path = "atomic_file_testfile"; + + write_file_atomic(&path, b"test").unwrap(); + let content = fs::read_to_string(&path).unwrap(); + fs::remove_file(&path).unwrap(); + assert_eq!(content, "test"); + } +} diff --git a/libafl/src/bolts/mod.rs b/libafl/src/bolts/mod.rs index a78d9817f0..9a11689142 100644 --- a/libafl/src/bolts/mod.rs +++ b/libafl/src/bolts/mod.rs @@ -1,7 +1,11 @@ //! Bolts are no conceptual fuzzing elements, but they keep libafl-based fuzzers together. pub mod bindings; +#[cfg(feature = "llmp_compression")] +pub mod compress; pub mod cpu; +#[cfg(feature = "std")] +pub mod fs; pub mod launcher; pub mod llmp; pub mod os; @@ -13,9 +17,6 @@ pub mod shmem; pub mod staterestore; pub mod tuples; -#[cfg(feature = "llmp_compression")] -pub mod compress; - use core::time; #[cfg(feature = "std")] use std::time::{SystemTime, UNIX_EPOCH}; @@ -40,7 +41,7 @@ pub fn current_time() -> time::Duration { /// which is linked into the binary and called from here. #[cfg(not(feature = "std"))] extern "C" { - #[no_mangle] + //#[no_mangle] fn external_current_millis() -> u64; } diff --git a/libafl/src/bolts/serdeany.rs b/libafl/src/bolts/serdeany.rs index 7d93967cc0..16d0e843a0 100644 --- a/libafl/src/bolts/serdeany.rs +++ b/libafl/src/bolts/serdeany.rs @@ -2,7 +2,6 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer}; -use alloc; use alloc::boxed::Box; use core::any::{Any, TypeId}; diff --git a/libafl/src/bolts/staterestore.rs b/libafl/src/bolts/staterestore.rs index d67340d74f..b006603447 100644 --- a/libafl/src/bolts/staterestore.rs +++ b/libafl/src/bolts/staterestore.rs @@ -2,7 +2,6 @@ /// Uses a [`ShMem`] up to a threshold, then write to disk. use ahash::AHasher; use core::{hash::Hasher, marker::PhantomData, mem::size_of, ptr, slice}; -use postcard; use serde::{de::DeserializeOwned, Serialize}; use std::{ env::temp_dir, diff --git a/libafl/src/corpus/ondisk.rs b/libafl/src/corpus/ondisk.rs index e538a65bdf..be5cf68a47 100644 --- a/libafl/src/corpus/ondisk.rs +++ b/libafl/src/corpus/ondisk.rs @@ -3,7 +3,7 @@ use alloc::vec::Vec; use core::cell::RefCell; use serde::{Deserialize, Serialize}; -use std::path::PathBuf; +use std::{fs::OpenOptions, path::PathBuf}; #[cfg(feature = "std")] use std::{fs, fs::File, io::Write}; @@ -50,27 +50,56 @@ where #[inline] fn add(&mut self, mut testcase: Testcase) -> Result { if testcase.filename().is_none() { - // TODO walk entry metadata to ask for pices of filename (e.g. :havoc in AFL) - let filename = self.dir_path.join( - testcase - .input() - .as_ref() - .unwrap() - .generate_name(self.entries.len()), - ); + // TODO walk entry metadata to ask for pieces of filename (e.g. :havoc in AFL) + let file_orig = testcase + .input() + .as_ref() + .unwrap() + .generate_name(self.entries.len()); + let mut file = file_orig.clone(); + + let mut ctr = 2; + let filename = loop { + let lockfile = format!(".{}.lafl_lock", file); + // try to create lockfile. + + if OpenOptions::new() + .write(true) + .create_new(true) + .open(self.dir_path.join(lockfile)) + .is_ok() + { + break self.dir_path.join(file); + } + + file = format!("{}-{}", &file_orig, ctr); + ctr += 1; + }; + let filename_str = filename.to_str().expect("Invalid Path"); testcase.set_filename(filename_str.into()); }; if self.meta_format.is_some() { - let filename = testcase.filename().as_ref().unwrap().clone() + ".metadata"; - let mut file = File::create(filename)?; + let mut filename = PathBuf::from(testcase.filename().as_ref().unwrap()); + filename.set_file_name(format!( + ".{}.metadata", + filename.file_name().unwrap().to_string_lossy() + )); + let mut tmpfile_name = PathBuf::from(&filename); + tmpfile_name.set_file_name(format!( + ".{}.tmp", + tmpfile_name.file_name().unwrap().to_string_lossy() + )); + + let mut tmpfile = File::create(&tmpfile_name)?; let serialized = match self.meta_format.as_ref().unwrap() { OnDiskMetadataFormat::Postcard => postcard::to_allocvec(testcase.metadata())?, OnDiskMetadataFormat::Json => serde_json::to_vec(testcase.metadata())?, OnDiskMetadataFormat::JsonPretty => serde_json::to_vec_pretty(testcase.metadata())?, }; - file.write_all(&serialized)?; + tmpfile.write_all(&serialized)?; + fs::rename(&tmpfile_name, &filename)?; } testcase .store_input() diff --git a/libafl/src/executors/inprocess.rs b/libafl/src/executors/inprocess.rs index 1953a4401f..751b9b5ef9 100644 --- a/libafl/src/executors/inprocess.rs +++ b/libafl/src/executors/inprocess.rs @@ -306,7 +306,7 @@ mod unix_signal_handler { fuzzer::HasObjective, inputs::Input, observers::ObserversTuple, - state::{HasClientPerfStats, HasSolutions}, + state::{HasClientPerfStats, HasMetadata, HasSolutions}, }; pub type HandlerFuncPtr = @@ -397,6 +397,7 @@ mod unix_signal_handler { if interesting { let mut new_testcase = Testcase::new(input.clone()); + new_testcase.add_metadata(ExitKind::Timeout); fuzzer .objective_mut() .append_metadata(state, &mut new_testcase) @@ -560,6 +561,7 @@ mod unix_signal_handler { if interesting { let new_input = input.clone(); let mut new_testcase = Testcase::new(new_input); + new_testcase.add_metadata(ExitKind::Crash); fuzzer .objective_mut() .append_metadata(state, &mut new_testcase) @@ -617,7 +619,7 @@ mod windows_exception_handler { fuzzer::HasObjective, inputs::Input, observers::ObserversTuple, - state::{HasClientPerfStats, HasSolutions}, + state::{HasClientPerfStats, HasMetadata, HasSolutions}, }; pub type HandlerFuncPtr = @@ -686,6 +688,7 @@ mod windows_exception_handler { if interesting { let mut new_testcase = Testcase::new(input.clone()); + new_testcase.add_metadata(ExitKind::Timeout); fuzzer .objective_mut() .append_metadata(state, &mut new_testcase) @@ -765,6 +768,7 @@ mod windows_exception_handler { if interesting { let new_input = input.clone(); let mut new_testcase = Testcase::new(new_input); + new_testcase.add_metadata(ExitKind::Crash); fuzzer .objective_mut() .append_metadata(state, &mut new_testcase) diff --git a/libafl/src/executors/mod.rs b/libafl/src/executors/mod.rs index f90f6b63bc..db7f2d9823 100644 --- a/libafl/src/executors/mod.rs +++ b/libafl/src/executors/mod.rs @@ -50,6 +50,8 @@ pub enum ExitKind { // Custom(Box), } +crate::impl_serdeany!(ExitKind); + /// Holds a tuple of Observers pub trait HasObservers where diff --git a/libafl/src/inputs/bytes.rs b/libafl/src/inputs/bytes.rs index c07c2b509d..5006abef2b 100644 --- a/libafl/src/inputs/bytes.rs +++ b/libafl/src/inputs/bytes.rs @@ -2,20 +2,15 @@ //! (As opposed to other, more abstract, imputs, like an Grammar-Based AST Input) use ahash::AHasher; -use core::hash::Hasher; - use alloc::{borrow::ToOwned, rc::Rc, string::String, vec::Vec}; +use core::hash::Hasher; use core::{cell::RefCell, convert::From}; use serde::{Deserialize, Serialize}; #[cfg(feature = "std")] -use std::{ - fs::File, - io::{Read, Write}, - path::Path, -}; +use std::{fs::File, io::Read, path::Path}; #[cfg(feature = "std")] -use crate::Error; +use crate::{bolts::fs::write_file_atomic, Error}; use crate::{ bolts::ownedref::OwnedSlice, inputs::{HasBytesVec, HasLen, HasTargetBytes, Input}, @@ -35,9 +30,7 @@ impl Input for BytesInput { where P: AsRef, { - let mut file = File::create(path)?; - file.write_all(&self.bytes)?; - Ok(()) + write_file_atomic(path, &self.bytes) } /// Load the contents of this input from a file diff --git a/libafl/src/inputs/mod.rs b/libafl/src/inputs/mod.rs index 0c364b9540..8a544f29aa 100644 --- a/libafl/src/inputs/mod.rs +++ b/libafl/src/inputs/mod.rs @@ -11,15 +11,12 @@ use alloc::{ vec::Vec, }; use core::{clone::Clone, fmt::Debug}; -#[cfg(feature = "std")] -use std::{ - fs::File, - io::{Read, Write}, - path::Path, -}; - use serde::{Deserialize, Serialize}; +#[cfg(feature = "std")] +use std::{fs::File, io::Read, path::Path}; +#[cfg(feature = "std")] +use crate::bolts::fs::write_file_atomic; use crate::{bolts::ownedref::OwnedSlice, Error}; /// An input for the target @@ -30,10 +27,7 @@ pub trait Input: Clone + serde::Serialize + serde::de::DeserializeOwned + Debug where P: AsRef, { - let mut file = File::create(path)?; - let serialized = postcard::to_allocvec(self)?; - file.write_all(&serialized)?; - Ok(()) + write_file_atomic(path, &postcard::to_allocvec(self)?) } #[cfg(not(feature = "std"))] diff --git a/libafl/src/lib.rs b/libafl/src/lib.rs index d8f5871292..cbbdc91be5 100644 --- a/libafl/src/lib.rs +++ b/libafl/src/lib.rs @@ -242,3 +242,11 @@ mod tests { assert_eq!(state.corpus().count(), corpus_deserialized.count()); } } + +#[cfg(all(test, not(feature = "std")))] +/// Provide custom time in `no_std` tests. +#[no_mangle] +pub extern "C" fn external_current_millis() -> u64 { + // TODO: use "real" time here + 1000 +} diff --git a/libafl_concolic/symcc_runtime/build.rs b/libafl_concolic/symcc_runtime/build.rs index 26b9e19582..7b2eba1739 100644 --- a/libafl_concolic/symcc_runtime/build.rs +++ b/libafl_concolic/symcc_runtime/build.rs @@ -200,6 +200,7 @@ fn write_symcc_rename_header(rename_header_path: &Path, cpp_bindings: &bindgen:: ) .unwrap(); + #[allow(clippy::filter_map)] cpp_bindings .to_string() .lines()