Cargo feature to avoid regex dependency (#1102)

regex is a large crate, and is only used in a few specific spots. Users should have the ability to avoid this transitive dependency if not using the features in question.
2023-03-03 10:00:49 -05:00 · 2023-03-03 10:00:49 -05:00 · 35e5b87188
commit 35e5b87188
parent 5b4ae61cdd
4 changed files with 20 additions and 15 deletions
--- a/libafl/Cargo.toml
+++ b/libafl/Cargo.toml
@ -12,8 +12,8 @@ edition = "2021"
 categories = ["development-tools::testing", "emulators", "embedded", "os", "no-std"]
 [features]
-default = ["std", "derive", "llmp_compression", "llmp_small_maps", "llmp_broker_timeouts", "rand_trait", "fork", "prelude", "gzip"]
+default = ["std", "derive", "llmp_compression", "llmp_small_maps", "llmp_broker_timeouts", "rand_trait", "fork", "prelude", "gzip", "regex"]
-std = ["serde_json", "serde_json/std", "hostname", "nix", "serde/std", "bincode", "wait-timeout", "regex", "byteorder", "once_cell", "uuid", "tui_monitor", "ctor", "backtrace", "uds"] # print, env, launcher ... support
+std = ["serde_json", "serde_json/std", "hostname", "nix", "serde/std", "bincode", "wait-timeout", "byteorder", "once_cell", "uuid", "tui_monitor", "ctor", "backtrace", "uds"] # print, env, launcher ... support
 derive = ["libafl_derive"] # provide derive(SerdeAny) macro.
 fork = [] # uses the fork() syscall to spawn children, instead of launching a new command, if supported by the OS (has no effect on Windows, no_std).
 rand_trait = ["rand_core"] # If set, libafl's rand implementations will implement `rand::Rng`
@ -31,6 +31,7 @@ errors_backtrace = ["backtrace"] # Create backtraces at Error creation
 cmin = ["z3"] # for corpus minimisation
 corpus_btreemap = [] # Switches from HashMap to BTreeMap for CorpusId
 gzip = ["miniz_oxide"] # Enables gzip compression in certain parts of the lib
 regex = ["std", "dep:regex"] # enables the NaiveTokenizer and StacktraceObserver
 # features hiding dependencies licensed under GPL
 gpl = []
--- a/libafl/src/executors/forkserver.rs
+++ b/libafl/src/executors/forkserver.rs
@ -35,14 +35,14 @@ use crate::{
    executors::{Executor, ExitKind, HasObservers},
    inputs::{HasTargetBytes, Input, UsesInput},
    mutators::Tokens,
-    observers::{
+    observers::{MapObserver, Observer, ObserversTuple, UsesObservers},
        get_asan_runtime_flags_with_log_path, AsanBacktraceObserver, MapObserver, Observer,
        ObserversTuple, UsesObservers,
    },
    state::UsesState,
    Error,
 };
 #[cfg(feature = "regex")]
 use crate::observers::{get_asan_runtime_flags_with_log_path, AsanBacktraceObserver};
 const FORKSRV_FD: i32 = 198;
 #[allow(clippy::cast_possible_wrap)]
 const FS_OPT_ENABLED: i32 = 0x80000001_u32 as i32;
@ -232,9 +232,11 @@ impl Forkserver {
            command.env("__AFL_DEFER_FORKSRV", "1");
        }
        #[cfg(feature = "regex")]
        command.env("ASAN_OPTIONS", get_asan_runtime_flags_with_log_path());
        match command
            .env("LD_BIND_NOW", "1")
            .env("ASAN_OPTIONS", get_asan_runtime_flags_with_log_path())
            .envs(envs)
            .setlimit(memlimit)
            .setsid()
@ -1129,6 +1131,7 @@ where
        if libc::WIFSIGNALED(self.forkserver.status()) {
            exit_kind = ExitKind::Crash;
            #[cfg(feature = "regex")]
            if self.has_asan_observer.is_none() {
                self.has_asan_observer = Some(
                    self.observers()
@ -1136,6 +1139,7 @@ where
                        .is_some(),
                );
            }
            #[cfg(feature = "regex")]
            if self.has_asan_observer.unwrap() {
                self.observers_mut()
                    .match_name_mut::<AsanBacktraceObserver>("AsanBacktraceObserver")
--- a/libafl/src/inputs/encoded.rs
+++ b/libafl/src/inputs/encoded.rs
@ -15,7 +15,7 @@ use core::{
 use ahash::RandomState;
 use hashbrown::HashMap;
-#[cfg(feature = "std")]
+#[cfg(feature = "regex")]
 use regex::Regex;
 use serde::{Deserialize, Serialize};
@ -108,7 +108,7 @@ impl Default for TokenInputEncoderDecoder {
 }
 /// A naive tokenizer struct
-#[cfg(feature = "std")]
+#[cfg(feature = "regex")]
 #[derive(Clone, Debug)]
 pub struct NaiveTokenizer {
    /// Ident regex
@ -119,7 +119,7 @@ pub struct NaiveTokenizer {
    string_re: Regex,
 }
-#[cfg(feature = "std")]
+#[cfg(feature = "regex")]
 impl NaiveTokenizer {
    /// Creates a new [`NaiveTokenizer`]
    #[must_use]
@ -132,7 +132,7 @@ impl NaiveTokenizer {
    }
 }
-#[cfg(feature = "std")]
+#[cfg(feature = "regex")]
 impl Default for NaiveTokenizer {
    fn default() -> Self {
        Self {
@ -146,7 +146,7 @@ impl Default for NaiveTokenizer {
    }
 }
-#[cfg(feature = "std")]
+#[cfg(feature = "regex")]
 impl Tokenizer for NaiveTokenizer {
    fn tokenize(&self, bytes: &[u8]) -> Result<Vec<String>, Error> {
        let mut tokens = vec![];
@ -259,7 +259,7 @@ impl EncodedInput {
    }
 }
-#[cfg(feature = "std")]
+#[cfg(feature = "regex")]
 #[cfg(test)]
 mod tests {
    use alloc::borrow::ToOwned;
--- a/libafl/src/observers/mod.rs
+++ b/libafl/src/observers/mod.rs
@ -11,9 +11,9 @@ pub mod stdio;
 #[cfg(feature = "std")]
 pub use stdio::{StdErrObserver, StdOutObserver};
-#[cfg(feature = "std")]
+#[cfg(feature = "regex")]
 pub mod stacktrace;
-#[cfg(feature = "std")]
+#[cfg(feature = "regex")]
 pub use stacktrace::*;
 pub mod concolic;