From 4ae6f34ab4597f77d27debf2bb99d54ba4ba270a Mon Sep 17 00:00:00 2001
From: Henry Chu <henrytech@outlook.com>
Date: Mon, 5 May 2025 15:11:20 +0800
Subject: [PATCH] Split nautilus Python dependencies into separate feature
 (#3191)

* Split nautilus Python dependencies into separate feature

* Fix Nautilus imports order and tidy formatting

* Refactor tree generation to reduce code duplication

* Make plain_or_script_rule mutable
---
 libafl/Cargo.toml                             | 11 ++----
 .../src/common/nautilus/grammartec/context.rs |  2 ++
 libafl/src/common/nautilus/grammartec/mod.rs  |  2 +-
 libafl/src/common/nautilus/grammartec/rule.rs | 13 ++++++-
 libafl/src/common/nautilus/grammartec/tree.rs | 35 ++++++++++++++-----
 libafl/src/generators/nautilus.rs             | 25 +++++++++----
 6 files changed, 62 insertions(+), 26 deletions(-)

diff --git a/libafl/Cargo.toml b/libafl/Cargo.toml
index fc9172798c..471f6cebf0 100644
--- a/libafl/Cargo.toml
+++ b/libafl/Cargo.toml
@@ -187,14 +187,9 @@ llmp_small_maps = [
 ] # reduces initial map size for llmp
 
 ## Grammar mutator. Requires nightly.
-nautilus = [
-  "std",
-  "serde_json/std",
-  "dep:pyo3",
-  "rand_trait",
-  "regex-syntax",
-  "regex",
-]
+nautilus = ["std", "serde_json/std", "rand_trait", "regex-syntax", "regex"]
+
+nautilus_py = ["nautilus", "dep:pyo3"]
 
 ## Use the best SIMD implementation by our benchmark
 simd = ["libafl_bolts/simd"]
diff --git a/libafl/src/common/nautilus/grammartec/context.rs b/libafl/src/common/nautilus/grammartec/context.rs
index cd4068d014..1912ddb18e 100644
--- a/libafl/src/common/nautilus/grammartec/context.rs
+++ b/libafl/src/common/nautilus/grammartec/context.rs
@@ -5,6 +5,7 @@ use libafl_bolts::{
     nonzero,
     rands::{Rand, RomuDuoJrRand},
 };
+#[cfg(feature = "nautilus_py")]
 use pyo3::prelude::PyObject;
 
 use super::{
@@ -83,6 +84,7 @@ impl Context {
         rid
     }
 
+    #[cfg(feature = "nautilus_py")]
     pub fn add_script(&mut self, nt: &str, nts: &[String], script: PyObject) -> RuleId {
         let rid = self.rules.len().into();
         let rule = Rule::from_script(self, nt, nts, script);
diff --git a/libafl/src/common/nautilus/grammartec/mod.rs b/libafl/src/common/nautilus/grammartec/mod.rs
index 7d541b7880..53d74acde6 100644
--- a/libafl/src/common/nautilus/grammartec/mod.rs
+++ b/libafl/src/common/nautilus/grammartec/mod.rs
@@ -2,7 +2,7 @@ pub mod chunkstore;
 pub mod context;
 pub mod mutator;
 pub mod newtypes;
-#[cfg(feature = "nautilus")]
+#[cfg(feature = "nautilus_py")]
 pub mod python_grammar_loader;
 pub mod recursion_info;
 pub mod rule;
diff --git a/libafl/src/common/nautilus/grammartec/rule.rs b/libafl/src/common/nautilus/grammartec/rule.rs
index 62c4300957..22f8e17c10 100644
--- a/libafl/src/common/nautilus/grammartec/rule.rs
+++ b/libafl/src/common/nautilus/grammartec/rule.rs
@@ -2,6 +2,7 @@ use alloc::{string::String, vec::Vec};
 use std::sync::OnceLock;
 
 use libafl_bolts::rands::Rand;
+#[cfg(feature = "nautilus_py")]
 use pyo3::prelude::{PyObject, Python};
 use regex_syntax::hir::Hir;
 use serde::{Deserialize, Serialize};
@@ -91,6 +92,7 @@ impl RuleIdOrCustom {
 #[derive(Clone, Debug)]
 pub enum Rule {
     Plain(PlainRule),
+    #[cfg(feature = "nautilus_py")]
     Script(ScriptRule),
     RegExp(RegExpRule),
 }
@@ -108,6 +110,7 @@ impl RegExpRule {
     }
 }
 
+#[cfg(feature = "nautilus_py")]
 #[derive(Debug)]
 pub struct ScriptRule {
     pub nonterm: NTermId,
@@ -115,6 +118,7 @@ pub struct ScriptRule {
     pub script: PyObject,
 }
 
+#[cfg(feature = "nautilus_py")]
 impl ScriptRule {
     #[must_use]
     pub fn debug_show(&self, ctx: &Context) -> String {
@@ -148,6 +152,7 @@ impl PlainRule {
     }
 }
 
+#[cfg(feature = "nautilus_py")]
 impl Clone for ScriptRule {
     fn clone(&self) -> Self {
         Python::with_gil(|py| ScriptRule {
@@ -159,6 +164,7 @@ impl Clone for ScriptRule {
 }
 
 impl Rule {
+    #[cfg(feature = "nautilus_py")]
     pub fn from_script(
         ctx: &mut Context,
         nonterm: &str,
@@ -189,6 +195,7 @@ impl Rule {
     pub fn debug_show(&self, ctx: &Context) -> String {
         match self {
             Self::Plain(r) => r.debug_show(ctx),
+            #[cfg(feature = "nautilus_py")]
             Self::Script(r) => r.debug_show(ctx),
             Self::RegExp(r) => r.debug_show(ctx),
         }
@@ -281,6 +288,7 @@ impl Rule {
     #[must_use]
     pub fn nonterms(&self) -> &[NTermId] {
         match self {
+            #[cfg(feature = "nautilus_py")]
             Rule::Script(r) => &r.nonterms,
             Rule::Plain(r) => &r.nonterms,
             Rule::RegExp(_) => &[],
@@ -295,6 +303,7 @@ impl Rule {
     #[must_use]
     pub fn nonterm(&self) -> NTermId {
         match self {
+            #[cfg(feature = "nautilus_py")]
             Rule::Script(r) => r.nonterm,
             Rule::Plain(r) => r.nonterm,
             Rule::RegExp(r) => r.nonterm,
@@ -340,7 +349,9 @@ impl Rule {
             //get a rule that can be used with the remaining length
             let rid = ctx.get_random_rule_for_nt(rand, *nt, cur_child_max_len);
             let rule_or_custom = match ctx.get_rule(rid) {
-                Rule::Plain(_) | Rule::Script(_) => RuleIdOrCustom::Rule(rid),
+                Rule::Plain(_) => RuleIdOrCustom::Rule(rid),
+                #[cfg(feature = "nautilus_py")]
+                Rule::Script(_) => RuleIdOrCustom::Rule(rid),
                 Rule::RegExp(RegExpRule { hir, .. }) => {
                     RuleIdOrCustom::Custom(rid, regex_mutator::generate(rand, hir))
                 }
diff --git a/libafl/src/common/nautilus/grammartec/tree.rs b/libafl/src/common/nautilus/grammartec/tree.rs
index e679399531..6ce0c2b200 100644
--- a/libafl/src/common/nautilus/grammartec/tree.rs
+++ b/libafl/src/common/nautilus/grammartec/tree.rs
@@ -4,6 +4,7 @@ use std::io::{Cursor, Write, stdout};
 
 use hashbrown::HashSet;
 use libafl_bolts::rands::Rand;
+#[cfg(feature = "nautilus_py")]
 use pyo3::{
     PyTypeInfo,
     prelude::{PyObject, PyResult, Python},
@@ -11,18 +12,22 @@ use pyo3::{
 };
 use serde::{Deserialize, Serialize};
 
+#[cfg(feature = "nautilus_py")]
+use super::rule::ScriptRule;
 use super::{
     super::regex_mutator,
     context::Context,
     newtypes::{NTermId, NodeId, RuleId},
     recursion_info::RecursionInfo,
-    rule::{PlainRule, RegExpRule, Rule, RuleChild, RuleIdOrCustom, ScriptRule},
+    rule::{PlainRule, RegExpRule, Rule, RuleChild, RuleIdOrCustom},
 };
 
 enum UnparseStep<'dat> {
     Term(&'dat [u8]),
     Nonterm(NTermId),
+    #[cfg(feature = "nautilus_py")]
     Script(usize, PyObject),
+    #[cfg(feature = "nautilus_py")]
     PushBuffer(),
 }
 
@@ -55,7 +60,9 @@ impl<'data, 'tree: 'data, 'ctx: 'data, W: Write, T: TreeLike> Unparser<'data, 't
         match self.stack.pop() {
             Some(UnparseStep::Term(data)) => self.write(data),
             Some(UnparseStep::Nonterm(nt)) => self.nonterm(nt),
+            #[cfg(feature = "nautilus_py")]
             Some(UnparseStep::Script(num, expr)) => self.unwrap_script(num, &expr),
+            #[cfg(feature = "nautilus_py")]
             Some(UnparseStep::PushBuffer()) => self.push_buffer(),
             None => return false,
         }
@@ -73,6 +80,8 @@ impl<'data, 'tree: 'data, 'ctx: 'data, W: Write, T: TreeLike> Unparser<'data, 't
     fn nonterm(&mut self, nt: NTermId) {
         self.next_rule(nt);
     }
+
+    #[cfg(feature = "nautilus_py")]
     fn unwrap_script(&mut self, num: usize, expr: &PyObject) {
         Python::with_gil(|py| {
             self.script(py, num, expr)
@@ -80,6 +89,8 @@ impl<'data, 'tree: 'data, 'ctx: 'data, W: Write, T: TreeLike> Unparser<'data, 't
                 .unwrap();
         });
     }
+
+    #[cfg(feature = "nautilus_py")]
     fn script(&mut self, py: Python, num: usize, expr: &PyObject) -> PyResult<()> {
         let bufs = self.buffers.split_off(self.buffers.len() - num);
         let bufs = bufs.into_iter().map(Cursor::into_inner).collect::<Vec<_>>();
@@ -100,6 +111,7 @@ impl<'data, 'tree: 'data, 'ctx: 'data, W: Write, T: TreeLike> Unparser<'data, 't
         Ok(())
     }
 
+    #[cfg(feature = "nautilus_py")]
     fn push_buffer(&mut self) {
         self.buffers.push(Cursor::new(vec![]));
     }
@@ -111,6 +123,7 @@ impl<'data, 'tree: 'data, 'ctx: 'data, W: Write, T: TreeLike> Unparser<'data, 't
         self.i += 1;
         match rule {
             Rule::Plain(r) => self.next_plain(r),
+            #[cfg(feature = "nautilus_py")]
             Rule::Script(r) => self.next_script(r),
             Rule::RegExp(_) => self.next_regexp(self.tree.get_custom_rule_data(nid)),
         }
@@ -126,6 +139,7 @@ impl<'data, 'tree: 'data, 'ctx: 'data, W: Write, T: TreeLike> Unparser<'data, 't
         }
     }
 
+    #[cfg(feature = "nautilus_py")]
     fn next_script(&mut self, r: &ScriptRule) {
         Python::with_gil(|py| {
             self.stack.push(UnparseStep::Script(
@@ -345,15 +359,18 @@ impl Tree {
         max_len: usize,
         ctx: &Context,
     ) {
+        let mut plain_or_script_rule = || {
+            self.truncate();
+            self.rules.push(RuleIdOrCustom::Rule(ruleid));
+            self.sizes.push(0);
+            self.paren.push(NodeId::from(0));
+            ctx.get_rule(ruleid).generate(rand, self, ctx, max_len);
+            self.sizes[0] = self.rules.len();
+        };
         match ctx.get_rule(ruleid) {
-            Rule::Plain(..) | Rule::Script(..) => {
-                self.truncate();
-                self.rules.push(RuleIdOrCustom::Rule(ruleid));
-                self.sizes.push(0);
-                self.paren.push(NodeId::from(0));
-                ctx.get_rule(ruleid).generate(rand, self, ctx, max_len);
-                self.sizes[0] = self.rules.len();
-            }
+            Rule::Plain(..) => plain_or_script_rule(),
+            #[cfg(feature = "nautilus_py")]
+            Rule::Script(..) => plain_or_script_rule(),
             Rule::RegExp(RegExpRule { hir, .. }) => {
                 let rid = RuleIdOrCustom::Custom(ruleid, regex_mutator::generate(rand, hir));
                 self.truncate();
diff --git a/libafl/src/generators/nautilus.rs b/libafl/src/generators/nautilus.rs
index abb635f170..b0c76b74c4 100644
--- a/libafl/src/generators/nautilus.rs
+++ b/libafl/src/generators/nautilus.rs
@@ -9,9 +9,11 @@ use std::{fs, io::BufReader, path::Path};
 use libafl_bolts::rands::Rand;
 
 pub use crate::common::nautilus::grammartec::newtypes::NTermId;
+#[cfg(feature = "nautilus_py")]
+use crate::nautilus::grammartec::python_grammar_loader;
 use crate::{
     Error, common::nautilus::grammartec::context::Context, generators::Generator,
-    inputs::nautilus::NautilusInput, nautilus::grammartec::python_grammar_loader, state::HasRand,
+    inputs::nautilus::NautilusInput, state::HasRand,
 };
 
 /// The nautilus context for a generator
@@ -87,12 +89,21 @@ impl NautilusContext {
     pub fn from_file<P: AsRef<Path>>(tree_depth: usize, grammar_file: P) -> Result<Self, Error> {
         let grammar_file = grammar_file.as_ref();
         if grammar_file.extension().unwrap_or_default() == "py" {
-            log::debug!("Creating NautilusContext from python grammar");
-            let mut ctx = python_grammar_loader::load_python_grammar(
-                fs::read_to_string(grammar_file)?.as_str(),
-            );
-            ctx.initialize(tree_depth);
-            return Ok(Self { ctx });
+            #[cfg(feature = "nautilus_py")]
+            {
+                log::debug!("Creating NautilusContext from python grammar");
+                let mut ctx = python_grammar_loader::load_python_grammar(
+                    fs::read_to_string(grammar_file)?.as_str(),
+                );
+                ctx.initialize(tree_depth);
+                return Ok(Self { ctx });
+            }
+            #[cfg(not(feature = "nautilus_py"))]
+            {
+                return Err(Error::illegal_argument(format!(
+                    "Feature `nautilus_py` is required to load grammar from {grammar_file:?}"
+                )));
+            }
         }
         log::debug!("Creating NautilusContext from json grammar");
         let file = fs::File::open(grammar_file)?;