From 4ae6f34ab4597f77d27debf2bb99d54ba4ba270a Mon Sep 17 00:00:00 2001 From: Henry Chu Date: Mon, 5 May 2025 15:11:20 +0800 Subject: [PATCH] Split nautilus Python dependencies into separate feature (#3191) * Split nautilus Python dependencies into separate feature * Fix Nautilus imports order and tidy formatting * Refactor tree generation to reduce code duplication * Make plain_or_script_rule mutable --- libafl/Cargo.toml | 11 ++---- .../src/common/nautilus/grammartec/context.rs | 2 ++ libafl/src/common/nautilus/grammartec/mod.rs | 2 +- libafl/src/common/nautilus/grammartec/rule.rs | 13 ++++++- libafl/src/common/nautilus/grammartec/tree.rs | 35 ++++++++++++++----- libafl/src/generators/nautilus.rs | 25 +++++++++---- 6 files changed, 62 insertions(+), 26 deletions(-) diff --git a/libafl/Cargo.toml b/libafl/Cargo.toml index fc9172798c..471f6cebf0 100644 --- a/libafl/Cargo.toml +++ b/libafl/Cargo.toml @@ -187,14 +187,9 @@ llmp_small_maps = [ ] # reduces initial map size for llmp ## Grammar mutator. Requires nightly. -nautilus = [ - "std", - "serde_json/std", - "dep:pyo3", - "rand_trait", - "regex-syntax", - "regex", -] +nautilus = ["std", "serde_json/std", "rand_trait", "regex-syntax", "regex"] + +nautilus_py = ["nautilus", "dep:pyo3"] ## Use the best SIMD implementation by our benchmark simd = ["libafl_bolts/simd"] diff --git a/libafl/src/common/nautilus/grammartec/context.rs b/libafl/src/common/nautilus/grammartec/context.rs index cd4068d014..1912ddb18e 100644 --- a/libafl/src/common/nautilus/grammartec/context.rs +++ b/libafl/src/common/nautilus/grammartec/context.rs @@ -5,6 +5,7 @@ use libafl_bolts::{ nonzero, rands::{Rand, RomuDuoJrRand}, }; +#[cfg(feature = "nautilus_py")] use pyo3::prelude::PyObject; use super::{ @@ -83,6 +84,7 @@ impl Context { rid } + #[cfg(feature = "nautilus_py")] pub fn add_script(&mut self, nt: &str, nts: &[String], script: PyObject) -> RuleId { let rid = self.rules.len().into(); let rule = Rule::from_script(self, nt, nts, script); diff --git a/libafl/src/common/nautilus/grammartec/mod.rs b/libafl/src/common/nautilus/grammartec/mod.rs index 7d541b7880..53d74acde6 100644 --- a/libafl/src/common/nautilus/grammartec/mod.rs +++ b/libafl/src/common/nautilus/grammartec/mod.rs @@ -2,7 +2,7 @@ pub mod chunkstore; pub mod context; pub mod mutator; pub mod newtypes; -#[cfg(feature = "nautilus")] +#[cfg(feature = "nautilus_py")] pub mod python_grammar_loader; pub mod recursion_info; pub mod rule; diff --git a/libafl/src/common/nautilus/grammartec/rule.rs b/libafl/src/common/nautilus/grammartec/rule.rs index 62c4300957..22f8e17c10 100644 --- a/libafl/src/common/nautilus/grammartec/rule.rs +++ b/libafl/src/common/nautilus/grammartec/rule.rs @@ -2,6 +2,7 @@ use alloc::{string::String, vec::Vec}; use std::sync::OnceLock; use libafl_bolts::rands::Rand; +#[cfg(feature = "nautilus_py")] use pyo3::prelude::{PyObject, Python}; use regex_syntax::hir::Hir; use serde::{Deserialize, Serialize}; @@ -91,6 +92,7 @@ impl RuleIdOrCustom { #[derive(Clone, Debug)] pub enum Rule { Plain(PlainRule), + #[cfg(feature = "nautilus_py")] Script(ScriptRule), RegExp(RegExpRule), } @@ -108,6 +110,7 @@ impl RegExpRule { } } +#[cfg(feature = "nautilus_py")] #[derive(Debug)] pub struct ScriptRule { pub nonterm: NTermId, @@ -115,6 +118,7 @@ pub struct ScriptRule { pub script: PyObject, } +#[cfg(feature = "nautilus_py")] impl ScriptRule { #[must_use] pub fn debug_show(&self, ctx: &Context) -> String { @@ -148,6 +152,7 @@ impl PlainRule { } } +#[cfg(feature = "nautilus_py")] impl Clone for ScriptRule { fn clone(&self) -> Self { Python::with_gil(|py| ScriptRule { @@ -159,6 +164,7 @@ impl Clone for ScriptRule { } impl Rule { + #[cfg(feature = "nautilus_py")] pub fn from_script( ctx: &mut Context, nonterm: &str, @@ -189,6 +195,7 @@ impl Rule { pub fn debug_show(&self, ctx: &Context) -> String { match self { Self::Plain(r) => r.debug_show(ctx), + #[cfg(feature = "nautilus_py")] Self::Script(r) => r.debug_show(ctx), Self::RegExp(r) => r.debug_show(ctx), } @@ -281,6 +288,7 @@ impl Rule { #[must_use] pub fn nonterms(&self) -> &[NTermId] { match self { + #[cfg(feature = "nautilus_py")] Rule::Script(r) => &r.nonterms, Rule::Plain(r) => &r.nonterms, Rule::RegExp(_) => &[], @@ -295,6 +303,7 @@ impl Rule { #[must_use] pub fn nonterm(&self) -> NTermId { match self { + #[cfg(feature = "nautilus_py")] Rule::Script(r) => r.nonterm, Rule::Plain(r) => r.nonterm, Rule::RegExp(r) => r.nonterm, @@ -340,7 +349,9 @@ impl Rule { //get a rule that can be used with the remaining length let rid = ctx.get_random_rule_for_nt(rand, *nt, cur_child_max_len); let rule_or_custom = match ctx.get_rule(rid) { - Rule::Plain(_) | Rule::Script(_) => RuleIdOrCustom::Rule(rid), + Rule::Plain(_) => RuleIdOrCustom::Rule(rid), + #[cfg(feature = "nautilus_py")] + Rule::Script(_) => RuleIdOrCustom::Rule(rid), Rule::RegExp(RegExpRule { hir, .. }) => { RuleIdOrCustom::Custom(rid, regex_mutator::generate(rand, hir)) } diff --git a/libafl/src/common/nautilus/grammartec/tree.rs b/libafl/src/common/nautilus/grammartec/tree.rs index e679399531..6ce0c2b200 100644 --- a/libafl/src/common/nautilus/grammartec/tree.rs +++ b/libafl/src/common/nautilus/grammartec/tree.rs @@ -4,6 +4,7 @@ use std::io::{Cursor, Write, stdout}; use hashbrown::HashSet; use libafl_bolts::rands::Rand; +#[cfg(feature = "nautilus_py")] use pyo3::{ PyTypeInfo, prelude::{PyObject, PyResult, Python}, @@ -11,18 +12,22 @@ use pyo3::{ }; use serde::{Deserialize, Serialize}; +#[cfg(feature = "nautilus_py")] +use super::rule::ScriptRule; use super::{ super::regex_mutator, context::Context, newtypes::{NTermId, NodeId, RuleId}, recursion_info::RecursionInfo, - rule::{PlainRule, RegExpRule, Rule, RuleChild, RuleIdOrCustom, ScriptRule}, + rule::{PlainRule, RegExpRule, Rule, RuleChild, RuleIdOrCustom}, }; enum UnparseStep<'dat> { Term(&'dat [u8]), Nonterm(NTermId), + #[cfg(feature = "nautilus_py")] Script(usize, PyObject), + #[cfg(feature = "nautilus_py")] PushBuffer(), } @@ -55,7 +60,9 @@ impl<'data, 'tree: 'data, 'ctx: 'data, W: Write, T: TreeLike> Unparser<'data, 't match self.stack.pop() { Some(UnparseStep::Term(data)) => self.write(data), Some(UnparseStep::Nonterm(nt)) => self.nonterm(nt), + #[cfg(feature = "nautilus_py")] Some(UnparseStep::Script(num, expr)) => self.unwrap_script(num, &expr), + #[cfg(feature = "nautilus_py")] Some(UnparseStep::PushBuffer()) => self.push_buffer(), None => return false, } @@ -73,6 +80,8 @@ impl<'data, 'tree: 'data, 'ctx: 'data, W: Write, T: TreeLike> Unparser<'data, 't fn nonterm(&mut self, nt: NTermId) { self.next_rule(nt); } + + #[cfg(feature = "nautilus_py")] fn unwrap_script(&mut self, num: usize, expr: &PyObject) { Python::with_gil(|py| { self.script(py, num, expr) @@ -80,6 +89,8 @@ impl<'data, 'tree: 'data, 'ctx: 'data, W: Write, T: TreeLike> Unparser<'data, 't .unwrap(); }); } + + #[cfg(feature = "nautilus_py")] fn script(&mut self, py: Python, num: usize, expr: &PyObject) -> PyResult<()> { let bufs = self.buffers.split_off(self.buffers.len() - num); let bufs = bufs.into_iter().map(Cursor::into_inner).collect::>(); @@ -100,6 +111,7 @@ impl<'data, 'tree: 'data, 'ctx: 'data, W: Write, T: TreeLike> Unparser<'data, 't Ok(()) } + #[cfg(feature = "nautilus_py")] fn push_buffer(&mut self) { self.buffers.push(Cursor::new(vec![])); } @@ -111,6 +123,7 @@ impl<'data, 'tree: 'data, 'ctx: 'data, W: Write, T: TreeLike> Unparser<'data, 't self.i += 1; match rule { Rule::Plain(r) => self.next_plain(r), + #[cfg(feature = "nautilus_py")] Rule::Script(r) => self.next_script(r), Rule::RegExp(_) => self.next_regexp(self.tree.get_custom_rule_data(nid)), } @@ -126,6 +139,7 @@ impl<'data, 'tree: 'data, 'ctx: 'data, W: Write, T: TreeLike> Unparser<'data, 't } } + #[cfg(feature = "nautilus_py")] fn next_script(&mut self, r: &ScriptRule) { Python::with_gil(|py| { self.stack.push(UnparseStep::Script( @@ -345,15 +359,18 @@ impl Tree { max_len: usize, ctx: &Context, ) { + let mut plain_or_script_rule = || { + self.truncate(); + self.rules.push(RuleIdOrCustom::Rule(ruleid)); + self.sizes.push(0); + self.paren.push(NodeId::from(0)); + ctx.get_rule(ruleid).generate(rand, self, ctx, max_len); + self.sizes[0] = self.rules.len(); + }; match ctx.get_rule(ruleid) { - Rule::Plain(..) | Rule::Script(..) => { - self.truncate(); - self.rules.push(RuleIdOrCustom::Rule(ruleid)); - self.sizes.push(0); - self.paren.push(NodeId::from(0)); - ctx.get_rule(ruleid).generate(rand, self, ctx, max_len); - self.sizes[0] = self.rules.len(); - } + Rule::Plain(..) => plain_or_script_rule(), + #[cfg(feature = "nautilus_py")] + Rule::Script(..) => plain_or_script_rule(), Rule::RegExp(RegExpRule { hir, .. }) => { let rid = RuleIdOrCustom::Custom(ruleid, regex_mutator::generate(rand, hir)); self.truncate(); diff --git a/libafl/src/generators/nautilus.rs b/libafl/src/generators/nautilus.rs index abb635f170..b0c76b74c4 100644 --- a/libafl/src/generators/nautilus.rs +++ b/libafl/src/generators/nautilus.rs @@ -9,9 +9,11 @@ use std::{fs, io::BufReader, path::Path}; use libafl_bolts::rands::Rand; pub use crate::common::nautilus::grammartec::newtypes::NTermId; +#[cfg(feature = "nautilus_py")] +use crate::nautilus::grammartec::python_grammar_loader; use crate::{ Error, common::nautilus::grammartec::context::Context, generators::Generator, - inputs::nautilus::NautilusInput, nautilus::grammartec::python_grammar_loader, state::HasRand, + inputs::nautilus::NautilusInput, state::HasRand, }; /// The nautilus context for a generator @@ -87,12 +89,21 @@ impl NautilusContext { pub fn from_file>(tree_depth: usize, grammar_file: P) -> Result { let grammar_file = grammar_file.as_ref(); if grammar_file.extension().unwrap_or_default() == "py" { - log::debug!("Creating NautilusContext from python grammar"); - let mut ctx = python_grammar_loader::load_python_grammar( - fs::read_to_string(grammar_file)?.as_str(), - ); - ctx.initialize(tree_depth); - return Ok(Self { ctx }); + #[cfg(feature = "nautilus_py")] + { + log::debug!("Creating NautilusContext from python grammar"); + let mut ctx = python_grammar_loader::load_python_grammar( + fs::read_to_string(grammar_file)?.as_str(), + ); + ctx.initialize(tree_depth); + return Ok(Self { ctx }); + } + #[cfg(not(feature = "nautilus_py"))] + { + return Err(Error::illegal_argument(format!( + "Feature `nautilus_py` is required to load grammar from {grammar_file:?}" + ))); + } } log::debug!("Creating NautilusContext from json grammar"); let file = fs::File::open(grammar_file)?;