Split nautilus Python dependencies into separate feature (#3191)

* Split nautilus Python dependencies into separate feature

* Fix Nautilus imports order and tidy formatting

* Refactor tree generation to reduce code duplication

* Make plain_or_script_rule mutable
This commit is contained in:
Henry Chu 2025-05-05 15:11:20 +08:00 committed by GitHub
parent 3ec09711eb
commit 4ae6f34ab4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 62 additions and 26 deletions

View File

@ -187,14 +187,9 @@ llmp_small_maps = [
] # reduces initial map size for llmp ] # reduces initial map size for llmp
## Grammar mutator. Requires nightly. ## Grammar mutator. Requires nightly.
nautilus = [ nautilus = ["std", "serde_json/std", "rand_trait", "regex-syntax", "regex"]
"std",
"serde_json/std", nautilus_py = ["nautilus", "dep:pyo3"]
"dep:pyo3",
"rand_trait",
"regex-syntax",
"regex",
]
## Use the best SIMD implementation by our benchmark ## Use the best SIMD implementation by our benchmark
simd = ["libafl_bolts/simd"] simd = ["libafl_bolts/simd"]

View File

@ -5,6 +5,7 @@ use libafl_bolts::{
nonzero, nonzero,
rands::{Rand, RomuDuoJrRand}, rands::{Rand, RomuDuoJrRand},
}; };
#[cfg(feature = "nautilus_py")]
use pyo3::prelude::PyObject; use pyo3::prelude::PyObject;
use super::{ use super::{
@ -83,6 +84,7 @@ impl Context {
rid rid
} }
#[cfg(feature = "nautilus_py")]
pub fn add_script(&mut self, nt: &str, nts: &[String], script: PyObject) -> RuleId { pub fn add_script(&mut self, nt: &str, nts: &[String], script: PyObject) -> RuleId {
let rid = self.rules.len().into(); let rid = self.rules.len().into();
let rule = Rule::from_script(self, nt, nts, script); let rule = Rule::from_script(self, nt, nts, script);

View File

@ -2,7 +2,7 @@ pub mod chunkstore;
pub mod context; pub mod context;
pub mod mutator; pub mod mutator;
pub mod newtypes; pub mod newtypes;
#[cfg(feature = "nautilus")] #[cfg(feature = "nautilus_py")]
pub mod python_grammar_loader; pub mod python_grammar_loader;
pub mod recursion_info; pub mod recursion_info;
pub mod rule; pub mod rule;

View File

@ -2,6 +2,7 @@ use alloc::{string::String, vec::Vec};
use std::sync::OnceLock; use std::sync::OnceLock;
use libafl_bolts::rands::Rand; use libafl_bolts::rands::Rand;
#[cfg(feature = "nautilus_py")]
use pyo3::prelude::{PyObject, Python}; use pyo3::prelude::{PyObject, Python};
use regex_syntax::hir::Hir; use regex_syntax::hir::Hir;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
@ -91,6 +92,7 @@ impl RuleIdOrCustom {
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub enum Rule { pub enum Rule {
Plain(PlainRule), Plain(PlainRule),
#[cfg(feature = "nautilus_py")]
Script(ScriptRule), Script(ScriptRule),
RegExp(RegExpRule), RegExp(RegExpRule),
} }
@ -108,6 +110,7 @@ impl RegExpRule {
} }
} }
#[cfg(feature = "nautilus_py")]
#[derive(Debug)] #[derive(Debug)]
pub struct ScriptRule { pub struct ScriptRule {
pub nonterm: NTermId, pub nonterm: NTermId,
@ -115,6 +118,7 @@ pub struct ScriptRule {
pub script: PyObject, pub script: PyObject,
} }
#[cfg(feature = "nautilus_py")]
impl ScriptRule { impl ScriptRule {
#[must_use] #[must_use]
pub fn debug_show(&self, ctx: &Context) -> String { pub fn debug_show(&self, ctx: &Context) -> String {
@ -148,6 +152,7 @@ impl PlainRule {
} }
} }
#[cfg(feature = "nautilus_py")]
impl Clone for ScriptRule { impl Clone for ScriptRule {
fn clone(&self) -> Self { fn clone(&self) -> Self {
Python::with_gil(|py| ScriptRule { Python::with_gil(|py| ScriptRule {
@ -159,6 +164,7 @@ impl Clone for ScriptRule {
} }
impl Rule { impl Rule {
#[cfg(feature = "nautilus_py")]
pub fn from_script( pub fn from_script(
ctx: &mut Context, ctx: &mut Context,
nonterm: &str, nonterm: &str,
@ -189,6 +195,7 @@ impl Rule {
pub fn debug_show(&self, ctx: &Context) -> String { pub fn debug_show(&self, ctx: &Context) -> String {
match self { match self {
Self::Plain(r) => r.debug_show(ctx), Self::Plain(r) => r.debug_show(ctx),
#[cfg(feature = "nautilus_py")]
Self::Script(r) => r.debug_show(ctx), Self::Script(r) => r.debug_show(ctx),
Self::RegExp(r) => r.debug_show(ctx), Self::RegExp(r) => r.debug_show(ctx),
} }
@ -281,6 +288,7 @@ impl Rule {
#[must_use] #[must_use]
pub fn nonterms(&self) -> &[NTermId] { pub fn nonterms(&self) -> &[NTermId] {
match self { match self {
#[cfg(feature = "nautilus_py")]
Rule::Script(r) => &r.nonterms, Rule::Script(r) => &r.nonterms,
Rule::Plain(r) => &r.nonterms, Rule::Plain(r) => &r.nonterms,
Rule::RegExp(_) => &[], Rule::RegExp(_) => &[],
@ -295,6 +303,7 @@ impl Rule {
#[must_use] #[must_use]
pub fn nonterm(&self) -> NTermId { pub fn nonterm(&self) -> NTermId {
match self { match self {
#[cfg(feature = "nautilus_py")]
Rule::Script(r) => r.nonterm, Rule::Script(r) => r.nonterm,
Rule::Plain(r) => r.nonterm, Rule::Plain(r) => r.nonterm,
Rule::RegExp(r) => r.nonterm, Rule::RegExp(r) => r.nonterm,
@ -340,7 +349,9 @@ impl Rule {
//get a rule that can be used with the remaining length //get a rule that can be used with the remaining length
let rid = ctx.get_random_rule_for_nt(rand, *nt, cur_child_max_len); let rid = ctx.get_random_rule_for_nt(rand, *nt, cur_child_max_len);
let rule_or_custom = match ctx.get_rule(rid) { let rule_or_custom = match ctx.get_rule(rid) {
Rule::Plain(_) | Rule::Script(_) => RuleIdOrCustom::Rule(rid), Rule::Plain(_) => RuleIdOrCustom::Rule(rid),
#[cfg(feature = "nautilus_py")]
Rule::Script(_) => RuleIdOrCustom::Rule(rid),
Rule::RegExp(RegExpRule { hir, .. }) => { Rule::RegExp(RegExpRule { hir, .. }) => {
RuleIdOrCustom::Custom(rid, regex_mutator::generate(rand, hir)) RuleIdOrCustom::Custom(rid, regex_mutator::generate(rand, hir))
} }

View File

@ -4,6 +4,7 @@ use std::io::{Cursor, Write, stdout};
use hashbrown::HashSet; use hashbrown::HashSet;
use libafl_bolts::rands::Rand; use libafl_bolts::rands::Rand;
#[cfg(feature = "nautilus_py")]
use pyo3::{ use pyo3::{
PyTypeInfo, PyTypeInfo,
prelude::{PyObject, PyResult, Python}, prelude::{PyObject, PyResult, Python},
@ -11,18 +12,22 @@ use pyo3::{
}; };
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
#[cfg(feature = "nautilus_py")]
use super::rule::ScriptRule;
use super::{ use super::{
super::regex_mutator, super::regex_mutator,
context::Context, context::Context,
newtypes::{NTermId, NodeId, RuleId}, newtypes::{NTermId, NodeId, RuleId},
recursion_info::RecursionInfo, recursion_info::RecursionInfo,
rule::{PlainRule, RegExpRule, Rule, RuleChild, RuleIdOrCustom, ScriptRule}, rule::{PlainRule, RegExpRule, Rule, RuleChild, RuleIdOrCustom},
}; };
enum UnparseStep<'dat> { enum UnparseStep<'dat> {
Term(&'dat [u8]), Term(&'dat [u8]),
Nonterm(NTermId), Nonterm(NTermId),
#[cfg(feature = "nautilus_py")]
Script(usize, PyObject), Script(usize, PyObject),
#[cfg(feature = "nautilus_py")]
PushBuffer(), PushBuffer(),
} }
@ -55,7 +60,9 @@ impl<'data, 'tree: 'data, 'ctx: 'data, W: Write, T: TreeLike> Unparser<'data, 't
match self.stack.pop() { match self.stack.pop() {
Some(UnparseStep::Term(data)) => self.write(data), Some(UnparseStep::Term(data)) => self.write(data),
Some(UnparseStep::Nonterm(nt)) => self.nonterm(nt), Some(UnparseStep::Nonterm(nt)) => self.nonterm(nt),
#[cfg(feature = "nautilus_py")]
Some(UnparseStep::Script(num, expr)) => self.unwrap_script(num, &expr), Some(UnparseStep::Script(num, expr)) => self.unwrap_script(num, &expr),
#[cfg(feature = "nautilus_py")]
Some(UnparseStep::PushBuffer()) => self.push_buffer(), Some(UnparseStep::PushBuffer()) => self.push_buffer(),
None => return false, None => return false,
} }
@ -73,6 +80,8 @@ impl<'data, 'tree: 'data, 'ctx: 'data, W: Write, T: TreeLike> Unparser<'data, 't
fn nonterm(&mut self, nt: NTermId) { fn nonterm(&mut self, nt: NTermId) {
self.next_rule(nt); self.next_rule(nt);
} }
#[cfg(feature = "nautilus_py")]
fn unwrap_script(&mut self, num: usize, expr: &PyObject) { fn unwrap_script(&mut self, num: usize, expr: &PyObject) {
Python::with_gil(|py| { Python::with_gil(|py| {
self.script(py, num, expr) self.script(py, num, expr)
@ -80,6 +89,8 @@ impl<'data, 'tree: 'data, 'ctx: 'data, W: Write, T: TreeLike> Unparser<'data, 't
.unwrap(); .unwrap();
}); });
} }
#[cfg(feature = "nautilus_py")]
fn script(&mut self, py: Python, num: usize, expr: &PyObject) -> PyResult<()> { fn script(&mut self, py: Python, num: usize, expr: &PyObject) -> PyResult<()> {
let bufs = self.buffers.split_off(self.buffers.len() - num); let bufs = self.buffers.split_off(self.buffers.len() - num);
let bufs = bufs.into_iter().map(Cursor::into_inner).collect::<Vec<_>>(); let bufs = bufs.into_iter().map(Cursor::into_inner).collect::<Vec<_>>();
@ -100,6 +111,7 @@ impl<'data, 'tree: 'data, 'ctx: 'data, W: Write, T: TreeLike> Unparser<'data, 't
Ok(()) Ok(())
} }
#[cfg(feature = "nautilus_py")]
fn push_buffer(&mut self) { fn push_buffer(&mut self) {
self.buffers.push(Cursor::new(vec![])); self.buffers.push(Cursor::new(vec![]));
} }
@ -111,6 +123,7 @@ impl<'data, 'tree: 'data, 'ctx: 'data, W: Write, T: TreeLike> Unparser<'data, 't
self.i += 1; self.i += 1;
match rule { match rule {
Rule::Plain(r) => self.next_plain(r), Rule::Plain(r) => self.next_plain(r),
#[cfg(feature = "nautilus_py")]
Rule::Script(r) => self.next_script(r), Rule::Script(r) => self.next_script(r),
Rule::RegExp(_) => self.next_regexp(self.tree.get_custom_rule_data(nid)), Rule::RegExp(_) => self.next_regexp(self.tree.get_custom_rule_data(nid)),
} }
@ -126,6 +139,7 @@ impl<'data, 'tree: 'data, 'ctx: 'data, W: Write, T: TreeLike> Unparser<'data, 't
} }
} }
#[cfg(feature = "nautilus_py")]
fn next_script(&mut self, r: &ScriptRule) { fn next_script(&mut self, r: &ScriptRule) {
Python::with_gil(|py| { Python::with_gil(|py| {
self.stack.push(UnparseStep::Script( self.stack.push(UnparseStep::Script(
@ -345,15 +359,18 @@ impl Tree {
max_len: usize, max_len: usize,
ctx: &Context, ctx: &Context,
) { ) {
let mut plain_or_script_rule = || {
self.truncate();
self.rules.push(RuleIdOrCustom::Rule(ruleid));
self.sizes.push(0);
self.paren.push(NodeId::from(0));
ctx.get_rule(ruleid).generate(rand, self, ctx, max_len);
self.sizes[0] = self.rules.len();
};
match ctx.get_rule(ruleid) { match ctx.get_rule(ruleid) {
Rule::Plain(..) | Rule::Script(..) => { Rule::Plain(..) => plain_or_script_rule(),
self.truncate(); #[cfg(feature = "nautilus_py")]
self.rules.push(RuleIdOrCustom::Rule(ruleid)); Rule::Script(..) => plain_or_script_rule(),
self.sizes.push(0);
self.paren.push(NodeId::from(0));
ctx.get_rule(ruleid).generate(rand, self, ctx, max_len);
self.sizes[0] = self.rules.len();
}
Rule::RegExp(RegExpRule { hir, .. }) => { Rule::RegExp(RegExpRule { hir, .. }) => {
let rid = RuleIdOrCustom::Custom(ruleid, regex_mutator::generate(rand, hir)); let rid = RuleIdOrCustom::Custom(ruleid, regex_mutator::generate(rand, hir));
self.truncate(); self.truncate();

View File

@ -9,9 +9,11 @@ use std::{fs, io::BufReader, path::Path};
use libafl_bolts::rands::Rand; use libafl_bolts::rands::Rand;
pub use crate::common::nautilus::grammartec::newtypes::NTermId; pub use crate::common::nautilus::grammartec::newtypes::NTermId;
#[cfg(feature = "nautilus_py")]
use crate::nautilus::grammartec::python_grammar_loader;
use crate::{ use crate::{
Error, common::nautilus::grammartec::context::Context, generators::Generator, Error, common::nautilus::grammartec::context::Context, generators::Generator,
inputs::nautilus::NautilusInput, nautilus::grammartec::python_grammar_loader, state::HasRand, inputs::nautilus::NautilusInput, state::HasRand,
}; };
/// The nautilus context for a generator /// The nautilus context for a generator
@ -87,12 +89,21 @@ impl NautilusContext {
pub fn from_file<P: AsRef<Path>>(tree_depth: usize, grammar_file: P) -> Result<Self, Error> { pub fn from_file<P: AsRef<Path>>(tree_depth: usize, grammar_file: P) -> Result<Self, Error> {
let grammar_file = grammar_file.as_ref(); let grammar_file = grammar_file.as_ref();
if grammar_file.extension().unwrap_or_default() == "py" { if grammar_file.extension().unwrap_or_default() == "py" {
log::debug!("Creating NautilusContext from python grammar"); #[cfg(feature = "nautilus_py")]
let mut ctx = python_grammar_loader::load_python_grammar( {
fs::read_to_string(grammar_file)?.as_str(), log::debug!("Creating NautilusContext from python grammar");
); let mut ctx = python_grammar_loader::load_python_grammar(
ctx.initialize(tree_depth); fs::read_to_string(grammar_file)?.as_str(),
return Ok(Self { ctx }); );
ctx.initialize(tree_depth);
return Ok(Self { ctx });
}
#[cfg(not(feature = "nautilus_py"))]
{
return Err(Error::illegal_argument(format!(
"Feature `nautilus_py` is required to load grammar from {grammar_file:?}"
)));
}
} }
log::debug!("Creating NautilusContext from json grammar"); log::debug!("Creating NautilusContext from json grammar");
let file = fs::File::open(grammar_file)?; let file = fs::File::open(grammar_file)?;