Add Python Grammar Loader for Nautilus (#2635)

* add python grammar loader for Nautilus

* fmt

* fmt toml

* add python to macos CI deps

* install python

* fmt

* ci

* clippy

* fix workflow

* fmt

* fix baby nautilus

* fix nautilus sync

* fmt

* fmt

* clippy

* typo

* fix miri

* remove pyo3 from workspace to packages which need it and make it optional

* go back to AsRef<Path> for nautilus grammar loading

* replace hardcoded python flags for macos build

* typo

* taplo fmt

* revert formatting of libafl_qemu_arch

* ci

* typo

* remove expects in NautilusContext::from_file and make them Results

* remove not(miri) clause in test

* try and fix python build fir ios and android

* again

* android

* tmate

* fix android build

* document load_python_grammar

* log if python or json when loading nautilus grammar

* make nautilus optional

* add nautilus as feature to forkserver_simple_nautilus
This commit is contained in:
Aarnav 2024-10-29 11:32:59 +01:00 committed by GitHub
parent 58fad2befd
commit 0f744a3abb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 114 additions and 35 deletions

View File

@ -53,10 +53,13 @@ jobs:
run: ./scripts/check_for_blobs.sh
- name: Build libafl debug
run: cargo build -p libafl
- name: Test the book
- name: Test the book (Linux)
# TODO: fix books test fail with updated windows-rs
if: runner.os != 'Windows'
if: runner.os == 'Linux'
run: cd docs && mdbook test -L ../target/debug/deps
- name: Test the book (MacOS)
if: runner.os == 'MacOS'
run: cd docs && mdbook test -L ../target/debug/deps $(python3-config --ldflags | cut -d ' ' -f1)
- name: Run tests
run: cargo test
- name: Test libafl no_std
@ -468,7 +471,7 @@ jobs:
- name: Add nightly clippy
run: rustup toolchain install nightly --component clippy --allow-downgrade && rustup default nightly
- name: Install deps
run: brew install z3 gtk+3
run: brew install z3 gtk+3 python
- name: Install cxxbridge
run: cargo install cxxbridge-cmd
- uses: actions/checkout@v4
@ -491,7 +494,7 @@ jobs:
- uses: actions/checkout@v4
- uses: Swatinem/rust-cache@v2
- name: Build iOS
run: cargo build --target aarch64-apple-ios && cd libafl_frida && cargo build --target aarch64-apple-ios && cd ..
run: PYO3_CROSS_PYTHON_VERSION=$(python3 -c "print('{}.{}'.format(__import__('sys').version_info.major, __import__('sys').version_info.minor))") cargo build --target aarch64-apple-ios && cd libafl_frida && cargo build --target aarch64-apple-ios && cd ..
android:
runs-on: ubuntu-24.04
@ -509,7 +512,7 @@ jobs:
- uses: actions/checkout@v4
- uses: Swatinem/rust-cache@v2
- name: Build Android
run: cd libafl && cargo ndk -t arm64-v8a build --release
run: cd libafl && PYO3_CROSS_PYTHON_VERSION=$(python3 -c "print('{}.{}'.format(__import__('sys').version_info.major, __import__('sys').version_info.minor))") cargo ndk -t arm64-v8a build --release
#run: cargo build --target aarch64-linux-android
# TODO: Figure out how to properly build stuff with clang

View File

@ -69,9 +69,6 @@ paste = "1.0.15"
postcard = { version = "1.0.10", features = [
"alloc",
], default-features = false } # no_std compatible serde serialization format
pyo3 = "0.22.3"
pyo3-build-config = "0.22.3"
pyo3-log = "0.11.0"
rangemap = "1.5.1"
regex = "1.10.6"
rustversion = "1.0.17"

View File

@ -4,7 +4,7 @@
#include <string.h>
extern "C" __declspec(dllexport) size_t
LLVMFuzzerTestOneInput(const char *data, unsigned int len) {
LLVMFuzzerTestOneInput(const char *data, unsigned int len) {
if (data[0] == 'b') {
if (data[1] == 'a') {
if (data[2] == 'd') {

View File

@ -35,7 +35,7 @@ fn signals_set(idx: usize) {
#[allow(clippy::similar_names)]
pub fn main() {
let context = NautilusContext::from_file(15, "grammar.json");
let context = NautilusContext::from_file(15, "grammar.json").unwrap();
let mut bytes = vec![];
// The closure that we want to fuzz

View File

@ -18,7 +18,7 @@ opt-level = 3
[dependencies]
clap = { version = "4.5.18", features = ["derive"] }
env_logger = "0.11.5"
libafl = { path = "../../../libafl", features = ["std", "derive"] }
libafl = { path = "../../../libafl", features = ["std", "derive", "nautilus"] }
libafl_bolts = { path = "../../../libafl_bolts" }
log = { version = "0.4.22", features = ["release_max_level_info"] }
nix = { version = "0.29.0", features = ["signal"] }

View File

@ -108,7 +108,7 @@ pub fn main() {
// Create an observation channel to keep track of the execution time
let time_observer = TimeObserver::new("time");
let context = NautilusContext::from_file(15, opt.grammar);
let context = NautilusContext::from_file(15, opt.grammar).unwrap();
// Feedback to rate the interestingness of an input
// This one is composed by two Feedbacks in OR

View File

@ -118,7 +118,7 @@ pub extern "C" fn libafl_main() {
// The Monitor trait define how the fuzzer stats are reported to the user
let monitor = SimpleMonitor::new(|s| println!("{s}"));
let context = NautilusContext::from_file(15, "grammar.json");
let context = NautilusContext::from_file(15, "grammar.json").unwrap();
let mut event_converter = opt.bytes_broker_port.map(|port| {
LlmpEventConverter::builder()

View File

@ -27,7 +27,6 @@ rustc-args = ["--cfg", "docsrs"]
[features]
default = [
"nautilus",
"std",
"derive",
"llmp_compression",
@ -180,7 +179,7 @@ llmp_small_maps = [
nautilus = [
"std",
"serde_json/std",
"pyo3",
"dep:pyo3",
"rand_trait",
"regex-syntax",
"regex",
@ -261,7 +260,7 @@ arrayvec = { version = "0.7.6", optional = true, default-features = false } # us
const_format = "0.2.33" # used for providing helpful compiler output
const_panic = "0.2.9" # similarly, for formatting const panic output
pyo3 = { workspace = true, optional = true } # For nautilus
pyo3 = { version = "0.22.3", features = ["gil-refs"], optional = true }
regex-syntax = { version = "0.8.4", optional = true } # For nautilus
# optional-dev deps (change when target.'cfg(accessible(::std))'.test-dependencies will be stable)

View File

@ -2,6 +2,8 @@ pub mod chunkstore;
pub mod context;
pub mod mutator;
pub mod newtypes;
#[cfg(feature = "nautilus")]
pub mod python_grammar_loader;
pub mod recursion_info;
pub mod rule;
pub mod tree;

View File

@ -0,0 +1,64 @@
use std::{string::String, vec::Vec};
use pyo3::{prelude::*, pyclass, types::IntoPyDict};
use crate::{nautilus::grammartec::context::Context, Error};
#[pyclass]
struct PyContext {
ctx: Context,
}
impl PyContext {
fn get_context(&self) -> Context {
self.ctx.clone()
}
}
#[pymethods]
impl PyContext {
#[new]
fn new() -> Self {
PyContext {
ctx: Context::new(),
}
}
fn rule(&mut self, py: Python, nt: &str, format: &Bound<PyAny>) -> PyResult<()> {
if let Ok(s) = format.extract::<&str>() {
self.ctx.add_rule(nt, s.as_bytes());
} else if let Ok(s) = format.extract::<&[u8]>() {
self.ctx.add_rule(nt, s);
} else {
return Err(pyo3::exceptions::PyValueError::new_err(
"format argument should be string or bytes",
));
}
Ok(())
}
#[allow(clippy::needless_pass_by_value)]
fn script(&mut self, nt: &str, nts: Vec<String>, script: PyObject) {
self.ctx.add_script(nt, &nts, script);
}
fn regex(&mut self, nt: &str, regex: &str) {
self.ctx.add_regex(nt, regex);
}
}
fn loader(py: Python, grammar: &str) -> PyResult<Context> {
let py_ctx = Bound::new(py, PyContext::new())?;
let locals = [("ctx", &py_ctx)].into_py_dict_bound(py);
py.run_bound(grammar, None, Some(&locals))?;
Ok(py_ctx.borrow().get_context())
}
/// Create a `NautilusContext` from a python grammar file
#[must_use]
pub fn load_python_grammar(grammar: &str) -> Context {
Python::with_gil(|py| {
loader(py, grammar)
.map_err(|e| e.print_and_set_sys_last_vars(py))
.expect("failed to parse python grammar")
})
}

View File

@ -11,7 +11,8 @@ use libafl_bolts::rands::Rand;
pub use crate::common::nautilus::grammartec::newtypes::NTermId;
use crate::{
common::nautilus::grammartec::context::Context, generators::Generator,
inputs::nautilus::NautilusInput, state::HasRand, Error,
inputs::nautilus::NautilusInput, nautilus::grammartec::python_grammar_loader, state::HasRand,
Error,
};
/// The nautilus context for a generator
@ -84,13 +85,19 @@ impl NautilusContext {
}
/// Create a new [`NautilusContext`] from a file
#[must_use]
pub fn from_file<P: AsRef<Path>>(tree_depth: usize, grammar_file: P) -> Self {
let file = fs::File::open(grammar_file).expect("Cannot open grammar file");
pub fn from_file<P: AsRef<Path>>(tree_depth: usize, grammar_file: P) -> Result<Self, Error> {
if grammar_file.as_ref().extension().unwrap_or_default() == "py" {
log::debug!("Creating NautilusContext from python grammar");
let ctx = python_grammar_loader::load_python_grammar(
fs::read_to_string(grammar_file)?.as_str(),
);
return Ok(Self { ctx });
}
log::debug!("Creating NautilusContext from json grammar");
let file = fs::File::open(grammar_file)?;
let reader = BufReader::new(file);
let rules: Vec<Vec<String>> =
serde_json::from_reader(reader).expect("Cannot parse grammar file");
Self::new(tree_depth, &rules)
let rules: Vec<Vec<String>> = serde_json::from_reader(reader)?;
Ok(Self::new(tree_depth, &rules))
}
}

View File

@ -261,6 +261,7 @@ where
<Z as HasScheduler>::Scheduler: HasQueueCycles,
<<E as UsesState>::State as HasCorpus>::Corpus: Corpus<Input = E::Input>,
{
#[allow(clippy::too_many_lines)]
fn perform(
&mut self,
fuzzer: &mut Z,

View File

@ -1,3 +1,4 @@
#![allow(clippy::too_long_first_doc_paragraph)]
//! Stage that re-runs captured Timeouts with double the timeout to verify
//! Note: To capture the timeouts, use in conjunction with `CaptureTimeoutFeedback`
//! Note: Will NOT work with in process executors due to the potential for restarts/crashes when
@ -8,10 +9,12 @@ use std::{cell::RefCell, collections::VecDeque, fmt::Debug, marker::PhantomData,
use libafl_bolts::Error;
use serde::{de::DeserializeOwned, Deserialize, Serialize};
#[cfg(not(miri))]
use crate::inputs::BytesInput;
use crate::{
corpus::Corpus,
executors::{Executor, HasObservers, HasTimeout},
inputs::{BytesInput, UsesInput},
inputs::UsesInput,
observers::ObserversTuple,
stages::Stage,
state::{HasCorpus, State, UsesState},
@ -104,8 +107,9 @@ where
state: &mut Self::State,
manager: &mut EM,
) -> Result<(), Error> {
let mut timeouts =
state.metadata_or_insert_with(TimeoutsToVerify::<<S::Corpus as Corpus>::Input>::new).clone();
let mut timeouts = state
.metadata_or_insert_with(TimeoutsToVerify::<<S::Corpus as Corpus>::Input>::new)
.clone();
if timeouts.count() == 0 {
return Ok(());
}

View File

@ -160,7 +160,7 @@ clap = { workspace = true, features = [
"wrap_help",
], optional = true } # CLI parsing, for libafl_bolts::cli / the `cli` feature
log = { workspace = true }
pyo3 = { workspace = true, optional = true, features = ["serde", "macros"] }
pyo3 = { version = "0.22.3", optional = true, features = ["serde", "macros"] }
# optional-dev deps (change when target.'cfg(accessible(::std))'.test-dependencies will be stable)
serial_test = { workspace = true, optional = true, default-features = false, features = [

View File

@ -125,7 +125,9 @@ paste = { workspace = true }
enum-map = "2.7.3"
serde_yaml = { workspace = true, optional = true } # For parsing the injections yaml file
toml = { workspace = true, optional = true } # For parsing the injections toml file
pyo3 = { workspace = true, optional = true, features = ["multiple-pymethods"] }
pyo3 = { version = "0.22.3", optional = true, features = [
"multiple-pymethods",
] }
bytes-utils = "0.1.4"
typed-builder = { workspace = true }
memmap2 = "0.9.5"
@ -135,7 +137,7 @@ document-features = { workspace = true, optional = true }
[build-dependencies]
libafl_qemu_build = { path = "./libafl_qemu_build", version = "0.13.2" }
pyo3-build-config = { workspace = true, optional = true }
pyo3-build-config = { version = "0.22.3", optional = true }
rustversion = { workspace = true }
bindgen = { workspace = true }
cc = { workspace = true }

View File

@ -61,11 +61,11 @@ num_enum = { workspace = true, default-features = true }
libc = { workspace = true }
strum = { workspace = true }
strum_macros = { workspace = true }
pyo3 = { workspace = true, optional = true }
pyo3 = { version = "0.22.3", optional = true }
[build-dependencies]
libafl_qemu_build = { path = "../libafl_qemu_build", version = "0.13.2" }
pyo3-build-config = { workspace = true, optional = true }
pyo3-build-config = { version = "0.22.3", optional = true }
rustversion = { workspace = true }
[lints]

View File

@ -53,7 +53,7 @@ ppc = ["libafl_qemu/ppc"]
hexagon = ["libafl_qemu/hexagon"]
[build-dependencies]
pyo3-build-config = { workspace = true, optional = true }
pyo3-build-config = { version = "0.22.3", optional = true }
[dependencies]
libafl = { path = "../libafl", version = "0.13.2" }
@ -64,7 +64,7 @@ libafl_targets = { path = "../libafl_targets", version = "0.13.2" }
document-features = { workspace = true, optional = true }
typed-builder = { workspace = true } # Implement the builder pattern at compiletime
pyo3 = { workspace = true, optional = true }
pyo3 = { version = "0.22.3", optional = true }
log = { workspace = true }
[target.'cfg(target_os = "linux")'.dependencies]