Monitor to export fuzzer metrics to Prometheus server (#875)
* add custom monitor prometheus as a baseline to build functionality * working server, set up function to update metrics in the registry * for a test * metrics for corpus count, objective count, executions, execution rate are intermittently updated and exposed on /metrics * add runtime metric, clean up some comments * IP:PORT as argument instead of hardcoded * add client # as label attached to fuzzer metrics for filtering by client. add clients_count as a tracked metric * added support for custom metrics added to client_stats via feedbacks, such as edges count. cleaned up code * cargo fmt * clean up prometheus.rs * ran autofix and fmt scripts, and put optional dependencies behind prometheus_monitor feature Co-authored-by: Dominik Maier <domenukk@gmail.com>
This commit is contained in:
parent
8e6eaf7002
commit
18f288e2d3
@ -22,6 +22,7 @@ concolic_mutation = ["z3"] # include a simple concolic mutator based on z3
|
|||||||
python = ["pyo3", "concat-idents"]
|
python = ["pyo3", "concat-idents"]
|
||||||
prelude = [] # Expose libafl::prelude for access without additional using directives
|
prelude = [] # Expose libafl::prelude for access without additional using directives
|
||||||
tui_monitor = ["tui", "crossterm"] # enable TuiMonitor with crossterm
|
tui_monitor = ["tui", "crossterm"] # enable TuiMonitor with crossterm
|
||||||
|
prometheus_monitor = ["std", "async-std", "prometheus-client", "tide", "futures"]
|
||||||
cli = ["clap"] # expose bolts::cli
|
cli = ["clap"] # expose bolts::cli
|
||||||
qemu_cli = ["cli"]
|
qemu_cli = ["cli"]
|
||||||
frida_cli = ["cli"]
|
frida_cli = ["cli"]
|
||||||
@ -55,7 +56,7 @@ tuple_list = { version = "0.1.3" }
|
|||||||
hashbrown = { version = "0.12", features = ["serde", "ahash-compile-time-rng"], default-features=false } # A faster hashmap, nostd compatible
|
hashbrown = { version = "0.12", features = ["serde", "ahash-compile-time-rng"], default-features=false } # A faster hashmap, nostd compatible
|
||||||
num-traits = { version = "0.2", default-features = false }
|
num-traits = { version = "0.2", default-features = false }
|
||||||
xxhash-rust = { version = "0.8.5", features = ["xxh3"] } # xxh3 hashing for rust
|
xxhash-rust = { version = "0.8.5", features = ["xxh3"] } # xxh3 hashing for rust
|
||||||
serde = { version = "1.0", default-features = false, features = ["alloc"] } # serialization lib
|
serde = { version = "1.0", default-features = false, features = ["alloc", "derive"] } # serialization lib
|
||||||
erased-serde = { version = "0.3.21", default-features = false, features = ["alloc"] } # erased serde
|
erased-serde = { version = "0.3.21", default-features = false, features = ["alloc"] } # erased serde
|
||||||
postcard = { version = "1.0", features = ["alloc"] } # no_std compatible serde serialization fromat
|
postcard = { version = "1.0", features = ["alloc"] } # no_std compatible serde serialization fromat
|
||||||
bincode = {version = "1.3", optional = true }
|
bincode = {version = "1.3", optional = true }
|
||||||
@ -81,6 +82,11 @@ tui = { version = "0.19", default-features = false, features = ['crossterm'], op
|
|||||||
crossterm = { version = "0.25", optional = true }
|
crossterm = { version = "0.25", optional = true }
|
||||||
clap = {version = "4.0", features = ["derive", "wrap_help"], optional = true}
|
clap = {version = "4.0", features = ["derive", "wrap_help"], optional = true}
|
||||||
|
|
||||||
|
prometheus-client = { version= "0.18.0", optional = true}
|
||||||
|
tide = { version = "0.16.0", optional = true }
|
||||||
|
async-std = { version = "1.8.0", features = ["attributes"], optional = true }
|
||||||
|
futures = { version = "0.3.24", optional = true }
|
||||||
|
|
||||||
wait-timeout = { version = "0.2", optional = true } # used by CommandExecutor to wait for child process
|
wait-timeout = { version = "0.2", optional = true } # used by CommandExecutor to wait for child process
|
||||||
|
|
||||||
z3 = { version = "0.11", features = ["static-link-z3"], optional = true } # for concolic mutation
|
z3 = { version = "0.11", features = ["static-link-z3"], optional = true } # for concolic mutation
|
||||||
|
@ -7,6 +7,12 @@ pub use multi::MultiMonitor;
|
|||||||
#[allow(missing_docs)]
|
#[allow(missing_docs)]
|
||||||
pub mod tui;
|
pub mod tui;
|
||||||
|
|
||||||
|
#[cfg(all(feature = "prometheus_monitor", feature = "std"))]
|
||||||
|
#[allow(missing_docs)]
|
||||||
|
pub mod prometheus;
|
||||||
|
#[cfg(all(feature = "prometheus_monitor", feature = "std"))]
|
||||||
|
pub use prometheus::PrometheusMonitor;
|
||||||
|
|
||||||
#[cfg(feature = "std")]
|
#[cfg(feature = "std")]
|
||||||
pub mod disk;
|
pub mod disk;
|
||||||
use alloc::{fmt::Debug, string::String, vec::Vec};
|
use alloc::{fmt::Debug, string::String, vec::Vec};
|
||||||
|
355
libafl/src/monitors/prometheus.rs
Normal file
355
libafl/src/monitors/prometheus.rs
Normal file
@ -0,0 +1,355 @@
|
|||||||
|
// ===== overview for prommon =====
|
||||||
|
// The client (i.e., the fuzzer) sets up an HTTP endpoint (/metrics).
|
||||||
|
// The endpoint contains metrics such as execution rate.
|
||||||
|
|
||||||
|
// A prometheus server (can use a precompiled binary or docker) then scrapes \
|
||||||
|
// the endpoint at regular intervals (configurable via prometheus.yml file).
|
||||||
|
// ====================
|
||||||
|
//
|
||||||
|
// == how to use it ===
|
||||||
|
// This monitor should plug into any fuzzer similar to other monitors.
|
||||||
|
// In your fuzzer, include:
|
||||||
|
// use libafl::monitors::PrometheusMonitor;
|
||||||
|
// as well as:
|
||||||
|
// let listener = "127.0.0.1:8080".to_string(); // point prometheus to scrape here in your prometheus.yml
|
||||||
|
// let mon = PrometheusMonitor::new(listener, |s| println!("{}", s));
|
||||||
|
// and then like with any other monitor, pass it into the event manager like so:
|
||||||
|
// let mut mgr = SimpleEventManager::new(mon);
|
||||||
|
// When using docker, you may need to point prometheus.yml to the docker0 interface or host.docker.internal
|
||||||
|
// ====================
|
||||||
|
|
||||||
|
use alloc::{fmt::Debug, string::String, vec::Vec};
|
||||||
|
use core::{fmt, time::Duration};
|
||||||
|
use std::{
|
||||||
|
boxed::Box,
|
||||||
|
sync::{atomic::AtomicU64, Arc},
|
||||||
|
thread,
|
||||||
|
};
|
||||||
|
|
||||||
|
// using thread in order to start the HTTP server in a separate thread
|
||||||
|
use futures::executor::block_on;
|
||||||
|
// using the official rust client library for Prometheus: https://github.com/prometheus/client_rust
|
||||||
|
use prometheus_client::{
|
||||||
|
encoding::text::{encode, Encode, SendSyncEncodeMetric},
|
||||||
|
metrics::{family::Family, gauge::Gauge},
|
||||||
|
registry::Registry,
|
||||||
|
};
|
||||||
|
// using tide for the HTTP server library (fast, async, simple)
|
||||||
|
use tide::Request;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
bolts::{current_time, format_duration_hms},
|
||||||
|
monitors::{ClientStats, Monitor, UserStats},
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Tracking monitor during fuzzing.
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct PrometheusMonitor<F>
|
||||||
|
where
|
||||||
|
F: FnMut(String),
|
||||||
|
{
|
||||||
|
print_fn: F,
|
||||||
|
start_time: Duration,
|
||||||
|
client_stats: Vec<ClientStats>,
|
||||||
|
corpus_count: Family<Labels, Gauge>,
|
||||||
|
objective_count: Family<Labels, Gauge>,
|
||||||
|
executions: Family<Labels, Gauge>,
|
||||||
|
exec_rate: Family<Labels, Gauge>,
|
||||||
|
runtime: Family<Labels, Gauge>,
|
||||||
|
clients_count: Family<Labels, Gauge>,
|
||||||
|
custom_stat: Family<Labels, Gauge<f64, AtomicU64>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<F> Debug for PrometheusMonitor<F>
|
||||||
|
where
|
||||||
|
F: FnMut(String),
|
||||||
|
{
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
f.debug_struct("PrometheusMonitor")
|
||||||
|
.field("start_time", &self.start_time)
|
||||||
|
.field("client_stats", &self.client_stats)
|
||||||
|
.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<F> Monitor for PrometheusMonitor<F>
|
||||||
|
where
|
||||||
|
F: FnMut(String),
|
||||||
|
{
|
||||||
|
/// the client monitor, mutable
|
||||||
|
fn client_stats_mut(&mut self) -> &mut Vec<ClientStats> {
|
||||||
|
&mut self.client_stats
|
||||||
|
}
|
||||||
|
|
||||||
|
/// the client monitor
|
||||||
|
fn client_stats(&self) -> &[ClientStats] {
|
||||||
|
&self.client_stats
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Time this fuzzing run stated
|
||||||
|
fn start_time(&mut self) -> Duration {
|
||||||
|
self.start_time
|
||||||
|
}
|
||||||
|
|
||||||
|
fn display(&mut self, event_msg: String, sender_id: u32) {
|
||||||
|
// Update the prometheus metrics
|
||||||
|
// Label each metric with the sender / client_id
|
||||||
|
let corpus_size = self.corpus_size();
|
||||||
|
self.corpus_count
|
||||||
|
.get_or_create(&Labels {
|
||||||
|
client: sender_id,
|
||||||
|
stat: String::new(),
|
||||||
|
})
|
||||||
|
.set(corpus_size);
|
||||||
|
let objective_size = self.objective_size();
|
||||||
|
self.objective_count
|
||||||
|
.get_or_create(&Labels {
|
||||||
|
client: sender_id,
|
||||||
|
stat: String::new(),
|
||||||
|
})
|
||||||
|
.set(objective_size);
|
||||||
|
let total_execs = self.total_execs();
|
||||||
|
self.executions
|
||||||
|
.get_or_create(&Labels {
|
||||||
|
client: sender_id,
|
||||||
|
stat: String::new(),
|
||||||
|
})
|
||||||
|
.set(total_execs);
|
||||||
|
let execs_per_sec = self.execs_per_sec();
|
||||||
|
self.exec_rate
|
||||||
|
.get_or_create(&Labels {
|
||||||
|
client: sender_id,
|
||||||
|
stat: String::new(),
|
||||||
|
})
|
||||||
|
.set(execs_per_sec);
|
||||||
|
let run_time = (current_time() - self.start_time).as_secs();
|
||||||
|
self.runtime
|
||||||
|
.get_or_create(&Labels {
|
||||||
|
client: sender_id,
|
||||||
|
stat: String::new(),
|
||||||
|
})
|
||||||
|
.set(run_time); // run time in seconds, which can be converted to a time format by Grafana or similar
|
||||||
|
let total_clients = self.client_stats().len().try_into().unwrap(); // convert usize to u64 (unlikely that # of clients will be > 2^64 -1...)
|
||||||
|
self.clients_count
|
||||||
|
.get_or_create(&Labels {
|
||||||
|
client: sender_id,
|
||||||
|
stat: String::new(),
|
||||||
|
})
|
||||||
|
.set(total_clients);
|
||||||
|
|
||||||
|
// display stats in a SimpleMonitor format
|
||||||
|
let fmt = format!(
|
||||||
|
"[Prometheus] [{} #{}] run time: {}, clients: {}, corpus: {}, objectives: {}, executions: {}, exec/sec: {}",
|
||||||
|
event_msg,
|
||||||
|
sender_id,
|
||||||
|
format_duration_hms(&(current_time() - self.start_time)),
|
||||||
|
self.client_stats().len(),
|
||||||
|
self.corpus_size(),
|
||||||
|
self.objective_size(),
|
||||||
|
self.total_execs(),
|
||||||
|
self.execs_per_sec()
|
||||||
|
);
|
||||||
|
(self.print_fn)(fmt);
|
||||||
|
|
||||||
|
let cur_client = self.client_stats_mut_for(sender_id);
|
||||||
|
let cur_client_clone = cur_client.clone();
|
||||||
|
|
||||||
|
for (key, val) in cur_client_clone.user_monitor {
|
||||||
|
// Update metrics added to the user_stats hashmap by feedback event-fires
|
||||||
|
// You can filter for each custom stat in promQL via labels of both the stat name and client id
|
||||||
|
println!("{key}: {val}");
|
||||||
|
#[allow(clippy::cast_precision_loss)]
|
||||||
|
let value: f64 = match val {
|
||||||
|
UserStats::Number(n) => n as f64,
|
||||||
|
UserStats::Float(f) => f,
|
||||||
|
UserStats::String(_s) => 0.0,
|
||||||
|
UserStats::Ratio(a, b) => (a as f64 / b as f64) * 100.0,
|
||||||
|
};
|
||||||
|
self.custom_stat
|
||||||
|
.get_or_create(&Labels {
|
||||||
|
client: sender_id,
|
||||||
|
stat: key.clone(),
|
||||||
|
})
|
||||||
|
.set(value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<F> PrometheusMonitor<F>
|
||||||
|
where
|
||||||
|
F: FnMut(String),
|
||||||
|
{
|
||||||
|
pub fn new(listener: String, print_fn: F) -> Self {
|
||||||
|
// Gauge's implementation of clone uses Arc
|
||||||
|
let corpus_count = Family::<Labels, Gauge>::default();
|
||||||
|
let corpus_count_clone = corpus_count.clone();
|
||||||
|
let objective_count = Family::<Labels, Gauge>::default();
|
||||||
|
let objective_count_clone = objective_count.clone();
|
||||||
|
let executions = Family::<Labels, Gauge>::default();
|
||||||
|
let executions_clone = executions.clone();
|
||||||
|
let exec_rate = Family::<Labels, Gauge>::default();
|
||||||
|
let exec_rate_clone = exec_rate.clone();
|
||||||
|
let runtime = Family::<Labels, Gauge>::default();
|
||||||
|
let runtime_clone = runtime.clone();
|
||||||
|
let clients_count = Family::<Labels, Gauge>::default();
|
||||||
|
let clients_count_clone = clients_count.clone();
|
||||||
|
let custom_stat = Family::<Labels, Gauge<f64, AtomicU64>>::default();
|
||||||
|
let custom_stat_clone = custom_stat.clone();
|
||||||
|
|
||||||
|
// Need to run the metrics server in a different thread to avoid blocking
|
||||||
|
thread::spawn(move || {
|
||||||
|
block_on(serve_metrics(
|
||||||
|
listener,
|
||||||
|
corpus_count_clone,
|
||||||
|
objective_count_clone,
|
||||||
|
executions_clone,
|
||||||
|
exec_rate_clone,
|
||||||
|
runtime_clone,
|
||||||
|
clients_count_clone,
|
||||||
|
custom_stat_clone,
|
||||||
|
))
|
||||||
|
.map_err(|err| println!("{err:?}"))
|
||||||
|
.ok();
|
||||||
|
});
|
||||||
|
Self {
|
||||||
|
print_fn,
|
||||||
|
start_time: current_time(),
|
||||||
|
client_stats: vec![],
|
||||||
|
corpus_count,
|
||||||
|
objective_count,
|
||||||
|
executions,
|
||||||
|
exec_rate,
|
||||||
|
runtime,
|
||||||
|
clients_count,
|
||||||
|
custom_stat,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/// Creates the monitor with a given `start_time`.
|
||||||
|
pub fn with_time(listener: String, print_fn: F, start_time: Duration) -> Self {
|
||||||
|
let corpus_count = Family::<Labels, Gauge>::default();
|
||||||
|
let corpus_count_clone = corpus_count.clone();
|
||||||
|
let objective_count = Family::<Labels, Gauge>::default();
|
||||||
|
let objective_count_clone = objective_count.clone();
|
||||||
|
let executions = Family::<Labels, Gauge>::default();
|
||||||
|
let executions_clone = executions.clone();
|
||||||
|
let exec_rate = Family::<Labels, Gauge>::default();
|
||||||
|
let exec_rate_clone = exec_rate.clone();
|
||||||
|
let runtime = Family::<Labels, Gauge>::default();
|
||||||
|
let runtime_clone = runtime.clone();
|
||||||
|
let clients_count = Family::<Labels, Gauge>::default();
|
||||||
|
let clients_count_clone = clients_count.clone();
|
||||||
|
let custom_stat = Family::<Labels, Gauge<f64, AtomicU64>>::default();
|
||||||
|
let custom_stat_clone = custom_stat.clone();
|
||||||
|
|
||||||
|
thread::spawn(move || {
|
||||||
|
block_on(serve_metrics(
|
||||||
|
listener,
|
||||||
|
corpus_count_clone,
|
||||||
|
objective_count_clone,
|
||||||
|
executions_clone,
|
||||||
|
exec_rate_clone,
|
||||||
|
runtime_clone,
|
||||||
|
clients_count_clone,
|
||||||
|
custom_stat_clone,
|
||||||
|
))
|
||||||
|
.map_err(|err| println!("{err:?}"))
|
||||||
|
.ok();
|
||||||
|
});
|
||||||
|
Self {
|
||||||
|
print_fn,
|
||||||
|
start_time,
|
||||||
|
client_stats: vec![],
|
||||||
|
corpus_count,
|
||||||
|
objective_count,
|
||||||
|
executions,
|
||||||
|
exec_rate,
|
||||||
|
runtime,
|
||||||
|
clients_count,
|
||||||
|
custom_stat,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// set up an HTTP endpoint /metrics
|
||||||
|
#[allow(clippy::too_many_arguments)]
|
||||||
|
pub async fn serve_metrics(
|
||||||
|
listener: String,
|
||||||
|
corpus: Family<Labels, Gauge>,
|
||||||
|
objectives: Family<Labels, Gauge>,
|
||||||
|
executions: Family<Labels, Gauge>,
|
||||||
|
exec_rate: Family<Labels, Gauge>,
|
||||||
|
runtime: Family<Labels, Gauge>,
|
||||||
|
clients_count: Family<Labels, Gauge>,
|
||||||
|
custom_stat: Family<Labels, Gauge<f64, AtomicU64>>,
|
||||||
|
) -> Result<(), std::io::Error> {
|
||||||
|
tide::log::start();
|
||||||
|
|
||||||
|
let mut registry = <Registry>::default();
|
||||||
|
|
||||||
|
registry.register(
|
||||||
|
"corpus_count",
|
||||||
|
"Number of test cases in the corpus",
|
||||||
|
Box::new(corpus),
|
||||||
|
);
|
||||||
|
registry.register(
|
||||||
|
"objective_count",
|
||||||
|
"Number of times the objective has been achieved (e.g., crashes)",
|
||||||
|
Box::new(objectives),
|
||||||
|
);
|
||||||
|
registry.register(
|
||||||
|
"executions_total",
|
||||||
|
"Number of executions the fuzzer has done",
|
||||||
|
Box::new(executions),
|
||||||
|
);
|
||||||
|
registry.register(
|
||||||
|
"execution_rate",
|
||||||
|
"Rate of executions per second",
|
||||||
|
Box::new(exec_rate),
|
||||||
|
);
|
||||||
|
registry.register(
|
||||||
|
"runtime",
|
||||||
|
"How long the fuzzer has been running for (seconds)",
|
||||||
|
Box::new(runtime),
|
||||||
|
);
|
||||||
|
registry.register(
|
||||||
|
"clients_count",
|
||||||
|
"How many clients have been spawned for the fuzzing job",
|
||||||
|
Box::new(clients_count),
|
||||||
|
);
|
||||||
|
registry.register(
|
||||||
|
"custom_stat",
|
||||||
|
"A metric to contain custom stats returned by feedbacks, filterable by label",
|
||||||
|
Box::new(custom_stat),
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut app = tide::with_state(State {
|
||||||
|
registry: Arc::new(registry),
|
||||||
|
});
|
||||||
|
|
||||||
|
app.at("/")
|
||||||
|
.get(|_| async { Ok("LibAFL Prometheus Monitor") });
|
||||||
|
app.at("/metrics").get(|req: Request<State>| async move {
|
||||||
|
let mut encoded = Vec::new();
|
||||||
|
encode(&mut encoded, &req.state().registry).unwrap();
|
||||||
|
let response = tide::Response::builder(200)
|
||||||
|
.body(encoded)
|
||||||
|
.content_type("application/openmetrics-text; version=1.0.0; charset=utf-8")
|
||||||
|
.build();
|
||||||
|
Ok(response)
|
||||||
|
});
|
||||||
|
app.listen(listener).await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Hash, PartialEq, Eq, Encode, Debug)]
|
||||||
|
pub struct Labels {
|
||||||
|
client: u32, // sender_id: u32, to differentiate between clients when multiple are spawned.
|
||||||
|
stat: String, // for custom_stat filtering.
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
struct State {
|
||||||
|
#[allow(dead_code)]
|
||||||
|
registry: Arc<Registry<Box<dyn SendSyncEncodeMetric>>>,
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user