From e32b3eae9398a381a9449475adbba23213e5034b Mon Sep 17 00:00:00 2001
From: Valentin Huber <git@valentinhuber.me>
Date: Sat, 9 Nov 2024 19:13:51 +0100
Subject: [PATCH] Introducing Launcher::overcommit, improving CI formatting
 (#2670)

* introducing Launcher::overcommit

* removing unnecessary cfg restrictions and clippy allows

* improving warning for wrong clang-format version

* installing black in the format CI

* Enforcing python formatting in CI

* extending formatting using black on all python files

* printing diff on black failure

* preferring python's black over system black

* moving to LLVM 19 for formatting
---
 .github/workflows/build_and_test.yml          |   2 +
 bindings/pylibafl/test.py                     |   6 +-
 fuzzers/binary_only/python_qemu/fuzzer.py     |  18 +-
 .../dynamic_analysis/concatenator.py          |  20 +-
 libafl/src/events/launcher.rs                 | 179 ++++++-------
 scripts/fmt_all.sh                            |  21 +-
 utils/cfg_builder/build.py                    |  13 +-
 utils/gramatron/construct_automata.py         | 248 ++++++++++--------
 utils/gramatron/gnf_converter.py              |  99 +++----
 utils/libafl_fmt/Cargo.toml                   |   1 +
 utils/libafl_fmt/src/main.rs                  |  37 ++-
 11 files changed, 345 insertions(+), 299 deletions(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 27433d1f36..7bd69e987a 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -198,6 +198,8 @@ jobs:
         run: rustup component add --toolchain nightly-x86_64-unknown-linux-gnu rustfmt
       - uses: Swatinem/rust-cache@v2
         with: { shared-key: "ubuntu" }
+      - name: Installing black
+        run: python3 -m pip install black
       - name: Format Check
         run: ./scripts/fmt_all.sh check
 
diff --git a/bindings/pylibafl/test.py b/bindings/pylibafl/test.py
index 41d90c9e3e..1ad3ef8c1e 100644
--- a/bindings/pylibafl/test.py
+++ b/bindings/pylibafl/test.py
@@ -3,5 +3,7 @@ import ctypes
 import platform
 
 print("Starting to fuzz from python!")
-fuzzer = sugar.InMemoryBytesCoverageSugar(input_dirs=["./in"], output_dir="out", broker_port=1337, cores=[0,1]) 
-fuzzer.run(lambda b: print("foo"))
\ No newline at end of file
+fuzzer = sugar.InMemoryBytesCoverageSugar(
+    input_dirs=["./in"], output_dir="out", broker_port=1337, cores=[0, 1]
+)
+fuzzer.run(lambda b: print("foo"))
diff --git a/fuzzers/binary_only/python_qemu/fuzzer.py b/fuzzers/binary_only/python_qemu/fuzzer.py
index 71fc023580..295159cad5 100644
--- a/fuzzers/binary_only/python_qemu/fuzzer.py
+++ b/fuzzers/binary_only/python_qemu/fuzzer.py
@@ -4,31 +4,32 @@ from pylibafl import sugar, qemu
 import lief
 
 MAX_SIZE = 0x100
-BINARY_PATH = './a.out'
+BINARY_PATH = "./a.out"
 
-emu = qemu.Qemu(['qemu-x86_64', BINARY_PATH], [])
+emu = qemu.Qemu(["qemu-x86_64", BINARY_PATH], [])
 
 elf = lief.parse(BINARY_PATH)
 test_one_input = elf.get_function_address("LLVMFuzzerTestOneInput")
 if elf.is_pie:
     test_one_input += emu.load_addr()
-print('LLVMFuzzerTestOneInput @ 0x%x' % test_one_input)
+print("LLVMFuzzerTestOneInput @ 0x%x" % test_one_input)
 
 emu.set_breakpoint(test_one_input)
 emu.run()
 
 sp = emu.read_reg(qemu.regs.Rsp)
-print('SP   = 0x%x' % sp)
+print("SP   = 0x%x" % sp)
 
-retaddr = int.from_bytes(emu.read_mem(sp, 8), 'little')
-print('RET  = 0x%x' % retaddr)
+retaddr = int.from_bytes(emu.read_mem(sp, 8), "little")
+print("RET  = 0x%x" % retaddr)
 
 inp = emu.map_private(0, MAX_SIZE, qemu.mmap.ReadWrite)
-assert(inp > 0)
+assert inp > 0
 
 emu.remove_breakpoint(test_one_input)
 emu.set_breakpoint(retaddr)
 
+
 def harness(b):
     if len(b) > MAX_SIZE:
         b = b[:MAX_SIZE]
@@ -39,5 +40,6 @@ def harness(b):
     emu.write_reg(qemu.regs.Rip, test_one_input)
     emu.run()
 
-fuzz = sugar.QemuBytesCoverageSugar(['./in'], './out', 3456, [0,1,2,3])
+
+fuzz = sugar.QemuBytesCoverageSugar(["./in"], "./out", 3456, [0, 1, 2, 3])
 fuzz.run(emu, harness)
diff --git a/fuzzers/inprocess/dynamic_analysis/concatenator.py b/fuzzers/inprocess/dynamic_analysis/concatenator.py
index 72f09f56c6..f24d06ad1e 100755
--- a/fuzzers/inprocess/dynamic_analysis/concatenator.py
+++ b/fuzzers/inprocess/dynamic_analysis/concatenator.py
@@ -4,33 +4,35 @@ import os
 import json
 import sys
 
+
 def concatenate_json_files(input_dir):
     json_files = []
     for root, dirs, files in os.walk(input_dir):
         for file in files:
-            if file.endswith('.json'):
+            if file.endswith(".json"):
                 json_files.append(os.path.join(root, file))
-    
+
     data = dict()
     for json_file in json_files:
-        with open(json_file, 'r') as file:
+        with open(json_file, "r") as file:
             if os.stat(json_file).st_size == 0:
                 # skip empty file else json.load() fails
                 continue
             json_data = json.load(file)
             print(type(json_data), file)
             data = data | json_data
-    
-    output_file = os.path.join(os.getcwd(), 'concatenated.json')
-    with open(output_file, 'w') as file:
+
+    output_file = os.path.join(os.getcwd(), "concatenated.json")
+    with open(output_file, "w") as file:
         json.dump([data], file)
-    
+
     print(f"JSON files concatenated successfully! Output file: {output_file}")
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     if len(sys.argv) != 2:
         print("Usage: python script.py <directory_path>")
         sys.exit(1)
-    
+
     input_directory = sys.argv[1]
     concatenate_json_files(input_directory)
diff --git a/libafl/src/events/launcher.rs b/libafl/src/events/launcher.rs
index fcd75cafd2..9dfaced771 100644
--- a/libafl/src/events/launcher.rs
+++ b/libafl/src/events/launcher.rs
@@ -108,24 +108,27 @@ pub struct Launcher<'a, CF, MT, SP> {
     broker_port: u16,
     /// The list of cores to run on
     cores: &'a Cores,
+    /// The number of clients to spawn on each core
+    #[builder(default = 1)]
+    overcommit: usize,
     /// A file name to write all client output to
-    #[cfg(all(unix, feature = "std"))]
+    #[cfg(unix)]
     #[builder(default = None)]
     stdout_file: Option<&'a str>,
     /// The time in milliseconds to delay between child launches
     #[builder(default = 10)]
     launch_delay: u64,
     /// The actual, opened, `stdout_file` - so that we keep it open until the end
-    #[cfg(all(unix, feature = "std", feature = "fork"))]
+    #[cfg(all(unix, feature = "fork"))]
     #[builder(setter(skip), default = None)]
     opened_stdout_file: Option<File>,
     /// A file name to write all client stderr output to. If not specified, output is sent to
     /// `stdout_file`.
-    #[cfg(all(unix, feature = "std"))]
+    #[cfg(unix)]
     #[builder(default = None)]
     stderr_file: Option<&'a str>,
     /// The actual, opened, `stdout_file` - so that we keep it open until the end
-    #[cfg(all(unix, feature = "std", feature = "fork"))]
+    #[cfg(all(unix, feature = "fork"))]
     #[builder(setter(skip), default = None)]
     opened_stderr_file: Option<File>,
     /// The `ip:port` address of another broker to connect our new broker to for multi-machine
@@ -172,17 +175,10 @@ where
     SP: ShMemProvider,
 {
     /// Launch the broker and the clients and fuzz
-    #[cfg(all(unix, feature = "std", feature = "fork"))]
-    pub fn launch<S>(&mut self) -> Result<(), Error>
-    where
-        S: State + HasExecutions,
-        CF: FnOnce(Option<S>, LlmpRestartingEventManager<(), S, SP>, CoreId) -> Result<(), Error>,
-    {
-        Self::launch_with_hooks(self, tuple_list!())
-    }
-
-    /// Launch the broker and the clients and fuzz
-    #[cfg(all(feature = "std", any(windows, not(feature = "fork"))))]
+    #[cfg(all(
+        feature = "std",
+        any(windows, not(feature = "fork"), all(unix, feature = "fork"))
+    ))]
     #[allow(unused_mut, clippy::match_wild_err_arm)]
     pub fn launch<S>(&mut self) -> Result<(), Error>
     where
@@ -200,9 +196,8 @@ where
     SP: ShMemProvider,
 {
     /// Launch the broker and the clients and fuzz with a user-supplied hook
-    #[cfg(all(unix, feature = "std", feature = "fork"))]
-    #[allow(clippy::similar_names)]
-    #[allow(clippy::too_many_lines)]
+    #[cfg(all(unix, feature = "fork"))]
+    #[allow(clippy::similar_names, clippy::too_many_lines)]
     pub fn launch_with_hooks<EMH, S>(&mut self, hooks: EMH) -> Result<(), Error>
     where
         S: State + HasExecutions,
@@ -221,8 +216,7 @@ where
             ));
         }
 
-        let core_ids = get_core_ids().unwrap();
-        let num_cores = core_ids.len();
+        let core_ids = get_core_ids()?;
         let mut handles = vec![];
 
         log::info!("spawning on cores: {:?}", self.cores);
@@ -234,66 +228,63 @@ where
             .stderr_file
             .map(|filename| File::create(filename).unwrap());
 
-        #[cfg(feature = "std")]
         let debug_output = std::env::var(LIBAFL_DEBUG_OUTPUT).is_ok();
 
         // Spawn clients
         let mut index = 0_u64;
-        for (id, bind_to) in core_ids.iter().enumerate().take(num_cores) {
+        for (id, bind_to) in core_ids.iter().enumerate() {
             if self.cores.ids.iter().any(|&x| x == id.into()) {
-                index += 1;
-                self.shmem_provider.pre_fork()?;
-                // # Safety
-                // Fork is safe in general, apart from potential side effects to the OS and other threads
-                match unsafe { fork() }? {
-                    ForkResult::Parent(child) => {
-                        self.shmem_provider.post_fork(false)?;
-                        handles.push(child.pid);
-                        #[cfg(feature = "std")]
-                        log::info!("child spawned and bound to core {id}");
-                    }
-                    ForkResult::Child => {
-                        // # Safety
-                        // A call to `getpid` is safe.
-                        log::info!("{:?} PostFork", unsafe { libc::getpid() });
-                        self.shmem_provider.post_fork(true)?;
+                for _ in 0..self.overcommit {
+                    index += 1;
+                    self.shmem_provider.pre_fork()?;
+                    // # Safety
+                    // Fork is safe in general, apart from potential side effects to the OS and other threads
+                    match unsafe { fork() }? {
+                        ForkResult::Parent(child) => {
+                            self.shmem_provider.post_fork(false)?;
+                            handles.push(child.pid);
+                            log::info!("child spawned and bound to core {id}");
+                        }
+                        ForkResult::Child => {
+                            // # Safety
+                            // A call to `getpid` is safe.
+                            log::info!("{:?} PostFork", unsafe { libc::getpid() });
+                            self.shmem_provider.post_fork(true)?;
 
-                        #[cfg(feature = "std")]
-                        std::thread::sleep(Duration::from_millis(index * self.launch_delay));
+                            std::thread::sleep(Duration::from_millis(index * self.launch_delay));
 
-                        #[cfg(feature = "std")]
-                        if !debug_output {
-                            if let Some(file) = &self.opened_stdout_file {
-                                dup2(file.as_raw_fd(), libc::STDOUT_FILENO)?;
-                                if let Some(stderr) = &self.opened_stderr_file {
-                                    dup2(stderr.as_raw_fd(), libc::STDERR_FILENO)?;
-                                } else {
-                                    dup2(file.as_raw_fd(), libc::STDERR_FILENO)?;
+                            if !debug_output {
+                                if let Some(file) = &self.opened_stdout_file {
+                                    dup2(file.as_raw_fd(), libc::STDOUT_FILENO)?;
+                                    if let Some(stderr) = &self.opened_stderr_file {
+                                        dup2(stderr.as_raw_fd(), libc::STDERR_FILENO)?;
+                                    } else {
+                                        dup2(file.as_raw_fd(), libc::STDERR_FILENO)?;
+                                    }
                                 }
                             }
+
+                            // Fuzzer client. keeps retrying the connection to broker till the broker starts
+                            let builder = RestartingMgr::<EMH, MT, S, SP>::builder()
+                                .shmem_provider(self.shmem_provider.clone())
+                                .broker_port(self.broker_port)
+                                .kind(ManagerKind::Client {
+                                    cpu_core: Some(*bind_to),
+                                })
+                                .configuration(self.configuration)
+                                .serialize_state(self.serialize_state)
+                                .hooks(hooks);
+                            let builder = builder.time_ref(self.time_ref.clone());
+                            let (state, mgr) = builder.build().launch()?;
+
+                            return (self.run_client.take().unwrap())(state, mgr, *bind_to);
                         }
-
-                        // Fuzzer client. keeps retrying the connection to broker till the broker starts
-                        let builder = RestartingMgr::<EMH, MT, S, SP>::builder()
-                            .shmem_provider(self.shmem_provider.clone())
-                            .broker_port(self.broker_port)
-                            .kind(ManagerKind::Client {
-                                cpu_core: Some(*bind_to),
-                            })
-                            .configuration(self.configuration)
-                            .serialize_state(self.serialize_state)
-                            .hooks(hooks);
-                        let builder = builder.time_ref(self.time_ref.clone());
-                        let (state, mgr) = builder.build().launch()?;
-
-                        return (self.run_client.take().unwrap())(state, mgr, *bind_to);
-                    }
-                };
+                    };
+                }
             }
         }
 
         if self.spawn_broker {
-            #[cfg(feature = "std")]
             log::info!("I am broker!!.");
 
             // TODO we don't want always a broker here, think about using different laucher process to spawn different configurations
@@ -337,7 +328,7 @@ where
     }
 
     /// Launch the broker and the clients and fuzz
-    #[cfg(all(feature = "std", any(windows, not(feature = "fork"))))]
+    #[cfg(any(windows, not(feature = "fork")))]
     #[allow(unused_mut, clippy::match_wild_err_arm, clippy::too_many_lines)]
     pub fn launch_with_hooks<EMH, S>(&mut self, hooks: EMH) -> Result<(), Error>
     where
@@ -381,7 +372,7 @@ where
                 log::info!("spawning on cores: {:?}", self.cores);
 
                 let debug_output = std::env::var("LIBAFL_DEBUG_OUTPUT").is_ok();
-                #[cfg(all(feature = "std", unix))]
+                #[cfg(unix)]
                 {
                     // Set own stdout and stderr as set by the user
                     if !debug_output {
@@ -404,32 +395,34 @@ where
                 //spawn clients
                 for (id, _) in core_ids.iter().enumerate().take(num_cores) {
                     if self.cores.ids.iter().any(|&x| x == id.into()) {
-                        // Forward own stdio to child processes, if requested by user
-                        let (mut stdout, mut stderr) = (Stdio::null(), Stdio::null());
-                        #[cfg(all(feature = "std", unix))]
-                        {
-                            if self.stdout_file.is_some() || self.stderr_file.is_some() {
-                                stdout = Stdio::inherit();
-                                stderr = Stdio::inherit();
-                            };
+                        for _ in 0..self.overcommit {
+                            // Forward own stdio to child processes, if requested by user
+                            let (mut stdout, mut stderr) = (Stdio::null(), Stdio::null());
+                            #[cfg(unix)]
+                            {
+                                if self.stdout_file.is_some() || self.stderr_file.is_some() {
+                                    stdout = Stdio::inherit();
+                                    stderr = Stdio::inherit();
+                                };
+                            }
+
+                            std::thread::sleep(Duration::from_millis(
+                                id as u64 * self.launch_delay,
+                            ));
+
+                            std::env::set_var(_AFL_LAUNCHER_CLIENT, id.to_string());
+                            let mut child = startable_self()?;
+                            let child = (if debug_output {
+                                &mut child
+                            } else {
+                                child.stdout(stdout);
+                                child.stderr(stderr)
+                            })
+                            .spawn()?;
+                            handles.push(child);
                         }
-
-                        #[cfg(feature = "std")]
-                        std::thread::sleep(Duration::from_millis(id as u64 * self.launch_delay));
-
-                        std::env::set_var(_AFL_LAUNCHER_CLIENT, id.to_string());
-                        let mut child = startable_self()?;
-                        let child = (if debug_output {
-                            &mut child
-                        } else {
-                            child.stdout(stdout);
-                            child.stderr(stderr)
-                        })
-                        .spawn()?;
-                        handles.push(child);
                     }
                 }
-
                 handles
             }
             Err(_) => panic!("Env variables are broken, received non-unicode!"),
@@ -444,7 +437,6 @@ where
         }
 
         if self.spawn_broker {
-            #[cfg(feature = "std")]
             log::info!("I am broker!!.");
 
             let builder = RestartingMgr::<EMH, MT, S, SP>::builder()
@@ -620,8 +612,7 @@ where
     /// Launch a Centralized-based fuzzer.
     /// - `main_inner_mgr_builder` will be called to build the inner manager of the main node.
     /// - `secondary_inner_mgr_builder` will be called to build the inner manager of the secondary nodes.
-    #[allow(clippy::similar_names)]
-    #[allow(clippy::too_many_lines)]
+    #[allow(clippy::similar_names, clippy::too_many_lines)]
     pub fn launch_generic<EM, EMB, S>(
         &mut self,
         main_inner_mgr_builder: EMB,
diff --git a/scripts/fmt_all.sh b/scripts/fmt_all.sh
index e3a9a56b4b..963f15c4ce 100755
--- a/scripts/fmt_all.sh
+++ b/scripts/fmt_all.sh
@@ -11,16 +11,21 @@ else
   cargo run --manifest-path "$LIBAFL_DIR/utils/libafl_fmt/Cargo.toml" --release -- --verbose || exit 1
 fi
 
-if command -v black > /dev/null; then
-  echo "[*] Formatting python files"
-  if ! black "$SCRIPT_DIR"
-  then
-    echo "Python format failed."
-    exit 1
-  fi
+if python3 -m black --version > /dev/null; then
+  BLACK_COMMAND="python3 -m black"
+elif command -v black > /dev/null; then
+  BLACK_COMMAND="black"
+fi
 
+if [ -n "$BLACK_COMMAND" ]; then
+  echo "[*] Formatting python files"
+  if [ "$1" = "check" ]; then
+    $BLACK_COMMAND --check --diff "$LIBAFL_DIR" || exit 1
+  else
+    $BLACK_COMMAND "$LIBAFL_DIR" || exit 1
+  fi
 else
-  echo "Warning: python black not found. Formatting skipped for python."
+  echo -e "\n\033[1;33mWarning\033[0m: python black not found. Formatting skipped for python.\n"
 fi
 
 if [ "$1" != "check" ]; then
diff --git a/utils/cfg_builder/build.py b/utils/cfg_builder/build.py
index 070f15e30b..0d60fb6a55 100644
--- a/utils/cfg_builder/build.py
+++ b/utils/cfg_builder/build.py
@@ -7,7 +7,7 @@ import sys
 
 cfg = dict()
 
-if 'CFG_OUTPUT_PATH' not in os.environ:
+if "CFG_OUTPUT_PATH" not in os.environ:
     sys.exit("CFG_OUTPUT_PATH not set")
 
 input_path = os.environ["CFG_OUTPUT_PATH"]
@@ -31,7 +31,7 @@ for mname, module in cfg.items():
     fnname2SG = dict()
     # First, add all the intra-procedural edges
 
-    for (fname, v) in module['edges'].items():
+    for fname, v in module["edges"].items():
 
         if fname not in fname2id:
             GG.add_node(f_ids, label=fname)
@@ -41,8 +41,7 @@ for mname, module in cfg.items():
         sz = len(v)
         for idx in range(node_ids, node_ids + sz):
             G.add_node(idx)
-            G.nodes[idx]['label'] = mname + ' ' + \
-                fname + ' ' + str(idx - node_ids)
+            G.nodes[idx]["label"] = mname + " " + fname + " " + str(idx - node_ids)
         node_id_list = list(range(node_ids, node_ids + sz))
         node_ids += sz
         SG = G.subgraph(node_id_list)
@@ -52,14 +51,14 @@ for mname, module in cfg.items():
                 G.add_edge(node_id_list[src], node_id_list[item])
 
     # Next, build inter-procedural edges
-    for (fname, calls) in module['calls'].items():
-        for (idx, target_fns) in calls.items():
+    for fname, calls in module["calls"].items():
+        for idx, target_fns in calls.items():
             # G.nodes isn't sorted
 
             src = sorted(fnname2SG[fname].nodes())[0] + int(idx)
             for target_fn in target_fns:
                 if target_fn in fnname2SG:
-                    offset = module['entries'][target_fn]
+                    offset = module["entries"][target_fn]
 
                     dst = sorted(fnname2SG[target_fn].nodes)[0] + offset
 
diff --git a/utils/gramatron/construct_automata.py b/utils/gramatron/construct_automata.py
index 0cb87c3c75..126b3f84e8 100644
--- a/utils/gramatron/construct_automata.py
+++ b/utils/gramatron/construct_automata.py
@@ -8,36 +8,37 @@ import sys
 import json
 import re
 from collections import defaultdict
+
 # import pygraphviz as pgv
 
 gram_data = None
 state_count = 1
 pda = []
 worklist = []
-state_stacks = {} 
+state_stacks = {}
 
 # === If user provides upper bound on the stack size during FSA creation ===
 # Specifies the upper bound to which the stack is allowed to grow
 # If for any generated state, the stack size is >= stack_limit then this
 # state is not expanded further.
-stack_limit = None 
+stack_limit = None
 # Holds the set of unexpanded rules owing to the user-passed stack constraint limit
 unexpanded_rules = set()
 
+
 def main(grammar, limit):
     global worklist, gram_data, stack_limit
-    current = '0'
+    current = "0"
     stack_limit = limit
     if stack_limit:
-        print ('[X] Operating in bounded stack mode')
+        print("[X] Operating in bounded stack mode")
 
-    with open(grammar, 'r') as fd:
+    with open(grammar, "r") as fd:
         gram_data = json.load(fd)
     start_symbol = gram_data["Start"][0]
     worklist.append([current, [start_symbol]])
     # print (grammar)
-    filename = (grammar.split('/')[-1]).split('.')[0]
-    
+    filename = (grammar.split("/")[-1]).split(".")[0]
 
     while worklist:
         # Take an element from the worklist
@@ -45,69 +46,78 @@ def main(grammar, limit):
         # print ('Worklist:', worklist)
         element = worklist.pop(0)
         prep_transitions(element)
-    
-    pda_file = filename + '_transition.json'
-    graph_file = filename + '.png'
+
+    pda_file = filename + "_transition.json"
+    graph_file = filename + ".png"
     # print ('XXXXXXXXXXXXXXXX')
     # print ('PDA file:%s Png graph file:%s' % (pda_file, graph_file))
     # XXX Commented out because visualization of current version of PHP causes segfault
     # Create the graph and dump the transitions to a file
     # create_graph(filename)
     transformed = postprocess()
-    with open(filename + '_automata.json', 'w+') as fd:
+    with open(filename + "_automata.json", "w+") as fd:
         json.dump(transformed, fd)
-    with open(filename + '_transition.json', 'w+') as fd:
+    with open(filename + "_transition.json", "w+") as fd:
         json.dump(pda, fd)
     if not unexpanded_rules:
-        print ('[X] No unexpanded rules, absolute FSA formed')
+        print("[X] No unexpanded rules, absolute FSA formed")
         exit(0)
     else:
-        print ('[X] Certain rules were not expanded due to stack size limit. Inexact approximation has been created and the disallowed rules have been put in {}_disallowed.json'.format(filename))
-        print ('[X] Number of unexpanded rules:', len(unexpanded_rules))
-        with open(filename + '_disallowed.json', 'w+') as fd:
+        print(
+            "[X] Certain rules were not expanded due to stack size limit. Inexact approximation has been created and the disallowed rules have been put in {}_disallowed.json".format(
+                filename
+            )
+        )
+        print("[X] Number of unexpanded rules:", len(unexpanded_rules))
+        with open(filename + "_disallowed.json", "w+") as fd:
             json.dump(list(unexpanded_rules), fd)
 
+
 def create_graph(filename):
-    '''
+    """
     Creates a DOT representation of the PDA
-    '''
+    """
     global pda
-    G = pgv.AGraph(strict = False, directed = True)
+    G = pgv.AGraph(strict=False, directed=True)
     for transition in pda:
-        print ('Transition:', transition)
-        G.add_edge(transition['source'], transition['dest'], 
-                label = 'Term:{}'.format(transition['terminal']))
-    G.layout(prog = 'dot')
-    print ('Do it up 2')
-    G.draw(filename + '.png')
+        print("Transition:", transition)
+        G.add_edge(
+            transition["source"],
+            transition["dest"],
+            label="Term:{}".format(transition["terminal"]),
+        )
+    G.layout(prog="dot")
+    print("Do it up 2")
+    G.draw(filename + ".png")
+
 
 def prep_transitions(element):
-    '''
+    """
     Generates transitions
-    '''
+    """
     global gram_data, state_count, pda, worklist, state_stacks, stack_limit, unexpanded_rules
     state = element[0]
     try:
-        nonterminal = element[1][0] 
+        nonterminal = element[1][0]
     except IndexError:
         # Final state was encountered, pop from worklist without doing anything
         return
     rules = gram_data[nonterminal]
     count = 1
     for rule in rules:
-        isRecursive  = False
+        isRecursive = False
         # print ('Current state:', state)
         terminal, ss, termIsRegex = tokenize(rule)
         transition = get_template()
-        transition['trigger'] = '_'.join([state, str(count)])
-        transition['source'] = state
-        transition['dest'] = str(state_count) 
-        transition['ss'] = ss 
-        transition['terminal'] = terminal
-        transition['rule'] = "{} -> {}".format(nonterminal, rule )
+        transition["trigger"] = "_".join([state, str(count)])
+        transition["source"] = state
+        transition["dest"] = str(state_count)
+        transition["ss"] = ss
+        transition["terminal"] = terminal
+        transition["rule"] = "{} -> {}".format(nonterminal, rule)
         if termIsRegex:
-            transition['termIsRegex'] = True
-        
+            transition["termIsRegex"] = True
+
         # Creating a state stack for the new state
         try:
             state_stack = state_stacks[state][:]
@@ -118,7 +128,7 @@ def prep_transitions(element):
         if ss:
             for symbol in ss[::-1]:
                 state_stack.insert(0, symbol)
-        transition['stack'] = state_stack 
+        transition["stack"] = state_stack
 
         # Check if a recursive transition state being created, if so make a backward
         # edge and don't add anything to the worklist
@@ -128,38 +138,39 @@ def prep_transitions(element):
                 # print ('Stack:', sorted(stack))
                 # print ('State stack:', sorted(state_stack))
                 if sorted(stack) == sorted(state_stack):
-                    transition['dest'] = state_element
+                    transition["dest"] = state_element
                     # print ('Recursive:', transition)
                     pda.append(transition)
                     count += 1
                     isRecursive = True
-                    break 
+                    break
         # If a recursive transition exercised don't add the same transition as a new
         # edge, continue onto the next transitions
         if isRecursive:
             continue
-            
+
         # If the generated state has a stack size > stack_limit then that state is abandoned
         # and not added to the FSA or the worklist for further expansion
         if stack_limit:
-            if (len(transition['stack']) > stack_limit):
-                unexpanded_rules.add(transition['rule'])
+            if len(transition["stack"]) > stack_limit:
+                unexpanded_rules.add(transition["rule"])
                 continue
 
         # Create transitions for the non-recursive relations and add to the worklist
         # print ('Normal:', transition)
         # print ('State2:', state)
         pda.append(transition)
-        worklist.append([transition['dest'], transition['stack']])
-        state_stacks[transition['dest']] = state_stack
+        worklist.append([transition["dest"], transition["stack"]])
+        state_stacks[transition["dest"]] = state_stack
         state_count += 1
         count += 1
 
+
 def tokenize(rule):
-    '''
+    """
     Gets the terminal and the corresponding stack symbols from a rule in GNF form
-    '''
-    pattern = re.compile("([r])*\'([\s\S]+)\'([\s\S]*)")
+    """
+    pattern = re.compile("([r])*'([\s\S]+)'([\s\S]*)")
     terminal = None
     ss = None
     termIsRegex = False
@@ -176,34 +187,35 @@ def tokenize(rule):
 
     return terminal, ss, termIsRegex
 
+
 def get_template():
     transition_template = {
-            'trigger':None,
-            'source': None,
-            'dest': None,
-            'termIsRegex': False,
-            'terminal' : None,
-            'stack': []
-            }
+        "trigger": None,
+        "source": None,
+        "dest": None,
+        "termIsRegex": False,
+        "terminal": None,
+        "stack": [],
+    }
     return transition_template
 
+
 def postprocess1():
-    '''
+    """
     Creates a representation to be passed on to the C-module
-    '''
+    """
     global pda
     final_struct = {}
     # Supporting data structures for if stack limit is imposed
     culled_pda = []
     culled_final = []
-    num_transitions = 0 # Keep track of number of transitions
-
+    num_transitions = 0  # Keep track of number of transitions
 
     states, final, initial = _get_states()
     memoized = [[]] * len(states)
 
-    print (initial)
-    assert len(initial) == 1, 'More than one init state found'
+    print(initial)
+    assert len(initial) == 1, "More than one init state found"
 
     # Cull transitions to states which were not expanded owing to the stack limit
     if stack_limit:
@@ -211,63 +223,67 @@ def postprocess1():
         blocklist = []
         for final_state in final:
             for transition in pda:
-                if (transition["dest"] == final_state) and (len(transition["stack"]) > 0):
+                if (transition["dest"] == final_state) and (
+                    len(transition["stack"]) > 0
+                ):
                     blocklist.append(transition["dest"])
                     continue
                 else:
                     culled_pda.append(transition)
-        
+
         culled_final = [state for state in final if state not in blocklist]
 
-        assert len(culled_final) == 1, 'More than one final state found'
+        assert len(culled_final) == 1, "More than one final state found"
 
         for transition in culled_pda:
             state = transition["source"]
             if transition["dest"] in blocklist:
-                    continue 
+                continue
             num_transitions += 1
-            memoized[int(state)].append((transition["trigger"],
-                int(transition["dest"]), transition["terminal"]))
+            memoized[int(state)].append(
+                (transition["trigger"], int(transition["dest"]), transition["terminal"])
+            )
         final_struct["init_state"] = int(initial)
         final_struct["final_state"] = int(culled_final[0])
         # The reason we do this is because when states are culled, the indexing is
         # still relative to the actual number of states hence we keep numstates recorded
         # as the original number of states
-        print ('[X] Actual Number of states:', len(memoized))
-        print ('[X] Number of transitions:', num_transitions)
-        print ('[X] Original Number of states:', len(states))
+        print("[X] Actual Number of states:", len(memoized))
+        print("[X] Number of transitions:", num_transitions)
+        print("[X] Original Number of states:", len(states))
         final_struct["pda"] = memoized
         return final_struct
-    
+
     # Running FSA construction in exact approximation mode and postprocessing it like so
     for transition in pda:
-       state = transition["source"]
-       memoized[int(state)].append((transition["trigger"],
-                int(transition["dest"]), transition["terminal"]))
+        state = transition["source"]
+        memoized[int(state)].append(
+            (transition["trigger"], int(transition["dest"]), transition["terminal"])
+        )
 
     final_struct["init_state"] = int(initial)
     final_struct["final_state"] = int(final[0])
-    print ('[X] Actual Number of states:', len(memoized))
+    print("[X] Actual Number of states:", len(memoized))
     final_struct["pda"] = memoized
     return final_struct
 
+
 def postprocess():
-    '''
+    """
     Creates a representation to be passed on to the C-module
-    '''
+    """
     global pda
     final_struct = {}
     memoized = defaultdict(list)
     # Supporting data structures for if stack limit is imposed
     culled_pda = []
     culled_final = []
-    num_transitions = 0 # Keep track of number of transitions
-
+    num_transitions = 0  # Keep track of number of transitions
 
     states, final, initial = _get_states()
 
-    print (initial)
-    assert len(initial) == 1, 'More than one init state found'
+    print(initial)
+    assert len(initial) == 1, "More than one init state found"
 
     # Cull transitions to states which were not expanded owing to the stack limit
     if stack_limit:
@@ -275,49 +291,51 @@ def postprocess():
         blocklist = []
         for final_state in final:
             for transition in pda:
-                if (transition["dest"] == final_state) and (len(transition["stack"]) > 0):
+                if (transition["dest"] == final_state) and (
+                    len(transition["stack"]) > 0
+                ):
                     blocklist.append(transition["dest"])
                     continue
                 else:
                     culled_pda.append(transition)
-        
+
         culled_final = [state for state in final if state not in blocklist]
 
-        assert len(culled_final) == 1, 'More than one final state found'
+        assert len(culled_final) == 1, "More than one final state found"
 
         for transition in culled_pda:
             state = transition["source"]
             if transition["dest"] in blocklist:
-                    continue 
+                continue
             num_transitions += 1
-            memoized[int(state)].append([transition["trigger"], int(transition["dest"]), 
-                transition["terminal"]])
-        
-        
-        
+            memoized[int(state)].append(
+                [transition["trigger"], int(transition["dest"]), transition["terminal"]]
+            )
+
         final_struct["init_state"] = int(initial)
         final_struct["final_state"] = int(culled_final[0])
         # The reason we do this is because when states are culled, the indexing is
         # still relative to the actual number of states hence we keep numstates recorded
         # as the original number of states
-        print ('[X] Actual Number of states:', len(memoized.keys()))
-        print ('[X] Number of transitions:', num_transitions)
-        print ('[X] Original Number of states:', len(states))
-        #final_struct["numstates"] = len(states) 
-        memoized_list = [[]]*len(states) 
+        print("[X] Actual Number of states:", len(memoized.keys()))
+        print("[X] Number of transitions:", num_transitions)
+        print("[X] Original Number of states:", len(states))
+        # final_struct["numstates"] = len(states)
+        memoized_list = [[]] * len(states)
     else:
         # Running FSA construction in exact approximation mode and postprocessing it like so
         for transition in pda:
-           state = transition["source"]
-           memoized[int(state)].append([transition["trigger"], int(transition["dest"]), 
-               transition["terminal"]])
+            state = transition["source"]
+            memoized[int(state)].append(
+                [transition["trigger"], int(transition["dest"]), transition["terminal"]]
+            )
 
         final_struct["init_state"] = int(initial)
         final_struct["final_state"] = int(final[0])
-        print ('[X] Actual Number of states:', len(memoized.keys()))
-        #final_struct["numstates"] = len(memoized.keys()) 
-        memoized_list = [[]]*len(memoized.keys()) 
-    
+        print("[X] Actual Number of states:", len(memoized.keys()))
+        # final_struct["numstates"] = len(memoized.keys())
+        memoized_list = [[]] * len(memoized.keys())
+
     for k in memoized.keys():
         memoized_list[k] = memoized[k]
     final_struct["pda"] = memoized_list
@@ -333,19 +351,23 @@ def _get_states():
         dest.add(transition["dest"])
     source_copy = source.copy()
     source_copy.update(dest)
-    return list(source_copy), list(dest.difference(source)), str(''.join(list(source.difference(dest))))
+    return (
+        list(source_copy),
+        list(dest.difference(source)),
+        str("".join(list(source.difference(dest)))),
+    )
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     import argparse
-    parser = argparse.ArgumentParser(description = 'Script to convert GNF grammar to PDA')
+
+    parser = argparse.ArgumentParser(description="Script to convert GNF grammar to PDA")
+    parser.add_argument("--gf", type=str, help="Location of GNF grammar")
     parser.add_argument(
-            '--gf',
-            type = str,
-            help = 'Location of GNF grammar')
-    parser.add_argument(
-            '--limit',
-            type = int,
-            default = None,
-            help = 'Specify the upper bound for the stack size')
+        "--limit",
+        type=int,
+        default=None,
+        help="Specify the upper bound for the stack size",
+    )
     args = parser.parse_args()
     main(args.gf, args.limit)
diff --git a/utils/gramatron/gnf_converter.py b/utils/gramatron/gnf_converter.py
index 0bc70d1555..37434a7197 100755
--- a/utils/gramatron/gnf_converter.py
+++ b/utils/gramatron/gnf_converter.py
@@ -16,17 +16,18 @@ DEBUG = False
 NONTERMINALSET = []
 COUNT = 1
 
+
 def convert_to_gnf(grammar, start):
     if DEBUG:
-        with open('debug_preprocess.json', 'w+') as fd:
+        with open("debug_preprocess.json", "w+") as fd:
             json.dump(grammar, fd)
-    grammar = remove_unit(grammar) # eliminates unit productions
+    grammar = remove_unit(grammar)  # eliminates unit productions
     if DEBUG:
-        with open('debug_unit.json', 'w+') as fd:
+        with open("debug_unit.json", "w+") as fd:
             json.dump(grammar, fd)
-    grammar = remove_mixed(grammar) # eliminate terminals existing with non-terminals
+    grammar = remove_mixed(grammar)  # eliminate terminals existing with non-terminals
     if DEBUG:
-        with open('debug_mixed.json', 'w+') as fd:
+        with open("debug_mixed.json", "w+") as fd:
             json.dump(grammar, fd)
     grammar = gnf(grammar)
 
@@ -35,12 +36,13 @@ def convert_to_gnf(grammar, start):
     # with open('debug_gnf_reachable.json', 'w+') as fd:
     #     json.dump(reachable_grammar, fd)
     if DEBUG:
-        with open('debug_gnf.json', 'w+') as fd:
+        with open("debug_gnf.json", "w+") as fd:
             json.dump(grammar, fd)
 
     grammar["Start"] = [start]
     return grammar
 
+
 def remove_left_recursion(grammar):
     # Remove the left recursion in the grammar rules.
     # This algorithm is adopted from
@@ -69,10 +71,10 @@ def remove_left_recursion(grammar):
                     r.append(new_rule)
                 left_recursion = [r[1:] + [new_rule] for r in left_recursion]
                 left_recursion.append(["' '"])
-                new_grammar[lhs] = [' '.join(rule) for rule in others]
-                new_grammar[new_rule] = [' '.join(rule) for rule in left_recursion]
+                new_grammar[lhs] = [" ".join(rule) for rule in others]
+                new_grammar[new_rule] = [" ".join(rule) for rule in left_recursion]
             else:
-                new_grammar[lhs] = [' '.join(rule) for rule in others]
+                new_grammar[lhs] = [" ".join(rule) for rule in others]
         no_left_recursion = True
         for lhs, rules in old_grammar.items():
             for rule in rules:
@@ -88,10 +90,11 @@ def remove_left_recursion(grammar):
             new_grammar = defaultdict(list)
     return new_grammar
 
+
 def get_reachable(grammar, start):
-    '''
+    """
     Returns a grammar without dead rules
-    '''
+    """
     reachable_nt = set()
     worklist = list()
     processed = set()
@@ -113,9 +116,10 @@ def get_reachable(grammar, start):
 
 
 def gettokens(rule):
-    pattern = re.compile("([^\s\"\']+)|\"([^\"]*)\"|\'([^\']*)\'")
+    pattern = re.compile("([^\s\"']+)|\"([^\"]*)\"|'([^']*)'")
     return [matched.group(0) for matched in pattern.finditer(rule)]
 
+
 def gnf(grammar):
     old_grammar = copy.deepcopy(grammar)
     new_grammar = defaultdict(list)
@@ -129,7 +133,7 @@ def gnf(grammar):
                     new_grammar[lhs].append(rule)
                     continue
                 startoken = tokens[0]
-                assert(startoken != lhs)
+                assert startoken != lhs
                 endrule = tokens[1:]
                 if not isTerminal(startoken):
                     newrules = []
@@ -139,7 +143,7 @@ def gnf(grammar):
                         temprule.insert(0, extension)
                         newrules.append(temprule)
                     for newnew in newrules:
-                        new_grammar[lhs].append(' '.join(newnew))
+                        new_grammar[lhs].append(" ".join(newnew))
                 else:
                     new_grammar[lhs].append(rule)
         isgnf = True
@@ -163,7 +167,7 @@ def process_antlr4_grammar(data):
     productions = []
     production = []
     for line in data:
-        if line != '\n':
+        if line != "\n":
             production.append(line)
         else:
             productions.append(production)
@@ -172,16 +176,17 @@ def process_antlr4_grammar(data):
     for production in productions:
         rules = []
         init = production[0]
-        nonterminal = init.split(':')[0]
-        rules.append(strip_chars(init.split(':')[1]).strip('| '))
+        nonterminal = init.split(":")[0]
+        rules.append(strip_chars(init.split(":")[1]).strip("| "))
         for production_rule in production[1:]:
-            rules.append(strip_chars(production_rule.split('|')[0]))
+            rules.append(strip_chars(production_rule.split("|")[0]))
         final_rule_set[nonterminal] = rules
     # for line in data:
     #     if line != '\n':
     #         production.append(line)
     return final_rule_set
 
+
 def remove_unit(grammar):
     nounitproductions = False
     old_grammar = copy.deepcopy(grammar)
@@ -213,19 +218,21 @@ def remove_unit(grammar):
             new_grammar = defaultdict(list)
     return new_grammar
 
+
 def isTerminal(rule):
     # pattern = re.compile("([r]*\'[\s\S]+\')")
-    pattern = re.compile("\'(.*?)\'")
+    pattern = re.compile("'(.*?)'")
     match = pattern.match(rule)
     if match:
         return True
     else:
         return False
 
+
 def remove_mixed(grammar):
-    '''
+    """
     Remove rules where there are terminals mixed in with non-terminals
-    '''
+    """
     new_grammar = defaultdict(list)
     for lhs, rules in grammar.items():
         for rhs in rules:
@@ -248,17 +255,20 @@ def remove_mixed(grammar):
                         regen_rule.append(new_nonterm)
                 else:
                     regen_rule.append(token)
-            new_grammar[lhs].append(' '.join(regen_rule))
+            new_grammar[lhs].append(" ".join(regen_rule))
     return new_grammar
 
+
 def strip_chars(rule):
-    return rule.strip('\n\t ')
+    return rule.strip("\n\t ")
+
 
 def get_nonterminal():
     global COUNT
     COUNT += 1
     return f"GeneratedTermVar{COUNT}"
 
+
 def terminal_exist(token, grammar):
     for nonterminal, rules in grammar.items():
         if token in rules and len(token) == 1:
@@ -269,42 +279,37 @@ def terminal_exist(token, grammar):
 def main(grammar_file, out, start):
     grammar = None
     # If grammar file is a preprocessed NT file, then skip preprocessing
-    if '.json' in grammar_file:
-        with open(grammar_file, 'r') as fd:
+    if ".json" in grammar_file:
+        with open(grammar_file, "r") as fd:
             grammar = json.load(fd)
-    elif '.g4' in grammar_file:
-        with open(grammar_file, 'r') as fd:
+    elif ".g4" in grammar_file:
+        with open(grammar_file, "r") as fd:
             data = fd.readlines()
         grammar = process_antlr4_grammar(data)
     else:
-        raise('Unknwown file format passed. Accepts (.g4/.json)')
+        raise ("Unknwown file format passed. Accepts (.g4/.json)")
 
     grammar = convert_to_gnf(grammar, start)
-    with open(out, 'w+') as fd:
+    with open(out, "w+") as fd:
         json.dump(grammar, fd)
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     import argparse
-    parser = argparse.ArgumentParser(description = 'Script to convert grammar to GNF form')
+
+    parser = argparse.ArgumentParser(
+        description="Script to convert grammar to GNF form"
+    )
     parser.add_argument(
-            '--gf',
-            type = str,
-            required = True,
-            help = 'Location of grammar file')
+        "--gf", type=str, required=True, help="Location of grammar file"
+    )
     parser.add_argument(
-            '--out',
-            type = str,
-            required = True,
-            help = 'Location of output file')
+        "--out", type=str, required=True, help="Location of output file"
+    )
+    parser.add_argument("--start", type=str, required=True, help="Start token")
     parser.add_argument(
-            '--start',
-            type = str,
-            required = True,
-            help = 'Start token')
-    parser.add_argument(
-            '--debug',
-            action='store_true',
-            help = 'Write intermediate states to debug files')
+        "--debug", action="store_true", help="Write intermediate states to debug files"
+    )
     args = parser.parse_args()
     DEBUG = args.debug
 
diff --git a/utils/libafl_fmt/Cargo.toml b/utils/libafl_fmt/Cargo.toml
index 3b774ca07d..9f6ead251e 100644
--- a/utils/libafl_fmt/Cargo.toml
+++ b/utils/libafl_fmt/Cargo.toml
@@ -20,3 +20,4 @@ tokio = { version = "1.38", features = [
 clap = { version = "4.5", features = ["derive"] }
 exitcode = "1.1"
 which = "6.0"
+colored = "2.1.0"
diff --git a/utils/libafl_fmt/src/main.rs b/utils/libafl_fmt/src/main.rs
index 1b2913f06b..8617fc9708 100644
--- a/utils/libafl_fmt/src/main.rs
+++ b/utils/libafl_fmt/src/main.rs
@@ -78,12 +78,13 @@ use std::{
 };
 
 use clap::Parser;
+use colored::Colorize;
 use regex::RegexSet;
 use tokio::{process::Command, task::JoinSet};
 use walkdir::{DirEntry, WalkDir};
 use which::which;
 
-const REF_LLVM_VERSION: u32 = 18;
+const REF_LLVM_VERSION: u32 = 19;
 
 fn is_workspace_toml(path: &Path) -> bool {
     for line in read_to_string(path).unwrap().lines() {
@@ -249,20 +250,29 @@ async fn main() -> io::Result<()> {
         tokio_joinset.spawn(run_cargo_fmt(project, cli.check, cli.verbose));
     }
 
-    let ref_clang_format = format!("clang-format-{REF_LLVM_VERSION}");
+    let reference_clang_format = format!("clang-format-{REF_LLVM_VERSION}");
+    let unspecified_clang_format = "clang-format";
+
+    let (clang, warning) = if which(&reference_clang_format).is_ok() {
+        (Some(reference_clang_format.as_str()), None)
+    } else if which(unspecified_clang_format).is_ok() {
+        let version = Command::new(unspecified_clang_format)
+            .arg("--version")
+            .output()
+            .await?
+            .stdout;
 
-    let (clang, warning) = if which(ref_clang_format.clone()).is_ok() {
-        // can't use 18 for ci.
-        (Some(ref_clang_format), None)
-    } else if which("clang-format").is_ok() {
         (
-            Some("clang-format".to_string()),
-            Some("using clang-format, could provide a different result from clang-format-17"),
+            Some(unspecified_clang_format),
+            Some(format!(
+                "using {}, could provide a different result from clang-format-17",
+                from_utf8(&version).unwrap().replace('\n', "")
+            )),
         )
     } else {
         (
             None,
-            Some("clang-format not found. Skipping C formatting..."),
+            Some("clang-format not found. Skipping C formatting...".to_string()),
         )
     };
     // println!("Using {:#?} to format...", clang);
@@ -277,7 +287,12 @@ async fn main() -> io::Result<()> {
             .collect();
 
         for c_file in c_files_to_fmt {
-            tokio_joinset.spawn(run_clang_fmt(c_file, clang.clone(), cli.check, cli.verbose));
+            tokio_joinset.spawn(run_clang_fmt(
+                c_file,
+                clang.to_string(),
+                cli.check,
+                cli.verbose,
+            ));
         }
     }
 
@@ -292,7 +307,7 @@ async fn main() -> io::Result<()> {
     }
 
     if let Some(warning) = warning {
-        println!("Warning: {warning}");
+        println!("\n{}: {}\n", "Warning".yellow().bold(), warning);
     }
 
     if cli.check {