Introducing Launcher::overcommit, improving CI formatting (#2670)

* introducing Launcher::overcommit * removing unnecessary cfg restrictions and clippy allows * improving warning for wrong clang-format version * installing black in the format CI * Enforcing python formatting in CI * extending formatting using black on all python files * printing diff on black failure * preferring python's black over system black * moving to LLVM 19 for formatting
2024-11-09 19:13:51 +01:00 · 2024-11-09 19:13:51 +01:00 · e32b3eae93
commit e32b3eae93
parent 8617fa6603
11 changed files with 345 additions and 299 deletions
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@ -198,6 +198,8 @@ jobs:
        run: rustup component add --toolchain nightly-x86_64-unknown-linux-gnu rustfmt
      - uses: Swatinem/rust-cache@v2
        with: { shared-key: "ubuntu" }
+      - name: Installing black
+        run: python3 -m pip install black
      - name: Format Check
        run: ./scripts/fmt_all.sh check

--- a/bindings/pylibafl/test.py
+++ b/bindings/pylibafl/test.py
@ -3,5 +3,7 @@ import ctypes
 import platform

 print("Starting to fuzz from python!")
-fuzzer = sugar.InMemoryBytesCoverageSugar(input_dirs=["./in"], output_dir="out", broker_port=1337, cores=[0,1]) 
+fuzzer = sugar.InMemoryBytesCoverageSugar(
+    input_dirs=["./in"], output_dir="out", broker_port=1337, cores=[0, 1]
+)
 fuzzer.run(lambda b: print("foo"))
--- a/fuzzers/binary_only/python_qemu/fuzzer.py
+++ b/fuzzers/binary_only/python_qemu/fuzzer.py
@ -4,31 +4,32 @@ from pylibafl import sugar, qemu
 import lief

 MAX_SIZE = 0x100
-BINARY_PATH = './a.out'
+BINARY_PATH = "./a.out"

-emu = qemu.Qemu(['qemu-x86_64', BINARY_PATH], [])
+emu = qemu.Qemu(["qemu-x86_64", BINARY_PATH], [])

 elf = lief.parse(BINARY_PATH)
 test_one_input = elf.get_function_address("LLVMFuzzerTestOneInput")
 if elf.is_pie:
    test_one_input += emu.load_addr()
-print('LLVMFuzzerTestOneInput @ 0x%x' % test_one_input)
+print("LLVMFuzzerTestOneInput @ 0x%x" % test_one_input)

 emu.set_breakpoint(test_one_input)
 emu.run()

 sp = emu.read_reg(qemu.regs.Rsp)
-print('SP   = 0x%x' % sp)
+print("SP   = 0x%x" % sp)

-retaddr = int.from_bytes(emu.read_mem(sp, 8), 'little')
-print('RET  = 0x%x' % retaddr)
+retaddr = int.from_bytes(emu.read_mem(sp, 8), "little")
+print("RET  = 0x%x" % retaddr)

 inp = emu.map_private(0, MAX_SIZE, qemu.mmap.ReadWrite)
-assert(inp > 0)
+assert inp > 0

 emu.remove_breakpoint(test_one_input)
 emu.set_breakpoint(retaddr)

+
 def harness(b):
    if len(b) > MAX_SIZE:
        b = b[:MAX_SIZE]
@ -39,5 +40,6 @@ def harness(b):
    emu.write_reg(qemu.regs.Rip, test_one_input)
    emu.run()

-fuzz = sugar.QemuBytesCoverageSugar(['./in'], './out', 3456, [0,1,2,3])
+
+fuzz = sugar.QemuBytesCoverageSugar(["./in"], "./out", 3456, [0, 1, 2, 3])
 fuzz.run(emu, harness)
--- a/fuzzers/inprocess/dynamic_analysis/concatenator.py
+++ b/fuzzers/inprocess/dynamic_analysis/concatenator.py
@ -4,16 +4,17 @@ import os
 import json
 import sys

+
 def concatenate_json_files(input_dir):
    json_files = []
    for root, dirs, files in os.walk(input_dir):
        for file in files:
-            if file.endswith('.json'):
+            if file.endswith(".json"):
                json_files.append(os.path.join(root, file))

    data = dict()
    for json_file in json_files:
-        with open(json_file, 'r') as file:
+        with open(json_file, "r") as file:
            if os.stat(json_file).st_size == 0:
                # skip empty file else json.load() fails
                continue
@ -21,13 +22,14 @@ def concatenate_json_files(input_dir):
            print(type(json_data), file)
            data = data | json_data

-    output_file = os.path.join(os.getcwd(), 'concatenated.json')
-    with open(output_file, 'w') as file:
+    output_file = os.path.join(os.getcwd(), "concatenated.json")
+    with open(output_file, "w") as file:
        json.dump([data], file)

    print(f"JSON files concatenated successfully! Output file: {output_file}")

-if __name__ == '__main__':
+
+if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("Usage: python script.py <directory_path>")
        sys.exit(1)
--- a/libafl/src/events/launcher.rs
+++ b/libafl/src/events/launcher.rs
@ -108,24 +108,27 @@ pub struct Launcher<'a, CF, MT, SP> {
    broker_port: u16,
    /// The list of cores to run on
    cores: &'a Cores,
+    /// The number of clients to spawn on each core
+    #[builder(default = 1)]
+    overcommit: usize,
    /// A file name to write all client output to
-    #[cfg(all(unix, feature = "std"))]
+    #[cfg(unix)]
    #[builder(default = None)]
    stdout_file: Option<&'a str>,
    /// The time in milliseconds to delay between child launches
    #[builder(default = 10)]
    launch_delay: u64,
    /// The actual, opened, `stdout_file` - so that we keep it open until the end
-    #[cfg(all(unix, feature = "std", feature = "fork"))]
+    #[cfg(all(unix, feature = "fork"))]
    #[builder(setter(skip), default = None)]
    opened_stdout_file: Option<File>,
    /// A file name to write all client stderr output to. If not specified, output is sent to
    /// `stdout_file`.
-    #[cfg(all(unix, feature = "std"))]
+    #[cfg(unix)]
    #[builder(default = None)]
    stderr_file: Option<&'a str>,
    /// The actual, opened, `stdout_file` - so that we keep it open until the end
-    #[cfg(all(unix, feature = "std", feature = "fork"))]
+    #[cfg(all(unix, feature = "fork"))]
    #[builder(setter(skip), default = None)]
    opened_stderr_file: Option<File>,
    /// The `ip:port` address of another broker to connect our new broker to for multi-machine
@ -172,17 +175,10 @@ where
    SP: ShMemProvider,
 {
    /// Launch the broker and the clients and fuzz
-    #[cfg(all(unix, feature = "std", feature = "fork"))]
-    pub fn launch<S>(&mut self) -> Result<(), Error>
-    where
-        S: State + HasExecutions,
-        CF: FnOnce(Option<S>, LlmpRestartingEventManager<(), S, SP>, CoreId) -> Result<(), Error>,
-    {
-        Self::launch_with_hooks(self, tuple_list!())
-    }
-
-    /// Launch the broker and the clients and fuzz
-    #[cfg(all(feature = "std", any(windows, not(feature = "fork"))))]
+    #[cfg(all(
+        feature = "std",
+        any(windows, not(feature = "fork"), all(unix, feature = "fork"))
+    ))]
    #[allow(unused_mut, clippy::match_wild_err_arm)]
    pub fn launch<S>(&mut self) -> Result<(), Error>
    where
@ -200,9 +196,8 @@ where
    SP: ShMemProvider,
 {
    /// Launch the broker and the clients and fuzz with a user-supplied hook
-    #[cfg(all(unix, feature = "std", feature = "fork"))]
-    #[allow(clippy::similar_names)]
-    #[allow(clippy::too_many_lines)]
+    #[cfg(all(unix, feature = "fork"))]
+    #[allow(clippy::similar_names, clippy::too_many_lines)]
    pub fn launch_with_hooks<EMH, S>(&mut self, hooks: EMH) -> Result<(), Error>
    where
        S: State + HasExecutions,
@ -221,8 +216,7 @@ where
            ));
        }

-        let core_ids = get_core_ids().unwrap();
-        let num_cores = core_ids.len();
+        let core_ids = get_core_ids()?;
        let mut handles = vec![];

        log::info!("spawning on cores: {:?}", self.cores);
@ -234,13 +228,13 @@ where
            .stderr_file
            .map(|filename| File::create(filename).unwrap());

-        #[cfg(feature = "std")]
        let debug_output = std::env::var(LIBAFL_DEBUG_OUTPUT).is_ok();

        // Spawn clients
        let mut index = 0_u64;
-        for (id, bind_to) in core_ids.iter().enumerate().take(num_cores) {
+        for (id, bind_to) in core_ids.iter().enumerate() {
            if self.cores.ids.iter().any(|&x| x == id.into()) {
+                for _ in 0..self.overcommit {
                    index += 1;
                    self.shmem_provider.pre_fork()?;
                    // # Safety
@ -249,7 +243,6 @@ where
                        ForkResult::Parent(child) => {
                            self.shmem_provider.post_fork(false)?;
                            handles.push(child.pid);
-                        #[cfg(feature = "std")]
                            log::info!("child spawned and bound to core {id}");
                        }
                        ForkResult::Child => {
@ -258,10 +251,8 @@ where
                            log::info!("{:?} PostFork", unsafe { libc::getpid() });
                            self.shmem_provider.post_fork(true)?;

-                        #[cfg(feature = "std")]
                            std::thread::sleep(Duration::from_millis(index * self.launch_delay));

-                        #[cfg(feature = "std")]
                            if !debug_output {
                                if let Some(file) = &self.opened_stdout_file {
                                    dup2(file.as_raw_fd(), libc::STDOUT_FILENO)?;
@ -291,9 +282,9 @@ where
                    };
                }
            }
+        }

        if self.spawn_broker {
-            #[cfg(feature = "std")]
            log::info!("I am broker!!.");

            // TODO we don't want always a broker here, think about using different laucher process to spawn different configurations
@ -337,7 +328,7 @@ where
    }

    /// Launch the broker and the clients and fuzz
-    #[cfg(all(feature = "std", any(windows, not(feature = "fork"))))]
+    #[cfg(any(windows, not(feature = "fork")))]
    #[allow(unused_mut, clippy::match_wild_err_arm, clippy::too_many_lines)]
    pub fn launch_with_hooks<EMH, S>(&mut self, hooks: EMH) -> Result<(), Error>
    where
@ -381,7 +372,7 @@ where
                log::info!("spawning on cores: {:?}", self.cores);

                let debug_output = std::env::var("LIBAFL_DEBUG_OUTPUT").is_ok();
-                #[cfg(all(feature = "std", unix))]
+                #[cfg(unix)]
                {
                    // Set own stdout and stderr as set by the user
                    if !debug_output {
@ -404,9 +395,10 @@ where
                //spawn clients
                for (id, _) in core_ids.iter().enumerate().take(num_cores) {
                    if self.cores.ids.iter().any(|&x| x == id.into()) {
+                        for _ in 0..self.overcommit {
                            // Forward own stdio to child processes, if requested by user
                            let (mut stdout, mut stderr) = (Stdio::null(), Stdio::null());
-                        #[cfg(all(feature = "std", unix))]
+                            #[cfg(unix)]
                            {
                                if self.stdout_file.is_some() || self.stderr_file.is_some() {
                                    stdout = Stdio::inherit();
@ -414,8 +406,9 @@ where
                                };
                            }

-                        #[cfg(feature = "std")]
-                        std::thread::sleep(Duration::from_millis(id as u64 * self.launch_delay));
+                            std::thread::sleep(Duration::from_millis(
+                                id as u64 * self.launch_delay,
+                            ));

                            std::env::set_var(_AFL_LAUNCHER_CLIENT, id.to_string());
                            let mut child = startable_self()?;
@ -429,7 +422,7 @@ where
                            handles.push(child);
                        }
                    }
-
+                }
                handles
            }
            Err(_) => panic!("Env variables are broken, received non-unicode!"),
@ -444,7 +437,6 @@ where
        }

        if self.spawn_broker {
-            #[cfg(feature = "std")]
            log::info!("I am broker!!.");

            let builder = RestartingMgr::<EMH, MT, S, SP>::builder()
@ -620,8 +612,7 @@ where
    /// Launch a Centralized-based fuzzer.
    /// - `main_inner_mgr_builder` will be called to build the inner manager of the main node.
    /// - `secondary_inner_mgr_builder` will be called to build the inner manager of the secondary nodes.
-    #[allow(clippy::similar_names)]
-    #[allow(clippy::too_many_lines)]
+    #[allow(clippy::similar_names, clippy::too_many_lines)]
    pub fn launch_generic<EM, EMB, S>(
        &mut self,
        main_inner_mgr_builder: EMB,
--- a/scripts/fmt_all.sh
+++ b/scripts/fmt_all.sh
@ -11,16 +11,21 @@ else
  cargo run --manifest-path "$LIBAFL_DIR/utils/libafl_fmt/Cargo.toml" --release -- --verbose || exit 1
 fi

-if command -v black > /dev/null; then
-  echo "[*] Formatting python files"
-  if ! black "$SCRIPT_DIR"
-  then
-    echo "Python format failed."
-    exit 1
+if python3 -m black --version > /dev/null; then
+  BLACK_COMMAND="python3 -m black"
+elif command -v black > /dev/null; then
+  BLACK_COMMAND="black"
 fi

+if [ -n "$BLACK_COMMAND" ]; then
+  echo "[*] Formatting python files"
+  if [ "$1" = "check" ]; then
+    $BLACK_COMMAND --check --diff "$LIBAFL_DIR" || exit 1
  else
-  echo "Warning: python black not found. Formatting skipped for python."
+    $BLACK_COMMAND "$LIBAFL_DIR" || exit 1
+  fi
+else
+  echo -e "\n\033[1;33mWarning\033[0m: python black not found. Formatting skipped for python.\n"
 fi

 if [ "$1" != "check" ]; then
--- a/utils/cfg_builder/build.py
+++ b/utils/cfg_builder/build.py
@ -7,7 +7,7 @@ import sys

 cfg = dict()

-if 'CFG_OUTPUT_PATH' not in os.environ:
+if "CFG_OUTPUT_PATH" not in os.environ:
    sys.exit("CFG_OUTPUT_PATH not set")

 input_path = os.environ["CFG_OUTPUT_PATH"]
@ -31,7 +31,7 @@ for mname, module in cfg.items():
    fnname2SG = dict()
    # First, add all the intra-procedural edges

-    for (fname, v) in module['edges'].items():
+    for fname, v in module["edges"].items():

        if fname not in fname2id:
            GG.add_node(f_ids, label=fname)
@ -41,8 +41,7 @@ for mname, module in cfg.items():
        sz = len(v)
        for idx in range(node_ids, node_ids + sz):
            G.add_node(idx)
-            G.nodes[idx]['label'] = mname + ' ' + \
-                fname + ' ' + str(idx - node_ids)
+            G.nodes[idx]["label"] = mname + " " + fname + " " + str(idx - node_ids)
        node_id_list = list(range(node_ids, node_ids + sz))
        node_ids += sz
        SG = G.subgraph(node_id_list)
@ -52,14 +51,14 @@ for mname, module in cfg.items():
                G.add_edge(node_id_list[src], node_id_list[item])

    # Next, build inter-procedural edges
-    for (fname, calls) in module['calls'].items():
-        for (idx, target_fns) in calls.items():
+    for fname, calls in module["calls"].items():
+        for idx, target_fns in calls.items():
            # G.nodes isn't sorted

            src = sorted(fnname2SG[fname].nodes())[0] + int(idx)
            for target_fn in target_fns:
                if target_fn in fnname2SG:
-                    offset = module['entries'][target_fn]
+                    offset = module["entries"][target_fn]

                    dst = sorted(fnname2SG[target_fn].nodes)[0] + offset

--- a/utils/gramatron/construct_automata.py
+++ b/utils/gramatron/construct_automata.py
@ -8,6 +8,7 @@ import sys
 import json
 import re
 from collections import defaultdict
+
 # import pygraphviz as pgv

 gram_data = None
@ -24,20 +25,20 @@ stack_limit = None
 # Holds the set of unexpanded rules owing to the user-passed stack constraint limit
 unexpanded_rules = set()

+
 def main(grammar, limit):
    global worklist, gram_data, stack_limit
-    current = '0'
+    current = "0"
    stack_limit = limit
    if stack_limit:
-        print ('[X] Operating in bounded stack mode')
+        print("[X] Operating in bounded stack mode")

-    with open(grammar, 'r') as fd:
+    with open(grammar, "r") as fd:
        gram_data = json.load(fd)
    start_symbol = gram_data["Start"][0]
    worklist.append([current, [start_symbol]])
    # print (grammar)
-    filename = (grammar.split('/')[-1]).split('.')[0]
-    
+    filename = (grammar.split("/")[-1]).split(".")[0]

    while worklist:
        # Take an element from the worklist
@ -46,45 +47,54 @@ def main(grammar, limit):
        element = worklist.pop(0)
        prep_transitions(element)

-    pda_file = filename + '_transition.json'
-    graph_file = filename + '.png'
+    pda_file = filename + "_transition.json"
+    graph_file = filename + ".png"
    # print ('XXXXXXXXXXXXXXXX')
    # print ('PDA file:%s Png graph file:%s' % (pda_file, graph_file))
    # XXX Commented out because visualization of current version of PHP causes segfault
    # Create the graph and dump the transitions to a file
    # create_graph(filename)
    transformed = postprocess()
-    with open(filename + '_automata.json', 'w+') as fd:
+    with open(filename + "_automata.json", "w+") as fd:
        json.dump(transformed, fd)
-    with open(filename + '_transition.json', 'w+') as fd:
+    with open(filename + "_transition.json", "w+") as fd:
        json.dump(pda, fd)
    if not unexpanded_rules:
-        print ('[X] No unexpanded rules, absolute FSA formed')
+        print("[X] No unexpanded rules, absolute FSA formed")
        exit(0)
    else:
-        print ('[X] Certain rules were not expanded due to stack size limit. Inexact approximation has been created and the disallowed rules have been put in {}_disallowed.json'.format(filename))
-        print ('[X] Number of unexpanded rules:', len(unexpanded_rules))
-        with open(filename + '_disallowed.json', 'w+') as fd:
+        print(
+            "[X] Certain rules were not expanded due to stack size limit. Inexact approximation has been created and the disallowed rules have been put in {}_disallowed.json".format(
+                filename
+            )
+        )
+        print("[X] Number of unexpanded rules:", len(unexpanded_rules))
+        with open(filename + "_disallowed.json", "w+") as fd:
            json.dump(list(unexpanded_rules), fd)

+
 def create_graph(filename):
-    '''
+    """
    Creates a DOT representation of the PDA
-    '''
+    """
    global pda
    G = pgv.AGraph(strict=False, directed=True)
    for transition in pda:
-        print ('Transition:', transition)
-        G.add_edge(transition['source'], transition['dest'], 
-                label = 'Term:{}'.format(transition['terminal']))
-    G.layout(prog = 'dot')
-    print ('Do it up 2')
-    G.draw(filename + '.png')
+        print("Transition:", transition)
+        G.add_edge(
+            transition["source"],
+            transition["dest"],
+            label="Term:{}".format(transition["terminal"]),
+        )
+    G.layout(prog="dot")
+    print("Do it up 2")
+    G.draw(filename + ".png")
+

 def prep_transitions(element):
-    '''
+    """
    Generates transitions
-    '''
+    """
    global gram_data, state_count, pda, worklist, state_stacks, stack_limit, unexpanded_rules
    state = element[0]
    try:
@ -99,14 +109,14 @@ def prep_transitions(element):
        # print ('Current state:', state)
        terminal, ss, termIsRegex = tokenize(rule)
        transition = get_template()
-        transition['trigger'] = '_'.join([state, str(count)])
-        transition['source'] = state
-        transition['dest'] = str(state_count) 
-        transition['ss'] = ss 
-        transition['terminal'] = terminal
-        transition['rule'] = "{} -> {}".format(nonterminal, rule )
+        transition["trigger"] = "_".join([state, str(count)])
+        transition["source"] = state
+        transition["dest"] = str(state_count)
+        transition["ss"] = ss
+        transition["terminal"] = terminal
+        transition["rule"] = "{} -> {}".format(nonterminal, rule)
        if termIsRegex:
-            transition['termIsRegex'] = True
+            transition["termIsRegex"] = True

        # Creating a state stack for the new state
        try:
@ -118,7 +128,7 @@ def prep_transitions(element):
        if ss:
            for symbol in ss[::-1]:
                state_stack.insert(0, symbol)
-        transition['stack'] = state_stack 
+        transition["stack"] = state_stack

        # Check if a recursive transition state being created, if so make a backward
        # edge and don't add anything to the worklist
@ -128,7 +138,7 @@ def prep_transitions(element):
                # print ('Stack:', sorted(stack))
                # print ('State stack:', sorted(state_stack))
                if sorted(stack) == sorted(state_stack):
-                    transition['dest'] = state_element
+                    transition["dest"] = state_element
                    # print ('Recursive:', transition)
                    pda.append(transition)
                    count += 1
@ -142,24 +152,25 @@ def prep_transitions(element):
        # If the generated state has a stack size > stack_limit then that state is abandoned
        # and not added to the FSA or the worklist for further expansion
        if stack_limit:
-            if (len(transition['stack']) > stack_limit):
-                unexpanded_rules.add(transition['rule'])
+            if len(transition["stack"]) > stack_limit:
+                unexpanded_rules.add(transition["rule"])
                continue

        # Create transitions for the non-recursive relations and add to the worklist
        # print ('Normal:', transition)
        # print ('State2:', state)
        pda.append(transition)
-        worklist.append([transition['dest'], transition['stack']])
-        state_stacks[transition['dest']] = state_stack
+        worklist.append([transition["dest"], transition["stack"]])
+        state_stacks[transition["dest"]] = state_stack
        state_count += 1
        count += 1

+
 def tokenize(rule):
-    '''
+    """
    Gets the terminal and the corresponding stack symbols from a rule in GNF form
-    '''
-    pattern = re.compile("([r])*\'([\s\S]+)\'([\s\S]*)")
+    """
+    pattern = re.compile("([r])*'([\s\S]+)'([\s\S]*)")
    terminal = None
    ss = None
    termIsRegex = False
@ -176,21 +187,23 @@ def tokenize(rule):

    return terminal, ss, termIsRegex

+
 def get_template():
    transition_template = {
-            'trigger':None,
-            'source': None,
-            'dest': None,
-            'termIsRegex': False,
-            'terminal' : None,
-            'stack': []
+        "trigger": None,
+        "source": None,
+        "dest": None,
+        "termIsRegex": False,
+        "terminal": None,
+        "stack": [],
    }
    return transition_template

+
 def postprocess1():
-    '''
+    """
    Creates a representation to be passed on to the C-module
-    '''
+    """
    global pda
    final_struct = {}
    # Supporting data structures for if stack limit is imposed
@ -198,12 +211,11 @@ def postprocess1():
    culled_final = []
    num_transitions = 0  # Keep track of number of transitions

-
    states, final, initial = _get_states()
    memoized = [[]] * len(states)

    print(initial)
-    assert len(initial) == 1, 'More than one init state found'
+    assert len(initial) == 1, "More than one init state found"

    # Cull transitions to states which were not expanded owing to the stack limit
    if stack_limit:
@ -211,7 +223,9 @@ def postprocess1():
        blocklist = []
        for final_state in final:
            for transition in pda:
-                if (transition["dest"] == final_state) and (len(transition["stack"]) > 0):
+                if (transition["dest"] == final_state) and (
+                    len(transition["stack"]) > 0
+                ):
                    blocklist.append(transition["dest"])
                    continue
                else:
@ -219,42 +233,45 @@ def postprocess1():

        culled_final = [state for state in final if state not in blocklist]

-        assert len(culled_final) == 1, 'More than one final state found'
+        assert len(culled_final) == 1, "More than one final state found"

        for transition in culled_pda:
            state = transition["source"]
            if transition["dest"] in blocklist:
                continue
            num_transitions += 1
-            memoized[int(state)].append((transition["trigger"],
-                int(transition["dest"]), transition["terminal"]))
+            memoized[int(state)].append(
+                (transition["trigger"], int(transition["dest"]), transition["terminal"])
+            )
        final_struct["init_state"] = int(initial)
        final_struct["final_state"] = int(culled_final[0])
        # The reason we do this is because when states are culled, the indexing is
        # still relative to the actual number of states hence we keep numstates recorded
        # as the original number of states
-        print ('[X] Actual Number of states:', len(memoized))
-        print ('[X] Number of transitions:', num_transitions)
-        print ('[X] Original Number of states:', len(states))
+        print("[X] Actual Number of states:", len(memoized))
+        print("[X] Number of transitions:", num_transitions)
+        print("[X] Original Number of states:", len(states))
        final_struct["pda"] = memoized
        return final_struct

    # Running FSA construction in exact approximation mode and postprocessing it like so
    for transition in pda:
        state = transition["source"]
-       memoized[int(state)].append((transition["trigger"],
-                int(transition["dest"]), transition["terminal"]))
+        memoized[int(state)].append(
+            (transition["trigger"], int(transition["dest"]), transition["terminal"])
+        )

    final_struct["init_state"] = int(initial)
    final_struct["final_state"] = int(final[0])
-    print ('[X] Actual Number of states:', len(memoized))
+    print("[X] Actual Number of states:", len(memoized))
    final_struct["pda"] = memoized
    return final_struct

+
 def postprocess():
-    '''
+    """
    Creates a representation to be passed on to the C-module
-    '''
+    """
    global pda
    final_struct = {}
    memoized = defaultdict(list)
@ -263,11 +280,10 @@ def postprocess():
    culled_final = []
    num_transitions = 0  # Keep track of number of transitions

-
    states, final, initial = _get_states()

    print(initial)
-    assert len(initial) == 1, 'More than one init state found'
+    assert len(initial) == 1, "More than one init state found"

    # Cull transitions to states which were not expanded owing to the stack limit
    if stack_limit:
@ -275,7 +291,9 @@ def postprocess():
        blocklist = []
        for final_state in final:
            for transition in pda:
-                if (transition["dest"] == final_state) and (len(transition["stack"]) > 0):
+                if (transition["dest"] == final_state) and (
+                    len(transition["stack"]) > 0
+                ):
                    blocklist.append(transition["dest"])
                    continue
                else:
@ -283,38 +301,38 @@ def postprocess():

        culled_final = [state for state in final if state not in blocklist]

-        assert len(culled_final) == 1, 'More than one final state found'
+        assert len(culled_final) == 1, "More than one final state found"

        for transition in culled_pda:
            state = transition["source"]
            if transition["dest"] in blocklist:
                continue
            num_transitions += 1
-            memoized[int(state)].append([transition["trigger"], int(transition["dest"]), 
-                transition["terminal"]])
-        
-        
+            memoized[int(state)].append(
+                [transition["trigger"], int(transition["dest"]), transition["terminal"]]
+            )

        final_struct["init_state"] = int(initial)
        final_struct["final_state"] = int(culled_final[0])
        # The reason we do this is because when states are culled, the indexing is
        # still relative to the actual number of states hence we keep numstates recorded
        # as the original number of states
-        print ('[X] Actual Number of states:', len(memoized.keys()))
-        print ('[X] Number of transitions:', num_transitions)
-        print ('[X] Original Number of states:', len(states))
+        print("[X] Actual Number of states:", len(memoized.keys()))
+        print("[X] Number of transitions:", num_transitions)
+        print("[X] Original Number of states:", len(states))
        # final_struct["numstates"] = len(states)
        memoized_list = [[]] * len(states)
    else:
        # Running FSA construction in exact approximation mode and postprocessing it like so
        for transition in pda:
            state = transition["source"]
-           memoized[int(state)].append([transition["trigger"], int(transition["dest"]), 
-               transition["terminal"]])
+            memoized[int(state)].append(
+                [transition["trigger"], int(transition["dest"]), transition["terminal"]]
+            )

        final_struct["init_state"] = int(initial)
        final_struct["final_state"] = int(final[0])
-        print ('[X] Actual Number of states:', len(memoized.keys()))
+        print("[X] Actual Number of states:", len(memoized.keys()))
        # final_struct["numstates"] = len(memoized.keys())
        memoized_list = [[]] * len(memoized.keys())

@ -333,19 +351,23 @@ def _get_states():
        dest.add(transition["dest"])
    source_copy = source.copy()
    source_copy.update(dest)
-    return list(source_copy), list(dest.difference(source)), str(''.join(list(source.difference(dest))))
+    return (
+        list(source_copy),
+        list(dest.difference(source)),
+        str("".join(list(source.difference(dest)))),
+    )

-if __name__ == '__main__':
+
+if __name__ == "__main__":
    import argparse
-    parser = argparse.ArgumentParser(description = 'Script to convert GNF grammar to PDA')
+
+    parser = argparse.ArgumentParser(description="Script to convert GNF grammar to PDA")
+    parser.add_argument("--gf", type=str, help="Location of GNF grammar")
    parser.add_argument(
-            '--gf',
-            type = str,
-            help = 'Location of GNF grammar')
-    parser.add_argument(
-            '--limit',
+        "--limit",
        type=int,
        default=None,
-            help = 'Specify the upper bound for the stack size')
+        help="Specify the upper bound for the stack size",
+    )
    args = parser.parse_args()
    main(args.gf, args.limit)
--- a/utils/gramatron/gnf_converter.py
+++ b/utils/gramatron/gnf_converter.py
@ -16,17 +16,18 @@ DEBUG = False
 NONTERMINALSET = []
 COUNT = 1

+
 def convert_to_gnf(grammar, start):
    if DEBUG:
-        with open('debug_preprocess.json', 'w+') as fd:
+        with open("debug_preprocess.json", "w+") as fd:
            json.dump(grammar, fd)
    grammar = remove_unit(grammar)  # eliminates unit productions
    if DEBUG:
-        with open('debug_unit.json', 'w+') as fd:
+        with open("debug_unit.json", "w+") as fd:
            json.dump(grammar, fd)
    grammar = remove_mixed(grammar)  # eliminate terminals existing with non-terminals
    if DEBUG:
-        with open('debug_mixed.json', 'w+') as fd:
+        with open("debug_mixed.json", "w+") as fd:
            json.dump(grammar, fd)
    grammar = gnf(grammar)

@ -35,12 +36,13 @@ def convert_to_gnf(grammar, start):
    # with open('debug_gnf_reachable.json', 'w+') as fd:
    #     json.dump(reachable_grammar, fd)
    if DEBUG:
-        with open('debug_gnf.json', 'w+') as fd:
+        with open("debug_gnf.json", "w+") as fd:
            json.dump(grammar, fd)

    grammar["Start"] = [start]
    return grammar

+
 def remove_left_recursion(grammar):
    # Remove the left recursion in the grammar rules.
    # This algorithm is adopted from
@ -69,10 +71,10 @@ def remove_left_recursion(grammar):
                    r.append(new_rule)
                left_recursion = [r[1:] + [new_rule] for r in left_recursion]
                left_recursion.append(["' '"])
-                new_grammar[lhs] = [' '.join(rule) for rule in others]
-                new_grammar[new_rule] = [' '.join(rule) for rule in left_recursion]
+                new_grammar[lhs] = [" ".join(rule) for rule in others]
+                new_grammar[new_rule] = [" ".join(rule) for rule in left_recursion]
            else:
-                new_grammar[lhs] = [' '.join(rule) for rule in others]
+                new_grammar[lhs] = [" ".join(rule) for rule in others]
        no_left_recursion = True
        for lhs, rules in old_grammar.items():
            for rule in rules:
@ -88,10 +90,11 @@ def remove_left_recursion(grammar):
            new_grammar = defaultdict(list)
    return new_grammar

+
 def get_reachable(grammar, start):
-    '''
+    """
    Returns a grammar without dead rules
-    '''
+    """
    reachable_nt = set()
    worklist = list()
    processed = set()
@ -113,9 +116,10 @@ def get_reachable(grammar, start):


 def gettokens(rule):
-    pattern = re.compile("([^\s\"\']+)|\"([^\"]*)\"|\'([^\']*)\'")
+    pattern = re.compile("([^\s\"']+)|\"([^\"]*)\"|'([^']*)'")
    return [matched.group(0) for matched in pattern.finditer(rule)]

+
 def gnf(grammar):
    old_grammar = copy.deepcopy(grammar)
    new_grammar = defaultdict(list)
@ -129,7 +133,7 @@ def gnf(grammar):
                    new_grammar[lhs].append(rule)
                    continue
                startoken = tokens[0]
-                assert(startoken != lhs)
+                assert startoken != lhs
                endrule = tokens[1:]
                if not isTerminal(startoken):
                    newrules = []
@ -139,7 +143,7 @@ def gnf(grammar):
                        temprule.insert(0, extension)
                        newrules.append(temprule)
                    for newnew in newrules:
-                        new_grammar[lhs].append(' '.join(newnew))
+                        new_grammar[lhs].append(" ".join(newnew))
                else:
                    new_grammar[lhs].append(rule)
        isgnf = True
@ -163,7 +167,7 @@ def process_antlr4_grammar(data):
    productions = []
    production = []
    for line in data:
-        if line != '\n':
+        if line != "\n":
            production.append(line)
        else:
            productions.append(production)
@ -172,16 +176,17 @@ def process_antlr4_grammar(data):
    for production in productions:
        rules = []
        init = production[0]
-        nonterminal = init.split(':')[0]
-        rules.append(strip_chars(init.split(':')[1]).strip('| '))
+        nonterminal = init.split(":")[0]
+        rules.append(strip_chars(init.split(":")[1]).strip("| "))
        for production_rule in production[1:]:
-            rules.append(strip_chars(production_rule.split('|')[0]))
+            rules.append(strip_chars(production_rule.split("|")[0]))
        final_rule_set[nonterminal] = rules
    # for line in data:
    #     if line != '\n':
    #         production.append(line)
    return final_rule_set

+
 def remove_unit(grammar):
    nounitproductions = False
    old_grammar = copy.deepcopy(grammar)
@ -213,19 +218,21 @@ def remove_unit(grammar):
            new_grammar = defaultdict(list)
    return new_grammar

+
 def isTerminal(rule):
    # pattern = re.compile("([r]*\'[\s\S]+\')")
-    pattern = re.compile("\'(.*?)\'")
+    pattern = re.compile("'(.*?)'")
    match = pattern.match(rule)
    if match:
        return True
    else:
        return False

+
 def remove_mixed(grammar):
-    '''
+    """
    Remove rules where there are terminals mixed in with non-terminals
-    '''
+    """
    new_grammar = defaultdict(list)
    for lhs, rules in grammar.items():
        for rhs in rules:
@ -248,17 +255,20 @@ def remove_mixed(grammar):
                        regen_rule.append(new_nonterm)
                else:
                    regen_rule.append(token)
-            new_grammar[lhs].append(' '.join(regen_rule))
+            new_grammar[lhs].append(" ".join(regen_rule))
    return new_grammar

+
 def strip_chars(rule):
-    return rule.strip('\n\t ')
+    return rule.strip("\n\t ")
+

 def get_nonterminal():
    global COUNT
    COUNT += 1
    return f"GeneratedTermVar{COUNT}"

+
 def terminal_exist(token, grammar):
    for nonterminal, rules in grammar.items():
        if token in rules and len(token) == 1:
@ -269,42 +279,37 @@ def terminal_exist(token, grammar):
 def main(grammar_file, out, start):
    grammar = None
    # If grammar file is a preprocessed NT file, then skip preprocessing
-    if '.json' in grammar_file:
-        with open(grammar_file, 'r') as fd:
+    if ".json" in grammar_file:
+        with open(grammar_file, "r") as fd:
            grammar = json.load(fd)
-    elif '.g4' in grammar_file:
-        with open(grammar_file, 'r') as fd:
+    elif ".g4" in grammar_file:
+        with open(grammar_file, "r") as fd:
            data = fd.readlines()
        grammar = process_antlr4_grammar(data)
    else:
-        raise('Unknwown file format passed. Accepts (.g4/.json)')
+        raise ("Unknwown file format passed. Accepts (.g4/.json)")

    grammar = convert_to_gnf(grammar, start)
-    with open(out, 'w+') as fd:
+    with open(out, "w+") as fd:
        json.dump(grammar, fd)

-if __name__ == '__main__':
+
+if __name__ == "__main__":
    import argparse
-    parser = argparse.ArgumentParser(description = 'Script to convert grammar to GNF form')
+
+    parser = argparse.ArgumentParser(
+        description="Script to convert grammar to GNF form"
+    )
    parser.add_argument(
-            '--gf',
-            type = str,
-            required = True,
-            help = 'Location of grammar file')
+        "--gf", type=str, required=True, help="Location of grammar file"
+    )
    parser.add_argument(
-            '--out',
-            type = str,
-            required = True,
-            help = 'Location of output file')
+        "--out", type=str, required=True, help="Location of output file"
+    )
+    parser.add_argument("--start", type=str, required=True, help="Start token")
    parser.add_argument(
-            '--start',
-            type = str,
-            required = True,
-            help = 'Start token')
-    parser.add_argument(
-            '--debug',
-            action='store_true',
-            help = 'Write intermediate states to debug files')
+        "--debug", action="store_true", help="Write intermediate states to debug files"
+    )
    args = parser.parse_args()
    DEBUG = args.debug

--- a/utils/libafl_fmt/Cargo.toml
+++ b/utils/libafl_fmt/Cargo.toml
@ -20,3 +20,4 @@ tokio = { version = "1.38", features = [
 clap = { version = "4.5", features = ["derive"] }
 exitcode = "1.1"
 which = "6.0"
+colored = "2.1.0"
--- a/utils/libafl_fmt/src/main.rs
+++ b/utils/libafl_fmt/src/main.rs
@ -78,12 +78,13 @@ use std::{
 };

 use clap::Parser;
+use colored::Colorize;
 use regex::RegexSet;
 use tokio::{process::Command, task::JoinSet};
 use walkdir::{DirEntry, WalkDir};
 use which::which;

-const REF_LLVM_VERSION: u32 = 18;
+const REF_LLVM_VERSION: u32 = 19;

 fn is_workspace_toml(path: &Path) -> bool {
    for line in read_to_string(path).unwrap().lines() {
@ -249,20 +250,29 @@ async fn main() -> io::Result<()> {
        tokio_joinset.spawn(run_cargo_fmt(project, cli.check, cli.verbose));
    }

-    let ref_clang_format = format!("clang-format-{REF_LLVM_VERSION}");
+    let reference_clang_format = format!("clang-format-{REF_LLVM_VERSION}");
+    let unspecified_clang_format = "clang-format";
+
+    let (clang, warning) = if which(&reference_clang_format).is_ok() {
+        (Some(reference_clang_format.as_str()), None)
+    } else if which(unspecified_clang_format).is_ok() {
+        let version = Command::new(unspecified_clang_format)
+            .arg("--version")
+            .output()
+            .await?
+            .stdout;

-    let (clang, warning) = if which(ref_clang_format.clone()).is_ok() {
-        // can't use 18 for ci.
-        (Some(ref_clang_format), None)
-    } else if which("clang-format").is_ok() {
        (
-            Some("clang-format".to_string()),
-            Some("using clang-format, could provide a different result from clang-format-17"),
+            Some(unspecified_clang_format),
+            Some(format!(
+                "using {}, could provide a different result from clang-format-17",
+                from_utf8(&version).unwrap().replace('\n', "")
+            )),
        )
    } else {
        (
            None,
-            Some("clang-format not found. Skipping C formatting..."),
+            Some("clang-format not found. Skipping C formatting...".to_string()),
        )
    };
    // println!("Using {:#?} to format...", clang);
@ -277,7 +287,12 @@ async fn main() -> io::Result<()> {
            .collect();

        for c_file in c_files_to_fmt {
-            tokio_joinset.spawn(run_clang_fmt(c_file, clang.clone(), cli.check, cli.verbose));
+            tokio_joinset.spawn(run_clang_fmt(
+                c_file,
+                clang.to_string(),
+                cli.check,
+                cli.verbose,
+            ));
        }
    }

@ -292,7 +307,7 @@ async fn main() -> io::Result<()> {
    }

    if let Some(warning) = warning {
-        println!("Warning: {warning}");
+        println!("\n{}: {}\n", "Warning".yellow().bold(), warning);
    }

    if cli.check {