From ecaa013263f233065431adff35727f49a0417ac3 Mon Sep 17 00:00:00 2001
From: "Dongjia \"toka\" Zhang" <tokazerkje@outlook.com>
Date: Mon, 12 May 2025 18:01:21 +0200
Subject: [PATCH] Replace parallellize_cargo_check.py with Rust code (#3217)

* ci_splitter

* clpo

* FIX

* aa
---
 .github/workflows/build_and_test.yml |  2 +-
 Cargo.toml                           |  1 +
 scripts/clippy.sh                    |  5 +-
 scripts/parallellize_cargo_check.py  | 57 --------------------
 utils/ci_splitter/Cargo.toml         | 22 ++++++++
 utils/ci_splitter/src/main.rs        | 81 ++++++++++++++++++++++++++++
 6 files changed, 106 insertions(+), 62 deletions(-)
 delete mode 100755 scripts/parallellize_cargo_check.py
 create mode 100644 utils/ci_splitter/Cargo.toml
 create mode 100644 utils/ci_splitter/src/main.rs

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 3f18c113be..59e4bb5d33 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -168,7 +168,7 @@ jobs:
       - name: Check each feature
         # Skipping `python` as it has to be built with the `maturin` tool
         # `sancov_pcguard_edges` is tested seperatelyc
-        run: python3 ./scripts/parallellize_cargo_check.py ${{ matrix.instance_idx }}
+        run: LLVM_VERSION=18 CI_INSTANCES=18 cargo run --manifest-path ./utils/ci_splitter/Cargo.toml -- ${{ matrix.instance_idx }}
 
   ubuntu-concolic:
     runs-on: ubuntu-24.04
diff --git a/Cargo.toml b/Cargo.toml
index 474d9160ed..26062f8e42 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -27,6 +27,7 @@ members = [
   "utils/libafl_benches",
   "utils/libafl_jumper",
   "utils/ci_runner",
+  "utils/ci_splitter",
 ]
 
 default-members = [
diff --git a/scripts/clippy.sh b/scripts/clippy.sh
index 4f8de7bd37..12442b918f 100755
--- a/scripts/clippy.sh
+++ b/scripts/clippy.sh
@@ -69,10 +69,7 @@ for project in "${PROJECTS[@]}"; do
       echo "Warning: Directory $project does not exist. Skipping."
    fi
 done
-
+# Last run it on all
 eval "$CLIPPY_CMD --workspace -- $RUSTC_FLAGS"
 
 echo "Clippy run completed for all specified projects."
-
-# Last run it on all
-eval "$CLIPPY_CMD --workspace -- $RUSTC_FLAGS"
diff --git a/scripts/parallellize_cargo_check.py b/scripts/parallellize_cargo_check.py
deleted file mode 100755
index 5241e616b1..0000000000
--- a/scripts/parallellize_cargo_check.py
+++ /dev/null
@@ -1,57 +0,0 @@
-#!/usr/bin/python3
-import subprocess
-import os
-import sys
-import math
-
-LLVM_VERSION = "18"
-
-# Current CI Runner
-ci_instances = 18
-
-if len(sys.argv) != 2:
-    exit(1)
-
-instance_idx = int(sys.argv[1])
-
-# Set llvm config if it's not already set
-if "LLVM_CONFIG" not in os.environ:
-    os.environ["LLVM_CONFIG"] = f"llvm-config-{LLVM_VERSION}"
-
-command = (
-    "DOCS_RS=1 cargo hack check --workspace --each-feature --clean-per-run "
-    "--exclude-features=prelude,python,sancov_pcguard_edges,arm,aarch64,i386,be,systemmode,whole_archive "
-    "--no-dev-deps --exclude libafl_libfuzzer --exclude libafl_qemu --exclude libafl_qemu_sys --print-command-list;"
-    "DOCS_RS=1 cargo hack check -p libafl_qemu -p libafl_qemu_sys --each-feature --clean-per-run "
-    "--exclude-features=prelude,python,sancov_pcguard_edges,arm,aarch64,i386,be,systemmode,whole_archive,slirp,intel_pt,intel_pt_export_raw "
-    "--no-dev-deps --features usermode --print-command-list"
-)
-
-# Run the command and capture the output
-output = subprocess.check_output(command, shell=True, text=True)
-output = output.strip().split("\n")[0:]
-all_task_cnt = len(output) // 2  # by 2 cuz one task has two lines
-task_per_core = math.ceil(all_task_cnt // ci_instances)
-print(task_per_core, "tasks assigned to this instance")
-
-for task in output[
-    instance_idx * 2 * task_per_core : (instance_idx + 1) * 2 * task_per_core
-]:
-    print("Running ", task)
-    print(os.environ)
-
-    if (
-        "utils/libafl_jumper/Cargo.toml" in task
-        and "--no-default-features" in task
-        and "--features" not in task
-    ):
-        # ignore libafl_jumper no std
-        continue
-
-    if "libafl_frida" in task:
-        # DOCS_RS is needed for libafl_frida to build without auto-download feature
-        cargo_check = subprocess.check_output(
-            task, shell=True, text=True, env=dict(os.environ, DOCS_RS="1")
-        )
-    else:
-        cargo_check = subprocess.check_output(task, shell=True, text=True)
diff --git a/utils/ci_splitter/Cargo.toml b/utils/ci_splitter/Cargo.toml
new file mode 100644
index 0000000000..586593f5b3
--- /dev/null
+++ b/utils/ci_splitter/Cargo.toml
@@ -0,0 +1,22 @@
+[package]
+name = "ci_splitter"
+edition = "2024"
+authors = ["Dongjia Zhang <tokazerkje@outlook.com>"]
+version.workspace = true
+license.workspace = true
+description = "libafl CI tools for testing fuzzers"
+repository = "https://github.com/AFLplusplus/LibAFL/"
+keywords = ["fuzzing", "testing", "security"]
+categories = [
+  "development-tools::testing",
+  "emulators",
+  "embedded",
+  "os",
+  "no-std",
+]
+readme = "../README.md"
+
+[dependencies]
+
+[lints]
+workspace = true
diff --git a/utils/ci_splitter/src/main.rs b/utils/ci_splitter/src/main.rs
new file mode 100644
index 0000000000..17e6728e21
--- /dev/null
+++ b/utils/ci_splitter/src/main.rs
@@ -0,0 +1,81 @@
+use core::error::Error;
+use std::{
+    env,
+    process::{Command, exit},
+};
+
+fn main() -> Result<(), Box<dyn Error>> {
+    let args: Vec<String> = env::args().collect();
+    if args.len() != 2 {
+        exit(1);
+    }
+    let instance_idx: usize = args[1]
+        .parse()
+        .map_err(|e| format!("Failed to parse instance index '{}': {}", args[1], e))?;
+
+    let ci_instances: usize = if let Ok(val) = env::var("CI_INSTANCES") {
+        val.parse()
+            .map_err(|e| format!("CI_INSTANCES must be a positive integer, got '{val}': {e}"))?
+    } else {
+        eprintln!("Error: CI_INSTANCES environment variable not set");
+        exit(1);
+    };
+
+    let llvm_var: usize = if let Ok(val) = env::var("LLVM_VERSION") {
+        val.parse()
+            .map_err(|e| format!("LLVM_VERSION must be a positive integer, got '{val}': {e}"))?
+    } else {
+        eprintln!("Error: LLVM_VERSION environment variable not set");
+        exit(1);
+    };
+
+    if env::var("LLVM_CONFIG").is_err() {
+        unsafe {
+            env::set_var("LLVM_CONFIG", format!("llvm-config-{llvm_var}"));
+        }
+    }
+
+    let the_command = concat!(
+        "DOCS_RS=1 cargo hack check --workspace --each-feature --clean-per-run \
+        --exclude-features=prelude,python,sancov_pcguard_edges,arm,aarch64,i386,be,systemmode,whole_archive \
+        --no-dev-deps --exclude libafl_libfuzzer --exclude libafl_qemu --exclude libafl_qemu_sys --print-command-list; ",
+        "DOCS_RS=1 cargo hack check -p libafl_qemu -p libafl_qemu_sys --each-feature --clean-per-run \
+        --exclude-features=prelude,python,sancov_pcguard_edges,arm,aarch64,i386,be,systemmode,whole_archive,slirp,intel_pt,intel_pt_export_raw \
+        --no-dev-deps --features usermode --print-command-list"
+    );
+
+    let output = Command::new("sh").arg("-c").arg(the_command).output()?;
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    let lines: Vec<&str> = stdout.trim().lines().collect();
+
+    let all_task_cnt = lines.len() / 2; // one task == two lines
+    let task_per_core = all_task_cnt / ci_instances;
+    println!("{task_per_core}/{all_task_cnt} tasks assigned to this instance");
+
+    let start = instance_idx * 2 * task_per_core;
+    let end = ((instance_idx + 1) * 2 * task_per_core).min(lines.len());
+    for &task in &lines[start..end] {
+        println!("Running {task}");
+
+        // skip the libafl_jumper no-std case
+        if task.contains("utils/libafl_jumper/Cargo.toml")
+            && task.contains("--no-default-features")
+            && !task.contains("--features")
+        {
+            continue;
+        }
+
+        // run each task, with DOCS_RS override for libafl_frida
+        let mut cmd = Command::new("bash");
+        cmd.arg("-c").arg(task);
+        if task.contains("libafl_frida") {
+            cmd.env("DOCS_RS", "1");
+        }
+        let status = cmd.status()?;
+        if !status.success() {
+            return Err(format!("Command failed (exit code {:?}): {}", status.code(), task).into());
+        }
+    }
+
+    Ok(())
+}