Update tig-benchmarker.

2026-02-21 09:47:21 +08:00 · 2025-10-06 14:30:34 +01:00 · 2025-10-06 14:30:34 +01:00 · 2028d1ea31
commit 2028d1ea31
parent aa0246b1cd
11 changed files with 103 additions and 39 deletions
--- a/tig-benchmarker/.env
+++ b/tig-benchmarker/.env
@ -1,5 +1,5 @@
 # Version of all benchmarker containers
-VERSION=0.0.2
+VERSION=0.0.3
 # Set to 1 to enable verbose logging
 VERBOSE=1

--- a/tig-benchmarker/common/structs.py
+++ b/tig-benchmarker/common/structs.py
@ -65,6 +65,7 @@ class Precommit(FromDict):
    details: PrecommitDetails
    settings: BenchmarkSettings
    state: PrecommitState
+    hyperparameters: Optional[dict]

@dataclass
 class BenchmarkDetails(FromDict):
--- a/tig-benchmarker/master/master/job_manager.py
+++ b/tig-benchmarker/master/master/job_manager.py
@ -98,7 +98,8 @@ class JobManager:
                    INSERT INTO job 
                    (
                        benchmark_id, 
-                        settings, 
+                        settings,
+                        hyperparameters,
                        num_nonces, 
                        num_batches, 
                        rand_hash, 
@ -112,12 +113,13 @@ class JobManager:
                        average_solution_ratio,
                        start_time
                    )
-                    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, (EXTRACT(EPOCH FROM NOW()) * 1000)::BIGINT)
+                    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, (EXTRACT(EPOCH FROM NOW()) * 1000)::BIGINT)
                    ON CONFLICT (benchmark_id) DO NOTHING;
                    """,
                    (
                        benchmark_id,
                        json.dumps(asdict(x.settings)),
+                        json.dumps(x.hyperparameters),
                        x.details.num_nonces,
                        num_batches,
                        x.details.rand_hash,
--- a/tig-benchmarker/master/master/precommit_manager.py
+++ b/tig-benchmarker/master/master/precommit_manager.py
@ -94,7 +94,8 @@ class PrecommitManager:
                block_id=self.last_block_id,
                difficulty=difficulty_samples[a_id]
            ),
-            num_nonces=selection["num_nonces"]
+            num_nonces=selection["num_nonces"],
+            hyperparameters=selection["hyperparameters"]
        )
-        logger.info(f"Created precommit (algorithm_id: {a_id}, difficulty: {req.settings.difficulty}, num_nonces: {req.num_nonces})")
+        logger.info(f"Created precommit (algorithm_id: {a_id}, difficulty: {req.settings.difficulty}, num_nonces: {req.num_nonces}, hyperparameters: {req.hyperparameters})")
        return req
--- a/tig-benchmarker/master/master/slave_manager.py
+++ b/tig-benchmarker/master/master/slave_manager.py
@ -40,6 +40,7 @@ class SlaveManager:
                            'start_nonce', A.batch_idx * B.batch_size,
                            'num_nonces', LEAST(B.batch_size, B.num_nonces - A.batch_idx * B.batch_size),
                            'settings', B.settings,
+                            'hyperparameters', B.hyperparameters,
                            'sampled_nonces', A.sampled_nonces,
                            'runtime_config', B.runtime_config,
                            'download_url', B.download_url,
@ -72,6 +73,7 @@ class SlaveManager:
                            'start_nonce', A.batch_idx * B.batch_size,
                            'num_nonces', LEAST(B.batch_size, B.num_nonces - A.batch_idx * B.batch_size),
                            'settings', B.settings,
+                            'hyperparameters', B.hyperparameters,
                            'sampled_nonces', NULL,
                            'runtime_config', B.runtime_config,
                            'download_url', B.download_url,
--- a/tig-benchmarker/master/master/submissions_manager.py
+++ b/tig-benchmarker/master/master/submissions_manager.py
@ -15,6 +15,7 @@ logger = logging.getLogger(os.path.splitext(os.path.basename(__file__))[0])
 class SubmitPrecommitRequest(FromDict):
    settings: BenchmarkSettings
    num_nonces: int
+    hyperparameters: Optional[dict]

@dataclass
 class SubmitBenchmarkRequest(FromDict):
--- a/tig-benchmarker/postgres/init.sql
+++ b/tig-benchmarker/postgres/init.sql
@ -5,6 +5,7 @@ CREATE TABLE IF NOT EXISTS config (
 CREATE TABLE IF NOT EXISTS job (
    benchmark_id TEXT PRIMARY KEY,
    settings JSONB NOT NULL,
+    hyperparameters JSONB,
    num_nonces INTEGER NOT NULL,
    rand_hash TEXT NOT NULL,
    runtime_config JSONB NOT NULL,
@ -119,7 +120,8 @@ SELECT '
      "difficulty_range": [0, 0.5],
      "selected_difficulties": [],
      "weight": 1,
-      "batch_size": 8
+      "batch_size": 8,
+      "hyperparameters": null
    },
    {
      "algorithm_id": "c002_a001",
@ -127,7 +129,8 @@ SELECT '
      "difficulty_range": [0, 0.5],
      "selected_difficulties": [],
      "weight": 1,
-      "batch_size": 8
+      "batch_size": 8,
+      "hyperparameters": null
    },
    {
      "algorithm_id": "c003_a001",
@ -135,7 +138,8 @@ SELECT '
      "difficulty_range": [0, 0.5],
      "selected_difficulties": [],
      "weight": 1,
-      "batch_size": 8
+      "batch_size": 8,
+      "hyperparameters": null
    },
    {
      "algorithm_id": "c004_a001",
@ -143,7 +147,8 @@ SELECT '
      "difficulty_range": [0, 0.5],
      "selected_difficulties": [],
      "weight": 1,
-      "batch_size": 8
+      "batch_size": 8,
+      "hyperparameters": null
    },
    {
      "algorithm_id": "c005_a001",
@ -151,7 +156,8 @@ SELECT '
      "difficulty_range": [0, 0.5],
      "selected_difficulties": [],
      "weight": 1,
-      "batch_size": 8
+      "batch_size": 8,
+      "hyperparameters": null
    }
  ],
  "time_before_batch_retry": 60000,
--- a/tig-benchmarker/slave/main.py
+++ b/tig-benchmarker/slave/main.py
@ -58,22 +58,28 @@ def download_library(algorithms_dir, batch):


 def run_tig_runtime(nonce, batch, so_path, ptx_path, results_dir):
-    output_file = f"{results_dir}/{batch['id']}/{nonce}.json"
+    output_dir = f"{results_dir}/{batch['id']}"
+    output_file = f"{output_dir}/{nonce}.json"
+    settings = json.dumps(batch["settings"], separators=(',',':'))
    start = now()
    cmd = [
        "docker", "exec", batch["challenge"], "tig-runtime",
-        json.dumps(batch["settings"], separators=(',',':')),
+        settings,
        batch["rand_hash"],
        str(nonce),
        so_path,
        "--fuel", str(batch["runtime_config"]["max_fuel"]),
-        "--output", output_file,
+        "--output", output_dir,
    ]
+    if batch["hyperparameters"] is not None:
+        cmd += [
+            "--hyperparameters", json.dumps(batch["hyperparameters"], separators=(',',':')),
+        ]
    if ptx_path is not None:
        cmd += [
            "--ptx", ptx_path,
        ]
-    logger.debug(f"computing batch: {' '.join(cmd[:4] + [f"'{cmd[4]}'"] + cmd[5:])}")
+    logger.debug(f"computing nonce: {' '.join(cmd[:4] + [f"'{cmd[4]}'"] + cmd[5:])}")
    process = subprocess.Popen(
        cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
    )
@ -82,29 +88,52 @@ def run_tig_runtime(nonce, batch, so_path, ptx_path, results_dir):
        if ret is not None:
            exit_codes = {
                0: "success",
-                82: "cuda out of memory",
-                83: "host out of memory",
+                # 82: "cuda out of memory",
+                # 83: "host out of memory",
                84: "runtime error",
-                85: "no solution",
-                86: "invalid solution",
+                # 85: "no solution",
+                # 86: "invalid solution",
                87: "out of fuel",
            }
-            if (ret != 0 and ret in exit_codes) and not os.path.exists(output_file):
+
+            if ret not in exit_codes:
+                logger.error(f"batch {batch['id']}, nonce {nonce} failed with exit code {ret}: {process.stderr.read().decode()}")
+            else:
+                logger.debug(f"batch {batch['id']}, nonce {nonce} finished with exit code {ret}: {exit_codes[ret]}")
+
+            if not os.path.exists(output_file):
                with open(output_file, "w") as f:
                    json.dump(dict(
                        nonce=nonce,
                        runtime_signature=0,
                        fuel_consumed=(ret == 87) and (batch["runtime_config"]["max_fuel"] + 1),
-                        solution={},
+                        solution="",
                        cpu_arch=CPU_ARCH
                    ), f)
-
-            if ret != 0:
-                if ret not in exit_codes:
-                    logger.error(f"batch {batch['id']}, nonce {nonce} failed with exit code {ret}: {process.stderr.read().decode()}")
+            else:
+                start = now()
+                cmd = [
+                    "docker", "exec", batch["challenge"], "tig-verifier",
+                    settings,
+                    batch["rand_hash"],
+                    str(nonce),
+                    output_file,
+                ]
+                if ptx_path is not None:
+                    cmd += [
+                        "--ptx", ptx_path,
+                    ]
+                logger.debug(f"verifying nonce: {' '.join(cmd[:4] + [f"'{cmd[4]}'"] + cmd[5:])}")
+                ret = subprocess.run(cmd, capture_output=True, text=True)
+                if ret.returncode == 0:
+                    logger.debug(f"batch {batch['id']}, nonce {nonce} valid solution")
                else:
-                    logger.error(f"batch {batch['id']}, nonce {nonce} failed with exit code {ret}: {exit_codes[ret]}")
-            
+                    logger.debug(f"batch {batch['id']}, nonce {nonce} invalid solution (exit code: {ret.returncode}, stderr: {ret.stderr.strip()})")
+                    with open(output_file, "r") as f:
+                        d = json.load(f)
+                        d["solution"] = ""
+                    with open(output_file, "w") as f:
+                        json.dump(d, f)
            break

        elif batch["id"] not in PROCESSING_BATCH_IDS:
--- a/tig-binary/src/entry_point_template.rs
+++ b/tig-binary/src/entry_point_template.rs
@ -18,10 +18,11 @@ use std::sync::Arc;
 pub fn entry_point(
    challenge: &Challenge,
    save_solution: &dyn Fn(&Solution) -> Result<()>,
-    hyperparameters: &Option<Map<String, Value>>,
+    hyperparameters: Option<String>,
 ) -> Result<()>
 {
    catch_unwind(AssertUnwindSafe(|| {
+        let hyperparameters = hyperparameters.map(|x| serde_json::from_str::<Map<String, Value>>(&x).unwrap());
        {ALGORITHM}::solve_challenge(challenge, save_solution, hyperparameters)
    })).unwrap_or_else(|_| {
        Err(anyhow!("Panic occurred calling solve_challenge"))
@ -34,13 +35,14 @@ pub fn entry_point(
 pub fn entry_point(
    challenge: &Challenge,
    save_solution: &dyn Fn(&Solution) -> Result<()>,
-    hyperparameters: &Option<Map<String, Value>>,
+    hyperparameters: Option<String>,
    module: Arc<CudaModule>,
    stream: Arc<CudaStream>,
    prop: &cudaDeviceProp,
 ) -> Result<()>
 {
    catch_unwind(AssertUnwindSafe(|| {
+        let hyperparameters = hyperparameters.map(|x| serde_json::from_str::<Map<String, Value>>(x));
        {ALGORITHM}::solve_challenge(challenge, save_solution, hyperparameters, module, stream, prop)
    })).unwrap_or_else(|_| {
        Err(anyhow!("Panic occurred calling solve_challenge"))
--- a/tig-runtime/src/main.rs
+++ b/tig-runtime/src/main.rs
@ -112,7 +112,7 @@ pub fn compute_solution(
                library.get::<fn(
                    &$c::Challenge,
                    &dyn Fn(&$c::Solution) -> Result<()>,
-                    &Option<Map<String, Value>>,
+                    Option<String>,
                ) -> Result<()>>(b"entry_point")?
            };

@ -140,7 +140,7 @@ pub fn compute_solution(
                fs::write(&output_file, jsonify(&output_data))?;
                Ok(())
            };
-            let result = solve_challenge_fn(&challenge, &save_solution_fn, &hyperparameters);
+            let result = solve_challenge_fn(&challenge, &save_solution_fn, hyperparameters);
            if !output_file.exists() {
                save_solution_fn(&$c::Solution::new())?;
            }
@ -157,7 +157,7 @@ pub fn compute_solution(
                library.get::<fn(
                    &$c::Challenge,
                    save_solution: &dyn Fn(&$c::Solution) -> anyhow::Result<()>,
-                    &Option<Map<String, Value>>,
+                    Option<String>,
                    Arc<CudaModule>,
                    Arc<CudaStream>,
                    &cudaDeviceProp,
@ -258,7 +258,7 @@ pub fn compute_solution(
            let result = solve_challenge_fn(
                &challenge,
                &save_solution_fn,
-                &hyperparameters,
+                hyperparameters,
                module.clone(),
                stream.clone(),
                &prop,
@ -327,7 +327,7 @@ fn load_settings(settings: &str) -> BenchmarkSettings {
    })
 }

-fn load_hyperparameters(hyperparameters: &str) -> Map<String, Value> {
+fn load_hyperparameters(hyperparameters: &str) -> String {
    let hyperparameters = if hyperparameters.ends_with(".json") {
        fs::read_to_string(hyperparameters).unwrap_or_else(|_| {
            eprintln!("Failed to read hyperparameters file: {}", hyperparameters);
@ -337,10 +337,12 @@ fn load_hyperparameters(hyperparameters: &str) -> Map<String, Value> {
        hyperparameters.to_string()
    };

-    dejsonify::<Map<String, Value>>(&hyperparameters).unwrap_or_else(|_| {
+    // validate it is valid JSON
+    let _ = dejsonify::<Map<String, Value>>(&hyperparameters).unwrap_or_else(|_| {
        eprintln!("Failed to parse hyperparameters as JSON");
        std::process::exit(1);
-    })
+    });
+    hyperparameters
 }

 pub fn load_module(path: &PathBuf) -> Result<Library> {
--- a/tig-verifier/src/main.rs
+++ b/tig-verifier/src/main.rs
@ -1,5 +1,6 @@
 use anyhow::{anyhow, Result};
 use clap::{arg, Command};
+use serde_json::{Map, Value};
 use std::{fs, io::Read, panic, path::PathBuf};
 use tig_challenges::*;
 use tig_structs::core::BenchmarkSettings;
@ -22,7 +23,7 @@ fn cli() -> Command {
        )
        .arg(arg!(<NONCE> "Nonce value").value_parser(clap::value_parser!(u64)))
        .arg(
-            arg!(<SOLUTION> "Solution base64 string, path to b64 file, or '-' for stdin")
+            arg!(<SOLUTION> "Solution base64 string, path to json file with solution field, or '-' for stdin")
                .value_parser(clap::value_parser!(String)),
        )
        .arg(arg!(--ptx [PTX] "Path to a CUDA ptx file").value_parser(clap::value_parser!(PathBuf)))
@ -204,11 +205,28 @@ fn load_solution(solution: &str) -> String {
                std::process::exit(1);
            });
        buffer
-    } else if solution.ends_with(".b64") {
-        fs::read_to_string(&solution).unwrap_or_else(|_| {
+    } else if solution.ends_with(".json") {
+        let d = fs::read_to_string(&solution).unwrap_or_else(|_| {
            eprintln!("Failed to read solution file: {}", solution);
            std::process::exit(1);
-        })
+        });
+        let d = serde_json::from_str::<Map<String, Value>>(&d).unwrap_or_else(|_| {
+            eprintln!("Failed to parse solution file: {}", solution);
+            std::process::exit(1);
+        });
+        match d.get("solution") {
+            None => {
+                eprintln!("json file does not contain 'solution' field: {}", solution);
+                std::process::exit(1);
+            }
+            Some(v) => match v.as_str() {
+                None => {
+                    eprintln!("invalid 'solution' field in json file. Expecting string");
+                    std::process::exit(1);
+                }
+                Some(s) => s.to_string(),
+            },
+        }
    } else {
        solution.to_string()
    }