From 2028d1ea31ccdaaa9600419e7d1a1e120882fc83 Mon Sep 17 00:00:00 2001 From: FiveMovesAhead Date: Mon, 6 Oct 2025 14:30:34 +0100 Subject: [PATCH] Update tig-benchmarker. --- tig-benchmarker/.env | 2 +- tig-benchmarker/common/structs.py | 1 + tig-benchmarker/master/master/job_manager.py | 6 +- .../master/master/precommit_manager.py | 5 +- .../master/master/slave_manager.py | 2 + .../master/master/submissions_manager.py | 1 + tig-benchmarker/postgres/init.sql | 16 +++-- tig-benchmarker/slave/main.py | 61 ++++++++++++++----- tig-binary/src/entry_point_template.rs | 6 +- tig-runtime/src/main.rs | 16 ++--- tig-verifier/src/main.rs | 26 ++++++-- 11 files changed, 103 insertions(+), 39 deletions(-) diff --git a/tig-benchmarker/.env b/tig-benchmarker/.env index 4f4e8e1..ecdcbcc 100644 --- a/tig-benchmarker/.env +++ b/tig-benchmarker/.env @@ -1,5 +1,5 @@ # Version of all benchmarker containers -VERSION=0.0.2 +VERSION=0.0.3 # Set to 1 to enable verbose logging VERBOSE=1 diff --git a/tig-benchmarker/common/structs.py b/tig-benchmarker/common/structs.py index e4d4e94..33ed10d 100644 --- a/tig-benchmarker/common/structs.py +++ b/tig-benchmarker/common/structs.py @@ -65,6 +65,7 @@ class Precommit(FromDict): details: PrecommitDetails settings: BenchmarkSettings state: PrecommitState + hyperparameters: Optional[dict] @dataclass class BenchmarkDetails(FromDict): diff --git a/tig-benchmarker/master/master/job_manager.py b/tig-benchmarker/master/master/job_manager.py index 3eb79e1..1a37745 100644 --- a/tig-benchmarker/master/master/job_manager.py +++ b/tig-benchmarker/master/master/job_manager.py @@ -98,7 +98,8 @@ class JobManager: INSERT INTO job ( benchmark_id, - settings, + settings, + hyperparameters, num_nonces, num_batches, rand_hash, @@ -112,12 +113,13 @@ class JobManager: average_solution_ratio, start_time ) - VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, (EXTRACT(EPOCH FROM NOW()) * 1000)::BIGINT) + VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, (EXTRACT(EPOCH FROM NOW()) * 1000)::BIGINT) ON CONFLICT (benchmark_id) DO NOTHING; """, ( benchmark_id, json.dumps(asdict(x.settings)), + json.dumps(x.hyperparameters), x.details.num_nonces, num_batches, x.details.rand_hash, diff --git a/tig-benchmarker/master/master/precommit_manager.py b/tig-benchmarker/master/master/precommit_manager.py index 863a698..56eca17 100644 --- a/tig-benchmarker/master/master/precommit_manager.py +++ b/tig-benchmarker/master/master/precommit_manager.py @@ -94,7 +94,8 @@ class PrecommitManager: block_id=self.last_block_id, difficulty=difficulty_samples[a_id] ), - num_nonces=selection["num_nonces"] + num_nonces=selection["num_nonces"], + hyperparameters=selection["hyperparameters"] ) - logger.info(f"Created precommit (algorithm_id: {a_id}, difficulty: {req.settings.difficulty}, num_nonces: {req.num_nonces})") + logger.info(f"Created precommit (algorithm_id: {a_id}, difficulty: {req.settings.difficulty}, num_nonces: {req.num_nonces}, hyperparameters: {req.hyperparameters})") return req \ No newline at end of file diff --git a/tig-benchmarker/master/master/slave_manager.py b/tig-benchmarker/master/master/slave_manager.py index 6bcb84e..e2d6961 100644 --- a/tig-benchmarker/master/master/slave_manager.py +++ b/tig-benchmarker/master/master/slave_manager.py @@ -40,6 +40,7 @@ class SlaveManager: 'start_nonce', A.batch_idx * B.batch_size, 'num_nonces', LEAST(B.batch_size, B.num_nonces - A.batch_idx * B.batch_size), 'settings', B.settings, + 'hyperparameters', B.hyperparameters, 'sampled_nonces', A.sampled_nonces, 'runtime_config', B.runtime_config, 'download_url', B.download_url, @@ -72,6 +73,7 @@ class SlaveManager: 'start_nonce', A.batch_idx * B.batch_size, 'num_nonces', LEAST(B.batch_size, B.num_nonces - A.batch_idx * B.batch_size), 'settings', B.settings, + 'hyperparameters', B.hyperparameters, 'sampled_nonces', NULL, 'runtime_config', B.runtime_config, 'download_url', B.download_url, diff --git a/tig-benchmarker/master/master/submissions_manager.py b/tig-benchmarker/master/master/submissions_manager.py index d74abb3..c210107 100644 --- a/tig-benchmarker/master/master/submissions_manager.py +++ b/tig-benchmarker/master/master/submissions_manager.py @@ -15,6 +15,7 @@ logger = logging.getLogger(os.path.splitext(os.path.basename(__file__))[0]) class SubmitPrecommitRequest(FromDict): settings: BenchmarkSettings num_nonces: int + hyperparameters: Optional[dict] @dataclass class SubmitBenchmarkRequest(FromDict): diff --git a/tig-benchmarker/postgres/init.sql b/tig-benchmarker/postgres/init.sql index 1698b1b..a4fd973 100644 --- a/tig-benchmarker/postgres/init.sql +++ b/tig-benchmarker/postgres/init.sql @@ -5,6 +5,7 @@ CREATE TABLE IF NOT EXISTS config ( CREATE TABLE IF NOT EXISTS job ( benchmark_id TEXT PRIMARY KEY, settings JSONB NOT NULL, + hyperparameters JSONB, num_nonces INTEGER NOT NULL, rand_hash TEXT NOT NULL, runtime_config JSONB NOT NULL, @@ -119,7 +120,8 @@ SELECT ' "difficulty_range": [0, 0.5], "selected_difficulties": [], "weight": 1, - "batch_size": 8 + "batch_size": 8, + "hyperparameters": null }, { "algorithm_id": "c002_a001", @@ -127,7 +129,8 @@ SELECT ' "difficulty_range": [0, 0.5], "selected_difficulties": [], "weight": 1, - "batch_size": 8 + "batch_size": 8, + "hyperparameters": null }, { "algorithm_id": "c003_a001", @@ -135,7 +138,8 @@ SELECT ' "difficulty_range": [0, 0.5], "selected_difficulties": [], "weight": 1, - "batch_size": 8 + "batch_size": 8, + "hyperparameters": null }, { "algorithm_id": "c004_a001", @@ -143,7 +147,8 @@ SELECT ' "difficulty_range": [0, 0.5], "selected_difficulties": [], "weight": 1, - "batch_size": 8 + "batch_size": 8, + "hyperparameters": null }, { "algorithm_id": "c005_a001", @@ -151,7 +156,8 @@ SELECT ' "difficulty_range": [0, 0.5], "selected_difficulties": [], "weight": 1, - "batch_size": 8 + "batch_size": 8, + "hyperparameters": null } ], "time_before_batch_retry": 60000, diff --git a/tig-benchmarker/slave/main.py b/tig-benchmarker/slave/main.py index 313bfd8..3b724b2 100644 --- a/tig-benchmarker/slave/main.py +++ b/tig-benchmarker/slave/main.py @@ -58,22 +58,28 @@ def download_library(algorithms_dir, batch): def run_tig_runtime(nonce, batch, so_path, ptx_path, results_dir): - output_file = f"{results_dir}/{batch['id']}/{nonce}.json" + output_dir = f"{results_dir}/{batch['id']}" + output_file = f"{output_dir}/{nonce}.json" + settings = json.dumps(batch["settings"], separators=(',',':')) start = now() cmd = [ "docker", "exec", batch["challenge"], "tig-runtime", - json.dumps(batch["settings"], separators=(',',':')), + settings, batch["rand_hash"], str(nonce), so_path, "--fuel", str(batch["runtime_config"]["max_fuel"]), - "--output", output_file, + "--output", output_dir, ] + if batch["hyperparameters"] is not None: + cmd += [ + "--hyperparameters", json.dumps(batch["hyperparameters"], separators=(',',':')), + ] if ptx_path is not None: cmd += [ "--ptx", ptx_path, ] - logger.debug(f"computing batch: {' '.join(cmd[:4] + [f"'{cmd[4]}'"] + cmd[5:])}") + logger.debug(f"computing nonce: {' '.join(cmd[:4] + [f"'{cmd[4]}'"] + cmd[5:])}") process = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) @@ -82,29 +88,52 @@ def run_tig_runtime(nonce, batch, so_path, ptx_path, results_dir): if ret is not None: exit_codes = { 0: "success", - 82: "cuda out of memory", - 83: "host out of memory", + # 82: "cuda out of memory", + # 83: "host out of memory", 84: "runtime error", - 85: "no solution", - 86: "invalid solution", + # 85: "no solution", + # 86: "invalid solution", 87: "out of fuel", } - if (ret != 0 and ret in exit_codes) and not os.path.exists(output_file): + + if ret not in exit_codes: + logger.error(f"batch {batch['id']}, nonce {nonce} failed with exit code {ret}: {process.stderr.read().decode()}") + else: + logger.debug(f"batch {batch['id']}, nonce {nonce} finished with exit code {ret}: {exit_codes[ret]}") + + if not os.path.exists(output_file): with open(output_file, "w") as f: json.dump(dict( nonce=nonce, runtime_signature=0, fuel_consumed=(ret == 87) and (batch["runtime_config"]["max_fuel"] + 1), - solution={}, + solution="", cpu_arch=CPU_ARCH ), f) - - if ret != 0: - if ret not in exit_codes: - logger.error(f"batch {batch['id']}, nonce {nonce} failed with exit code {ret}: {process.stderr.read().decode()}") + else: + start = now() + cmd = [ + "docker", "exec", batch["challenge"], "tig-verifier", + settings, + batch["rand_hash"], + str(nonce), + output_file, + ] + if ptx_path is not None: + cmd += [ + "--ptx", ptx_path, + ] + logger.debug(f"verifying nonce: {' '.join(cmd[:4] + [f"'{cmd[4]}'"] + cmd[5:])}") + ret = subprocess.run(cmd, capture_output=True, text=True) + if ret.returncode == 0: + logger.debug(f"batch {batch['id']}, nonce {nonce} valid solution") else: - logger.error(f"batch {batch['id']}, nonce {nonce} failed with exit code {ret}: {exit_codes[ret]}") - + logger.debug(f"batch {batch['id']}, nonce {nonce} invalid solution (exit code: {ret.returncode}, stderr: {ret.stderr.strip()})") + with open(output_file, "r") as f: + d = json.load(f) + d["solution"] = "" + with open(output_file, "w") as f: + json.dump(d, f) break elif batch["id"] not in PROCESSING_BATCH_IDS: diff --git a/tig-binary/src/entry_point_template.rs b/tig-binary/src/entry_point_template.rs index 7ecb9c8..4a1f240 100644 --- a/tig-binary/src/entry_point_template.rs +++ b/tig-binary/src/entry_point_template.rs @@ -18,10 +18,11 @@ use std::sync::Arc; pub fn entry_point( challenge: &Challenge, save_solution: &dyn Fn(&Solution) -> Result<()>, - hyperparameters: &Option>, + hyperparameters: Option, ) -> Result<()> { catch_unwind(AssertUnwindSafe(|| { + let hyperparameters = hyperparameters.map(|x| serde_json::from_str::>(&x).unwrap()); {ALGORITHM}::solve_challenge(challenge, save_solution, hyperparameters) })).unwrap_or_else(|_| { Err(anyhow!("Panic occurred calling solve_challenge")) @@ -34,13 +35,14 @@ pub fn entry_point( pub fn entry_point( challenge: &Challenge, save_solution: &dyn Fn(&Solution) -> Result<()>, - hyperparameters: &Option>, + hyperparameters: Option, module: Arc, stream: Arc, prop: &cudaDeviceProp, ) -> Result<()> { catch_unwind(AssertUnwindSafe(|| { + let hyperparameters = hyperparameters.map(|x| serde_json::from_str::>(x)); {ALGORITHM}::solve_challenge(challenge, save_solution, hyperparameters, module, stream, prop) })).unwrap_or_else(|_| { Err(anyhow!("Panic occurred calling solve_challenge")) diff --git a/tig-runtime/src/main.rs b/tig-runtime/src/main.rs index 1356845..dabffdd 100644 --- a/tig-runtime/src/main.rs +++ b/tig-runtime/src/main.rs @@ -112,7 +112,7 @@ pub fn compute_solution( library.get:: Result<()>, - &Option>, + Option, ) -> Result<()>>(b"entry_point")? }; @@ -140,7 +140,7 @@ pub fn compute_solution( fs::write(&output_file, jsonify(&output_data))?; Ok(()) }; - let result = solve_challenge_fn(&challenge, &save_solution_fn, &hyperparameters); + let result = solve_challenge_fn(&challenge, &save_solution_fn, hyperparameters); if !output_file.exists() { save_solution_fn(&$c::Solution::new())?; } @@ -157,7 +157,7 @@ pub fn compute_solution( library.get:: anyhow::Result<()>, - &Option>, + Option, Arc, Arc, &cudaDeviceProp, @@ -258,7 +258,7 @@ pub fn compute_solution( let result = solve_challenge_fn( &challenge, &save_solution_fn, - &hyperparameters, + hyperparameters, module.clone(), stream.clone(), &prop, @@ -327,7 +327,7 @@ fn load_settings(settings: &str) -> BenchmarkSettings { }) } -fn load_hyperparameters(hyperparameters: &str) -> Map { +fn load_hyperparameters(hyperparameters: &str) -> String { let hyperparameters = if hyperparameters.ends_with(".json") { fs::read_to_string(hyperparameters).unwrap_or_else(|_| { eprintln!("Failed to read hyperparameters file: {}", hyperparameters); @@ -337,10 +337,12 @@ fn load_hyperparameters(hyperparameters: &str) -> Map { hyperparameters.to_string() }; - dejsonify::>(&hyperparameters).unwrap_or_else(|_| { + // validate it is valid JSON + let _ = dejsonify::>(&hyperparameters).unwrap_or_else(|_| { eprintln!("Failed to parse hyperparameters as JSON"); std::process::exit(1); - }) + }); + hyperparameters } pub fn load_module(path: &PathBuf) -> Result { diff --git a/tig-verifier/src/main.rs b/tig-verifier/src/main.rs index f238183..d823f48 100644 --- a/tig-verifier/src/main.rs +++ b/tig-verifier/src/main.rs @@ -1,5 +1,6 @@ use anyhow::{anyhow, Result}; use clap::{arg, Command}; +use serde_json::{Map, Value}; use std::{fs, io::Read, panic, path::PathBuf}; use tig_challenges::*; use tig_structs::core::BenchmarkSettings; @@ -22,7 +23,7 @@ fn cli() -> Command { ) .arg(arg!( "Nonce value").value_parser(clap::value_parser!(u64))) .arg( - arg!( "Solution base64 string, path to b64 file, or '-' for stdin") + arg!( "Solution base64 string, path to json file with solution field, or '-' for stdin") .value_parser(clap::value_parser!(String)), ) .arg(arg!(--ptx [PTX] "Path to a CUDA ptx file").value_parser(clap::value_parser!(PathBuf))) @@ -204,11 +205,28 @@ fn load_solution(solution: &str) -> String { std::process::exit(1); }); buffer - } else if solution.ends_with(".b64") { - fs::read_to_string(&solution).unwrap_or_else(|_| { + } else if solution.ends_with(".json") { + let d = fs::read_to_string(&solution).unwrap_or_else(|_| { eprintln!("Failed to read solution file: {}", solution); std::process::exit(1); - }) + }); + let d = serde_json::from_str::>(&d).unwrap_or_else(|_| { + eprintln!("Failed to parse solution file: {}", solution); + std::process::exit(1); + }); + match d.get("solution") { + None => { + eprintln!("json file does not contain 'solution' field: {}", solution); + std::process::exit(1); + } + Some(v) => match v.as_str() { + None => { + eprintln!("invalid 'solution' field in json file. Expecting string"); + std::process::exit(1); + } + Some(s) => s.to_string(), + }, + } } else { solution.to_string() }