Update tig-benchmarker.

This commit is contained in:
FiveMovesAhead 2025-10-06 14:30:34 +01:00
parent aa0246b1cd
commit 2028d1ea31
11 changed files with 103 additions and 39 deletions

View File

@ -1,5 +1,5 @@
# Version of all benchmarker containers
VERSION=0.0.2
VERSION=0.0.3
# Set to 1 to enable verbose logging
VERBOSE=1

View File

@ -65,6 +65,7 @@ class Precommit(FromDict):
details: PrecommitDetails
settings: BenchmarkSettings
state: PrecommitState
hyperparameters: Optional[dict]
@dataclass
class BenchmarkDetails(FromDict):

View File

@ -98,7 +98,8 @@ class JobManager:
INSERT INTO job
(
benchmark_id,
settings,
settings,
hyperparameters,
num_nonces,
num_batches,
rand_hash,
@ -112,12 +113,13 @@ class JobManager:
average_solution_ratio,
start_time
)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, (EXTRACT(EPOCH FROM NOW()) * 1000)::BIGINT)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, (EXTRACT(EPOCH FROM NOW()) * 1000)::BIGINT)
ON CONFLICT (benchmark_id) DO NOTHING;
""",
(
benchmark_id,
json.dumps(asdict(x.settings)),
json.dumps(x.hyperparameters),
x.details.num_nonces,
num_batches,
x.details.rand_hash,

View File

@ -94,7 +94,8 @@ class PrecommitManager:
block_id=self.last_block_id,
difficulty=difficulty_samples[a_id]
),
num_nonces=selection["num_nonces"]
num_nonces=selection["num_nonces"],
hyperparameters=selection["hyperparameters"]
)
logger.info(f"Created precommit (algorithm_id: {a_id}, difficulty: {req.settings.difficulty}, num_nonces: {req.num_nonces})")
logger.info(f"Created precommit (algorithm_id: {a_id}, difficulty: {req.settings.difficulty}, num_nonces: {req.num_nonces}, hyperparameters: {req.hyperparameters})")
return req

View File

@ -40,6 +40,7 @@ class SlaveManager:
'start_nonce', A.batch_idx * B.batch_size,
'num_nonces', LEAST(B.batch_size, B.num_nonces - A.batch_idx * B.batch_size),
'settings', B.settings,
'hyperparameters', B.hyperparameters,
'sampled_nonces', A.sampled_nonces,
'runtime_config', B.runtime_config,
'download_url', B.download_url,
@ -72,6 +73,7 @@ class SlaveManager:
'start_nonce', A.batch_idx * B.batch_size,
'num_nonces', LEAST(B.batch_size, B.num_nonces - A.batch_idx * B.batch_size),
'settings', B.settings,
'hyperparameters', B.hyperparameters,
'sampled_nonces', NULL,
'runtime_config', B.runtime_config,
'download_url', B.download_url,

View File

@ -15,6 +15,7 @@ logger = logging.getLogger(os.path.splitext(os.path.basename(__file__))[0])
class SubmitPrecommitRequest(FromDict):
settings: BenchmarkSettings
num_nonces: int
hyperparameters: Optional[dict]
@dataclass
class SubmitBenchmarkRequest(FromDict):

View File

@ -5,6 +5,7 @@ CREATE TABLE IF NOT EXISTS config (
CREATE TABLE IF NOT EXISTS job (
benchmark_id TEXT PRIMARY KEY,
settings JSONB NOT NULL,
hyperparameters JSONB,
num_nonces INTEGER NOT NULL,
rand_hash TEXT NOT NULL,
runtime_config JSONB NOT NULL,
@ -119,7 +120,8 @@ SELECT '
"difficulty_range": [0, 0.5],
"selected_difficulties": [],
"weight": 1,
"batch_size": 8
"batch_size": 8,
"hyperparameters": null
},
{
"algorithm_id": "c002_a001",
@ -127,7 +129,8 @@ SELECT '
"difficulty_range": [0, 0.5],
"selected_difficulties": [],
"weight": 1,
"batch_size": 8
"batch_size": 8,
"hyperparameters": null
},
{
"algorithm_id": "c003_a001",
@ -135,7 +138,8 @@ SELECT '
"difficulty_range": [0, 0.5],
"selected_difficulties": [],
"weight": 1,
"batch_size": 8
"batch_size": 8,
"hyperparameters": null
},
{
"algorithm_id": "c004_a001",
@ -143,7 +147,8 @@ SELECT '
"difficulty_range": [0, 0.5],
"selected_difficulties": [],
"weight": 1,
"batch_size": 8
"batch_size": 8,
"hyperparameters": null
},
{
"algorithm_id": "c005_a001",
@ -151,7 +156,8 @@ SELECT '
"difficulty_range": [0, 0.5],
"selected_difficulties": [],
"weight": 1,
"batch_size": 8
"batch_size": 8,
"hyperparameters": null
}
],
"time_before_batch_retry": 60000,

View File

@ -58,22 +58,28 @@ def download_library(algorithms_dir, batch):
def run_tig_runtime(nonce, batch, so_path, ptx_path, results_dir):
output_file = f"{results_dir}/{batch['id']}/{nonce}.json"
output_dir = f"{results_dir}/{batch['id']}"
output_file = f"{output_dir}/{nonce}.json"
settings = json.dumps(batch["settings"], separators=(',',':'))
start = now()
cmd = [
"docker", "exec", batch["challenge"], "tig-runtime",
json.dumps(batch["settings"], separators=(',',':')),
settings,
batch["rand_hash"],
str(nonce),
so_path,
"--fuel", str(batch["runtime_config"]["max_fuel"]),
"--output", output_file,
"--output", output_dir,
]
if batch["hyperparameters"] is not None:
cmd += [
"--hyperparameters", json.dumps(batch["hyperparameters"], separators=(',',':')),
]
if ptx_path is not None:
cmd += [
"--ptx", ptx_path,
]
logger.debug(f"computing batch: {' '.join(cmd[:4] + [f"'{cmd[4]}'"] + cmd[5:])}")
logger.debug(f"computing nonce: {' '.join(cmd[:4] + [f"'{cmd[4]}'"] + cmd[5:])}")
process = subprocess.Popen(
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
@ -82,29 +88,52 @@ def run_tig_runtime(nonce, batch, so_path, ptx_path, results_dir):
if ret is not None:
exit_codes = {
0: "success",
82: "cuda out of memory",
83: "host out of memory",
# 82: "cuda out of memory",
# 83: "host out of memory",
84: "runtime error",
85: "no solution",
86: "invalid solution",
# 85: "no solution",
# 86: "invalid solution",
87: "out of fuel",
}
if (ret != 0 and ret in exit_codes) and not os.path.exists(output_file):
if ret not in exit_codes:
logger.error(f"batch {batch['id']}, nonce {nonce} failed with exit code {ret}: {process.stderr.read().decode()}")
else:
logger.debug(f"batch {batch['id']}, nonce {nonce} finished with exit code {ret}: {exit_codes[ret]}")
if not os.path.exists(output_file):
with open(output_file, "w") as f:
json.dump(dict(
nonce=nonce,
runtime_signature=0,
fuel_consumed=(ret == 87) and (batch["runtime_config"]["max_fuel"] + 1),
solution={},
solution="",
cpu_arch=CPU_ARCH
), f)
if ret != 0:
if ret not in exit_codes:
logger.error(f"batch {batch['id']}, nonce {nonce} failed with exit code {ret}: {process.stderr.read().decode()}")
else:
start = now()
cmd = [
"docker", "exec", batch["challenge"], "tig-verifier",
settings,
batch["rand_hash"],
str(nonce),
output_file,
]
if ptx_path is not None:
cmd += [
"--ptx", ptx_path,
]
logger.debug(f"verifying nonce: {' '.join(cmd[:4] + [f"'{cmd[4]}'"] + cmd[5:])}")
ret = subprocess.run(cmd, capture_output=True, text=True)
if ret.returncode == 0:
logger.debug(f"batch {batch['id']}, nonce {nonce} valid solution")
else:
logger.error(f"batch {batch['id']}, nonce {nonce} failed with exit code {ret}: {exit_codes[ret]}")
logger.debug(f"batch {batch['id']}, nonce {nonce} invalid solution (exit code: {ret.returncode}, stderr: {ret.stderr.strip()})")
with open(output_file, "r") as f:
d = json.load(f)
d["solution"] = ""
with open(output_file, "w") as f:
json.dump(d, f)
break
elif batch["id"] not in PROCESSING_BATCH_IDS:

View File

@ -18,10 +18,11 @@ use std::sync::Arc;
pub fn entry_point(
challenge: &Challenge,
save_solution: &dyn Fn(&Solution) -> Result<()>,
hyperparameters: &Option<Map<String, Value>>,
hyperparameters: Option<String>,
) -> Result<()>
{
catch_unwind(AssertUnwindSafe(|| {
let hyperparameters = hyperparameters.map(|x| serde_json::from_str::<Map<String, Value>>(&x).unwrap());
{ALGORITHM}::solve_challenge(challenge, save_solution, hyperparameters)
})).unwrap_or_else(|_| {
Err(anyhow!("Panic occurred calling solve_challenge"))
@ -34,13 +35,14 @@ pub fn entry_point(
pub fn entry_point(
challenge: &Challenge,
save_solution: &dyn Fn(&Solution) -> Result<()>,
hyperparameters: &Option<Map<String, Value>>,
hyperparameters: Option<String>,
module: Arc<CudaModule>,
stream: Arc<CudaStream>,
prop: &cudaDeviceProp,
) -> Result<()>
{
catch_unwind(AssertUnwindSafe(|| {
let hyperparameters = hyperparameters.map(|x| serde_json::from_str::<Map<String, Value>>(x));
{ALGORITHM}::solve_challenge(challenge, save_solution, hyperparameters, module, stream, prop)
})).unwrap_or_else(|_| {
Err(anyhow!("Panic occurred calling solve_challenge"))

View File

@ -112,7 +112,7 @@ pub fn compute_solution(
library.get::<fn(
&$c::Challenge,
&dyn Fn(&$c::Solution) -> Result<()>,
&Option<Map<String, Value>>,
Option<String>,
) -> Result<()>>(b"entry_point")?
};
@ -140,7 +140,7 @@ pub fn compute_solution(
fs::write(&output_file, jsonify(&output_data))?;
Ok(())
};
let result = solve_challenge_fn(&challenge, &save_solution_fn, &hyperparameters);
let result = solve_challenge_fn(&challenge, &save_solution_fn, hyperparameters);
if !output_file.exists() {
save_solution_fn(&$c::Solution::new())?;
}
@ -157,7 +157,7 @@ pub fn compute_solution(
library.get::<fn(
&$c::Challenge,
save_solution: &dyn Fn(&$c::Solution) -> anyhow::Result<()>,
&Option<Map<String, Value>>,
Option<String>,
Arc<CudaModule>,
Arc<CudaStream>,
&cudaDeviceProp,
@ -258,7 +258,7 @@ pub fn compute_solution(
let result = solve_challenge_fn(
&challenge,
&save_solution_fn,
&hyperparameters,
hyperparameters,
module.clone(),
stream.clone(),
&prop,
@ -327,7 +327,7 @@ fn load_settings(settings: &str) -> BenchmarkSettings {
})
}
fn load_hyperparameters(hyperparameters: &str) -> Map<String, Value> {
fn load_hyperparameters(hyperparameters: &str) -> String {
let hyperparameters = if hyperparameters.ends_with(".json") {
fs::read_to_string(hyperparameters).unwrap_or_else(|_| {
eprintln!("Failed to read hyperparameters file: {}", hyperparameters);
@ -337,10 +337,12 @@ fn load_hyperparameters(hyperparameters: &str) -> Map<String, Value> {
hyperparameters.to_string()
};
dejsonify::<Map<String, Value>>(&hyperparameters).unwrap_or_else(|_| {
// validate it is valid JSON
let _ = dejsonify::<Map<String, Value>>(&hyperparameters).unwrap_or_else(|_| {
eprintln!("Failed to parse hyperparameters as JSON");
std::process::exit(1);
})
});
hyperparameters
}
pub fn load_module(path: &PathBuf) -> Result<Library> {

View File

@ -1,5 +1,6 @@
use anyhow::{anyhow, Result};
use clap::{arg, Command};
use serde_json::{Map, Value};
use std::{fs, io::Read, panic, path::PathBuf};
use tig_challenges::*;
use tig_structs::core::BenchmarkSettings;
@ -22,7 +23,7 @@ fn cli() -> Command {
)
.arg(arg!(<NONCE> "Nonce value").value_parser(clap::value_parser!(u64)))
.arg(
arg!(<SOLUTION> "Solution base64 string, path to b64 file, or '-' for stdin")
arg!(<SOLUTION> "Solution base64 string, path to json file with solution field, or '-' for stdin")
.value_parser(clap::value_parser!(String)),
)
.arg(arg!(--ptx [PTX] "Path to a CUDA ptx file").value_parser(clap::value_parser!(PathBuf)))
@ -204,11 +205,28 @@ fn load_solution(solution: &str) -> String {
std::process::exit(1);
});
buffer
} else if solution.ends_with(".b64") {
fs::read_to_string(&solution).unwrap_or_else(|_| {
} else if solution.ends_with(".json") {
let d = fs::read_to_string(&solution).unwrap_or_else(|_| {
eprintln!("Failed to read solution file: {}", solution);
std::process::exit(1);
})
});
let d = serde_json::from_str::<Map<String, Value>>(&d).unwrap_or_else(|_| {
eprintln!("Failed to parse solution file: {}", solution);
std::process::exit(1);
});
match d.get("solution") {
None => {
eprintln!("json file does not contain 'solution' field: {}", solution);
std::process::exit(1);
}
Some(v) => match v.as_str() {
None => {
eprintln!("invalid 'solution' field in json file. Expecting string");
std::process::exit(1);
}
Some(s) => s.to_string(),
},
}
} else {
solution.to_string()
}