mirror of
https://github.com/tig-pool-nk/tig-monorepo.git
synced 2026-02-21 15:57:22 +08:00
Update tig-benchmarker.
This commit is contained in:
parent
aa0246b1cd
commit
2028d1ea31
@ -1,5 +1,5 @@
|
||||
# Version of all benchmarker containers
|
||||
VERSION=0.0.2
|
||||
VERSION=0.0.3
|
||||
# Set to 1 to enable verbose logging
|
||||
VERBOSE=1
|
||||
|
||||
|
||||
@ -65,6 +65,7 @@ class Precommit(FromDict):
|
||||
details: PrecommitDetails
|
||||
settings: BenchmarkSettings
|
||||
state: PrecommitState
|
||||
hyperparameters: Optional[dict]
|
||||
|
||||
@dataclass
|
||||
class BenchmarkDetails(FromDict):
|
||||
|
||||
@ -99,6 +99,7 @@ class JobManager:
|
||||
(
|
||||
benchmark_id,
|
||||
settings,
|
||||
hyperparameters,
|
||||
num_nonces,
|
||||
num_batches,
|
||||
rand_hash,
|
||||
@ -112,12 +113,13 @@ class JobManager:
|
||||
average_solution_ratio,
|
||||
start_time
|
||||
)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, (EXTRACT(EPOCH FROM NOW()) * 1000)::BIGINT)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, (EXTRACT(EPOCH FROM NOW()) * 1000)::BIGINT)
|
||||
ON CONFLICT (benchmark_id) DO NOTHING;
|
||||
""",
|
||||
(
|
||||
benchmark_id,
|
||||
json.dumps(asdict(x.settings)),
|
||||
json.dumps(x.hyperparameters),
|
||||
x.details.num_nonces,
|
||||
num_batches,
|
||||
x.details.rand_hash,
|
||||
|
||||
@ -94,7 +94,8 @@ class PrecommitManager:
|
||||
block_id=self.last_block_id,
|
||||
difficulty=difficulty_samples[a_id]
|
||||
),
|
||||
num_nonces=selection["num_nonces"]
|
||||
num_nonces=selection["num_nonces"],
|
||||
hyperparameters=selection["hyperparameters"]
|
||||
)
|
||||
logger.info(f"Created precommit (algorithm_id: {a_id}, difficulty: {req.settings.difficulty}, num_nonces: {req.num_nonces})")
|
||||
logger.info(f"Created precommit (algorithm_id: {a_id}, difficulty: {req.settings.difficulty}, num_nonces: {req.num_nonces}, hyperparameters: {req.hyperparameters})")
|
||||
return req
|
||||
@ -40,6 +40,7 @@ class SlaveManager:
|
||||
'start_nonce', A.batch_idx * B.batch_size,
|
||||
'num_nonces', LEAST(B.batch_size, B.num_nonces - A.batch_idx * B.batch_size),
|
||||
'settings', B.settings,
|
||||
'hyperparameters', B.hyperparameters,
|
||||
'sampled_nonces', A.sampled_nonces,
|
||||
'runtime_config', B.runtime_config,
|
||||
'download_url', B.download_url,
|
||||
@ -72,6 +73,7 @@ class SlaveManager:
|
||||
'start_nonce', A.batch_idx * B.batch_size,
|
||||
'num_nonces', LEAST(B.batch_size, B.num_nonces - A.batch_idx * B.batch_size),
|
||||
'settings', B.settings,
|
||||
'hyperparameters', B.hyperparameters,
|
||||
'sampled_nonces', NULL,
|
||||
'runtime_config', B.runtime_config,
|
||||
'download_url', B.download_url,
|
||||
|
||||
@ -15,6 +15,7 @@ logger = logging.getLogger(os.path.splitext(os.path.basename(__file__))[0])
|
||||
class SubmitPrecommitRequest(FromDict):
|
||||
settings: BenchmarkSettings
|
||||
num_nonces: int
|
||||
hyperparameters: Optional[dict]
|
||||
|
||||
@dataclass
|
||||
class SubmitBenchmarkRequest(FromDict):
|
||||
|
||||
@ -5,6 +5,7 @@ CREATE TABLE IF NOT EXISTS config (
|
||||
CREATE TABLE IF NOT EXISTS job (
|
||||
benchmark_id TEXT PRIMARY KEY,
|
||||
settings JSONB NOT NULL,
|
||||
hyperparameters JSONB,
|
||||
num_nonces INTEGER NOT NULL,
|
||||
rand_hash TEXT NOT NULL,
|
||||
runtime_config JSONB NOT NULL,
|
||||
@ -119,7 +120,8 @@ SELECT '
|
||||
"difficulty_range": [0, 0.5],
|
||||
"selected_difficulties": [],
|
||||
"weight": 1,
|
||||
"batch_size": 8
|
||||
"batch_size": 8,
|
||||
"hyperparameters": null
|
||||
},
|
||||
{
|
||||
"algorithm_id": "c002_a001",
|
||||
@ -127,7 +129,8 @@ SELECT '
|
||||
"difficulty_range": [0, 0.5],
|
||||
"selected_difficulties": [],
|
||||
"weight": 1,
|
||||
"batch_size": 8
|
||||
"batch_size": 8,
|
||||
"hyperparameters": null
|
||||
},
|
||||
{
|
||||
"algorithm_id": "c003_a001",
|
||||
@ -135,7 +138,8 @@ SELECT '
|
||||
"difficulty_range": [0, 0.5],
|
||||
"selected_difficulties": [],
|
||||
"weight": 1,
|
||||
"batch_size": 8
|
||||
"batch_size": 8,
|
||||
"hyperparameters": null
|
||||
},
|
||||
{
|
||||
"algorithm_id": "c004_a001",
|
||||
@ -143,7 +147,8 @@ SELECT '
|
||||
"difficulty_range": [0, 0.5],
|
||||
"selected_difficulties": [],
|
||||
"weight": 1,
|
||||
"batch_size": 8
|
||||
"batch_size": 8,
|
||||
"hyperparameters": null
|
||||
},
|
||||
{
|
||||
"algorithm_id": "c005_a001",
|
||||
@ -151,7 +156,8 @@ SELECT '
|
||||
"difficulty_range": [0, 0.5],
|
||||
"selected_difficulties": [],
|
||||
"weight": 1,
|
||||
"batch_size": 8
|
||||
"batch_size": 8,
|
||||
"hyperparameters": null
|
||||
}
|
||||
],
|
||||
"time_before_batch_retry": 60000,
|
||||
|
||||
@ -58,22 +58,28 @@ def download_library(algorithms_dir, batch):
|
||||
|
||||
|
||||
def run_tig_runtime(nonce, batch, so_path, ptx_path, results_dir):
|
||||
output_file = f"{results_dir}/{batch['id']}/{nonce}.json"
|
||||
output_dir = f"{results_dir}/{batch['id']}"
|
||||
output_file = f"{output_dir}/{nonce}.json"
|
||||
settings = json.dumps(batch["settings"], separators=(',',':'))
|
||||
start = now()
|
||||
cmd = [
|
||||
"docker", "exec", batch["challenge"], "tig-runtime",
|
||||
json.dumps(batch["settings"], separators=(',',':')),
|
||||
settings,
|
||||
batch["rand_hash"],
|
||||
str(nonce),
|
||||
so_path,
|
||||
"--fuel", str(batch["runtime_config"]["max_fuel"]),
|
||||
"--output", output_file,
|
||||
"--output", output_dir,
|
||||
]
|
||||
if batch["hyperparameters"] is not None:
|
||||
cmd += [
|
||||
"--hyperparameters", json.dumps(batch["hyperparameters"], separators=(',',':')),
|
||||
]
|
||||
if ptx_path is not None:
|
||||
cmd += [
|
||||
"--ptx", ptx_path,
|
||||
]
|
||||
logger.debug(f"computing batch: {' '.join(cmd[:4] + [f"'{cmd[4]}'"] + cmd[5:])}")
|
||||
logger.debug(f"computing nonce: {' '.join(cmd[:4] + [f"'{cmd[4]}'"] + cmd[5:])}")
|
||||
process = subprocess.Popen(
|
||||
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
||||
)
|
||||
@ -82,29 +88,52 @@ def run_tig_runtime(nonce, batch, so_path, ptx_path, results_dir):
|
||||
if ret is not None:
|
||||
exit_codes = {
|
||||
0: "success",
|
||||
82: "cuda out of memory",
|
||||
83: "host out of memory",
|
||||
# 82: "cuda out of memory",
|
||||
# 83: "host out of memory",
|
||||
84: "runtime error",
|
||||
85: "no solution",
|
||||
86: "invalid solution",
|
||||
# 85: "no solution",
|
||||
# 86: "invalid solution",
|
||||
87: "out of fuel",
|
||||
}
|
||||
if (ret != 0 and ret in exit_codes) and not os.path.exists(output_file):
|
||||
|
||||
if ret not in exit_codes:
|
||||
logger.error(f"batch {batch['id']}, nonce {nonce} failed with exit code {ret}: {process.stderr.read().decode()}")
|
||||
else:
|
||||
logger.debug(f"batch {batch['id']}, nonce {nonce} finished with exit code {ret}: {exit_codes[ret]}")
|
||||
|
||||
if not os.path.exists(output_file):
|
||||
with open(output_file, "w") as f:
|
||||
json.dump(dict(
|
||||
nonce=nonce,
|
||||
runtime_signature=0,
|
||||
fuel_consumed=(ret == 87) and (batch["runtime_config"]["max_fuel"] + 1),
|
||||
solution={},
|
||||
solution="",
|
||||
cpu_arch=CPU_ARCH
|
||||
), f)
|
||||
|
||||
if ret != 0:
|
||||
if ret not in exit_codes:
|
||||
logger.error(f"batch {batch['id']}, nonce {nonce} failed with exit code {ret}: {process.stderr.read().decode()}")
|
||||
else:
|
||||
start = now()
|
||||
cmd = [
|
||||
"docker", "exec", batch["challenge"], "tig-verifier",
|
||||
settings,
|
||||
batch["rand_hash"],
|
||||
str(nonce),
|
||||
output_file,
|
||||
]
|
||||
if ptx_path is not None:
|
||||
cmd += [
|
||||
"--ptx", ptx_path,
|
||||
]
|
||||
logger.debug(f"verifying nonce: {' '.join(cmd[:4] + [f"'{cmd[4]}'"] + cmd[5:])}")
|
||||
ret = subprocess.run(cmd, capture_output=True, text=True)
|
||||
if ret.returncode == 0:
|
||||
logger.debug(f"batch {batch['id']}, nonce {nonce} valid solution")
|
||||
else:
|
||||
logger.error(f"batch {batch['id']}, nonce {nonce} failed with exit code {ret}: {exit_codes[ret]}")
|
||||
|
||||
logger.debug(f"batch {batch['id']}, nonce {nonce} invalid solution (exit code: {ret.returncode}, stderr: {ret.stderr.strip()})")
|
||||
with open(output_file, "r") as f:
|
||||
d = json.load(f)
|
||||
d["solution"] = ""
|
||||
with open(output_file, "w") as f:
|
||||
json.dump(d, f)
|
||||
break
|
||||
|
||||
elif batch["id"] not in PROCESSING_BATCH_IDS:
|
||||
|
||||
@ -18,10 +18,11 @@ use std::sync::Arc;
|
||||
pub fn entry_point(
|
||||
challenge: &Challenge,
|
||||
save_solution: &dyn Fn(&Solution) -> Result<()>,
|
||||
hyperparameters: &Option<Map<String, Value>>,
|
||||
hyperparameters: Option<String>,
|
||||
) -> Result<()>
|
||||
{
|
||||
catch_unwind(AssertUnwindSafe(|| {
|
||||
let hyperparameters = hyperparameters.map(|x| serde_json::from_str::<Map<String, Value>>(&x).unwrap());
|
||||
{ALGORITHM}::solve_challenge(challenge, save_solution, hyperparameters)
|
||||
})).unwrap_or_else(|_| {
|
||||
Err(anyhow!("Panic occurred calling solve_challenge"))
|
||||
@ -34,13 +35,14 @@ pub fn entry_point(
|
||||
pub fn entry_point(
|
||||
challenge: &Challenge,
|
||||
save_solution: &dyn Fn(&Solution) -> Result<()>,
|
||||
hyperparameters: &Option<Map<String, Value>>,
|
||||
hyperparameters: Option<String>,
|
||||
module: Arc<CudaModule>,
|
||||
stream: Arc<CudaStream>,
|
||||
prop: &cudaDeviceProp,
|
||||
) -> Result<()>
|
||||
{
|
||||
catch_unwind(AssertUnwindSafe(|| {
|
||||
let hyperparameters = hyperparameters.map(|x| serde_json::from_str::<Map<String, Value>>(x));
|
||||
{ALGORITHM}::solve_challenge(challenge, save_solution, hyperparameters, module, stream, prop)
|
||||
})).unwrap_or_else(|_| {
|
||||
Err(anyhow!("Panic occurred calling solve_challenge"))
|
||||
|
||||
@ -112,7 +112,7 @@ pub fn compute_solution(
|
||||
library.get::<fn(
|
||||
&$c::Challenge,
|
||||
&dyn Fn(&$c::Solution) -> Result<()>,
|
||||
&Option<Map<String, Value>>,
|
||||
Option<String>,
|
||||
) -> Result<()>>(b"entry_point")?
|
||||
};
|
||||
|
||||
@ -140,7 +140,7 @@ pub fn compute_solution(
|
||||
fs::write(&output_file, jsonify(&output_data))?;
|
||||
Ok(())
|
||||
};
|
||||
let result = solve_challenge_fn(&challenge, &save_solution_fn, &hyperparameters);
|
||||
let result = solve_challenge_fn(&challenge, &save_solution_fn, hyperparameters);
|
||||
if !output_file.exists() {
|
||||
save_solution_fn(&$c::Solution::new())?;
|
||||
}
|
||||
@ -157,7 +157,7 @@ pub fn compute_solution(
|
||||
library.get::<fn(
|
||||
&$c::Challenge,
|
||||
save_solution: &dyn Fn(&$c::Solution) -> anyhow::Result<()>,
|
||||
&Option<Map<String, Value>>,
|
||||
Option<String>,
|
||||
Arc<CudaModule>,
|
||||
Arc<CudaStream>,
|
||||
&cudaDeviceProp,
|
||||
@ -258,7 +258,7 @@ pub fn compute_solution(
|
||||
let result = solve_challenge_fn(
|
||||
&challenge,
|
||||
&save_solution_fn,
|
||||
&hyperparameters,
|
||||
hyperparameters,
|
||||
module.clone(),
|
||||
stream.clone(),
|
||||
&prop,
|
||||
@ -327,7 +327,7 @@ fn load_settings(settings: &str) -> BenchmarkSettings {
|
||||
})
|
||||
}
|
||||
|
||||
fn load_hyperparameters(hyperparameters: &str) -> Map<String, Value> {
|
||||
fn load_hyperparameters(hyperparameters: &str) -> String {
|
||||
let hyperparameters = if hyperparameters.ends_with(".json") {
|
||||
fs::read_to_string(hyperparameters).unwrap_or_else(|_| {
|
||||
eprintln!("Failed to read hyperparameters file: {}", hyperparameters);
|
||||
@ -337,10 +337,12 @@ fn load_hyperparameters(hyperparameters: &str) -> Map<String, Value> {
|
||||
hyperparameters.to_string()
|
||||
};
|
||||
|
||||
dejsonify::<Map<String, Value>>(&hyperparameters).unwrap_or_else(|_| {
|
||||
// validate it is valid JSON
|
||||
let _ = dejsonify::<Map<String, Value>>(&hyperparameters).unwrap_or_else(|_| {
|
||||
eprintln!("Failed to parse hyperparameters as JSON");
|
||||
std::process::exit(1);
|
||||
})
|
||||
});
|
||||
hyperparameters
|
||||
}
|
||||
|
||||
pub fn load_module(path: &PathBuf) -> Result<Library> {
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
use anyhow::{anyhow, Result};
|
||||
use clap::{arg, Command};
|
||||
use serde_json::{Map, Value};
|
||||
use std::{fs, io::Read, panic, path::PathBuf};
|
||||
use tig_challenges::*;
|
||||
use tig_structs::core::BenchmarkSettings;
|
||||
@ -22,7 +23,7 @@ fn cli() -> Command {
|
||||
)
|
||||
.arg(arg!(<NONCE> "Nonce value").value_parser(clap::value_parser!(u64)))
|
||||
.arg(
|
||||
arg!(<SOLUTION> "Solution base64 string, path to b64 file, or '-' for stdin")
|
||||
arg!(<SOLUTION> "Solution base64 string, path to json file with solution field, or '-' for stdin")
|
||||
.value_parser(clap::value_parser!(String)),
|
||||
)
|
||||
.arg(arg!(--ptx [PTX] "Path to a CUDA ptx file").value_parser(clap::value_parser!(PathBuf)))
|
||||
@ -204,11 +205,28 @@ fn load_solution(solution: &str) -> String {
|
||||
std::process::exit(1);
|
||||
});
|
||||
buffer
|
||||
} else if solution.ends_with(".b64") {
|
||||
fs::read_to_string(&solution).unwrap_or_else(|_| {
|
||||
} else if solution.ends_with(".json") {
|
||||
let d = fs::read_to_string(&solution).unwrap_or_else(|_| {
|
||||
eprintln!("Failed to read solution file: {}", solution);
|
||||
std::process::exit(1);
|
||||
})
|
||||
});
|
||||
let d = serde_json::from_str::<Map<String, Value>>(&d).unwrap_or_else(|_| {
|
||||
eprintln!("Failed to parse solution file: {}", solution);
|
||||
std::process::exit(1);
|
||||
});
|
||||
match d.get("solution") {
|
||||
None => {
|
||||
eprintln!("json file does not contain 'solution' field: {}", solution);
|
||||
std::process::exit(1);
|
||||
}
|
||||
Some(v) => match v.as_str() {
|
||||
None => {
|
||||
eprintln!("invalid 'solution' field in json file. Expecting string");
|
||||
std::process::exit(1);
|
||||
}
|
||||
Some(s) => s.to_string(),
|
||||
},
|
||||
}
|
||||
} else {
|
||||
solution.to_string()
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user