mirror of
https://github.com/tig-foundation/tig-monorepo.git
synced 2026-02-21 10:27:49 +08:00
227 lines
9.1 KiB
Python
227 lines
9.1 KiB
Python
#!/usr/bin/env python3
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import platform
|
|
import re
|
|
import shutil
|
|
import subprocess
|
|
import sys
|
|
import tempfile
|
|
import time
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
|
|
if (CPU_ARCH := platform.machine().lower()) in ["x86_64", "amd64"]:
|
|
CPU_ARCH = "amd64"
|
|
elif CPU_ARCH in ["arm64", "aarch64"]:
|
|
CPU_ARCH = "arm64"
|
|
else:
|
|
print(f"Unsupported CPU architecture: {CPU_ARCH}")
|
|
sys.exit(1)
|
|
|
|
HAS_GPU = subprocess.run(["which", "nvidia-smi"], capture_output=True).returncode == 0
|
|
if (VISIBLE_CPUS := os.environ.get("CPU_VISIBLE_CORES", None)) is None:
|
|
VISIBLE_CPUS = list(os.sched_getaffinity(0))
|
|
else:
|
|
VISIBLE_CPUS = list(map(int, VISIBLE_CPUS.split(",")))
|
|
os.sched_setaffinity(0, VISIBLE_CPUS)
|
|
|
|
if not HAS_GPU:
|
|
VISIBLE_GPUS = []
|
|
elif (VISIBLE_GPUS := os.environ.get("CUDA_VISIBLE_DEVICES", None)) is None:
|
|
VISIBLE_GPUS = [
|
|
int(match.group(1))
|
|
for line in subprocess.check_output(["nvidia-smi", "-L"]).decode("utf-8").splitlines()
|
|
if (match := re.match(r'^GPU (\d+):', line)) is not None
|
|
]
|
|
else:
|
|
VISIBLE_GPUS = list(map(int, VISIBLE_GPUS.split(",")))
|
|
|
|
CHALLENGE = os.getenv("CHALLENGE")
|
|
if CHALLENGE is None:
|
|
print("CHALLENGE environment variable must be set!")
|
|
sys.exit(1)
|
|
|
|
def now():
|
|
return int(time.time() * 1000)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
tig_runtime_path = shutil.which("tig-runtime")
|
|
tig_verifier_path = shutil.which("tig-verifier")
|
|
parser = argparse.ArgumentParser(description="TIG Algorithm Tester")
|
|
parser.add_argument("algorithm", type=str, help="Algorithm name")
|
|
parser.add_argument("size", type=int, help="Size of the problem instance")
|
|
parser.add_argument("hyperparameters", type=str, help="JSON string of hyperparameters for the algorithm (can set to null)")
|
|
parser.add_argument("--tig-runtime-path", type=str, default=tig_runtime_path, help=f"Path to tig-runtime executable (default: {tig_runtime_path})")
|
|
parser.add_argument("--tig-verifier-path", type=str, default=tig_verifier_path, help=f"Path to tig-verifier executable (default: {tig_verifier_path})")
|
|
parser.add_argument("--lib-dir", type=str, default="./tig-algorithms/lib", help="Path to the algorithms library folder (default: ./tig-algorithms/lib)")
|
|
parser.add_argument("--seed", type=str, default="rand_hash", help="String to use as seed instance generation (default: 'rand_hash')")
|
|
parser.add_argument("--start", type=int, default=0, help="Starting nonce (default: 0)")
|
|
parser.add_argument("--nonces", type=int, default=100, help="Number of nonces to process (default: 100)")
|
|
parser.add_argument("--fuel", type=int, default=int(100e9), help="Max fuel (default: 100 billion)")
|
|
parser.add_argument("--workers", type=int, default=1, help="Number of worker threads (default: 1)")
|
|
parser.add_argument("--ignore-invalid", action='store_true', help="Continue testing even if invalid solutions are encountered")
|
|
parser.add_argument("--verbose", action='store_true', help="Print debug logs")
|
|
|
|
args = parser.parse_args()
|
|
if args.hyperparameters.lower() == "null":
|
|
args.hyperparameters = None
|
|
else:
|
|
try:
|
|
json.loads(args.hyperparameters)
|
|
except Exception as e:
|
|
print(f"Failed to parse hyperparameters as JSON: {e}")
|
|
sys.exit(1)
|
|
|
|
so_path = f"{args.lib_dir}/{CHALLENGE}/{CPU_ARCH}/{args.algorithm}.so"
|
|
ptx_path = f"{args.lib_dir}/{CHALLENGE}/ptx/{args.algorithm}.ptx"
|
|
|
|
if not os.path.exists(so_path):
|
|
print(
|
|
f"""Library not found at {so_path}:
|
|
* To download: use download_algorithm
|
|
* To build: use build_algorithm
|
|
* To set the lib folder: set --lib-dir <path_to_folder>
|
|
""")
|
|
sys.exit(1)
|
|
|
|
if not os.path.exists(ptx_path):
|
|
ptx_path = None
|
|
elif not HAS_GPU:
|
|
print(f"PTX file found at {ptx_path}, but no GPU support detected (failed to run nvidia-smi)")
|
|
sys.exit(1)
|
|
|
|
challenge_ids = {
|
|
"satisfiability": "c001",
|
|
"vehicle_routing": "c002",
|
|
"knapsack": "c003",
|
|
"vector_search": "c004",
|
|
"hypergraph": "c005",
|
|
"neuralnet_optimizer": "c006",
|
|
}
|
|
challenge_id = challenge_ids[CHALLENGE]
|
|
|
|
settings = {"algorithm_id": "", "challenge_id": challenge_id, "size": args.size, "block_id": "", "player_id": ""}
|
|
pool = ThreadPoolExecutor(max_workers=args.workers + 1)
|
|
|
|
results = {}
|
|
def print_results():
|
|
start = now()
|
|
while True:
|
|
time.sleep(0.5)
|
|
num_processing, num_finished, num_invalid = 0, 0, 0
|
|
total_quality = 0
|
|
for (_, _, ok, quality) in results.values():
|
|
if ok is None:
|
|
num_processing += 1
|
|
elif ok:
|
|
num_finished += 1
|
|
total_quality += quality
|
|
else:
|
|
num_invalid += 1
|
|
|
|
elapsed = (now() - start) / 1000
|
|
avg_quality = (total_quality // num_finished) if num_finished > 0 else 0
|
|
out = f"#processing: {num_processing}, #finished: {num_finished}, #invalid: {num_invalid}, elapsed: {elapsed:.2f}s, avg_quality: {avg_quality:,} "
|
|
if args.verbose:
|
|
print(out)
|
|
else:
|
|
print(f"\r{out}", end="")
|
|
|
|
if num_finished == args.nonces:
|
|
break
|
|
if num_invalid > 0 and not args.ignore_invalid:
|
|
print()
|
|
print("Invalid solution encountered. Stopping tests")
|
|
print("To continue testing despite invalid solutions, use the --ignore-invalid flag.")
|
|
print("Note: Invalid solutions will cause actual benchmarks to fail")
|
|
break
|
|
if not args.verbose:
|
|
print("\n")
|
|
|
|
def run_tig_runtime(nonce):
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
cmd = [
|
|
args.tig_runtime_path,
|
|
json.dumps(settings, separators=(',',':')),
|
|
args.seed,
|
|
str(nonce),
|
|
so_path,
|
|
"--fuel", str(args.fuel),
|
|
"--output", temp_dir,
|
|
]
|
|
if ptx_path is not None:
|
|
cmd += [
|
|
"--ptx", ptx_path,
|
|
"--gpu", str(nonce % len(VISIBLE_GPUS)),
|
|
]
|
|
if args.hyperparameters:
|
|
cmd += [
|
|
"--hyperparameters", args.hyperparameters,
|
|
]
|
|
if args.verbose:
|
|
print(f"[nonce {nonce}]: {' '.join(cmd[:1] + [f"'{cmd[1]}'"] + cmd[2:])}")
|
|
start = now()
|
|
results[nonce] = (start, None, None, None)
|
|
ret = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, universal_newlines=True)
|
|
for line in ret.stdout:
|
|
if args.verbose:
|
|
print(f"[nonce {nonce}] {line.strip()}")
|
|
ret.wait()
|
|
elapsed = now() - start
|
|
output_file = f"{temp_dir}/{nonce}.json"
|
|
elapsed2 = 0
|
|
|
|
cmd2 = [
|
|
args.tig_verifier_path,
|
|
json.dumps(settings, separators=(',',':')),
|
|
args.seed,
|
|
str(nonce),
|
|
output_file,
|
|
]
|
|
if ptx_path is not None:
|
|
cmd2 += [
|
|
"--ptx", ptx_path,
|
|
"--gpu", str(nonce % len(VISIBLE_GPUS)),
|
|
]
|
|
if args.verbose:
|
|
cmd2 += ["--verbose"]
|
|
print(f"[nonce {nonce}] {' '.join(cmd2[:1] + [f"'{cmd2[1]}'"] + cmd2[2:])}")
|
|
ret2 = subprocess.Popen(cmd2, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, universal_newlines=True)
|
|
for line in ret2.stdout:
|
|
if args.verbose:
|
|
print(f"[nonce {nonce}] {line.strip()}")
|
|
ret2.wait()
|
|
elapsed2 = now() - start - elapsed
|
|
|
|
if args.verbose:
|
|
out = f"[nonce {nonce}] finished\n\ttig-runtime\n\t\telapsed: {elapsed}ms\n\t\texit code: {ret.returncode}\n\t\tstderr: "
|
|
if ret.returncode != 0:
|
|
if ret.returncode == 87:
|
|
out += "out of fuel"
|
|
else:
|
|
out += ret.stderr.read()
|
|
out += f"\n\ttig-verifier\n\t\telapsed: {elapsed2}ms\n\t\texit code: {ret2.returncode}\n\t\tstderr: "
|
|
if ret2.returncode != 0:
|
|
out += ret2.stderr.read()
|
|
print(out)
|
|
ok = ret2.returncode == 0
|
|
quality = None
|
|
if ok:
|
|
line = line.strip()
|
|
assert line.startswith("quality: ")
|
|
quality = int(line[len("quality: "):])
|
|
results[nonce] = (start, elapsed, ok, quality)
|
|
if not ok and not args.ignore_invalid:
|
|
raise RuntimeError
|
|
|
|
nonces = list(range(args.start, args.start + args.nonces))
|
|
if args.verbose:
|
|
print(f"Processing {len(nonces)} nonces with {args.workers} workers...")
|
|
pool.submit(print_results)
|
|
try:
|
|
list(pool.map(run_tig_runtime, nonces))
|
|
except RuntimeError as e:
|
|
pass |