tig-monorepo/scripts/test_algorithm

#!/usr/bin/env python3

import argparse
import json
import os
import platform
import re
import shutil
import subprocess
import sys
import tempfile
import time
from concurrent.futures import ThreadPoolExecutor

if (CPU_ARCH := platform.machine().lower()) in ["x86_64", "amd64"]:
    CPU_ARCH = "amd64"
elif CPU_ARCH in ["arm64", "aarch64"]:
    CPU_ARCH = "arm64"
else:
    print(f"Unsupported CPU architecture: {CPU_ARCH}")
    sys.exit(1)

HAS_GPU = subprocess.run(["which", "nvidia-smi"], capture_output=True).returncode == 0
if (VISIBLE_CPUS := os.environ.get("CPU_VISIBLE_CORES", None)) is None:
    VISIBLE_CPUS = list(os.sched_getaffinity(0))
else:
    VISIBLE_CPUS = list(map(int, VISIBLE_CPUS.split(",")))
    os.sched_setaffinity(0, VISIBLE_CPUS)

if not HAS_GPU:
    VISIBLE_GPUS = []
elif (VISIBLE_GPUS := os.environ.get("CUDA_VISIBLE_DEVICES", None)) is None:
    VISIBLE_GPUS = [
        int(match.group(1))
        for line in subprocess.check_output(["nvidia-smi", "-L"]).decode("utf-8").splitlines()
        if (match := re.match(r'^GPU (\d+):', line)) is not None
    ]
else:
    VISIBLE_GPUS = list(map(int, VISIBLE_GPUS.split(",")))

CHALLENGE = os.getenv("CHALLENGE")
if CHALLENGE is None:
    print("CHALLENGE environment variable must be set!")
    sys.exit(1)

def now():
    return int(time.time() * 1000)


if __name__ == "__main__":
    tig_runtime_path = shutil.which("tig-runtime")
    tig_verifier_path = shutil.which("tig-verifier")
    parser = argparse.ArgumentParser(description="TIG Algorithm Tester")
    parser.add_argument("algorithm", type=str, help="Algorithm name")
    parser.add_argument("size", type=int, help="Size of the problem instance")
    parser.add_argument("hyperparameters", type=str, help="JSON string of hyperparameters for the algorithm (can set to null)")
    parser.add_argument("--tig-runtime-path", type=str, default=tig_runtime_path, help=f"Path to tig-runtime executable (default: {tig_runtime_path})")
    parser.add_argument("--tig-verifier-path", type=str, default=tig_verifier_path, help=f"Path to tig-verifier executable (default: {tig_verifier_path})")
    parser.add_argument("--lib-dir", type=str, default="./tig-algorithms/lib", help="Path to the algorithms library folder (default: ./tig-algorithms/lib)")
    parser.add_argument("--seed", type=str, default="rand_hash", help="String to use as seed instance generation (default: 'rand_hash')")
    parser.add_argument("--start", type=int, default=0, help="Starting nonce (default: 0)")
    parser.add_argument("--nonces", type=int, default=100, help="Number of nonces to process (default: 100)")
    parser.add_argument("--fuel", type=int, default=int(100e9), help="Max fuel (default: 100 billion)")
    parser.add_argument("--workers", type=int, default=1, help="Number of worker threads (default: 1)")
    parser.add_argument("--ignore-invalid", action='store_true', help="Continue testing even if invalid solutions are encountered")
    parser.add_argument("--verbose", action='store_true', help="Print debug logs")

    args = parser.parse_args()
    if args.hyperparameters.lower() == "null":
        args.hyperparameters = None
    else:
        try:
            json.loads(args.hyperparameters)
        except Exception as e:
            print(f"Failed to parse hyperparameters as JSON: {e}")
            sys.exit(1)

    so_path = f"{args.lib_dir}/{CHALLENGE}/{CPU_ARCH}/{args.algorithm}.so"
    ptx_path = f"{args.lib_dir}/{CHALLENGE}/ptx/{args.algorithm}.ptx"

    if not os.path.exists(so_path):
        print(
f"""Library not found at {so_path}:
    * To download: use download_algorithm
    * To build: use build_algorithm
    * To set the lib folder: set --lib-dir <path_to_folder>
""")
        sys.exit(1)

    if not os.path.exists(ptx_path):
        ptx_path = None
    elif not HAS_GPU:
        print(f"PTX file found at {ptx_path}, but no GPU support detected (failed to run nvidia-smi)")
        sys.exit(1)

    challenge_ids = {
        "satisfiability": "c001",
        "vehicle_routing": "c002",
        "knapsack": "c003",
        "vector_search": "c004",
        "hypergraph": "c005",
        "neuralnet_optimizer": "c006",
    }
    challenge_id = challenge_ids[CHALLENGE]

    settings = {"algorithm_id": "", "challenge_id": challenge_id, "size": args.size, "block_id": "", "player_id": ""}
    pool = ThreadPoolExecutor(max_workers=args.workers + 1)

    results = {}
    def print_results():
        start = now()
        while True:
            time.sleep(0.5)
            num_processing, num_finished, num_invalid = 0, 0, 0
            total_quality = 0
            for (_, _, ok, quality) in results.values():
                if ok is None:
                    num_processing += 1
                elif ok:
                    num_finished += 1
                    total_quality += quality
                else:
                    num_invalid += 1

            elapsed = (now() - start) / 1000
            avg_quality = (total_quality // num_finished) if num_finished > 0 else 0
            out = f"#processing: {num_processing}, #finished: {num_finished}, #invalid: {num_invalid}, elapsed: {elapsed:.2f}s, avg_quality: {avg_quality:,}       "
            if args.verbose:
                print(out)
            else:
                print(f"\r{out}", end="")

            if num_finished == args.nonces:
                break
            if num_invalid > 0 and not args.ignore_invalid:
                print()
                print("Invalid solution encountered. Stopping tests")
                print("To continue testing despite invalid solutions, use the --ignore-invalid flag.")
                print("Note: Invalid solutions will cause actual benchmarks to fail")
                break
        if not args.verbose:
            print("\n")

    def run_tig_runtime(nonce):
        with tempfile.TemporaryDirectory() as temp_dir:
            cmd = [
                args.tig_runtime_path,
                json.dumps(settings, separators=(',',':')),
                args.seed,
                str(nonce),
                so_path,
                "--fuel", str(args.fuel),
                "--output", temp_dir,
            ]
            if ptx_path is not None:
                cmd += [
                    "--ptx", ptx_path,
                    "--gpu", str(nonce % len(VISIBLE_GPUS)),
                ]
            if args.hyperparameters:
                cmd += [
                    "--hyperparameters", args.hyperparameters,
                ]
            if args.verbose:
                print(f"[nonce {nonce}]: {' '.join(cmd[:1] + [f"'{cmd[1]}'"] + cmd[2:])}")
            start = now()
            results[nonce] = (start, None, None, None)
            ret = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, universal_newlines=True)
            for line in ret.stdout:
                if args.verbose:
                    print(f"[nonce {nonce}] {line.strip()}")
            ret.wait()
            elapsed = now() - start
            output_file = f"{temp_dir}/{nonce}.json"
            elapsed2 = 0

            cmd2 = [
                args.tig_verifier_path,
                json.dumps(settings, separators=(',',':')),
                args.seed,
                str(nonce),
                output_file,
            ]
            if ptx_path is not None:
                cmd2 += [
                    "--ptx", ptx_path,
                    "--gpu", str(nonce % len(VISIBLE_GPUS)),
                ]
            if args.verbose:
                cmd2 += ["--verbose"]
                print(f"[nonce {nonce}] {' '.join(cmd2[:1] + [f"'{cmd2[1]}'"] + cmd2[2:])}")
            ret2 = subprocess.Popen(cmd2, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, universal_newlines=True)
            for line in ret2.stdout:
                if args.verbose:
                    print(f"[nonce {nonce}] {line.strip()}")
            ret2.wait()
            elapsed2 = now() - start - elapsed

            if args.verbose:
                out = f"[nonce {nonce}] finished\n\ttig-runtime\n\t\telapsed: {elapsed}ms\n\t\texit code: {ret.returncode}\n\t\tstderr: "
                if ret.returncode != 0:
                    if ret.returncode == 87:
                        out += "out of fuel"
                    else:
                        out += ret.stderr.read()
                out += f"\n\ttig-verifier\n\t\telapsed: {elapsed2}ms\n\t\texit code: {ret2.returncode}\n\t\tstderr: "
                if ret2.returncode != 0:
                    out += ret2.stderr.read()
                print(out)
            ok = ret2.returncode == 0
            quality = None
            if ok:
                line = line.strip()
                assert line.startswith("quality: ")
                quality = int(line[len("quality: "):])
            results[nonce] = (start, elapsed, ok, quality)
            if not ok and not args.ignore_invalid:
                raise RuntimeError

    nonces = list(range(args.start, args.start + args.nonces))
    if args.verbose:
        print(f"Processing {len(nonces)} nonces with {args.workers} workers...")
    pool.submit(print_results)
    try:
        list(pool.map(run_tig_runtime, nonces))
    except RuntimeError as e:
        pass