Update gpu challenges to output quality.

This commit is contained in:
FiveMovesAhead 2025-11-26 11:27:19 +00:00
parent 5aba4aa7eb
commit 1d4417d004
13 changed files with 210 additions and 275 deletions

View File

@ -28,8 +28,9 @@ pub fn solve_challenge(
) -> Result<()> {
// boilerplate for training loop
// recommend not modifying this function unless you have a good reason
let (solution, train_losses, val_losses) = training_loop(
training_loop(
challenge,
save_solution,
module,
stream,
prop,
@ -37,9 +38,8 @@ pub fn solve_challenge(
optimizer_query_at_params,
optimizer_step,
)?;
save_solution(&solution)?;
Ok()
Ok(())
}
#[derive(Clone)]

View File

@ -44,7 +44,7 @@ case "$CHALLENGE" in
neuralnet_optimizer)
echo "Building ALGORITHM '$ALGORITHM' for CHALLENGE 'neuralnet_optimizer'"
build_so $ALGORITHM
build_ptx $ALGORITHM --extra-cu-files tig-challenges/src/neuralnet/kernels.cu
build_ptx $ALGORITHM
;;
*)
echo "Error: Invalid CHALLENGE value. Must be one of: satisfiability, knapsack, vehicle_routing, vector_search, hypergraph, neuralnet_optimizer"

View File

@ -250,16 +250,18 @@ def main():
f"Framework code does not exist @ '{framework_cu}'. This script must be run from the root of tig-monorepo"
)
challenge_cu = f"tig-challenges/src/{CHALLENGE}.cu"
if not os.path.exists(challenge_cu):
challenge_cus_pattern = f"tig-challenges/src/{CHALLENGE}/**/*.cu"
challenge_cus = glob(challenge_cus_pattern, recursive=True)
if not challenge_cus:
raise FileNotFoundError(
f"Challenge code does not exist @ '{challenge_cu}'. Is the challenge name correct?"
f"Challenge code does not exist @ '{challenge_cus_pattern}'. Is the challenge name correct?"
)
algorithm_cus = glob(f"tig-algorithms/src/{CHALLENGE}/{args.algorithm}/*.cu")
algorithm_cus_pattern = f"tig-algorithms/src/{CHALLENGE}/{args.algorithm}/*.cu"
algorithm_cus = glob(algorithm_cus_pattern)
if not algorithm_cus:
raise FileNotFoundError(
f"Algorithm code does not exist @ '{algorithm_cus}'. Is the algorithm name correct?"
f"Algorithm code does not exist @ '{algorithm_cus_pattern}'. Is the algorithm name correct?"
)
# Combine .cu source files into a temporary file
@ -269,12 +271,8 @@ def main():
with open(framework_cu, 'r') as f:
code = f.read() + "\n"
with open(challenge_cu, 'r') as f:
code += f.read() + "\n"
for extra_cu in args.extra_cu_files:
if not os.path.exists(extra_cu):
raise FileNotFoundError(f"Extra .cu file does not exist: {extra_cu}")
with open(extra_cu, 'r') as f:
for cu_path in challenge_cus:
with open(cu_path, 'r') as f:
code += f.read() + "\n"
kernel_regex = r'(?:extern\s+"C"\s+__global__|__device__)\s+\w+\s+(?P<func>\w+)\s*\('
kernels_to_ignore = [match.group('func') for match in re.finditer(kernel_regex, code)]

View File

@ -1,30 +1,13 @@
use crate::QUALITY_PRECISION;
use anyhow::{anyhow, Result};
use cudarc::driver::*;
use cudarc::runtime::sys::cudaDeviceProp;
use rand::{rngs::StdRng, Rng, SeedableRng};
use serde::{Deserialize, Serialize};
use std::sync::Arc;
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct Difficulty {
pub num_hyperedges: u32,
#[cfg(not(feature = "hide_verification"))]
pub better_than_baseline: u32,
#[cfg(feature = "hide_verification")]
better_than_baseline: u32,
}
impl From<Vec<i32>> for Difficulty {
fn from(arr: Vec<i32>) -> Self {
Self {
num_hyperedges: arr[0] as u32,
better_than_baseline: arr[1] as u32,
}
}
}
impl Into<Vec<i32>> for Difficulty {
fn into(self) -> Vec<i32> {
vec![self.num_hyperedges as i32, self.better_than_baseline as i32]
impl_kv_string_serde! {
Track {
num_hyperedges: u32,
}
}
@ -44,7 +27,7 @@ impl Solution {
pub struct Challenge {
pub seed: [u8; 32],
pub difficulty: Difficulty,
pub num_hyperedges: u32,
pub num_nodes: u32,
pub num_parts: u32,
pub max_part_size: u32,
@ -60,9 +43,9 @@ pub struct Challenge {
pub d_node_offsets: CudaSlice<i32>,
pub d_node_hyperedges: CudaSlice<i32>,
#[cfg(not(feature = "hide_verification"))]
pub baseline_connectivity_metric: u32,
pub greedy_baseline_connectivity_metric: u32,
#[cfg(feature = "hide_verification")]
baseline_connectivity_metric: u32,
greedy_baseline_connectivity_metric: u32,
}
pub const MAX_THREADS_PER_BLOCK: u32 = 1024;
@ -70,14 +53,14 @@ pub const MAX_THREADS_PER_BLOCK: u32 = 1024;
impl Challenge {
pub fn generate_instance(
seed: &[u8; 32],
difficulty: &Difficulty,
track: &Track,
module: Arc<CudaModule>,
stream: Arc<CudaStream>,
_prop: &cudaDeviceProp,
) -> Result<Self> {
let mut rng = StdRng::from_seed(seed.clone());
let num_hyperedges = difficulty.num_hyperedges;
let target_num_nodes = difficulty.num_hyperedges; // actual number may be around 8% less
let num_hyperedges = track.num_hyperedges;
let target_num_nodes = track.num_hyperedges; // actual number may be around 8% less
let depth = 6;
let num_parts = 1 << depth; // 2^6 = 64 partitions
let level_weights: Vec<f32> = vec![
@ -371,7 +354,7 @@ impl Challenge {
Ok(Self {
seed: *seed,
difficulty: difficulty.clone(),
num_hyperedges: track.num_hyperedges,
num_nodes: target_num_nodes - num_prune,
num_parts,
max_part_size,
@ -382,11 +365,11 @@ impl Challenge {
d_node_degrees: d_shuffled_node_degrees,
d_node_offsets: d_shuffled_node_offsets,
d_node_hyperedges: d_shuffled_node_hyperedges,
baseline_connectivity_metric: connectivity_metric,
greedy_baseline_connectivity_metric: connectivity_metric,
})
}
pub fn calc_connectivity_metric(
pub fn evaluate_connectivity_metric(
&self,
solution: &Solution,
module: Arc<CudaModule>,
@ -407,7 +390,7 @@ impl Challenge {
let count_nodes_in_part_kernel = module.load_function("count_nodes_in_part")?;
let block_size = MAX_THREADS_PER_BLOCK;
let grid_size = (self.difficulty.num_hyperedges + block_size - 1) / block_size;
let grid_size = (self.num_hyperedges + block_size - 1) / block_size;
let cfg = LaunchConfig {
grid_dim: (grid_size, 1, 1),
@ -466,7 +449,7 @@ impl Challenge {
unsafe {
stream
.launch_builder(&calc_connectivity_metric_kernel)
.arg(&self.difficulty.num_hyperedges)
.arg(&self.num_hyperedges)
.arg(&self.d_hyperedge_offsets)
.arg(&self.d_hyperedge_nodes)
.arg(&d_partition)
@ -480,29 +463,33 @@ impl Challenge {
}
conditional_pub!(
fn verify_solution(
fn compute_greedy_baseline(&self) -> Result<Solution> {
Err(anyhow!("Not implemented yet"))
}
);
conditional_pub!(
fn compute_sota_baseline(&self) -> Result<Solution> {
Err(anyhow!("Not implemented yet"))
}
);
conditional_pub!(
fn evaluate_solution(
&self,
solution: &Solution,
module: Arc<CudaModule>,
stream: Arc<CudaStream>,
_prop: &cudaDeviceProp,
) -> Result<()> {
) -> Result<i32> {
let connectivity_metric =
self.calc_connectivity_metric(solution, module, stream, _prop)?;
let btb = self.difficulty.better_than_baseline as f64 / 1000.0;
let connectivity_metric_threshold =
(self.baseline_connectivity_metric as f64 * (1.0 - btb)).ceil() as u32;
if connectivity_metric > connectivity_metric_threshold {
Err(anyhow!(
"connectivity_metric {} is greater than threshold {} (baseline: {}, better_than_baseline: {}%)",
connectivity_metric,
connectivity_metric_threshold,
self.baseline_connectivity_metric,
btb * 100.0
))
} else {
Ok(())
}
self.evaluate_connectivity_metric(solution, module, stream, _prop)?;
let baseline_connectivity_metric = self.greedy_baseline_connectivity_metric;
let quality = (baseline_connectivity_metric as f64 - connectivity_metric as f64)
/ baseline_connectivity_metric as f64;
let quality = quality.clamp(-10.0, 10.0) * QUALITY_PRECISION as f64;
let quality = quality.round() as i32;
Ok(quality)
}
);
}

View File

@ -9,6 +9,7 @@ use std::collections::HashSet;
impl_kv_string_serde! {
Track {
num_items: usize,
density: u32,
}
}
@ -38,7 +39,7 @@ impl Challenge {
pub fn generate_instance(seed: &[u8; 32], track: &Track) -> Result<Self> {
let mut rng = SmallRng::from_seed(seed.clone());
// Set constant density for value generation
let density = 0.25;
let density = track.density as f64 / 100.0;
// Generate weights w_i in the range [1, 50]
let weights: Vec<u32> = (0..track.num_items)

View File

@ -199,8 +199,6 @@ pub mod hypergraph;
#[cfg(feature = "c005")]
pub use hypergraph as c005;
#[cfg(feature = "c006")]
pub(crate) mod neuralnet;
#[cfg(feature = "c006")]
pub mod neuralnet_optimizer;
#[cfg(feature = "c006")]
pub use neuralnet_optimizer as c006;

View File

@ -1,49 +1,22 @@
use anyhow::{anyhow, Result};
use base64::{engine::general_purpose::STANDARD as BASE64, Engine as _};
use crate::QUALITY_PRECISION;
use anyhow::Result;
use cudarc::{
cublas::CudaBlas,
cudnn::Cudnn,
driver::{CudaModule, CudaSlice, CudaStream, CudaView, LaunchConfig, PushKernelArg},
runtime::sys::cudaDeviceProp,
};
use flate2::{read::GzDecoder, write::GzEncoder, Compression};
use rand::{prelude::*, rngs::StdRng};
use serde::{
de::{self, Visitor},
Deserialize, Deserializer, Serialize, Serializer,
};
use serde_json::{from_value, Map, Value};
use std::{
any::Any,
fmt,
io::{Read, Write},
sync::Arc,
};
use std::{any::Any, sync::Arc};
use crate::neuralnet::MLP;
mod nn;
use nn::MLP;
const THREADS_PER_BLOCK: u32 = 1024;
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct Difficulty {
pub num_hidden_layers: usize,
#[cfg(not(feature = "hide_verification"))]
pub accuracy_factor: u32,
#[cfg(feature = "hide_verification")]
accuracy_factor: u32,
}
impl From<Vec<i32>> for Difficulty {
fn from(arr: Vec<i32>) -> Self {
Self {
num_hidden_layers: arr[0] as usize,
accuracy_factor: arr[1] as u32,
}
}
}
impl Into<Vec<i32>> for Difficulty {
fn into(self) -> Vec<i32> {
vec![self.num_hidden_layers as i32, self.accuracy_factor as i32]
impl_kv_string_serde! {
Track {
num_hidden_layers: usize,
}
}
@ -87,48 +60,48 @@ pub struct Dataset {
}
impl Dataset {
pub fn train_inputs(&self) -> CudaView<f32> {
pub fn train_inputs(&self) -> CudaView<'_, f32> {
self.inputs.slice(0..self.train_size * self.input_dims)
}
pub fn train_targets_noisy(&self) -> CudaView<f32> {
pub fn train_targets_noisy(&self) -> CudaView<'_, f32> {
self.targets_noisy
.slice(0..self.train_size * self.output_dims)
}
pub fn train_targets_true_f(&self) -> CudaView<f32> {
pub fn train_targets_true_f(&self) -> CudaView<'_, f32> {
self.targets_true_f
.slice(0..self.train_size * self.output_dims)
}
pub fn validation_inputs(&self) -> CudaView<f32> {
pub fn validation_inputs(&self) -> CudaView<'_, f32> {
self.inputs.slice(
self.train_size * self.input_dims
..(self.train_size + self.validation_size) * self.input_dims,
)
}
pub fn validation_targets_noisy(&self) -> CudaView<f32> {
pub fn validation_targets_noisy(&self) -> CudaView<'_, f32> {
self.targets_noisy.slice(
self.train_size * self.output_dims
..(self.train_size + self.validation_size) * self.output_dims,
)
}
pub fn validation_targets_true_f(&self) -> CudaView<f32> {
pub fn validation_targets_true_f(&self) -> CudaView<'_, f32> {
self.targets_true_f.slice(
self.train_size * self.output_dims
..(self.train_size + self.validation_size) * self.output_dims,
)
}
pub fn test_inputs(&self) -> CudaView<f32> {
pub fn test_inputs(&self) -> CudaView<'_, f32> {
self.inputs.slice(
(self.train_size + self.validation_size) * self.input_dims
..(self.train_size + self.validation_size + self.test_size) * self.input_dims,
)
}
pub fn test_targets_noisy(&self) -> CudaView<f32> {
pub fn test_targets_noisy(&self) -> CudaView<'_, f32> {
self.targets_noisy.slice(
(self.train_size + self.validation_size) * self.output_dims
..(self.train_size + self.validation_size + self.test_size) * self.output_dims,
)
}
pub fn test_targets_true_f(&self) -> CudaView<f32> {
pub fn test_targets_true_f(&self) -> CudaView<'_, f32> {
self.targets_true_f.slice(
(self.train_size + self.validation_size) * self.output_dims
..(self.train_size + self.validation_size + self.test_size) * self.output_dims,
@ -138,7 +111,7 @@ impl Dataset {
pub struct Challenge {
pub seed: [u8; 32],
pub difficulty: Difficulty,
pub num_hidden_layers: usize,
pub hidden_layers_dims: usize,
pub batch_size: usize,
pub max_epochs: usize,
@ -151,7 +124,7 @@ pub struct Challenge {
impl Challenge {
pub fn generate_instance(
seed: &[u8; 32],
difficulty: &Difficulty,
track: &Track,
module: Arc<CudaModule>,
stream: Arc<CudaStream>,
_prop: &cudaDeviceProp,
@ -235,7 +208,7 @@ impl Challenge {
Ok(Self {
seed: *seed,
difficulty: difficulty.clone(),
num_hidden_layers: track.num_hidden_layers.clone(),
hidden_layers_dims: 256,
batch_size: 128,
max_epochs: 1000,
@ -256,13 +229,13 @@ impl Challenge {
}
conditional_pub!(
fn verify_solution(
fn evaluate_solution(
&self,
solution: &Solution,
module: Arc<CudaModule>,
stream: Arc<CudaStream>,
_prop: &cudaDeviceProp,
) -> Result<()> {
) -> Result<i32> {
let cublas = CudaBlas::new(stream.clone())?;
let cudnn = Cudnn::new(stream.clone())?;
@ -288,8 +261,6 @@ impl Challenge {
let avg_model_loss_on_test = stream.memcpy_dtov(&loss)?[0];
// Calculate baseline error epsilon_star_squared
let alpha = 4.0 - self.difficulty.accuracy_factor as f32 / 1000.0;
let y_h = stream.memcpy_dtov(&self.dataset.test_targets_noisy())?;
let f_h = stream.memcpy_dtov(&self.dataset.test_targets_true_f())?;
stream.synchronize()?;
@ -301,22 +272,18 @@ impl Challenge {
.sum();
let epsilon_star_squared =
(alpha / self.dataset.test_size as f32) * sum_sq_diff_true_vs_noisy;
(4.0 / self.dataset.test_size as f32) * sum_sq_diff_true_vs_noisy;
if avg_model_loss_on_test <= epsilon_star_squared {
Ok(())
} else {
Err(anyhow!(
"Model test loss ({:.4e}) exceeds target baseline epsilon_star_squared ({:.4e})",
avg_model_loss_on_test,
epsilon_star_squared
))
}
let quality = (epsilon_star_squared as f64 - avg_model_loss_on_test as f64)
/ epsilon_star_squared as f64;
let quality = quality.clamp(-10.0, 10.0) * QUALITY_PRECISION as f64;
let quality = quality.round() as i32;
Ok(quality)
}
);
pub fn layer_dims(&self) -> Vec<usize> {
let mut layer_dims = vec![self.hidden_layers_dims; self.difficulty.num_hidden_layers];
let mut layer_dims = vec![self.hidden_layers_dims; self.num_hidden_layers];
layer_dims.insert(0, self.dataset.input_dims);
layer_dims.push(self.dataset.output_dims);
layer_dims
@ -370,13 +337,14 @@ pub type OptimizerStepFn = fn(
pub fn training_loop(
challenge: &Challenge,
save_solution: &dyn Fn(&Solution) -> Result<()>,
module: Arc<CudaModule>,
stream: Arc<CudaStream>,
prop: &cudaDeviceProp,
optimizer_init_state: OptimizerInitStateFn,
optimizer_query_at_params: OptimizerQueryAtParamsFn,
optimizer_step: OptimizerStepFn,
) -> Result<(Solution, Vec<f32>, Vec<f32>)> {
) -> Result<()> {
let Challenge {
batch_size,
max_epochs,
@ -415,7 +383,6 @@ pub fn training_loop(
let mut lowest_loss = f32::INFINITY;
let mut _best_epoch = 0;
let mut epochs_no_improvement = 0;
let mut best_model_solution: Option<Solution> = None;
let mut prev_train_loss = None;
let mut prev_validation_loss = None;
let mut train_losses = Vec::with_capacity(max_epochs);
@ -601,7 +568,7 @@ pub fn training_loop(
if avg_val_loss < lowest_loss - min_loss_delta {
lowest_loss = avg_val_loss;
_best_epoch = epoch;
best_model_solution = Some(to_solution(&model, epoch + 1, stream.clone())?);
save_solution(&to_solution(&model, epoch + 1, stream.clone())?)?;
epochs_no_improvement = 0;
} else {
epochs_no_improvement += 1;
@ -613,8 +580,7 @@ pub fn training_loop(
stream.synchronize()?;
let solution = best_model_solution.ok_or_else(|| anyhow!("No valid solution found during training. Validation loss may have been NaN or never improved."))?;
Ok((solution, train_losses, validation_losses))
Ok(())
}
pub fn load_solution(mlp: &mut MLP, solution: &Solution, stream: Arc<CudaStream>) -> Result<()> {

View File

@ -11,7 +11,7 @@ use serde::{Deserialize, Serialize};
impl_kv_string_serde! {
Track {
num_variables: usize,
clauses_to_variables_percent: u32
clauses_to_variables_ratio: u32
}
}
@ -39,8 +39,8 @@ pub struct Challenge {
impl Challenge {
pub fn generate_instance(seed: &[u8; 32], track: &Track) -> Result<Self> {
let mut rng = SmallRng::from_seed(StdRng::from_seed(seed.clone()).gen());
let num_clauses = (track.num_variables as f64 * track.clauses_to_variables_percent as f64
/ 100.0)
let num_clauses = (track.num_variables as f64 * track.clauses_to_variables_ratio as f64
/ 1000.0)
.floor() as usize;
let var_distr = Uniform::new(1, track.num_variables as i32 + 1);

View File

@ -131,7 +131,7 @@ extern "C" __global__ void generate_vectors(
}
}
extern "C" __global__ void calc_total_distance(
extern "C" __global__ void evaluate_total_distance(
const uint32_t vector_dims,
const uint32_t database_size,
const uint32_t num_queries,

View File

@ -1,33 +1,15 @@
use crate::QUALITY_PRECISION;
use anyhow::{anyhow, Result};
use cudarc::{
driver::{safe::LaunchConfig, CudaModule, CudaSlice, CudaStream, PushKernelArg},
runtime::sys::cudaDeviceProp,
};
use rand::{rngs::StdRng, Rng, SeedableRng};
use serde::{Deserialize, Serialize};
use serde_json::{from_value, Map, Value};
use std::sync::Arc;
#[derive(Serialize, Deserialize, Debug, Copy, Clone)]
pub struct Difficulty {
pub num_queries: u32,
#[cfg(not(feature = "hide_verification"))]
pub better_than_baseline: u32,
#[cfg(feature = "hide_verification")]
better_than_baseline: u32,
}
impl From<Vec<i32>> for Difficulty {
fn from(arr: Vec<i32>) -> Self {
Self {
num_queries: arr[0] as u32,
better_than_baseline: arr[1] as u32,
}
}
}
impl Into<Vec<i32>> for Difficulty {
fn into(self) -> Vec<i32> {
vec![self.num_queries as i32, self.better_than_baseline as i32]
impl_kv_string_serde! {
Track {
num_queries: u32,
}
}
@ -47,12 +29,11 @@ impl Solution {
pub struct Challenge {
pub seed: [u8; 32],
pub difficulty: Difficulty,
pub num_queries: u32,
pub vector_dims: u32,
pub database_size: u32,
pub d_database_vectors: CudaSlice<f32>,
pub d_query_vectors: CudaSlice<f32>,
pub max_distance: f32,
}
pub const MAX_THREADS_PER_BLOCK: u32 = 1024;
@ -60,16 +41,14 @@ pub const MAX_THREADS_PER_BLOCK: u32 = 1024;
impl Challenge {
pub fn generate_instance(
seed: &[u8; 32],
difficulty: &Difficulty,
track: &Track,
module: Arc<CudaModule>,
stream: Arc<CudaStream>,
_prop: &cudaDeviceProp,
) -> Result<Self> {
let mut rng = StdRng::from_seed(seed.clone());
let better_than_baseline = difficulty.better_than_baseline;
let max_distance = 11.0 - (better_than_baseline as f32) / 1000.0;
let vector_dims = 250;
let database_size = 100 * difficulty.num_queries;
let database_size = 100 * track.num_queries;
let avg_cluster_size: f32 = 700.0;
let num_clusters: u32 = ((1.0 + rng.gen::<f32>() * 0.05)
+ database_size as f32 / avg_cluster_size)
@ -126,14 +105,14 @@ impl Challenge {
let mut d_database_vectors =
stream.alloc_zeros::<f32>((database_size * vector_dims) as usize)?;
let mut d_query_vectors =
stream.alloc_zeros::<f32>((difficulty.num_queries * vector_dims) as usize)?;
stream.alloc_zeros::<f32>((track.num_queries * vector_dims) as usize)?;
unsafe {
stream
.launch_builder(&generate_vectors_kernel)
.arg(&d_seed)
.arg(&database_size)
.arg(&difficulty.num_queries)
.arg(&track.num_queries)
.arg(&vector_dims)
.arg(&num_clusters)
.arg(&d_cluster_cum_prob)
@ -151,117 +130,87 @@ impl Challenge {
return Ok(Self {
seed: seed.clone(),
difficulty: difficulty.clone(),
num_queries: track.num_queries.clone(),
vector_dims,
database_size,
d_database_vectors,
d_query_vectors,
max_distance,
});
}
pub fn calc_average_distance(
pub fn evaluate_average_distance(
&self,
solution: &Solution,
module: Arc<CudaModule>,
stream: Arc<CudaStream>,
prop: &cudaDeviceProp,
_prop: &cudaDeviceProp,
) -> Result<f32> {
calc_average_distance(
self.difficulty.num_queries,
self.vector_dims,
self.database_size,
&self.d_query_vectors,
&self.d_database_vectors,
&solution.indexes,
module.clone(),
stream.clone(),
prop,
)
if solution.indexes.len() != self.num_queries as usize {
return Err(anyhow!(
"Invalid number of indexes. Expected: {}, Actual: {}",
self.num_queries,
solution.indexes.len()
));
}
let evaluate_total_distance_kernel = module.load_function("evaluate_total_distance")?;
let d_solution_indexes = stream.memcpy_stod(&solution.indexes)?;
let mut d_total_distance = stream.alloc_zeros::<f32>(1)?;
let mut errorflag = stream.alloc_zeros::<u32>(1)?;
let cfg = LaunchConfig {
grid_dim: (1, 1, 1),
block_dim: (1, 1, 1),
shared_mem_bytes: 0,
};
unsafe {
stream
.launch_builder(&evaluate_total_distance_kernel)
.arg(&self.vector_dims)
.arg(&self.database_size)
.arg(&self.num_queries)
.arg(&self.d_query_vectors)
.arg(&self.d_database_vectors)
.arg(&d_solution_indexes)
.arg(&mut d_total_distance)
.arg(&mut errorflag)
.launch(cfg)?;
}
stream.synchronize()?;
let total_distance = stream.memcpy_dtov(&d_total_distance)?[0];
let error_flag = stream.memcpy_dtov(&errorflag)?[0];
match error_flag {
0 => {}
1 => {
return Err(anyhow!("Invalid index in solution"));
}
_ => {
return Err(anyhow!("Unknown error code: {}", error_flag));
}
}
let avg_dist = total_distance / self.num_queries as f32;
Ok(avg_dist)
}
conditional_pub!(
fn verify_solution(
fn evaluate_solution(
&self,
solution: &Solution,
module: Arc<CudaModule>,
stream: Arc<CudaStream>,
prop: &cudaDeviceProp,
) -> Result<()> {
let avg_dist = self.calc_average_distance(solution, module, stream, prop)?;
if avg_dist > self.max_distance {
return Err(anyhow!(
"Average query vector distance is '{}'. Max dist: '{}'",
avg_dist,
self.max_distance
));
} else {
Ok(())
}
) -> Result<i32> {
let avg_dist = self.evaluate_average_distance(solution, module, stream, prop)?;
let quality = (11.0 - avg_dist as f64) / 11.0;
let quality = quality.clamp(-10.0, 10.0) * QUALITY_PRECISION as f64;
let quality = quality.round() as i32;
Ok(quality)
}
);
}
pub fn calc_average_distance(
num_queries: u32,
vector_dims: u32,
database_size: u32,
d_query_vectors: &CudaSlice<f32>,
d_database_vectors: &CudaSlice<f32>,
indexes: &Vec<usize>,
module: Arc<CudaModule>,
stream: Arc<CudaStream>,
_prop: &cudaDeviceProp,
) -> Result<f32> {
if indexes.len() != num_queries as usize {
return Err(anyhow!(
"Invalid number of indexes. Expected: {}, Actual: {}",
num_queries,
indexes.len()
));
}
let calc_total_distance_kernel = module.load_function("calc_total_distance")?;
let d_solution_indexes = stream.memcpy_stod(indexes)?;
let mut d_total_distance = stream.alloc_zeros::<f32>(1)?;
let mut errorflag = stream.alloc_zeros::<u32>(1)?;
let cfg = LaunchConfig {
grid_dim: (1, 1, 1),
block_dim: (1, 1, 1),
shared_mem_bytes: 0,
};
unsafe {
stream
.launch_builder(&calc_total_distance_kernel)
.arg(&vector_dims)
.arg(&database_size)
.arg(&num_queries)
.arg(d_query_vectors)
.arg(d_database_vectors)
.arg(&d_solution_indexes)
.arg(&mut d_total_distance)
.arg(&mut errorflag)
.launch(cfg)?;
}
stream.synchronize()?;
let total_distance = stream.memcpy_dtov(&d_total_distance)?[0];
let error_flag = stream.memcpy_dtov(&errorflag)?[0];
match error_flag {
0 => {}
1 => {
return Err(anyhow!("Invalid index in solution"));
}
_ => {
return Err(anyhow!("Unknown error code: {}", error_flag));
}
}
let avg_dist = total_distance / num_queries as f32;
Ok(avg_dist)
}

View File

@ -37,6 +37,10 @@ pub struct Challenge {
pub service_time: i32,
pub ready_times: Vec<i32>,
pub due_times: Vec<i32>,
#[cfg(not(feature = "hide_verification"))]
pub greedy_baseline_total_distance: u32,
#[cfg(feature = "hide_verification")]
greedy_baseline_total_distance: u32,
}
impl Challenge {
@ -146,13 +150,18 @@ impl Challenge {
node_positions,
distance_matrix,
max_capacity,
fleet_size: 0,
fleet_size: u32::MAX as usize,
service_time,
ready_times,
due_times,
greedy_baseline_total_distance: 0,
};
c.fleet_size = c.compute_greedy_baseline()?.routes.len() + 2;
let greedy_baseline_solution = c.compute_greedy_baseline()?;
c.greedy_baseline_total_distance =
c.evaluate_total_distance(&greedy_baseline_solution)? as u32;
c.fleet_size = greedy_baseline_solution.routes.len() + 2;
Ok(c)
}
@ -236,10 +245,8 @@ impl Challenge {
conditional_pub!(
fn evaluate_solution(&self, solution: &Solution) -> Result<i32> {
let total_distance = self.evaluate_total_distance(solution)?;
let greedy_solution = self.compute_greedy_baseline()?;
let greedy_total_distance = self.evaluate_total_distance(&greedy_solution)?;
// TODO: implement SOTA baseline
let sota_total_distance = greedy_total_distance;
let sota_total_distance = self.greedy_baseline_total_distance;
// if total_distance > greedy_total_distance {
// return Err(anyhow!(
// "Total distance {} is greater than greedy baseline distance {}",

View File

@ -107,15 +107,6 @@ pub fn compute_solution(
macro_rules! dispatch_challenge {
($c:ident, cpu) => {{
// library function may exit 87 if it runs out of fuel
let solve_challenge_fn = unsafe {
library.get::<fn(
&$c::Challenge,
&dyn Fn(&$c::Solution) -> Result<()>,
Option<String>,
) -> Result<()>>(b"entry_point")?
};
let track_id = if settings.track_id.starts_with('"') && settings.track_id.ends_with('"')
{
settings.track_id.clone()
@ -129,6 +120,16 @@ pub fn compute_solution(
stringify!($c)
)
})?;
// library function may exit 87 if it runs out of fuel
let solve_challenge_fn = unsafe {
library.get::<fn(
&$c::Challenge,
&dyn Fn(&$c::Solution) -> Result<()>,
Option<String>,
) -> Result<()>>(b"entry_point")?
};
let challenge = $c::Challenge::generate_instance(&seed, &track)?;
let save_solution_fn = |solution: &$c::Solution| -> Result<()> {
@ -161,6 +162,20 @@ pub fn compute_solution(
}};
($c:ident, gpu) => {{
let track_id = if settings.track_id.starts_with('"') && settings.track_id.ends_with('"')
{
settings.track_id.clone()
} else {
format!(r#""{}""#, settings.track_id)
};
let track = serde_json::from_str(&track_id).map_err(|_| {
anyhow::anyhow!(
"Failed to parse track_id '{}' as {}::Track",
settings.track_id,
stringify!($c)
)
})?;
if ptx_path.is_none() {
panic!("PTX file is required for GPU challenges.");
}
@ -197,7 +212,7 @@ pub fn compute_solution(
let challenge = $c::Challenge::generate_instance(
&seed,
settings.size,
&track,
module.clone(),
stream.clone(),
&prop,

View File

@ -103,6 +103,20 @@ pub fn verify_solution(
}};
($c:ident, gpu) => {{
let track_id = if settings.track_id.starts_with('"') && settings.track_id.ends_with('"')
{
settings.track_id.clone()
} else {
format!(r#""{}""#, settings.track_id)
};
let track = serde_json::from_str(&track_id).map_err(|_| {
anyhow::anyhow!(
"Failed to parse track_id '{}' as {}::Track",
settings.track_id,
stringify!($c)
)
})?;
if ptx_path.is_none() {
panic!("PTX file is required for GPU challenges.");
}
@ -121,7 +135,7 @@ pub fn verify_solution(
let challenge = $c::Challenge::generate_instance(
&seed,
settings.size,
&track,
module.clone(),
stream.clone(),
&prop,