diff --git a/tig-algorithms/src/neuralnet_optimizer/template.rs b/tig-algorithms/src/neuralnet_optimizer/template.rs index 6f4f7b0d..7b240618 100644 --- a/tig-algorithms/src/neuralnet_optimizer/template.rs +++ b/tig-algorithms/src/neuralnet_optimizer/template.rs @@ -28,8 +28,9 @@ pub fn solve_challenge( ) -> Result<()> { // boilerplate for training loop // recommend not modifying this function unless you have a good reason - let (solution, train_losses, val_losses) = training_loop( + training_loop( challenge, + save_solution, module, stream, prop, @@ -37,9 +38,8 @@ pub fn solve_challenge( optimizer_query_at_params, optimizer_step, )?; - save_solution(&solution)?; - Ok() + Ok(()) } #[derive(Clone)] diff --git a/tig-binary/scripts/build_algorithm b/tig-binary/scripts/build_algorithm index 720d9853..5960ca8f 100644 --- a/tig-binary/scripts/build_algorithm +++ b/tig-binary/scripts/build_algorithm @@ -44,7 +44,7 @@ case "$CHALLENGE" in neuralnet_optimizer) echo "Building ALGORITHM '$ALGORITHM' for CHALLENGE 'neuralnet_optimizer'" build_so $ALGORITHM - build_ptx $ALGORITHM --extra-cu-files tig-challenges/src/neuralnet/kernels.cu + build_ptx $ALGORITHM ;; *) echo "Error: Invalid CHALLENGE value. Must be one of: satisfiability, knapsack, vehicle_routing, vector_search, hypergraph, neuralnet_optimizer" diff --git a/tig-binary/scripts/build_ptx b/tig-binary/scripts/build_ptx index 85852b75..7c2682b6 100644 --- a/tig-binary/scripts/build_ptx +++ b/tig-binary/scripts/build_ptx @@ -250,16 +250,18 @@ def main(): f"Framework code does not exist @ '{framework_cu}'. This script must be run from the root of tig-monorepo" ) - challenge_cu = f"tig-challenges/src/{CHALLENGE}.cu" - if not os.path.exists(challenge_cu): + challenge_cus_pattern = f"tig-challenges/src/{CHALLENGE}/**/*.cu" + challenge_cus = glob(challenge_cus_pattern, recursive=True) + if not challenge_cus: raise FileNotFoundError( - f"Challenge code does not exist @ '{challenge_cu}'. Is the challenge name correct?" + f"Challenge code does not exist @ '{challenge_cus_pattern}'. Is the challenge name correct?" ) - algorithm_cus = glob(f"tig-algorithms/src/{CHALLENGE}/{args.algorithm}/*.cu") + algorithm_cus_pattern = f"tig-algorithms/src/{CHALLENGE}/{args.algorithm}/*.cu" + algorithm_cus = glob(algorithm_cus_pattern) if not algorithm_cus: raise FileNotFoundError( - f"Algorithm code does not exist @ '{algorithm_cus}'. Is the algorithm name correct?" + f"Algorithm code does not exist @ '{algorithm_cus_pattern}'. Is the algorithm name correct?" ) # Combine .cu source files into a temporary file @@ -269,12 +271,8 @@ def main(): with open(framework_cu, 'r') as f: code = f.read() + "\n" - with open(challenge_cu, 'r') as f: - code += f.read() + "\n" - for extra_cu in args.extra_cu_files: - if not os.path.exists(extra_cu): - raise FileNotFoundError(f"Extra .cu file does not exist: {extra_cu}") - with open(extra_cu, 'r') as f: + for cu_path in challenge_cus: + with open(cu_path, 'r') as f: code += f.read() + "\n" kernel_regex = r'(?:extern\s+"C"\s+__global__|__device__)\s+\w+\s+(?P\w+)\s*\(' kernels_to_ignore = [match.group('func') for match in re.finditer(kernel_regex, code)] diff --git a/tig-challenges/src/hypergraph/mod.rs b/tig-challenges/src/hypergraph/mod.rs index 1ce12f12..aafe7522 100644 --- a/tig-challenges/src/hypergraph/mod.rs +++ b/tig-challenges/src/hypergraph/mod.rs @@ -1,30 +1,13 @@ +use crate::QUALITY_PRECISION; use anyhow::{anyhow, Result}; use cudarc::driver::*; use cudarc::runtime::sys::cudaDeviceProp; use rand::{rngs::StdRng, Rng, SeedableRng}; -use serde::{Deserialize, Serialize}; use std::sync::Arc; -#[derive(Serialize, Deserialize, Debug, Clone)] -pub struct Difficulty { - pub num_hyperedges: u32, - #[cfg(not(feature = "hide_verification"))] - pub better_than_baseline: u32, - #[cfg(feature = "hide_verification")] - better_than_baseline: u32, -} - -impl From> for Difficulty { - fn from(arr: Vec) -> Self { - Self { - num_hyperedges: arr[0] as u32, - better_than_baseline: arr[1] as u32, - } - } -} -impl Into> for Difficulty { - fn into(self) -> Vec { - vec![self.num_hyperedges as i32, self.better_than_baseline as i32] +impl_kv_string_serde! { + Track { + num_hyperedges: u32, } } @@ -44,7 +27,7 @@ impl Solution { pub struct Challenge { pub seed: [u8; 32], - pub difficulty: Difficulty, + pub num_hyperedges: u32, pub num_nodes: u32, pub num_parts: u32, pub max_part_size: u32, @@ -60,9 +43,9 @@ pub struct Challenge { pub d_node_offsets: CudaSlice, pub d_node_hyperedges: CudaSlice, #[cfg(not(feature = "hide_verification"))] - pub baseline_connectivity_metric: u32, + pub greedy_baseline_connectivity_metric: u32, #[cfg(feature = "hide_verification")] - baseline_connectivity_metric: u32, + greedy_baseline_connectivity_metric: u32, } pub const MAX_THREADS_PER_BLOCK: u32 = 1024; @@ -70,14 +53,14 @@ pub const MAX_THREADS_PER_BLOCK: u32 = 1024; impl Challenge { pub fn generate_instance( seed: &[u8; 32], - difficulty: &Difficulty, + track: &Track, module: Arc, stream: Arc, _prop: &cudaDeviceProp, ) -> Result { let mut rng = StdRng::from_seed(seed.clone()); - let num_hyperedges = difficulty.num_hyperedges; - let target_num_nodes = difficulty.num_hyperedges; // actual number may be around 8% less + let num_hyperedges = track.num_hyperedges; + let target_num_nodes = track.num_hyperedges; // actual number may be around 8% less let depth = 6; let num_parts = 1 << depth; // 2^6 = 64 partitions let level_weights: Vec = vec![ @@ -371,7 +354,7 @@ impl Challenge { Ok(Self { seed: *seed, - difficulty: difficulty.clone(), + num_hyperedges: track.num_hyperedges, num_nodes: target_num_nodes - num_prune, num_parts, max_part_size, @@ -382,11 +365,11 @@ impl Challenge { d_node_degrees: d_shuffled_node_degrees, d_node_offsets: d_shuffled_node_offsets, d_node_hyperedges: d_shuffled_node_hyperedges, - baseline_connectivity_metric: connectivity_metric, + greedy_baseline_connectivity_metric: connectivity_metric, }) } - pub fn calc_connectivity_metric( + pub fn evaluate_connectivity_metric( &self, solution: &Solution, module: Arc, @@ -407,7 +390,7 @@ impl Challenge { let count_nodes_in_part_kernel = module.load_function("count_nodes_in_part")?; let block_size = MAX_THREADS_PER_BLOCK; - let grid_size = (self.difficulty.num_hyperedges + block_size - 1) / block_size; + let grid_size = (self.num_hyperedges + block_size - 1) / block_size; let cfg = LaunchConfig { grid_dim: (grid_size, 1, 1), @@ -466,7 +449,7 @@ impl Challenge { unsafe { stream .launch_builder(&calc_connectivity_metric_kernel) - .arg(&self.difficulty.num_hyperedges) + .arg(&self.num_hyperedges) .arg(&self.d_hyperedge_offsets) .arg(&self.d_hyperedge_nodes) .arg(&d_partition) @@ -480,29 +463,33 @@ impl Challenge { } conditional_pub!( - fn verify_solution( + fn compute_greedy_baseline(&self) -> Result { + Err(anyhow!("Not implemented yet")) + } + ); + + conditional_pub!( + fn compute_sota_baseline(&self) -> Result { + Err(anyhow!("Not implemented yet")) + } + ); + + conditional_pub!( + fn evaluate_solution( &self, solution: &Solution, module: Arc, stream: Arc, _prop: &cudaDeviceProp, - ) -> Result<()> { + ) -> Result { let connectivity_metric = - self.calc_connectivity_metric(solution, module, stream, _prop)?; - let btb = self.difficulty.better_than_baseline as f64 / 1000.0; - let connectivity_metric_threshold = - (self.baseline_connectivity_metric as f64 * (1.0 - btb)).ceil() as u32; - if connectivity_metric > connectivity_metric_threshold { - Err(anyhow!( - "connectivity_metric {} is greater than threshold {} (baseline: {}, better_than_baseline: {}%)", - connectivity_metric, - connectivity_metric_threshold, - self.baseline_connectivity_metric, - btb * 100.0 - )) - } else { - Ok(()) - } + self.evaluate_connectivity_metric(solution, module, stream, _prop)?; + let baseline_connectivity_metric = self.greedy_baseline_connectivity_metric; + let quality = (baseline_connectivity_metric as f64 - connectivity_metric as f64) + / baseline_connectivity_metric as f64; + let quality = quality.clamp(-10.0, 10.0) * QUALITY_PRECISION as f64; + let quality = quality.round() as i32; + Ok(quality) } ); } diff --git a/tig-challenges/src/knapsack/mod.rs b/tig-challenges/src/knapsack/mod.rs index 5e037468..63c6acbe 100644 --- a/tig-challenges/src/knapsack/mod.rs +++ b/tig-challenges/src/knapsack/mod.rs @@ -9,6 +9,7 @@ use std::collections::HashSet; impl_kv_string_serde! { Track { num_items: usize, + density: u32, } } @@ -38,7 +39,7 @@ impl Challenge { pub fn generate_instance(seed: &[u8; 32], track: &Track) -> Result { let mut rng = SmallRng::from_seed(seed.clone()); // Set constant density for value generation - let density = 0.25; + let density = track.density as f64 / 100.0; // Generate weights w_i in the range [1, 50] let weights: Vec = (0..track.num_items) diff --git a/tig-challenges/src/lib.rs b/tig-challenges/src/lib.rs index 3ee5354f..a4c636c5 100644 --- a/tig-challenges/src/lib.rs +++ b/tig-challenges/src/lib.rs @@ -199,8 +199,6 @@ pub mod hypergraph; #[cfg(feature = "c005")] pub use hypergraph as c005; #[cfg(feature = "c006")] -pub(crate) mod neuralnet; -#[cfg(feature = "c006")] pub mod neuralnet_optimizer; #[cfg(feature = "c006")] pub use neuralnet_optimizer as c006; diff --git a/tig-challenges/src/neuralnet_optimizer/mod.rs b/tig-challenges/src/neuralnet_optimizer/mod.rs index 4f30a90f..b01fa34b 100644 --- a/tig-challenges/src/neuralnet_optimizer/mod.rs +++ b/tig-challenges/src/neuralnet_optimizer/mod.rs @@ -1,49 +1,22 @@ -use anyhow::{anyhow, Result}; -use base64::{engine::general_purpose::STANDARD as BASE64, Engine as _}; +use crate::QUALITY_PRECISION; +use anyhow::Result; use cudarc::{ cublas::CudaBlas, cudnn::Cudnn, driver::{CudaModule, CudaSlice, CudaStream, CudaView, LaunchConfig, PushKernelArg}, runtime::sys::cudaDeviceProp, }; -use flate2::{read::GzDecoder, write::GzEncoder, Compression}; use rand::{prelude::*, rngs::StdRng}; -use serde::{ - de::{self, Visitor}, - Deserialize, Deserializer, Serialize, Serializer, -}; -use serde_json::{from_value, Map, Value}; -use std::{ - any::Any, - fmt, - io::{Read, Write}, - sync::Arc, -}; +use std::{any::Any, sync::Arc}; -use crate::neuralnet::MLP; +mod nn; +use nn::MLP; const THREADS_PER_BLOCK: u32 = 1024; -#[derive(Serialize, Deserialize, Debug, Clone)] -pub struct Difficulty { - pub num_hidden_layers: usize, - #[cfg(not(feature = "hide_verification"))] - pub accuracy_factor: u32, - #[cfg(feature = "hide_verification")] - accuracy_factor: u32, -} - -impl From> for Difficulty { - fn from(arr: Vec) -> Self { - Self { - num_hidden_layers: arr[0] as usize, - accuracy_factor: arr[1] as u32, - } - } -} -impl Into> for Difficulty { - fn into(self) -> Vec { - vec![self.num_hidden_layers as i32, self.accuracy_factor as i32] +impl_kv_string_serde! { + Track { + num_hidden_layers: usize, } } @@ -87,48 +60,48 @@ pub struct Dataset { } impl Dataset { - pub fn train_inputs(&self) -> CudaView { + pub fn train_inputs(&self) -> CudaView<'_, f32> { self.inputs.slice(0..self.train_size * self.input_dims) } - pub fn train_targets_noisy(&self) -> CudaView { + pub fn train_targets_noisy(&self) -> CudaView<'_, f32> { self.targets_noisy .slice(0..self.train_size * self.output_dims) } - pub fn train_targets_true_f(&self) -> CudaView { + pub fn train_targets_true_f(&self) -> CudaView<'_, f32> { self.targets_true_f .slice(0..self.train_size * self.output_dims) } - pub fn validation_inputs(&self) -> CudaView { + pub fn validation_inputs(&self) -> CudaView<'_, f32> { self.inputs.slice( self.train_size * self.input_dims ..(self.train_size + self.validation_size) * self.input_dims, ) } - pub fn validation_targets_noisy(&self) -> CudaView { + pub fn validation_targets_noisy(&self) -> CudaView<'_, f32> { self.targets_noisy.slice( self.train_size * self.output_dims ..(self.train_size + self.validation_size) * self.output_dims, ) } - pub fn validation_targets_true_f(&self) -> CudaView { + pub fn validation_targets_true_f(&self) -> CudaView<'_, f32> { self.targets_true_f.slice( self.train_size * self.output_dims ..(self.train_size + self.validation_size) * self.output_dims, ) } - pub fn test_inputs(&self) -> CudaView { + pub fn test_inputs(&self) -> CudaView<'_, f32> { self.inputs.slice( (self.train_size + self.validation_size) * self.input_dims ..(self.train_size + self.validation_size + self.test_size) * self.input_dims, ) } - pub fn test_targets_noisy(&self) -> CudaView { + pub fn test_targets_noisy(&self) -> CudaView<'_, f32> { self.targets_noisy.slice( (self.train_size + self.validation_size) * self.output_dims ..(self.train_size + self.validation_size + self.test_size) * self.output_dims, ) } - pub fn test_targets_true_f(&self) -> CudaView { + pub fn test_targets_true_f(&self) -> CudaView<'_, f32> { self.targets_true_f.slice( (self.train_size + self.validation_size) * self.output_dims ..(self.train_size + self.validation_size + self.test_size) * self.output_dims, @@ -138,7 +111,7 @@ impl Dataset { pub struct Challenge { pub seed: [u8; 32], - pub difficulty: Difficulty, + pub num_hidden_layers: usize, pub hidden_layers_dims: usize, pub batch_size: usize, pub max_epochs: usize, @@ -151,7 +124,7 @@ pub struct Challenge { impl Challenge { pub fn generate_instance( seed: &[u8; 32], - difficulty: &Difficulty, + track: &Track, module: Arc, stream: Arc, _prop: &cudaDeviceProp, @@ -235,7 +208,7 @@ impl Challenge { Ok(Self { seed: *seed, - difficulty: difficulty.clone(), + num_hidden_layers: track.num_hidden_layers.clone(), hidden_layers_dims: 256, batch_size: 128, max_epochs: 1000, @@ -256,13 +229,13 @@ impl Challenge { } conditional_pub!( - fn verify_solution( + fn evaluate_solution( &self, solution: &Solution, module: Arc, stream: Arc, _prop: &cudaDeviceProp, - ) -> Result<()> { + ) -> Result { let cublas = CudaBlas::new(stream.clone())?; let cudnn = Cudnn::new(stream.clone())?; @@ -288,8 +261,6 @@ impl Challenge { let avg_model_loss_on_test = stream.memcpy_dtov(&loss)?[0]; // Calculate baseline error epsilon_star_squared - let alpha = 4.0 - self.difficulty.accuracy_factor as f32 / 1000.0; - let y_h = stream.memcpy_dtov(&self.dataset.test_targets_noisy())?; let f_h = stream.memcpy_dtov(&self.dataset.test_targets_true_f())?; stream.synchronize()?; @@ -301,22 +272,18 @@ impl Challenge { .sum(); let epsilon_star_squared = - (alpha / self.dataset.test_size as f32) * sum_sq_diff_true_vs_noisy; + (4.0 / self.dataset.test_size as f32) * sum_sq_diff_true_vs_noisy; - if avg_model_loss_on_test <= epsilon_star_squared { - Ok(()) - } else { - Err(anyhow!( - "Model test loss ({:.4e}) exceeds target baseline epsilon_star_squared ({:.4e})", - avg_model_loss_on_test, - epsilon_star_squared - )) - } + let quality = (epsilon_star_squared as f64 - avg_model_loss_on_test as f64) + / epsilon_star_squared as f64; + let quality = quality.clamp(-10.0, 10.0) * QUALITY_PRECISION as f64; + let quality = quality.round() as i32; + Ok(quality) } ); pub fn layer_dims(&self) -> Vec { - let mut layer_dims = vec![self.hidden_layers_dims; self.difficulty.num_hidden_layers]; + let mut layer_dims = vec![self.hidden_layers_dims; self.num_hidden_layers]; layer_dims.insert(0, self.dataset.input_dims); layer_dims.push(self.dataset.output_dims); layer_dims @@ -370,13 +337,14 @@ pub type OptimizerStepFn = fn( pub fn training_loop( challenge: &Challenge, + save_solution: &dyn Fn(&Solution) -> Result<()>, module: Arc, stream: Arc, prop: &cudaDeviceProp, optimizer_init_state: OptimizerInitStateFn, optimizer_query_at_params: OptimizerQueryAtParamsFn, optimizer_step: OptimizerStepFn, -) -> Result<(Solution, Vec, Vec)> { +) -> Result<()> { let Challenge { batch_size, max_epochs, @@ -415,7 +383,6 @@ pub fn training_loop( let mut lowest_loss = f32::INFINITY; let mut _best_epoch = 0; let mut epochs_no_improvement = 0; - let mut best_model_solution: Option = None; let mut prev_train_loss = None; let mut prev_validation_loss = None; let mut train_losses = Vec::with_capacity(max_epochs); @@ -601,7 +568,7 @@ pub fn training_loop( if avg_val_loss < lowest_loss - min_loss_delta { lowest_loss = avg_val_loss; _best_epoch = epoch; - best_model_solution = Some(to_solution(&model, epoch + 1, stream.clone())?); + save_solution(&to_solution(&model, epoch + 1, stream.clone())?)?; epochs_no_improvement = 0; } else { epochs_no_improvement += 1; @@ -613,8 +580,7 @@ pub fn training_loop( stream.synchronize()?; - let solution = best_model_solution.ok_or_else(|| anyhow!("No valid solution found during training. Validation loss may have been NaN or never improved."))?; - Ok((solution, train_losses, validation_losses)) + Ok(()) } pub fn load_solution(mlp: &mut MLP, solution: &Solution, stream: Arc) -> Result<()> { diff --git a/tig-challenges/src/satisfiability/mod.rs b/tig-challenges/src/satisfiability/mod.rs index b828b8de..404516c9 100644 --- a/tig-challenges/src/satisfiability/mod.rs +++ b/tig-challenges/src/satisfiability/mod.rs @@ -11,7 +11,7 @@ use serde::{Deserialize, Serialize}; impl_kv_string_serde! { Track { num_variables: usize, - clauses_to_variables_percent: u32 + clauses_to_variables_ratio: u32 } } @@ -39,8 +39,8 @@ pub struct Challenge { impl Challenge { pub fn generate_instance(seed: &[u8; 32], track: &Track) -> Result { let mut rng = SmallRng::from_seed(StdRng::from_seed(seed.clone()).gen()); - let num_clauses = (track.num_variables as f64 * track.clauses_to_variables_percent as f64 - / 100.0) + let num_clauses = (track.num_variables as f64 * track.clauses_to_variables_ratio as f64 + / 1000.0) .floor() as usize; let var_distr = Uniform::new(1, track.num_variables as i32 + 1); diff --git a/tig-challenges/src/vector_search/kernels.cu b/tig-challenges/src/vector_search/kernels.cu index a812d67b..94d8e0ff 100644 --- a/tig-challenges/src/vector_search/kernels.cu +++ b/tig-challenges/src/vector_search/kernels.cu @@ -131,7 +131,7 @@ extern "C" __global__ void generate_vectors( } } -extern "C" __global__ void calc_total_distance( +extern "C" __global__ void evaluate_total_distance( const uint32_t vector_dims, const uint32_t database_size, const uint32_t num_queries, diff --git a/tig-challenges/src/vector_search/mod.rs b/tig-challenges/src/vector_search/mod.rs index 9392d9ea..c48a03e2 100644 --- a/tig-challenges/src/vector_search/mod.rs +++ b/tig-challenges/src/vector_search/mod.rs @@ -1,33 +1,15 @@ +use crate::QUALITY_PRECISION; use anyhow::{anyhow, Result}; use cudarc::{ driver::{safe::LaunchConfig, CudaModule, CudaSlice, CudaStream, PushKernelArg}, runtime::sys::cudaDeviceProp, }; use rand::{rngs::StdRng, Rng, SeedableRng}; -use serde::{Deserialize, Serialize}; -use serde_json::{from_value, Map, Value}; use std::sync::Arc; -#[derive(Serialize, Deserialize, Debug, Copy, Clone)] -pub struct Difficulty { - pub num_queries: u32, - #[cfg(not(feature = "hide_verification"))] - pub better_than_baseline: u32, - #[cfg(feature = "hide_verification")] - better_than_baseline: u32, -} - -impl From> for Difficulty { - fn from(arr: Vec) -> Self { - Self { - num_queries: arr[0] as u32, - better_than_baseline: arr[1] as u32, - } - } -} -impl Into> for Difficulty { - fn into(self) -> Vec { - vec![self.num_queries as i32, self.better_than_baseline as i32] +impl_kv_string_serde! { + Track { + num_queries: u32, } } @@ -47,12 +29,11 @@ impl Solution { pub struct Challenge { pub seed: [u8; 32], - pub difficulty: Difficulty, + pub num_queries: u32, pub vector_dims: u32, pub database_size: u32, pub d_database_vectors: CudaSlice, pub d_query_vectors: CudaSlice, - pub max_distance: f32, } pub const MAX_THREADS_PER_BLOCK: u32 = 1024; @@ -60,16 +41,14 @@ pub const MAX_THREADS_PER_BLOCK: u32 = 1024; impl Challenge { pub fn generate_instance( seed: &[u8; 32], - difficulty: &Difficulty, + track: &Track, module: Arc, stream: Arc, _prop: &cudaDeviceProp, ) -> Result { let mut rng = StdRng::from_seed(seed.clone()); - let better_than_baseline = difficulty.better_than_baseline; - let max_distance = 11.0 - (better_than_baseline as f32) / 1000.0; let vector_dims = 250; - let database_size = 100 * difficulty.num_queries; + let database_size = 100 * track.num_queries; let avg_cluster_size: f32 = 700.0; let num_clusters: u32 = ((1.0 + rng.gen::() * 0.05) + database_size as f32 / avg_cluster_size) @@ -126,14 +105,14 @@ impl Challenge { let mut d_database_vectors = stream.alloc_zeros::((database_size * vector_dims) as usize)?; let mut d_query_vectors = - stream.alloc_zeros::((difficulty.num_queries * vector_dims) as usize)?; + stream.alloc_zeros::((track.num_queries * vector_dims) as usize)?; unsafe { stream .launch_builder(&generate_vectors_kernel) .arg(&d_seed) .arg(&database_size) - .arg(&difficulty.num_queries) + .arg(&track.num_queries) .arg(&vector_dims) .arg(&num_clusters) .arg(&d_cluster_cum_prob) @@ -151,117 +130,87 @@ impl Challenge { return Ok(Self { seed: seed.clone(), - difficulty: difficulty.clone(), + num_queries: track.num_queries.clone(), vector_dims, database_size, d_database_vectors, d_query_vectors, - max_distance, }); } - pub fn calc_average_distance( + pub fn evaluate_average_distance( &self, solution: &Solution, module: Arc, stream: Arc, - prop: &cudaDeviceProp, + _prop: &cudaDeviceProp, ) -> Result { - calc_average_distance( - self.difficulty.num_queries, - self.vector_dims, - self.database_size, - &self.d_query_vectors, - &self.d_database_vectors, - &solution.indexes, - module.clone(), - stream.clone(), - prop, - ) + if solution.indexes.len() != self.num_queries as usize { + return Err(anyhow!( + "Invalid number of indexes. Expected: {}, Actual: {}", + self.num_queries, + solution.indexes.len() + )); + } + + let evaluate_total_distance_kernel = module.load_function("evaluate_total_distance")?; + + let d_solution_indexes = stream.memcpy_stod(&solution.indexes)?; + let mut d_total_distance = stream.alloc_zeros::(1)?; + let mut errorflag = stream.alloc_zeros::(1)?; + + let cfg = LaunchConfig { + grid_dim: (1, 1, 1), + block_dim: (1, 1, 1), + shared_mem_bytes: 0, + }; + + unsafe { + stream + .launch_builder(&evaluate_total_distance_kernel) + .arg(&self.vector_dims) + .arg(&self.database_size) + .arg(&self.num_queries) + .arg(&self.d_query_vectors) + .arg(&self.d_database_vectors) + .arg(&d_solution_indexes) + .arg(&mut d_total_distance) + .arg(&mut errorflag) + .launch(cfg)?; + } + + stream.synchronize()?; + + let total_distance = stream.memcpy_dtov(&d_total_distance)?[0]; + let error_flag = stream.memcpy_dtov(&errorflag)?[0]; + + match error_flag { + 0 => {} + 1 => { + return Err(anyhow!("Invalid index in solution")); + } + _ => { + return Err(anyhow!("Unknown error code: {}", error_flag)); + } + } + + let avg_dist = total_distance / self.num_queries as f32; + Ok(avg_dist) } conditional_pub!( - fn verify_solution( + fn evaluate_solution( &self, solution: &Solution, module: Arc, stream: Arc, prop: &cudaDeviceProp, - ) -> Result<()> { - let avg_dist = self.calc_average_distance(solution, module, stream, prop)?; - if avg_dist > self.max_distance { - return Err(anyhow!( - "Average query vector distance is '{}'. Max dist: '{}'", - avg_dist, - self.max_distance - )); - } else { - Ok(()) - } + ) -> Result { + let avg_dist = self.evaluate_average_distance(solution, module, stream, prop)?; + let quality = (11.0 - avg_dist as f64) / 11.0; + let quality = quality.clamp(-10.0, 10.0) * QUALITY_PRECISION as f64; + let quality = quality.round() as i32; + Ok(quality) } ); } - -pub fn calc_average_distance( - num_queries: u32, - vector_dims: u32, - database_size: u32, - d_query_vectors: &CudaSlice, - d_database_vectors: &CudaSlice, - indexes: &Vec, - module: Arc, - stream: Arc, - _prop: &cudaDeviceProp, -) -> Result { - if indexes.len() != num_queries as usize { - return Err(anyhow!( - "Invalid number of indexes. Expected: {}, Actual: {}", - num_queries, - indexes.len() - )); - } - - let calc_total_distance_kernel = module.load_function("calc_total_distance")?; - - let d_solution_indexes = stream.memcpy_stod(indexes)?; - let mut d_total_distance = stream.alloc_zeros::(1)?; - let mut errorflag = stream.alloc_zeros::(1)?; - - let cfg = LaunchConfig { - grid_dim: (1, 1, 1), - block_dim: (1, 1, 1), - shared_mem_bytes: 0, - }; - - unsafe { - stream - .launch_builder(&calc_total_distance_kernel) - .arg(&vector_dims) - .arg(&database_size) - .arg(&num_queries) - .arg(d_query_vectors) - .arg(d_database_vectors) - .arg(&d_solution_indexes) - .arg(&mut d_total_distance) - .arg(&mut errorflag) - .launch(cfg)?; - } - - stream.synchronize()?; - - let total_distance = stream.memcpy_dtov(&d_total_distance)?[0]; - let error_flag = stream.memcpy_dtov(&errorflag)?[0]; - - match error_flag { - 0 => {} - 1 => { - return Err(anyhow!("Invalid index in solution")); - } - _ => { - return Err(anyhow!("Unknown error code: {}", error_flag)); - } - } - - let avg_dist = total_distance / num_queries as f32; - Ok(avg_dist) -} diff --git a/tig-challenges/src/vehicle_routing/mod.rs b/tig-challenges/src/vehicle_routing/mod.rs index c3453f8d..28523f25 100644 --- a/tig-challenges/src/vehicle_routing/mod.rs +++ b/tig-challenges/src/vehicle_routing/mod.rs @@ -37,6 +37,10 @@ pub struct Challenge { pub service_time: i32, pub ready_times: Vec, pub due_times: Vec, + #[cfg(not(feature = "hide_verification"))] + pub greedy_baseline_total_distance: u32, + #[cfg(feature = "hide_verification")] + greedy_baseline_total_distance: u32, } impl Challenge { @@ -146,13 +150,18 @@ impl Challenge { node_positions, distance_matrix, max_capacity, - fleet_size: 0, + fleet_size: u32::MAX as usize, service_time, ready_times, due_times, + greedy_baseline_total_distance: 0, }; - c.fleet_size = c.compute_greedy_baseline()?.routes.len() + 2; + let greedy_baseline_solution = c.compute_greedy_baseline()?; + c.greedy_baseline_total_distance = + c.evaluate_total_distance(&greedy_baseline_solution)? as u32; + c.fleet_size = greedy_baseline_solution.routes.len() + 2; + Ok(c) } @@ -236,10 +245,8 @@ impl Challenge { conditional_pub!( fn evaluate_solution(&self, solution: &Solution) -> Result { let total_distance = self.evaluate_total_distance(solution)?; - let greedy_solution = self.compute_greedy_baseline()?; - let greedy_total_distance = self.evaluate_total_distance(&greedy_solution)?; // TODO: implement SOTA baseline - let sota_total_distance = greedy_total_distance; + let sota_total_distance = self.greedy_baseline_total_distance; // if total_distance > greedy_total_distance { // return Err(anyhow!( // "Total distance {} is greater than greedy baseline distance {}", diff --git a/tig-runtime/src/main.rs b/tig-runtime/src/main.rs index dcc74381..32b83d05 100644 --- a/tig-runtime/src/main.rs +++ b/tig-runtime/src/main.rs @@ -107,15 +107,6 @@ pub fn compute_solution( macro_rules! dispatch_challenge { ($c:ident, cpu) => {{ - // library function may exit 87 if it runs out of fuel - let solve_challenge_fn = unsafe { - library.get:: Result<()>, - Option, - ) -> Result<()>>(b"entry_point")? - }; - let track_id = if settings.track_id.starts_with('"') && settings.track_id.ends_with('"') { settings.track_id.clone() @@ -129,6 +120,16 @@ pub fn compute_solution( stringify!($c) ) })?; + + // library function may exit 87 if it runs out of fuel + let solve_challenge_fn = unsafe { + library.get:: Result<()>, + Option, + ) -> Result<()>>(b"entry_point")? + }; + let challenge = $c::Challenge::generate_instance(&seed, &track)?; let save_solution_fn = |solution: &$c::Solution| -> Result<()> { @@ -161,6 +162,20 @@ pub fn compute_solution( }}; ($c:ident, gpu) => {{ + let track_id = if settings.track_id.starts_with('"') && settings.track_id.ends_with('"') + { + settings.track_id.clone() + } else { + format!(r#""{}""#, settings.track_id) + }; + let track = serde_json::from_str(&track_id).map_err(|_| { + anyhow::anyhow!( + "Failed to parse track_id '{}' as {}::Track", + settings.track_id, + stringify!($c) + ) + })?; + if ptx_path.is_none() { panic!("PTX file is required for GPU challenges."); } @@ -197,7 +212,7 @@ pub fn compute_solution( let challenge = $c::Challenge::generate_instance( &seed, - settings.size, + &track, module.clone(), stream.clone(), &prop, diff --git a/tig-verifier/src/main.rs b/tig-verifier/src/main.rs index b81c9db6..6a0ddb08 100644 --- a/tig-verifier/src/main.rs +++ b/tig-verifier/src/main.rs @@ -103,6 +103,20 @@ pub fn verify_solution( }}; ($c:ident, gpu) => {{ + let track_id = if settings.track_id.starts_with('"') && settings.track_id.ends_with('"') + { + settings.track_id.clone() + } else { + format!(r#""{}""#, settings.track_id) + }; + let track = serde_json::from_str(&track_id).map_err(|_| { + anyhow::anyhow!( + "Failed to parse track_id '{}' as {}::Track", + settings.track_id, + stringify!($c) + ) + })?; + if ptx_path.is_none() { panic!("PTX file is required for GPU challenges."); } @@ -121,7 +135,7 @@ pub fn verify_solution( let challenge = $c::Challenge::generate_instance( &seed, - settings.size, + &track, module.clone(), stream.clone(), &prop,