mirror of
https://github.com/tig-foundation/tig-monorepo.git
synced 2026-02-21 10:27:49 +08:00
Update gpu challenges to output quality.
This commit is contained in:
parent
5aba4aa7eb
commit
1d4417d004
@ -28,8 +28,9 @@ pub fn solve_challenge(
|
||||
) -> Result<()> {
|
||||
// boilerplate for training loop
|
||||
// recommend not modifying this function unless you have a good reason
|
||||
let (solution, train_losses, val_losses) = training_loop(
|
||||
training_loop(
|
||||
challenge,
|
||||
save_solution,
|
||||
module,
|
||||
stream,
|
||||
prop,
|
||||
@ -37,9 +38,8 @@ pub fn solve_challenge(
|
||||
optimizer_query_at_params,
|
||||
optimizer_step,
|
||||
)?;
|
||||
save_solution(&solution)?;
|
||||
|
||||
Ok()
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
|
||||
@ -44,7 +44,7 @@ case "$CHALLENGE" in
|
||||
neuralnet_optimizer)
|
||||
echo "Building ALGORITHM '$ALGORITHM' for CHALLENGE 'neuralnet_optimizer'"
|
||||
build_so $ALGORITHM
|
||||
build_ptx $ALGORITHM --extra-cu-files tig-challenges/src/neuralnet/kernels.cu
|
||||
build_ptx $ALGORITHM
|
||||
;;
|
||||
*)
|
||||
echo "Error: Invalid CHALLENGE value. Must be one of: satisfiability, knapsack, vehicle_routing, vector_search, hypergraph, neuralnet_optimizer"
|
||||
|
||||
@ -250,16 +250,18 @@ def main():
|
||||
f"Framework code does not exist @ '{framework_cu}'. This script must be run from the root of tig-monorepo"
|
||||
)
|
||||
|
||||
challenge_cu = f"tig-challenges/src/{CHALLENGE}.cu"
|
||||
if not os.path.exists(challenge_cu):
|
||||
challenge_cus_pattern = f"tig-challenges/src/{CHALLENGE}/**/*.cu"
|
||||
challenge_cus = glob(challenge_cus_pattern, recursive=True)
|
||||
if not challenge_cus:
|
||||
raise FileNotFoundError(
|
||||
f"Challenge code does not exist @ '{challenge_cu}'. Is the challenge name correct?"
|
||||
f"Challenge code does not exist @ '{challenge_cus_pattern}'. Is the challenge name correct?"
|
||||
)
|
||||
|
||||
algorithm_cus = glob(f"tig-algorithms/src/{CHALLENGE}/{args.algorithm}/*.cu")
|
||||
algorithm_cus_pattern = f"tig-algorithms/src/{CHALLENGE}/{args.algorithm}/*.cu"
|
||||
algorithm_cus = glob(algorithm_cus_pattern)
|
||||
if not algorithm_cus:
|
||||
raise FileNotFoundError(
|
||||
f"Algorithm code does not exist @ '{algorithm_cus}'. Is the algorithm name correct?"
|
||||
f"Algorithm code does not exist @ '{algorithm_cus_pattern}'. Is the algorithm name correct?"
|
||||
)
|
||||
|
||||
# Combine .cu source files into a temporary file
|
||||
@ -269,12 +271,8 @@ def main():
|
||||
|
||||
with open(framework_cu, 'r') as f:
|
||||
code = f.read() + "\n"
|
||||
with open(challenge_cu, 'r') as f:
|
||||
code += f.read() + "\n"
|
||||
for extra_cu in args.extra_cu_files:
|
||||
if not os.path.exists(extra_cu):
|
||||
raise FileNotFoundError(f"Extra .cu file does not exist: {extra_cu}")
|
||||
with open(extra_cu, 'r') as f:
|
||||
for cu_path in challenge_cus:
|
||||
with open(cu_path, 'r') as f:
|
||||
code += f.read() + "\n"
|
||||
kernel_regex = r'(?:extern\s+"C"\s+__global__|__device__)\s+\w+\s+(?P<func>\w+)\s*\('
|
||||
kernels_to_ignore = [match.group('func') for match in re.finditer(kernel_regex, code)]
|
||||
|
||||
@ -1,30 +1,13 @@
|
||||
use crate::QUALITY_PRECISION;
|
||||
use anyhow::{anyhow, Result};
|
||||
use cudarc::driver::*;
|
||||
use cudarc::runtime::sys::cudaDeviceProp;
|
||||
use rand::{rngs::StdRng, Rng, SeedableRng};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::sync::Arc;
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub struct Difficulty {
|
||||
pub num_hyperedges: u32,
|
||||
#[cfg(not(feature = "hide_verification"))]
|
||||
pub better_than_baseline: u32,
|
||||
#[cfg(feature = "hide_verification")]
|
||||
better_than_baseline: u32,
|
||||
}
|
||||
|
||||
impl From<Vec<i32>> for Difficulty {
|
||||
fn from(arr: Vec<i32>) -> Self {
|
||||
Self {
|
||||
num_hyperedges: arr[0] as u32,
|
||||
better_than_baseline: arr[1] as u32,
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Into<Vec<i32>> for Difficulty {
|
||||
fn into(self) -> Vec<i32> {
|
||||
vec![self.num_hyperedges as i32, self.better_than_baseline as i32]
|
||||
impl_kv_string_serde! {
|
||||
Track {
|
||||
num_hyperedges: u32,
|
||||
}
|
||||
}
|
||||
|
||||
@ -44,7 +27,7 @@ impl Solution {
|
||||
|
||||
pub struct Challenge {
|
||||
pub seed: [u8; 32],
|
||||
pub difficulty: Difficulty,
|
||||
pub num_hyperedges: u32,
|
||||
pub num_nodes: u32,
|
||||
pub num_parts: u32,
|
||||
pub max_part_size: u32,
|
||||
@ -60,9 +43,9 @@ pub struct Challenge {
|
||||
pub d_node_offsets: CudaSlice<i32>,
|
||||
pub d_node_hyperedges: CudaSlice<i32>,
|
||||
#[cfg(not(feature = "hide_verification"))]
|
||||
pub baseline_connectivity_metric: u32,
|
||||
pub greedy_baseline_connectivity_metric: u32,
|
||||
#[cfg(feature = "hide_verification")]
|
||||
baseline_connectivity_metric: u32,
|
||||
greedy_baseline_connectivity_metric: u32,
|
||||
}
|
||||
|
||||
pub const MAX_THREADS_PER_BLOCK: u32 = 1024;
|
||||
@ -70,14 +53,14 @@ pub const MAX_THREADS_PER_BLOCK: u32 = 1024;
|
||||
impl Challenge {
|
||||
pub fn generate_instance(
|
||||
seed: &[u8; 32],
|
||||
difficulty: &Difficulty,
|
||||
track: &Track,
|
||||
module: Arc<CudaModule>,
|
||||
stream: Arc<CudaStream>,
|
||||
_prop: &cudaDeviceProp,
|
||||
) -> Result<Self> {
|
||||
let mut rng = StdRng::from_seed(seed.clone());
|
||||
let num_hyperedges = difficulty.num_hyperedges;
|
||||
let target_num_nodes = difficulty.num_hyperedges; // actual number may be around 8% less
|
||||
let num_hyperedges = track.num_hyperedges;
|
||||
let target_num_nodes = track.num_hyperedges; // actual number may be around 8% less
|
||||
let depth = 6;
|
||||
let num_parts = 1 << depth; // 2^6 = 64 partitions
|
||||
let level_weights: Vec<f32> = vec![
|
||||
@ -371,7 +354,7 @@ impl Challenge {
|
||||
|
||||
Ok(Self {
|
||||
seed: *seed,
|
||||
difficulty: difficulty.clone(),
|
||||
num_hyperedges: track.num_hyperedges,
|
||||
num_nodes: target_num_nodes - num_prune,
|
||||
num_parts,
|
||||
max_part_size,
|
||||
@ -382,11 +365,11 @@ impl Challenge {
|
||||
d_node_degrees: d_shuffled_node_degrees,
|
||||
d_node_offsets: d_shuffled_node_offsets,
|
||||
d_node_hyperedges: d_shuffled_node_hyperedges,
|
||||
baseline_connectivity_metric: connectivity_metric,
|
||||
greedy_baseline_connectivity_metric: connectivity_metric,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn calc_connectivity_metric(
|
||||
pub fn evaluate_connectivity_metric(
|
||||
&self,
|
||||
solution: &Solution,
|
||||
module: Arc<CudaModule>,
|
||||
@ -407,7 +390,7 @@ impl Challenge {
|
||||
let count_nodes_in_part_kernel = module.load_function("count_nodes_in_part")?;
|
||||
|
||||
let block_size = MAX_THREADS_PER_BLOCK;
|
||||
let grid_size = (self.difficulty.num_hyperedges + block_size - 1) / block_size;
|
||||
let grid_size = (self.num_hyperedges + block_size - 1) / block_size;
|
||||
|
||||
let cfg = LaunchConfig {
|
||||
grid_dim: (grid_size, 1, 1),
|
||||
@ -466,7 +449,7 @@ impl Challenge {
|
||||
unsafe {
|
||||
stream
|
||||
.launch_builder(&calc_connectivity_metric_kernel)
|
||||
.arg(&self.difficulty.num_hyperedges)
|
||||
.arg(&self.num_hyperedges)
|
||||
.arg(&self.d_hyperedge_offsets)
|
||||
.arg(&self.d_hyperedge_nodes)
|
||||
.arg(&d_partition)
|
||||
@ -480,29 +463,33 @@ impl Challenge {
|
||||
}
|
||||
|
||||
conditional_pub!(
|
||||
fn verify_solution(
|
||||
fn compute_greedy_baseline(&self) -> Result<Solution> {
|
||||
Err(anyhow!("Not implemented yet"))
|
||||
}
|
||||
);
|
||||
|
||||
conditional_pub!(
|
||||
fn compute_sota_baseline(&self) -> Result<Solution> {
|
||||
Err(anyhow!("Not implemented yet"))
|
||||
}
|
||||
);
|
||||
|
||||
conditional_pub!(
|
||||
fn evaluate_solution(
|
||||
&self,
|
||||
solution: &Solution,
|
||||
module: Arc<CudaModule>,
|
||||
stream: Arc<CudaStream>,
|
||||
_prop: &cudaDeviceProp,
|
||||
) -> Result<()> {
|
||||
) -> Result<i32> {
|
||||
let connectivity_metric =
|
||||
self.calc_connectivity_metric(solution, module, stream, _prop)?;
|
||||
let btb = self.difficulty.better_than_baseline as f64 / 1000.0;
|
||||
let connectivity_metric_threshold =
|
||||
(self.baseline_connectivity_metric as f64 * (1.0 - btb)).ceil() as u32;
|
||||
if connectivity_metric > connectivity_metric_threshold {
|
||||
Err(anyhow!(
|
||||
"connectivity_metric {} is greater than threshold {} (baseline: {}, better_than_baseline: {}%)",
|
||||
connectivity_metric,
|
||||
connectivity_metric_threshold,
|
||||
self.baseline_connectivity_metric,
|
||||
btb * 100.0
|
||||
))
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
self.evaluate_connectivity_metric(solution, module, stream, _prop)?;
|
||||
let baseline_connectivity_metric = self.greedy_baseline_connectivity_metric;
|
||||
let quality = (baseline_connectivity_metric as f64 - connectivity_metric as f64)
|
||||
/ baseline_connectivity_metric as f64;
|
||||
let quality = quality.clamp(-10.0, 10.0) * QUALITY_PRECISION as f64;
|
||||
let quality = quality.round() as i32;
|
||||
Ok(quality)
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
@ -9,6 +9,7 @@ use std::collections::HashSet;
|
||||
impl_kv_string_serde! {
|
||||
Track {
|
||||
num_items: usize,
|
||||
density: u32,
|
||||
}
|
||||
}
|
||||
|
||||
@ -38,7 +39,7 @@ impl Challenge {
|
||||
pub fn generate_instance(seed: &[u8; 32], track: &Track) -> Result<Self> {
|
||||
let mut rng = SmallRng::from_seed(seed.clone());
|
||||
// Set constant density for value generation
|
||||
let density = 0.25;
|
||||
let density = track.density as f64 / 100.0;
|
||||
|
||||
// Generate weights w_i in the range [1, 50]
|
||||
let weights: Vec<u32> = (0..track.num_items)
|
||||
|
||||
@ -199,8 +199,6 @@ pub mod hypergraph;
|
||||
#[cfg(feature = "c005")]
|
||||
pub use hypergraph as c005;
|
||||
#[cfg(feature = "c006")]
|
||||
pub(crate) mod neuralnet;
|
||||
#[cfg(feature = "c006")]
|
||||
pub mod neuralnet_optimizer;
|
||||
#[cfg(feature = "c006")]
|
||||
pub use neuralnet_optimizer as c006;
|
||||
|
||||
@ -1,49 +1,22 @@
|
||||
use anyhow::{anyhow, Result};
|
||||
use base64::{engine::general_purpose::STANDARD as BASE64, Engine as _};
|
||||
use crate::QUALITY_PRECISION;
|
||||
use anyhow::Result;
|
||||
use cudarc::{
|
||||
cublas::CudaBlas,
|
||||
cudnn::Cudnn,
|
||||
driver::{CudaModule, CudaSlice, CudaStream, CudaView, LaunchConfig, PushKernelArg},
|
||||
runtime::sys::cudaDeviceProp,
|
||||
};
|
||||
use flate2::{read::GzDecoder, write::GzEncoder, Compression};
|
||||
use rand::{prelude::*, rngs::StdRng};
|
||||
use serde::{
|
||||
de::{self, Visitor},
|
||||
Deserialize, Deserializer, Serialize, Serializer,
|
||||
};
|
||||
use serde_json::{from_value, Map, Value};
|
||||
use std::{
|
||||
any::Any,
|
||||
fmt,
|
||||
io::{Read, Write},
|
||||
sync::Arc,
|
||||
};
|
||||
use std::{any::Any, sync::Arc};
|
||||
|
||||
use crate::neuralnet::MLP;
|
||||
mod nn;
|
||||
use nn::MLP;
|
||||
|
||||
const THREADS_PER_BLOCK: u32 = 1024;
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub struct Difficulty {
|
||||
pub num_hidden_layers: usize,
|
||||
#[cfg(not(feature = "hide_verification"))]
|
||||
pub accuracy_factor: u32,
|
||||
#[cfg(feature = "hide_verification")]
|
||||
accuracy_factor: u32,
|
||||
}
|
||||
|
||||
impl From<Vec<i32>> for Difficulty {
|
||||
fn from(arr: Vec<i32>) -> Self {
|
||||
Self {
|
||||
num_hidden_layers: arr[0] as usize,
|
||||
accuracy_factor: arr[1] as u32,
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Into<Vec<i32>> for Difficulty {
|
||||
fn into(self) -> Vec<i32> {
|
||||
vec![self.num_hidden_layers as i32, self.accuracy_factor as i32]
|
||||
impl_kv_string_serde! {
|
||||
Track {
|
||||
num_hidden_layers: usize,
|
||||
}
|
||||
}
|
||||
|
||||
@ -87,48 +60,48 @@ pub struct Dataset {
|
||||
}
|
||||
|
||||
impl Dataset {
|
||||
pub fn train_inputs(&self) -> CudaView<f32> {
|
||||
pub fn train_inputs(&self) -> CudaView<'_, f32> {
|
||||
self.inputs.slice(0..self.train_size * self.input_dims)
|
||||
}
|
||||
pub fn train_targets_noisy(&self) -> CudaView<f32> {
|
||||
pub fn train_targets_noisy(&self) -> CudaView<'_, f32> {
|
||||
self.targets_noisy
|
||||
.slice(0..self.train_size * self.output_dims)
|
||||
}
|
||||
pub fn train_targets_true_f(&self) -> CudaView<f32> {
|
||||
pub fn train_targets_true_f(&self) -> CudaView<'_, f32> {
|
||||
self.targets_true_f
|
||||
.slice(0..self.train_size * self.output_dims)
|
||||
}
|
||||
pub fn validation_inputs(&self) -> CudaView<f32> {
|
||||
pub fn validation_inputs(&self) -> CudaView<'_, f32> {
|
||||
self.inputs.slice(
|
||||
self.train_size * self.input_dims
|
||||
..(self.train_size + self.validation_size) * self.input_dims,
|
||||
)
|
||||
}
|
||||
pub fn validation_targets_noisy(&self) -> CudaView<f32> {
|
||||
pub fn validation_targets_noisy(&self) -> CudaView<'_, f32> {
|
||||
self.targets_noisy.slice(
|
||||
self.train_size * self.output_dims
|
||||
..(self.train_size + self.validation_size) * self.output_dims,
|
||||
)
|
||||
}
|
||||
pub fn validation_targets_true_f(&self) -> CudaView<f32> {
|
||||
pub fn validation_targets_true_f(&self) -> CudaView<'_, f32> {
|
||||
self.targets_true_f.slice(
|
||||
self.train_size * self.output_dims
|
||||
..(self.train_size + self.validation_size) * self.output_dims,
|
||||
)
|
||||
}
|
||||
pub fn test_inputs(&self) -> CudaView<f32> {
|
||||
pub fn test_inputs(&self) -> CudaView<'_, f32> {
|
||||
self.inputs.slice(
|
||||
(self.train_size + self.validation_size) * self.input_dims
|
||||
..(self.train_size + self.validation_size + self.test_size) * self.input_dims,
|
||||
)
|
||||
}
|
||||
pub fn test_targets_noisy(&self) -> CudaView<f32> {
|
||||
pub fn test_targets_noisy(&self) -> CudaView<'_, f32> {
|
||||
self.targets_noisy.slice(
|
||||
(self.train_size + self.validation_size) * self.output_dims
|
||||
..(self.train_size + self.validation_size + self.test_size) * self.output_dims,
|
||||
)
|
||||
}
|
||||
pub fn test_targets_true_f(&self) -> CudaView<f32> {
|
||||
pub fn test_targets_true_f(&self) -> CudaView<'_, f32> {
|
||||
self.targets_true_f.slice(
|
||||
(self.train_size + self.validation_size) * self.output_dims
|
||||
..(self.train_size + self.validation_size + self.test_size) * self.output_dims,
|
||||
@ -138,7 +111,7 @@ impl Dataset {
|
||||
|
||||
pub struct Challenge {
|
||||
pub seed: [u8; 32],
|
||||
pub difficulty: Difficulty,
|
||||
pub num_hidden_layers: usize,
|
||||
pub hidden_layers_dims: usize,
|
||||
pub batch_size: usize,
|
||||
pub max_epochs: usize,
|
||||
@ -151,7 +124,7 @@ pub struct Challenge {
|
||||
impl Challenge {
|
||||
pub fn generate_instance(
|
||||
seed: &[u8; 32],
|
||||
difficulty: &Difficulty,
|
||||
track: &Track,
|
||||
module: Arc<CudaModule>,
|
||||
stream: Arc<CudaStream>,
|
||||
_prop: &cudaDeviceProp,
|
||||
@ -235,7 +208,7 @@ impl Challenge {
|
||||
|
||||
Ok(Self {
|
||||
seed: *seed,
|
||||
difficulty: difficulty.clone(),
|
||||
num_hidden_layers: track.num_hidden_layers.clone(),
|
||||
hidden_layers_dims: 256,
|
||||
batch_size: 128,
|
||||
max_epochs: 1000,
|
||||
@ -256,13 +229,13 @@ impl Challenge {
|
||||
}
|
||||
|
||||
conditional_pub!(
|
||||
fn verify_solution(
|
||||
fn evaluate_solution(
|
||||
&self,
|
||||
solution: &Solution,
|
||||
module: Arc<CudaModule>,
|
||||
stream: Arc<CudaStream>,
|
||||
_prop: &cudaDeviceProp,
|
||||
) -> Result<()> {
|
||||
) -> Result<i32> {
|
||||
let cublas = CudaBlas::new(stream.clone())?;
|
||||
let cudnn = Cudnn::new(stream.clone())?;
|
||||
|
||||
@ -288,8 +261,6 @@ impl Challenge {
|
||||
let avg_model_loss_on_test = stream.memcpy_dtov(&loss)?[0];
|
||||
|
||||
// Calculate baseline error epsilon_star_squared
|
||||
let alpha = 4.0 - self.difficulty.accuracy_factor as f32 / 1000.0;
|
||||
|
||||
let y_h = stream.memcpy_dtov(&self.dataset.test_targets_noisy())?;
|
||||
let f_h = stream.memcpy_dtov(&self.dataset.test_targets_true_f())?;
|
||||
stream.synchronize()?;
|
||||
@ -301,22 +272,18 @@ impl Challenge {
|
||||
.sum();
|
||||
|
||||
let epsilon_star_squared =
|
||||
(alpha / self.dataset.test_size as f32) * sum_sq_diff_true_vs_noisy;
|
||||
(4.0 / self.dataset.test_size as f32) * sum_sq_diff_true_vs_noisy;
|
||||
|
||||
if avg_model_loss_on_test <= epsilon_star_squared {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(anyhow!(
|
||||
"Model test loss ({:.4e}) exceeds target baseline epsilon_star_squared ({:.4e})",
|
||||
avg_model_loss_on_test,
|
||||
epsilon_star_squared
|
||||
))
|
||||
}
|
||||
let quality = (epsilon_star_squared as f64 - avg_model_loss_on_test as f64)
|
||||
/ epsilon_star_squared as f64;
|
||||
let quality = quality.clamp(-10.0, 10.0) * QUALITY_PRECISION as f64;
|
||||
let quality = quality.round() as i32;
|
||||
Ok(quality)
|
||||
}
|
||||
);
|
||||
|
||||
pub fn layer_dims(&self) -> Vec<usize> {
|
||||
let mut layer_dims = vec![self.hidden_layers_dims; self.difficulty.num_hidden_layers];
|
||||
let mut layer_dims = vec![self.hidden_layers_dims; self.num_hidden_layers];
|
||||
layer_dims.insert(0, self.dataset.input_dims);
|
||||
layer_dims.push(self.dataset.output_dims);
|
||||
layer_dims
|
||||
@ -370,13 +337,14 @@ pub type OptimizerStepFn = fn(
|
||||
|
||||
pub fn training_loop(
|
||||
challenge: &Challenge,
|
||||
save_solution: &dyn Fn(&Solution) -> Result<()>,
|
||||
module: Arc<CudaModule>,
|
||||
stream: Arc<CudaStream>,
|
||||
prop: &cudaDeviceProp,
|
||||
optimizer_init_state: OptimizerInitStateFn,
|
||||
optimizer_query_at_params: OptimizerQueryAtParamsFn,
|
||||
optimizer_step: OptimizerStepFn,
|
||||
) -> Result<(Solution, Vec<f32>, Vec<f32>)> {
|
||||
) -> Result<()> {
|
||||
let Challenge {
|
||||
batch_size,
|
||||
max_epochs,
|
||||
@ -415,7 +383,6 @@ pub fn training_loop(
|
||||
let mut lowest_loss = f32::INFINITY;
|
||||
let mut _best_epoch = 0;
|
||||
let mut epochs_no_improvement = 0;
|
||||
let mut best_model_solution: Option<Solution> = None;
|
||||
let mut prev_train_loss = None;
|
||||
let mut prev_validation_loss = None;
|
||||
let mut train_losses = Vec::with_capacity(max_epochs);
|
||||
@ -601,7 +568,7 @@ pub fn training_loop(
|
||||
if avg_val_loss < lowest_loss - min_loss_delta {
|
||||
lowest_loss = avg_val_loss;
|
||||
_best_epoch = epoch;
|
||||
best_model_solution = Some(to_solution(&model, epoch + 1, stream.clone())?);
|
||||
save_solution(&to_solution(&model, epoch + 1, stream.clone())?)?;
|
||||
epochs_no_improvement = 0;
|
||||
} else {
|
||||
epochs_no_improvement += 1;
|
||||
@ -613,8 +580,7 @@ pub fn training_loop(
|
||||
|
||||
stream.synchronize()?;
|
||||
|
||||
let solution = best_model_solution.ok_or_else(|| anyhow!("No valid solution found during training. Validation loss may have been NaN or never improved."))?;
|
||||
Ok((solution, train_losses, validation_losses))
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn load_solution(mlp: &mut MLP, solution: &Solution, stream: Arc<CudaStream>) -> Result<()> {
|
||||
|
||||
@ -11,7 +11,7 @@ use serde::{Deserialize, Serialize};
|
||||
impl_kv_string_serde! {
|
||||
Track {
|
||||
num_variables: usize,
|
||||
clauses_to_variables_percent: u32
|
||||
clauses_to_variables_ratio: u32
|
||||
}
|
||||
}
|
||||
|
||||
@ -39,8 +39,8 @@ pub struct Challenge {
|
||||
impl Challenge {
|
||||
pub fn generate_instance(seed: &[u8; 32], track: &Track) -> Result<Self> {
|
||||
let mut rng = SmallRng::from_seed(StdRng::from_seed(seed.clone()).gen());
|
||||
let num_clauses = (track.num_variables as f64 * track.clauses_to_variables_percent as f64
|
||||
/ 100.0)
|
||||
let num_clauses = (track.num_variables as f64 * track.clauses_to_variables_ratio as f64
|
||||
/ 1000.0)
|
||||
.floor() as usize;
|
||||
|
||||
let var_distr = Uniform::new(1, track.num_variables as i32 + 1);
|
||||
|
||||
@ -131,7 +131,7 @@ extern "C" __global__ void generate_vectors(
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ void calc_total_distance(
|
||||
extern "C" __global__ void evaluate_total_distance(
|
||||
const uint32_t vector_dims,
|
||||
const uint32_t database_size,
|
||||
const uint32_t num_queries,
|
||||
|
||||
@ -1,33 +1,15 @@
|
||||
use crate::QUALITY_PRECISION;
|
||||
use anyhow::{anyhow, Result};
|
||||
use cudarc::{
|
||||
driver::{safe::LaunchConfig, CudaModule, CudaSlice, CudaStream, PushKernelArg},
|
||||
runtime::sys::cudaDeviceProp,
|
||||
};
|
||||
use rand::{rngs::StdRng, Rng, SeedableRng};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{from_value, Map, Value};
|
||||
use std::sync::Arc;
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Copy, Clone)]
|
||||
pub struct Difficulty {
|
||||
pub num_queries: u32,
|
||||
#[cfg(not(feature = "hide_verification"))]
|
||||
pub better_than_baseline: u32,
|
||||
#[cfg(feature = "hide_verification")]
|
||||
better_than_baseline: u32,
|
||||
}
|
||||
|
||||
impl From<Vec<i32>> for Difficulty {
|
||||
fn from(arr: Vec<i32>) -> Self {
|
||||
Self {
|
||||
num_queries: arr[0] as u32,
|
||||
better_than_baseline: arr[1] as u32,
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Into<Vec<i32>> for Difficulty {
|
||||
fn into(self) -> Vec<i32> {
|
||||
vec![self.num_queries as i32, self.better_than_baseline as i32]
|
||||
impl_kv_string_serde! {
|
||||
Track {
|
||||
num_queries: u32,
|
||||
}
|
||||
}
|
||||
|
||||
@ -47,12 +29,11 @@ impl Solution {
|
||||
|
||||
pub struct Challenge {
|
||||
pub seed: [u8; 32],
|
||||
pub difficulty: Difficulty,
|
||||
pub num_queries: u32,
|
||||
pub vector_dims: u32,
|
||||
pub database_size: u32,
|
||||
pub d_database_vectors: CudaSlice<f32>,
|
||||
pub d_query_vectors: CudaSlice<f32>,
|
||||
pub max_distance: f32,
|
||||
}
|
||||
|
||||
pub const MAX_THREADS_PER_BLOCK: u32 = 1024;
|
||||
@ -60,16 +41,14 @@ pub const MAX_THREADS_PER_BLOCK: u32 = 1024;
|
||||
impl Challenge {
|
||||
pub fn generate_instance(
|
||||
seed: &[u8; 32],
|
||||
difficulty: &Difficulty,
|
||||
track: &Track,
|
||||
module: Arc<CudaModule>,
|
||||
stream: Arc<CudaStream>,
|
||||
_prop: &cudaDeviceProp,
|
||||
) -> Result<Self> {
|
||||
let mut rng = StdRng::from_seed(seed.clone());
|
||||
let better_than_baseline = difficulty.better_than_baseline;
|
||||
let max_distance = 11.0 - (better_than_baseline as f32) / 1000.0;
|
||||
let vector_dims = 250;
|
||||
let database_size = 100 * difficulty.num_queries;
|
||||
let database_size = 100 * track.num_queries;
|
||||
let avg_cluster_size: f32 = 700.0;
|
||||
let num_clusters: u32 = ((1.0 + rng.gen::<f32>() * 0.05)
|
||||
+ database_size as f32 / avg_cluster_size)
|
||||
@ -126,14 +105,14 @@ impl Challenge {
|
||||
let mut d_database_vectors =
|
||||
stream.alloc_zeros::<f32>((database_size * vector_dims) as usize)?;
|
||||
let mut d_query_vectors =
|
||||
stream.alloc_zeros::<f32>((difficulty.num_queries * vector_dims) as usize)?;
|
||||
stream.alloc_zeros::<f32>((track.num_queries * vector_dims) as usize)?;
|
||||
|
||||
unsafe {
|
||||
stream
|
||||
.launch_builder(&generate_vectors_kernel)
|
||||
.arg(&d_seed)
|
||||
.arg(&database_size)
|
||||
.arg(&difficulty.num_queries)
|
||||
.arg(&track.num_queries)
|
||||
.arg(&vector_dims)
|
||||
.arg(&num_clusters)
|
||||
.arg(&d_cluster_cum_prob)
|
||||
@ -151,117 +130,87 @@ impl Challenge {
|
||||
|
||||
return Ok(Self {
|
||||
seed: seed.clone(),
|
||||
difficulty: difficulty.clone(),
|
||||
num_queries: track.num_queries.clone(),
|
||||
vector_dims,
|
||||
database_size,
|
||||
d_database_vectors,
|
||||
d_query_vectors,
|
||||
max_distance,
|
||||
});
|
||||
}
|
||||
|
||||
pub fn calc_average_distance(
|
||||
pub fn evaluate_average_distance(
|
||||
&self,
|
||||
solution: &Solution,
|
||||
module: Arc<CudaModule>,
|
||||
stream: Arc<CudaStream>,
|
||||
prop: &cudaDeviceProp,
|
||||
_prop: &cudaDeviceProp,
|
||||
) -> Result<f32> {
|
||||
calc_average_distance(
|
||||
self.difficulty.num_queries,
|
||||
self.vector_dims,
|
||||
self.database_size,
|
||||
&self.d_query_vectors,
|
||||
&self.d_database_vectors,
|
||||
&solution.indexes,
|
||||
module.clone(),
|
||||
stream.clone(),
|
||||
prop,
|
||||
)
|
||||
if solution.indexes.len() != self.num_queries as usize {
|
||||
return Err(anyhow!(
|
||||
"Invalid number of indexes. Expected: {}, Actual: {}",
|
||||
self.num_queries,
|
||||
solution.indexes.len()
|
||||
));
|
||||
}
|
||||
|
||||
let evaluate_total_distance_kernel = module.load_function("evaluate_total_distance")?;
|
||||
|
||||
let d_solution_indexes = stream.memcpy_stod(&solution.indexes)?;
|
||||
let mut d_total_distance = stream.alloc_zeros::<f32>(1)?;
|
||||
let mut errorflag = stream.alloc_zeros::<u32>(1)?;
|
||||
|
||||
let cfg = LaunchConfig {
|
||||
grid_dim: (1, 1, 1),
|
||||
block_dim: (1, 1, 1),
|
||||
shared_mem_bytes: 0,
|
||||
};
|
||||
|
||||
unsafe {
|
||||
stream
|
||||
.launch_builder(&evaluate_total_distance_kernel)
|
||||
.arg(&self.vector_dims)
|
||||
.arg(&self.database_size)
|
||||
.arg(&self.num_queries)
|
||||
.arg(&self.d_query_vectors)
|
||||
.arg(&self.d_database_vectors)
|
||||
.arg(&d_solution_indexes)
|
||||
.arg(&mut d_total_distance)
|
||||
.arg(&mut errorflag)
|
||||
.launch(cfg)?;
|
||||
}
|
||||
|
||||
stream.synchronize()?;
|
||||
|
||||
let total_distance = stream.memcpy_dtov(&d_total_distance)?[0];
|
||||
let error_flag = stream.memcpy_dtov(&errorflag)?[0];
|
||||
|
||||
match error_flag {
|
||||
0 => {}
|
||||
1 => {
|
||||
return Err(anyhow!("Invalid index in solution"));
|
||||
}
|
||||
_ => {
|
||||
return Err(anyhow!("Unknown error code: {}", error_flag));
|
||||
}
|
||||
}
|
||||
|
||||
let avg_dist = total_distance / self.num_queries as f32;
|
||||
Ok(avg_dist)
|
||||
}
|
||||
|
||||
conditional_pub!(
|
||||
fn verify_solution(
|
||||
fn evaluate_solution(
|
||||
&self,
|
||||
solution: &Solution,
|
||||
module: Arc<CudaModule>,
|
||||
stream: Arc<CudaStream>,
|
||||
prop: &cudaDeviceProp,
|
||||
) -> Result<()> {
|
||||
let avg_dist = self.calc_average_distance(solution, module, stream, prop)?;
|
||||
if avg_dist > self.max_distance {
|
||||
return Err(anyhow!(
|
||||
"Average query vector distance is '{}'. Max dist: '{}'",
|
||||
avg_dist,
|
||||
self.max_distance
|
||||
));
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
) -> Result<i32> {
|
||||
let avg_dist = self.evaluate_average_distance(solution, module, stream, prop)?;
|
||||
let quality = (11.0 - avg_dist as f64) / 11.0;
|
||||
let quality = quality.clamp(-10.0, 10.0) * QUALITY_PRECISION as f64;
|
||||
let quality = quality.round() as i32;
|
||||
Ok(quality)
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
pub fn calc_average_distance(
|
||||
num_queries: u32,
|
||||
vector_dims: u32,
|
||||
database_size: u32,
|
||||
d_query_vectors: &CudaSlice<f32>,
|
||||
d_database_vectors: &CudaSlice<f32>,
|
||||
indexes: &Vec<usize>,
|
||||
module: Arc<CudaModule>,
|
||||
stream: Arc<CudaStream>,
|
||||
_prop: &cudaDeviceProp,
|
||||
) -> Result<f32> {
|
||||
if indexes.len() != num_queries as usize {
|
||||
return Err(anyhow!(
|
||||
"Invalid number of indexes. Expected: {}, Actual: {}",
|
||||
num_queries,
|
||||
indexes.len()
|
||||
));
|
||||
}
|
||||
|
||||
let calc_total_distance_kernel = module.load_function("calc_total_distance")?;
|
||||
|
||||
let d_solution_indexes = stream.memcpy_stod(indexes)?;
|
||||
let mut d_total_distance = stream.alloc_zeros::<f32>(1)?;
|
||||
let mut errorflag = stream.alloc_zeros::<u32>(1)?;
|
||||
|
||||
let cfg = LaunchConfig {
|
||||
grid_dim: (1, 1, 1),
|
||||
block_dim: (1, 1, 1),
|
||||
shared_mem_bytes: 0,
|
||||
};
|
||||
|
||||
unsafe {
|
||||
stream
|
||||
.launch_builder(&calc_total_distance_kernel)
|
||||
.arg(&vector_dims)
|
||||
.arg(&database_size)
|
||||
.arg(&num_queries)
|
||||
.arg(d_query_vectors)
|
||||
.arg(d_database_vectors)
|
||||
.arg(&d_solution_indexes)
|
||||
.arg(&mut d_total_distance)
|
||||
.arg(&mut errorflag)
|
||||
.launch(cfg)?;
|
||||
}
|
||||
|
||||
stream.synchronize()?;
|
||||
|
||||
let total_distance = stream.memcpy_dtov(&d_total_distance)?[0];
|
||||
let error_flag = stream.memcpy_dtov(&errorflag)?[0];
|
||||
|
||||
match error_flag {
|
||||
0 => {}
|
||||
1 => {
|
||||
return Err(anyhow!("Invalid index in solution"));
|
||||
}
|
||||
_ => {
|
||||
return Err(anyhow!("Unknown error code: {}", error_flag));
|
||||
}
|
||||
}
|
||||
|
||||
let avg_dist = total_distance / num_queries as f32;
|
||||
Ok(avg_dist)
|
||||
}
|
||||
|
||||
@ -37,6 +37,10 @@ pub struct Challenge {
|
||||
pub service_time: i32,
|
||||
pub ready_times: Vec<i32>,
|
||||
pub due_times: Vec<i32>,
|
||||
#[cfg(not(feature = "hide_verification"))]
|
||||
pub greedy_baseline_total_distance: u32,
|
||||
#[cfg(feature = "hide_verification")]
|
||||
greedy_baseline_total_distance: u32,
|
||||
}
|
||||
|
||||
impl Challenge {
|
||||
@ -146,13 +150,18 @@ impl Challenge {
|
||||
node_positions,
|
||||
distance_matrix,
|
||||
max_capacity,
|
||||
fleet_size: 0,
|
||||
fleet_size: u32::MAX as usize,
|
||||
service_time,
|
||||
ready_times,
|
||||
due_times,
|
||||
greedy_baseline_total_distance: 0,
|
||||
};
|
||||
|
||||
c.fleet_size = c.compute_greedy_baseline()?.routes.len() + 2;
|
||||
let greedy_baseline_solution = c.compute_greedy_baseline()?;
|
||||
c.greedy_baseline_total_distance =
|
||||
c.evaluate_total_distance(&greedy_baseline_solution)? as u32;
|
||||
c.fleet_size = greedy_baseline_solution.routes.len() + 2;
|
||||
|
||||
Ok(c)
|
||||
}
|
||||
|
||||
@ -236,10 +245,8 @@ impl Challenge {
|
||||
conditional_pub!(
|
||||
fn evaluate_solution(&self, solution: &Solution) -> Result<i32> {
|
||||
let total_distance = self.evaluate_total_distance(solution)?;
|
||||
let greedy_solution = self.compute_greedy_baseline()?;
|
||||
let greedy_total_distance = self.evaluate_total_distance(&greedy_solution)?;
|
||||
// TODO: implement SOTA baseline
|
||||
let sota_total_distance = greedy_total_distance;
|
||||
let sota_total_distance = self.greedy_baseline_total_distance;
|
||||
// if total_distance > greedy_total_distance {
|
||||
// return Err(anyhow!(
|
||||
// "Total distance {} is greater than greedy baseline distance {}",
|
||||
|
||||
@ -107,15 +107,6 @@ pub fn compute_solution(
|
||||
|
||||
macro_rules! dispatch_challenge {
|
||||
($c:ident, cpu) => {{
|
||||
// library function may exit 87 if it runs out of fuel
|
||||
let solve_challenge_fn = unsafe {
|
||||
library.get::<fn(
|
||||
&$c::Challenge,
|
||||
&dyn Fn(&$c::Solution) -> Result<()>,
|
||||
Option<String>,
|
||||
) -> Result<()>>(b"entry_point")?
|
||||
};
|
||||
|
||||
let track_id = if settings.track_id.starts_with('"') && settings.track_id.ends_with('"')
|
||||
{
|
||||
settings.track_id.clone()
|
||||
@ -129,6 +120,16 @@ pub fn compute_solution(
|
||||
stringify!($c)
|
||||
)
|
||||
})?;
|
||||
|
||||
// library function may exit 87 if it runs out of fuel
|
||||
let solve_challenge_fn = unsafe {
|
||||
library.get::<fn(
|
||||
&$c::Challenge,
|
||||
&dyn Fn(&$c::Solution) -> Result<()>,
|
||||
Option<String>,
|
||||
) -> Result<()>>(b"entry_point")?
|
||||
};
|
||||
|
||||
let challenge = $c::Challenge::generate_instance(&seed, &track)?;
|
||||
|
||||
let save_solution_fn = |solution: &$c::Solution| -> Result<()> {
|
||||
@ -161,6 +162,20 @@ pub fn compute_solution(
|
||||
}};
|
||||
|
||||
($c:ident, gpu) => {{
|
||||
let track_id = if settings.track_id.starts_with('"') && settings.track_id.ends_with('"')
|
||||
{
|
||||
settings.track_id.clone()
|
||||
} else {
|
||||
format!(r#""{}""#, settings.track_id)
|
||||
};
|
||||
let track = serde_json::from_str(&track_id).map_err(|_| {
|
||||
anyhow::anyhow!(
|
||||
"Failed to parse track_id '{}' as {}::Track",
|
||||
settings.track_id,
|
||||
stringify!($c)
|
||||
)
|
||||
})?;
|
||||
|
||||
if ptx_path.is_none() {
|
||||
panic!("PTX file is required for GPU challenges.");
|
||||
}
|
||||
@ -197,7 +212,7 @@ pub fn compute_solution(
|
||||
|
||||
let challenge = $c::Challenge::generate_instance(
|
||||
&seed,
|
||||
settings.size,
|
||||
&track,
|
||||
module.clone(),
|
||||
stream.clone(),
|
||||
&prop,
|
||||
|
||||
@ -103,6 +103,20 @@ pub fn verify_solution(
|
||||
}};
|
||||
|
||||
($c:ident, gpu) => {{
|
||||
let track_id = if settings.track_id.starts_with('"') && settings.track_id.ends_with('"')
|
||||
{
|
||||
settings.track_id.clone()
|
||||
} else {
|
||||
format!(r#""{}""#, settings.track_id)
|
||||
};
|
||||
let track = serde_json::from_str(&track_id).map_err(|_| {
|
||||
anyhow::anyhow!(
|
||||
"Failed to parse track_id '{}' as {}::Track",
|
||||
settings.track_id,
|
||||
stringify!($c)
|
||||
)
|
||||
})?;
|
||||
|
||||
if ptx_path.is_none() {
|
||||
panic!("PTX file is required for GPU challenges.");
|
||||
}
|
||||
@ -121,7 +135,7 @@ pub fn verify_solution(
|
||||
|
||||
let challenge = $c::Challenge::generate_instance(
|
||||
&seed,
|
||||
settings.size,
|
||||
&track,
|
||||
module.clone(),
|
||||
stream.clone(),
|
||||
&prop,
|
||||
|
||||
Loading…
Reference in New Issue
Block a user