From e835cfdb731e13b5e82b026feff27b24296bd4c1 Mon Sep 17 00:00:00 2001 From: FiveMovesAhead Date: Thu, 16 Oct 2025 11:13:54 +0100 Subject: [PATCH] Submitted vector_search/optimax_gpu --- tig-algorithms/src/vector_search/mod.rs | 3 +- .../src/vector_search/optimax_gpu/README.md | 23 + .../src/vector_search/optimax_gpu/kernels.cu | 19 + .../src/vector_search/optimax_gpu/mod.rs | 477 ++++++++++++++++++ 4 files changed, 521 insertions(+), 1 deletion(-) create mode 100644 tig-algorithms/src/vector_search/optimax_gpu/README.md create mode 100644 tig-algorithms/src/vector_search/optimax_gpu/kernels.cu create mode 100644 tig-algorithms/src/vector_search/optimax_gpu/mod.rs diff --git a/tig-algorithms/src/vector_search/mod.rs b/tig-algorithms/src/vector_search/mod.rs index ae3472a..1f1c315 100644 --- a/tig-algorithms/src/vector_search/mod.rs +++ b/tig-algorithms/src/vector_search/mod.rs @@ -48,7 +48,8 @@ // c004_a025 -// c004_a026 +pub mod optimax_gpu; +pub use optimax_gpu as c004_a026; // c004_a027 diff --git a/tig-algorithms/src/vector_search/optimax_gpu/README.md b/tig-algorithms/src/vector_search/optimax_gpu/README.md new file mode 100644 index 0000000..80e8642 --- /dev/null +++ b/tig-algorithms/src/vector_search/optimax_gpu/README.md @@ -0,0 +1,23 @@ +# TIG Code Submission + + ## Submission Details + + * **Challenge Name:** vector_search + * **Submission Name:** optimax_gpu + * **Copyright:** 2024 bw-dev36 + * **Identity of Submitter:** bw-dev36 + * **Identity of Creator of Algorithmic Method:** null + * **Unique Algorithm Identifier (UAI):** null + + ## License + + The files in this folder are under the following licenses: + * TIG Benchmarker Outbound License + * TIG Commercial License + * TIG Inbound Game License + * TIG Innovator Outbound Game License + * TIG Open Data License + * TIG THV Game License + + Copies of the licenses can be obtained at: + https://github.com/tig-foundation/tig-monorepo/tree/main/docs/licenses \ No newline at end of file diff --git a/tig-algorithms/src/vector_search/optimax_gpu/kernels.cu b/tig-algorithms/src/vector_search/optimax_gpu/kernels.cu new file mode 100644 index 0000000..7f0e30f --- /dev/null +++ b/tig-algorithms/src/vector_search/optimax_gpu/kernels.cu @@ -0,0 +1,19 @@ +/*! +Copyright 2024 bw-dev36 + +Licensed under the TIG Inbound Game License v1.0 or (at your option) any later +version (the "License"); you may not use this file except in compliance with the +License. You may obtain a copy of the License at + +https://github.com/tig-foundation/tig-monorepo/tree/main/docs/licenses + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the specific +language governing permissions and limitations under the License. +*/ + +extern "C" __global__ void do_nothing() +{ + // This kernel does nothing +} diff --git a/tig-algorithms/src/vector_search/optimax_gpu/mod.rs b/tig-algorithms/src/vector_search/optimax_gpu/mod.rs new file mode 100644 index 0000000..7b0b116 --- /dev/null +++ b/tig-algorithms/src/vector_search/optimax_gpu/mod.rs @@ -0,0 +1,477 @@ +use anyhow::{anyhow, Result}; +use cudarc::{ + driver::{safe::LaunchConfig, CudaModule, CudaStream, PushKernelArg}, + runtime::sys::cudaDeviceProp, +}; +use std::sync::Arc; +use serde_json::{Map, Value}; +use tig_challenges::vector_search::{Challenge, Solution}; + +pub fn solve_challenge( + challenge: &Challenge, + save_solution: &dyn Fn(&Solution) -> anyhow::Result<()>, + hyperparameters: &Option>, + module: Arc, + stream: Arc, + prop: &cudaDeviceProp, +) -> anyhow::Result<()> { + Err(anyhow!("This algorithm is no longer compatible.")) +} + +// Old code that is no longer compatible +#[cfg(none)] +mod dead_code { + use anyhow::Ok; + use tig_challenges::vector_search::*; + use std::cmp::Ordering; + use std::collections::BinaryHeap; + + struct KDNode<'a> { + point: &'a [f32], + left: Option>>, + right: Option>>, + index: usize, + } + + impl<'a> KDNode<'a> { + fn new(point: &'a [f32], index: usize) -> Self { + KDNode { + point, + left: None, + right: None, + index, + } + } + } + fn quickselect_by(arr: &mut [(&[f32], usize)], k: usize, compare: &F) + where + F: Fn(&(&[f32], usize), &(&[f32], usize)) -> Ordering, + { + if arr.len() <= 1 { + return; + } + + let pivot_index = partition(arr, compare); + if k < pivot_index { + quickselect_by(&mut arr[..pivot_index], k, compare); + } else if k > pivot_index { + quickselect_by(&mut arr[pivot_index + 1..], k - pivot_index - 1, compare); + } + } + + fn partition(arr: &mut [(&[f32], usize)], compare: &F) -> usize + where + F: Fn(&(&[f32], usize), &(&[f32], usize)) -> Ordering, + { + let pivot_index = arr.len() >> 1; + arr.swap(pivot_index, arr.len() - 1); + + let mut store_index = 0; + for i in 0..arr.len() - 1 { + if compare(&arr[i], &arr[arr.len() - 1]) == Ordering::Less { + arr.swap(i, store_index); + store_index += 1; + } + } + arr.swap(store_index, arr.len() - 1); + store_index + } + + fn build_kd_tree<'a>(points: &mut [(&'a [f32], usize)]) -> Option>> { + if points.is_empty() { + return None; + } + + const NUM_DIMENSIONS: usize = 250; + let mut stack: Vec<(usize, usize, usize, Option<*mut KDNode<'a>>, bool)> = Vec::new(); + let mut root: Option>> = None; + + stack.push((0, points.len(), 0, None, false)); + + while let Some((start, end, depth, parent_ptr, is_left)) = stack.pop() { + if start >= end { + continue; + } + + let axis = depth % NUM_DIMENSIONS; + let median = (start + end) / 2; + quickselect_by(&mut points[start..end], median - start, &|a, b| { + a.0[axis].partial_cmp(&b.0[axis]).unwrap() + }); + + let (median_point, median_index) = points[median]; + let mut new_node = Box::new(KDNode::new(median_point, median_index)); + let new_node_ptr: *mut KDNode = &mut *new_node; + + if let Some(parent_ptr) = parent_ptr { + unsafe { + if is_left { + (*parent_ptr).left = Some(new_node); + } else { + (*parent_ptr).right = Some(new_node); + } + } + } else { + root = Some(new_node); + } + + stack.push((median + 1, end, depth + 1, Some(new_node_ptr), false)); + stack.push((start, median, depth + 1, Some(new_node_ptr), true)); + } + + root + } + + #[inline(always)] + fn squared_euclidean_distance(a: &[f32], b: &[f32]) -> f32 { + let mut sum = 0.0; + for i in 0..a.len() { + let diff = a[i] - b[i]; + sum += diff * diff; + } + sum + } + + #[inline(always)] + fn early_stopping_distance(a: &[f32], b: &[f32], current_min: f32) -> f32 { + let mut sum = 0.0; + let mut i = 0; + while i + 3 < a.len() { + let diff0 = a[i] - b[i]; + let diff1 = a[i + 1] - b[i + 1]; + let diff2 = a[i + 2] - b[i + 2]; + let diff3 = a[i + 3] - b[i + 3]; + + sum += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3; + + if sum > current_min { + return f32::MAX; + } + + i += 4; + } + + while i < a.len() { + let diff = a[i] - b[i]; + sum += diff * diff; + + if sum > current_min { + return f32::MAX; + } + + i += 1; + } + + sum + } + + fn nearest_neighbor_search<'a>( + root: &Option>>, + target: &[f32], + best: &mut (f32, Option), + ) { + let num_dimensions = target.len(); + let mut stack = Vec::with_capacity(64); + + if let Some(node) = root { + stack.push((node.as_ref(), 0)); + } + + while let Some((node, depth)) = stack.pop() { + let axis = depth % num_dimensions; + let dist = early_stopping_distance(&node.point, target, best.0); + + if dist < best.0 { + best.0 = dist; + best.1 = Some(node.index); + } + + let diff = target[axis] - node.point[axis]; + let sqr_diff = diff * diff; + + if sqr_diff < best.0 { + if let Some(farther_node) = if diff < 0.0 { &node.right } else { &node.left } { + stack.push((farther_node.as_ref(), depth + 1)); + } + } + + if let Some(nearer_node) = if diff < 0.0 { &node.left } else { &node.right } { + stack.push((nearer_node.as_ref(), depth + 1)); + } + } + } + + fn calculate_mean_vector(vectors: &[&[f32]]) -> Vec { + let num_vectors = vectors.len(); + let num_dimensions = 250; + + let mut mean_vector = vec![0.0; num_dimensions]; + + for vector in vectors { + for i in 0..num_dimensions { + mean_vector[i] += vector[i]; + } + } + + for i in 0..num_dimensions { + mean_vector[i] /= num_vectors as f32; + } + + mean_vector + } + + #[derive(Debug)] + struct FloatOrd(f32); + + impl PartialEq for FloatOrd { + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 + } + } + + impl Eq for FloatOrd {} + + impl PartialOrd for FloatOrd { + fn partial_cmp(&self, other: &Self) -> Option { + self.0.partial_cmp(&other.0) + } + } + + impl Ord for FloatOrd { + fn cmp(&self, other: &Self) -> Ordering { + + self.partial_cmp(other).unwrap_or(Ordering::Equal) + } + } + + fn filter_relevant_vectors<'a>( + database: &'a [Vec], + query_vectors: &[Vec], + k: usize, + ) -> Vec<(&'a [f32], usize)> { + let query_refs: Vec<&[f32]> = query_vectors.iter().map(|v| &v[..]).collect(); + let mean_query_vector = calculate_mean_vector(&query_refs); + + let mut heap: BinaryHeap<(FloatOrd, usize)> = BinaryHeap::with_capacity(k); + + for (index, vector) in database.iter().enumerate() { + let dist = squared_euclidean_distance(&mean_query_vector, vector); + let ord_dist = FloatOrd(dist); + if heap.len() < k { + heap.push((ord_dist, index)); + } else if let Some(&(FloatOrd(top_dist), _)) = heap.peek() { + if dist < top_dist { + heap.pop(); + heap.push((ord_dist, index)); + } + } + } + let result: Vec<(&'a [f32], usize)> = heap + .into_iter() + .map(|(_, index)| (&database[index][..], index)) + .collect(); + + result + } + + pub fn solve_challenge(challenge: &Challenge) -> anyhow::Result> { + let query_count = challenge.query_vectors.len(); + + let subset_size = match query_count { + 10..=19 if challenge.difficulty.better_than_baseline <= 470 => 4200, + 10..=19 if challenge.difficulty.better_than_baseline > 470 => 4200, + 20..=28 if challenge.difficulty.better_than_baseline <= 465 => 3000, + 20..=28 if challenge.difficulty.better_than_baseline > 465 => 6000, // need more fuel + 29..=50 if challenge.difficulty.better_than_baseline <= 480 => 2000, + 29..=45 if challenge.difficulty.better_than_baseline > 480 => 6000, + 46..=50 if challenge.difficulty.better_than_baseline > 480 => 5000, // need more fuel + 51..=70 if challenge.difficulty.better_than_baseline <= 480 => 3000, + 51..=70 if challenge.difficulty.better_than_baseline > 480 => 3000, // need more fuel + 71..=100 if challenge.difficulty.better_than_baseline <= 480 => 1500, + 71..=100 if challenge.difficulty.better_than_baseline > 480 => 2500, // need more fuel + _ => 1000, // need more fuel + }; + let subset = filter_relevant_vectors( + &challenge.vector_database, + &challenge.query_vectors, + subset_size, + ); + + + let kd_tree = build_kd_tree(&mut subset.clone()); + + + let mut best_indexes = Vec::with_capacity(challenge.query_vectors.len()); + + for query in challenge.query_vectors.iter() { + let mut best = (std::f32::MAX, None); + nearest_neighbor_search(&kd_tree, query, &mut best); + + if let Some(best_index) = best.1 { + best_indexes.push(best_index); + } + } + + + Ok(Some(Solution { + indexes: best_indexes, + })) + } + + #[cfg(feature = "cuda")] + mod gpu_optimisation { + use super::*; + use cudarc::driver::*; + use std::{collections::HashMap, sync::Arc}; + use tig_challenges::CudaKernel; + pub const KERNEL: Option = Some(CudaKernel { + src: r#" + + extern "C" __global__ void filter_vectors(float* query_mean, float* vectors, float* distances, int num_vectors, int num_dimensions) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < num_vectors) { + float dist = 0.0; + for (int d = 0; d < num_dimensions; ++d) { + float diff = query_mean[d] - vectors[idx * num_dimensions + d]; + dist += diff * diff; + } + distances[idx] = dist; + } + } + + "#, + + funcs: &["filter_vectors"], + }); + + pub fn cuda_solve_challenge( + challenge: &Challenge, + dev: &Arc, + mut funcs: HashMap<&'static str, CudaFunction>, + ) -> anyhow::Result> { + let query_count = challenge.query_vectors.len(); + + let subset_size = match query_count { + 10..=19 if challenge.difficulty.better_than_baseline <= 470 => 4200, + 10..=19 if challenge.difficulty.better_than_baseline > 470 => 4200, + 20..=28 if challenge.difficulty.better_than_baseline <= 465 => 3000, + 20..=28 if challenge.difficulty.better_than_baseline > 465 => 6000, // need more fuel + 29..=50 if challenge.difficulty.better_than_baseline <= 480 => 2000, + 29..=45 if challenge.difficulty.better_than_baseline > 480 => 6000, + 46..=50 if challenge.difficulty.better_than_baseline > 480 => 5000, // need more fuel + 51..=70 if challenge.difficulty.better_than_baseline <= 480 => 3000, + 51..=70 if challenge.difficulty.better_than_baseline > 480 => 3000, // need more fuel + 71..=100 if challenge.difficulty.better_than_baseline <= 480 => 1500, + 71..=100 if challenge.difficulty.better_than_baseline > 480 => 2500, // need more fuel + _ => 1000, // need more fuel + }; + let subset = cuda_filter_relevant_vectors( + &challenge.vector_database, + &challenge.query_vectors, + subset_size, + dev, + funcs, + )?; + let kd_tree = build_kd_tree(&mut subset.clone()); + + + let mut best_indexes = Vec::with_capacity(challenge.query_vectors.len()); + + for query in challenge.query_vectors.iter() { + let mut best = (std::f32::MAX, None); + nearest_neighbor_search(&kd_tree, query, &mut best); + + if let Some(best_index) = best.1 { + best_indexes.push(best_index); + } + } + + + + + + Ok(Some(Solution { + indexes: best_indexes, + })) + } + + #[cfg(feature = "cuda")] + fn cuda_filter_relevant_vectors<'a>( + database: &'a [Vec], + query_vectors: &[Vec], + k: usize, + dev: &Arc, + mut funcs: HashMap<&'static str, CudaFunction>, + ) -> anyhow::Result> { + + let query_refs: Vec<&[f32]> = query_vectors.iter().map(|v| &v[..]).collect(); + let mean_query_vector = calculate_mean_vector(&query_refs); + + let num_vectors = database.len(); + let num_dimensions = 250; + let flattened_database: Vec = database.iter().flatten().cloned().collect(); + let database_dev = dev.htod_sync_copy(&flattened_database)?; + let mean_query_dev = dev.htod_sync_copy(&mean_query_vector)?; + let mut distances_dev = dev.alloc_zeros::(num_vectors)?; + let cfg = LaunchConfig { + block_dim: (256, 1, 1), + grid_dim: ((num_vectors as u32 + 255) / 256, 1, 1), + shared_mem_bytes: 0, + }; + unsafe { + funcs.remove("filter_vectors").unwrap().launch( + cfg, + ( + &mean_query_dev, + &database_dev, + &mut distances_dev, + num_vectors as i32, + num_dimensions as i32, + ), + ) + }?; + let mut distances_host = vec![0.0f32; num_vectors]; + dev.dtoh_sync_copy_into(&distances_dev, &mut distances_host)?; + let mut heap: BinaryHeap<(FloatOrd, usize)> = BinaryHeap::with_capacity(k); + + for (index, &distance) in distances_host.iter().enumerate() { + let ord_dist = FloatOrd(distance); + if heap.len() < k { + heap.push((ord_dist, index)); + } else if let Some(&(FloatOrd(top_dist), _)) = heap.peek() { + if distance < top_dist { + heap.pop(); + heap.push((ord_dist, index)); + } + } + } + let result: Vec<(&[f32], usize)> = heap + .into_iter() + .map(|(_, index)| (&database[index][..], index)) + .collect(); + + Ok(result) + } + + #[cfg(feature = "cuda")] + fn cuda_build_kd_tree<'a>(subset: &mut [(&'a [f32], usize)], + dev: &Arc, + funcs: &mut HashMap<&'static str, CudaFunction>, + ) -> Option>> { + None + } + + #[cfg(feature = "cuda")] + fn cuda_nearest_neighbor_search( + kd_tree: &Option>>, + query: &[f32], + best: &mut (f32, Option), + dev: &Arc, + funcs: &mut HashMap<&'static str, CudaFunction>, + ) -> anyhow::Result<()> { + Ok(()) + } + } + #[cfg(feature = "cuda")] + pub use gpu_optimisation::{cuda_solve_challenge, KERNEL}; +} \ No newline at end of file