From 0c8aaf1cd2e6b4f77003fc4a3f536ffb329ae23b Mon Sep 17 00:00:00 2001 From: FiveMovesAhead Date: Tue, 2 Dec 2025 16:54:11 +0000 Subject: [PATCH] Submitted vector_search/fast_search_cuda --- .../vector_search/fast_search_cuda/README.md | 23 ++ .../vector_search/fast_search_cuda/kernels.cu | 19 ++ .../src/vector_search/fast_search_cuda/mod.rs | 223 ++++++++++++++++++ tig-algorithms/src/vector_search/mod.rs | 3 +- 4 files changed, 267 insertions(+), 1 deletion(-) create mode 100644 tig-algorithms/src/vector_search/fast_search_cuda/README.md create mode 100644 tig-algorithms/src/vector_search/fast_search_cuda/kernels.cu create mode 100644 tig-algorithms/src/vector_search/fast_search_cuda/mod.rs diff --git a/tig-algorithms/src/vector_search/fast_search_cuda/README.md b/tig-algorithms/src/vector_search/fast_search_cuda/README.md new file mode 100644 index 00000000..38eea73b --- /dev/null +++ b/tig-algorithms/src/vector_search/fast_search_cuda/README.md @@ -0,0 +1,23 @@ +# TIG Code Submission + +## Submission Details + +* **Challenge Name:** vector_search +* **Algorithm Name:** fast_search_cuda +* **Copyright:** 2024 Cortex & Haz +* **Identity of Submitter:** Cortex & Haz +* **Identity of Creator of Algorithmic Method:** null +* **Unique Algorithm Identifier (UAI):** null + +## License + +The files in this folder are under the following licenses: +* TIG Benchmarker Outbound License +* TIG Commercial License +* TIG Inbound Game License +* TIG Innovator Outbound Game License +* TIG Open Data License +* TIG THV Game License + +Copies of the licenses can be obtained at: +https://github.com/tig-foundation/tig-monorepo/tree/main/docs/licenses \ No newline at end of file diff --git a/tig-algorithms/src/vector_search/fast_search_cuda/kernels.cu b/tig-algorithms/src/vector_search/fast_search_cuda/kernels.cu new file mode 100644 index 00000000..7dff314a --- /dev/null +++ b/tig-algorithms/src/vector_search/fast_search_cuda/kernels.cu @@ -0,0 +1,19 @@ +/*! +Copyright 2024 Cortex & Haz + +Licensed under the TIG Inbound Game License v1.0 or (at your option) any later +version (the "License"); you may not use this file except in compliance with the +License. You may obtain a copy of the License at + +https://github.com/tig-foundation/tig-monorepo/tree/main/docs/licenses + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the specific +language governing permissions and limitations under the License. + */ + +extern "C" __global__ void do_nothing() +{ + // This kernel does nothing +} diff --git a/tig-algorithms/src/vector_search/fast_search_cuda/mod.rs b/tig-algorithms/src/vector_search/fast_search_cuda/mod.rs new file mode 100644 index 00000000..43020c09 --- /dev/null +++ b/tig-algorithms/src/vector_search/fast_search_cuda/mod.rs @@ -0,0 +1,223 @@ +use anyhow::{anyhow, Result}; +use cudarc::{ + driver::{safe::LaunchConfig, CudaModule, CudaStream, PushKernelArg}, + runtime::sys::cudaDeviceProp, +}; +use serde_json::{Map, Value}; +use std::sync::Arc; +use tig_challenges::vector_search::{Challenge, Solution}; + +pub fn solve_challenge( + challenge: &Challenge, + save_solution: &dyn Fn(&Solution) -> anyhow::Result<()>, + hyperparameters: &Option>, + module: Arc, + stream: Arc, + prop: &cudaDeviceProp, +) -> anyhow::Result<()> { + Err(anyhow!("This algorithm is no longer compatible.")) +} + +// Old code that is no longer compatible +#[cfg(none)] +mod dead_code { + use anyhow::Result; + use tig_challenges::vector_search::{Challenge, Solution}; + + fn squared_distance(v1: &[f32], v2: &[f32]) -> f32 { + v1.iter() + .zip(v2.iter()) + .map(|(a, b)| (a - b) * (a - b)) + .sum() + } + + pub fn solve_challenge(challenge: &Challenge) -> Result> { + let max_distance_sq = challenge.max_distance * challenge.max_distance; + + let indexes: Vec = challenge + .query_vectors + .iter() + .filter_map(|query| { + challenge + .vector_database + .iter() + .enumerate() + .find_map(|(i, vector)| { + if squared_distance(query, vector) <= max_distance_sq { + Some(i) + } else { + None + } + }) + }) + .collect(); + + if indexes.len() == challenge.query_vectors.len() { + Ok(Some(Solution { indexes })) + } else { + Ok(None) + } + } + + #[cfg(feature = "cuda")] + mod gpu_optimisation { + use super::*; + use cudarc::driver::*; + use std::{collections::HashMap, ptr, sync::Arc, time::Instant}; + use tig_challenges::CudaKernel; + + const BLOCK_SIZE: u32 = 256; + + pub const KERNEL: Option = Some(CudaKernel { + src: r#" + #define INT_MAX (2147483647U) + #define FLT_MAX (3.402823466e+38F) + + __device__ float squared_distance(const float * v1, const float * v2, int dim) { + float sum = 0.0f; + for (int i = 0; i < dim; i++) { + float diff = v1[i] - v2[i]; + sum += diff * diff; + } + return sum; + } + + extern "C" __global__ void fast_search( + const float * query_vectors, + const float * vector_database, + int query_count, + int vector_count, + int vector_dim, + float max_distance_sq, + int shared_mem_size, + int * result_indexes + ) { + extern __shared__ int sh_mem[]; + int queryIdx = blockIdx.x; + int thIdx = threadIdx.x; + int * sh_indexes = sh_mem; + assert(shared_mem_size >= sizeof(sh_indexes[0]) * blockDim.x); + + if (queryIdx >= query_count) { + return; + } + + const float * query = &query_vectors[queryIdx * vector_dim]; + float minDistance = FLT_MAX; + int minIndex = -1; + for (int i = 0; i < vector_count; i += blockDim.x) { + int vectorIdx = i + thIdx; + const float * vector = &vector_database[vectorIdx * vector_dim]; + float distance = squared_distance(query, vector, vector_dim); + if ((distance < minDistance) && (distance <= max_distance_sq)) { + minDistance = distance; + minIndex = vectorIdx; + break; + } + } + + sh_indexes[thIdx] = minIndex; + __syncthreads(); + + if (thIdx == 0) { + minIndex = INT_MAX; + for (int i = 0; i < blockDim.x; i++) { + if ((sh_indexes[i] > -1) && (sh_indexes[i] < minIndex)) { + minIndex = sh_indexes[i]; + } + } + result_indexes[queryIdx] = minIndex; + } + } + "#, + funcs: &["fast_search"], + }); + + fn flatten_vector_database(database: &[Vec]) -> Vec { + let total_elements = database.len() * database[0].len(); + let mut database_flat: Vec = Vec::with_capacity(total_elements); + let v_len = database[0].len(); + let mut offset = 0; + unsafe { + database_flat.set_len(total_elements); + for v in database.iter() { + let src = v.as_ptr(); + let dest = database_flat.as_mut_ptr().add(offset); + ptr::copy_nonoverlapping(src, dest, v_len); + offset += v_len; + } + } + + database_flat + } + + pub fn cuda_solve_challenge( + challenge: &Challenge, + dev: &Arc, + mut funcs: HashMap<&'static str, CudaFunction>, + ) -> anyhow::Result> { + let vector_count = challenge.vector_database.len(); + let query_count = challenge.query_vectors.len(); + let vector_dim = challenge.query_vectors[0].len(); + let max_distance_sq = challenge.max_distance * challenge.max_distance; + + /* Prepare databases */ + let vector_database_flat = flatten_vector_database(&challenge.vector_database); + let query_vectors_flat = flatten_vector_database(&challenge.query_vectors); + + /* Copy data to GPU */ + let d_vector_database = dev.htod_copy(vector_database_flat)?; + let d_query_vectors = dev.htod_copy(query_vectors_flat)?; + let d_result_indexes = dev.alloc_zeros::(query_count)?; + dev.synchronize()?; + + /* Fire Kernel */ + unsafe { + const SHARED_MEM_SIZE: u32 = BLOCK_SIZE * std::mem::size_of::() as u32 * 2; + funcs.remove("fast_search").unwrap().launch( + LaunchConfig { + grid_dim: (query_count as u32, 1, 1), + block_dim: (BLOCK_SIZE, 1, 1), + shared_mem_bytes: SHARED_MEM_SIZE, + }, + ( + &d_query_vectors, + &d_vector_database, + query_count as i32, + vector_count as i32, + vector_dim as i32, + max_distance_sq, + SHARED_MEM_SIZE as i32, + &d_result_indexes, + ), + )?; + } + + /* Get data from kernel */ + let result_indexes = dev.dtoh_sync_copy(&d_result_indexes)?; + let indexes: Vec = result_indexes + .into_iter() + .filter_map(|i| { + if i != std::i32::MAX { + Some(i as usize) + } else { + None + } + }) + .collect(); + + if indexes.len() == query_count { + Ok(Some(Solution { indexes })) + } else { + Ok(None) + } + } + } + + #[cfg(feature = "cuda")] + pub use gpu_optimisation::{cuda_solve_challenge, KERNEL}; +} + +pub fn help() { + println!("No help information available."); +} diff --git a/tig-algorithms/src/vector_search/mod.rs b/tig-algorithms/src/vector_search/mod.rs index ae3472ac..58ab432c 100644 --- a/tig-algorithms/src/vector_search/mod.rs +++ b/tig-algorithms/src/vector_search/mod.rs @@ -56,7 +56,8 @@ // c004_a029 -// c004_a030 +pub mod fast_search_cuda; +pub use fast_search_cuda as c004_a030; // c004_a031