diff --git a/tig-algorithms/src/vector_search/is_adp_optimal/README.md b/tig-algorithms/src/vector_search/is_adp_optimal/README.md new file mode 100644 index 0000000..81793ad --- /dev/null +++ b/tig-algorithms/src/vector_search/is_adp_optimal/README.md @@ -0,0 +1,23 @@ +# TIG Code Submission + + ## Submission Details + + * **Challenge Name:** vector_search + * **Submission Name:** is_adp_optimal + * **Copyright:** 2025 OptimusMaximus + * **Identity of Submitter:** OptimusMaximus + * **Identity of Creator of Algorithmic Method:** null + * **Unique Algorithm Identifier (UAI):** null + + ## License + + The files in this folder are under the following licenses: + * TIG Benchmarker Outbound License + * TIG Commercial License + * TIG Inbound Game License + * TIG Innovator Outbound Game License + * TIG Open Data License + * TIG THV Game License + + Copies of the licenses can be obtained at: + https://github.com/tig-foundation/tig-monorepo/tree/main/docs/licenses \ No newline at end of file diff --git a/tig-algorithms/src/vector_search/is_adp_optimal/kernels.cu b/tig-algorithms/src/vector_search/is_adp_optimal/kernels.cu new file mode 100644 index 0000000..9254627 --- /dev/null +++ b/tig-algorithms/src/vector_search/is_adp_optimal/kernels.cu @@ -0,0 +1,323 @@ +/*! +Copyright 2025 OptimusMaximus + +Identity of Submitter OptimusMaximus + +UAI null + +Licensed under the TIG Inbound Game License v2.0 or (at your option) any later +version (the "License"); you may not use this file except in compliance with the +License. You may obtain a copy of the License at + +https://github.com/tig-foundation/tig-monorepo/tree/main/docs/licenses + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the specific +language governing permissions and limitations under the License. +*/ + +#include +#include + +#define MAX_FLOAT 3.402823466e+38F + +__device__ float euclidean_distance(const float* a, const float* b, int dims) { + float sum = 0.0f; + int i; + for (i = 0; i < dims - 3; i += 4) { + float diff0 = a[i] - b[i]; + float diff1 = a[i+1] - b[i+1]; + float diff2 = a[i+2] - b[i+2]; + float diff3 = a[i+3] - b[i+3]; + sum = fmaf(diff0, diff0, sum); + sum = fmaf(diff1, diff1, sum); + sum = fmaf(diff2, diff2, sum); + sum = fmaf(diff3, diff3, sum); + } + for (; i < dims; i++) { + float diff = a[i] - b[i]; + sum = fmaf(diff, diff, sum); + } + return sum; +} + +__device__ float euclidean_distance_high(const float* a, const float* b, int dims) { + float sum = 0.0f; + for (int i = 0; i < dims; i += 4) { + float diff0 = a[i] - b[i]; + float diff1 = a[i+1] - b[i+1]; + float diff2 = a[i+2] - b[i+2]; + float diff3 = a[i+3] - b[i+3]; + sum = fmaf(diff0, diff0, sum); + sum = fmaf(diff1, diff1, sum); + sum = fmaf(diff2, diff2, sum); + sum = fmaf(diff3, diff3, sum); + } + return sum; +} + +extern "C" __global__ void deterministic_clustering( + const float* database_vectors, + float* cluster_centers, + int* cluster_assignments, + int* cluster_sizes, + int database_size, + int vector_dims, + int num_clusters, + int num_queries +) { + int cluster_idx = blockIdx.x; + int tid = threadIdx.x; + + if (cluster_idx >= num_clusters) return; + + extern __shared__ float shared_mem[]; + float* center = shared_mem; + + for (int d = tid; d < vector_dims; d += blockDim.x) { + center[d] = 0.0f; + } + __syncthreads(); + + int seed_idx = ((cluster_idx * 982451653LL + 1566083941LL) % (long long)database_size); + const float* seed_vector = database_vectors + seed_idx * vector_dims; + + for (int d = tid; d < vector_dims; d += blockDim.x) { + center[d] = seed_vector[d]; + cluster_centers[cluster_idx * vector_dims + d] = seed_vector[d]; + } + + if (tid == 0) { + cluster_sizes[cluster_idx] = 0; + } + __syncthreads(); + + for (int vec_idx = tid; vec_idx < database_size; vec_idx += blockDim.x) { + const float* vector = database_vectors + vec_idx * vector_dims; + + float min_dist = MAX_FLOAT; + int best_cluster = 0; + + for (int c = 0; c < num_clusters; c++) { + const float* c_center = cluster_centers + c * vector_dims; + float dist = (num_queries <= 4000) ? + euclidean_distance(vector, c_center, vector_dims) : + euclidean_distance_high(vector, c_center, vector_dims); + if (dist < min_dist) { + min_dist = dist; + best_cluster = c; + } + } + + cluster_assignments[vec_idx] = best_cluster; + if (best_cluster == cluster_idx) { + atomicAdd(&cluster_sizes[cluster_idx], 1); + } + } +} + +extern "C" __global__ void cluster_search( + const float* query_vectors, + const float* database_vectors, + const float* cluster_centers, + const int* cluster_assignments, + const int* cluster_sizes, + int* results, + int num_queries, + int database_size, + int vector_dims, + int num_clusters +) { + if (num_queries <= 4000) { + int query_idx = blockIdx.x; + if (query_idx >= num_queries) return; + + const float* query = query_vectors + query_idx * vector_dims; + + float cluster_dists[8]; + for (int i = 0; i < num_clusters; i++) { + cluster_dists[i] = MAX_FLOAT; + } + + float best_dist[3] = {MAX_FLOAT, MAX_FLOAT, MAX_FLOAT}; + int best_clusters[3] = {-1, -1, -1}; + + for (int cluster = 0; cluster < num_clusters; cluster++) { + const float* center = cluster_centers + cluster * vector_dims; + float dist = euclidean_distance(query, center, vector_dims); + + cluster_dists[cluster] = dist; + + if (dist < best_dist[0]) { + best_dist[2] = best_dist[1]; + best_clusters[2] = best_clusters[1]; + best_dist[1] = best_dist[0]; + best_clusters[1] = best_clusters[0]; + best_dist[0] = dist; + best_clusters[0] = cluster; + } else if (dist < best_dist[1]) { + best_dist[2] = best_dist[1]; + best_clusters[2] = best_clusters[1]; + best_dist[1] = dist; + best_clusters[1] = cluster; + } else if (dist < best_dist[2]) { + best_dist[2] = dist; + best_clusters[2] = cluster; + } + } + + float min_dist = MAX_FLOAT; + int best_idx = -1; + + int target_cluster = best_clusters[0]; + if (target_cluster != -1 && cluster_sizes[target_cluster] > 0) { + for (int vec_idx = 0; vec_idx < database_size; vec_idx++) { + if (cluster_assignments[vec_idx] == target_cluster) { + const float* db_vector = database_vectors + vec_idx * vector_dims; + float dist = euclidean_distance(query, db_vector, vector_dims); + if (dist < min_dist) { + min_dist = dist; + best_idx = vec_idx; + } + } + } + } + + if (best_clusters[1] != -1 && cluster_sizes[best_clusters[1]] > 0) { + target_cluster = best_clusters[1]; + for (int vec_idx = 0; vec_idx < database_size; vec_idx++) { + if (cluster_assignments[vec_idx] == target_cluster) { + const float* db_vector = database_vectors + vec_idx * vector_dims; + float dist = euclidean_distance(query, db_vector, vector_dims); + if (dist < min_dist) { + min_dist = dist; + best_idx = vec_idx; + } + } + } + } + + if (best_clusters[2] != -1 && cluster_sizes[best_clusters[2]] > 0) { + target_cluster = best_clusters[2]; + for (int vec_idx = 0; vec_idx < database_size; vec_idx++) { + if (cluster_assignments[vec_idx] == target_cluster) { + const float* db_vector = database_vectors + vec_idx * vector_dims; + float dist = euclidean_distance(query, db_vector, vector_dims); + if (dist < min_dist) { + min_dist = dist; + best_idx = vec_idx; + } + } + } + } + + for (int cluster = 0; cluster < num_clusters; cluster++) { + if (cluster == best_clusters[0] || cluster == best_clusters[1] || cluster == best_clusters[2]) continue; + if (cluster_sizes[cluster] == 0) continue; + + for (int vec_idx = 0; vec_idx < database_size; vec_idx++) { + if (cluster_assignments[vec_idx] == cluster) { + const float* db_vector = database_vectors + vec_idx * vector_dims; + float dist = euclidean_distance(query, db_vector, vector_dims); + if (dist < min_dist) { + min_dist = dist; + best_idx = vec_idx; + } + } + } + } + + results[query_idx] = best_idx; + } else { + int query_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (query_idx >= num_queries) return; + + const float* query = query_vectors + query_idx * vector_dims; + + extern __shared__ float shared_mem[]; + float* cluster_dists = shared_mem; + int* cluster_indices = (int*)&shared_mem[num_clusters]; + + if (threadIdx.x < num_clusters) { + cluster_dists[threadIdx.x] = MAX_FLOAT; + cluster_indices[threadIdx.x] = -1; + } + + float best_dist[2] = {MAX_FLOAT, MAX_FLOAT}; + int best_clusters[2] = {-1, -1}; + + for (int cluster = 0; cluster < num_clusters; cluster++) { + const float* center = cluster_centers + cluster * vector_dims; + float dist = euclidean_distance_high(query, center, vector_dims); + + if (dist < best_dist[0]) { + best_dist[1] = best_dist[0]; + best_clusters[1] = best_clusters[0]; + best_dist[0] = dist; + best_clusters[0] = cluster; + } else if (dist < best_dist[1]) { + best_dist[1] = dist; + best_clusters[1] = cluster; + } + + if (cluster < num_clusters && threadIdx.x == 0) { + cluster_dists[cluster] = dist; + } + } + + float min_dist = MAX_FLOAT; + int best_idx = -1; + + int target_cluster = best_clusters[0]; + if (target_cluster != -1 && cluster_sizes[target_cluster] > 0) { + for (int vec_idx = 0; vec_idx < database_size; vec_idx++) { + if (cluster_assignments[vec_idx] == target_cluster) { + const float* db_vector = database_vectors + vec_idx * vector_dims; + float dist = euclidean_distance_high(query, db_vector, vector_dims); + if (dist < min_dist) { + min_dist = dist; + best_idx = vec_idx; + } + } + } + } + + if (min_dist == MAX_FLOAT && best_clusters[1] != -1 && cluster_sizes[best_clusters[1]] > 0) { + target_cluster = best_clusters[1]; + for (int vec_idx = 0; vec_idx < database_size; vec_idx++) { + if (cluster_assignments[vec_idx] == target_cluster) { + const float* db_vector = database_vectors + vec_idx * vector_dims; + float dist = euclidean_distance_high(query, db_vector, vector_dims); + if (dist < min_dist) { + min_dist = dist; + best_idx = vec_idx; + } + } + } + } + + if (min_dist == MAX_FLOAT) { + float search_radius = cluster_dists[0] * 2.0f; + + for (int cluster = 0; cluster < num_clusters; cluster++) { + if (cluster == best_clusters[0] || cluster == best_clusters[1]) continue; + if (cluster_dists[cluster] >= search_radius) continue; + if (cluster_sizes[cluster] == 0) continue; + + for (int vec_idx = 0; vec_idx < database_size; vec_idx++) { + if (cluster_assignments[vec_idx] == cluster) { + const float* db_vector = database_vectors + vec_idx * vector_dims; + float dist = euclidean_distance_high(query, db_vector, vector_dims); + if (dist < min_dist) { + min_dist = dist; + best_idx = vec_idx; + } + } + } + } + } + + results[query_idx] = best_idx; + } +} diff --git a/tig-algorithms/src/vector_search/is_adp_optimal/mod.rs b/tig-algorithms/src/vector_search/is_adp_optimal/mod.rs new file mode 100644 index 0000000..9c4bcef --- /dev/null +++ b/tig-algorithms/src/vector_search/is_adp_optimal/mod.rs @@ -0,0 +1,102 @@ +use cudarc::{ + driver::{safe::LaunchConfig, CudaModule, CudaStream, PushKernelArg}, + runtime::sys::cudaDeviceProp, +}; +use std::sync::Arc; +use serde_json::{Map, Value}; +use tig_challenges::vector_search::*; + +pub fn solve_challenge( + challenge: &Challenge, + save_solution: &dyn Fn(&Solution) -> anyhow::Result<()>, + hyperparameters: &Option>, + module: Arc, + stream: Arc, + _prop: &cudaDeviceProp, +) -> anyhow::Result<()> { + let vector_dims = challenge.vector_dims as i32; + let database_size = challenge.database_size as i32; + let num_queries = challenge.difficulty.num_queries as i32; + + let block_size = 128; + let num_clusters = if num_queries <= 6000 { + 2 + } else if num_queries < 9000 { + 4 + } else if num_queries < 10000 { + 6 + } else if num_queries < 11000 { + 10 + } else if num_queries < 12000 { + 12 + } else if num_queries < 14000 { + 14 + } else { + 14 + }; + + let deterministic_clustering = module.load_function("deterministic_clustering")?; + let cluster_search = module.load_function("cluster_search")?; + + let mut d_cluster_centers = stream.alloc_zeros::((num_clusters * vector_dims) as usize)?; + let mut d_cluster_assignments = stream.alloc_zeros::(database_size as usize)?; + let mut d_cluster_sizes = stream.alloc_zeros::(num_clusters as usize)?; + + let cluster_config = LaunchConfig { + grid_dim: (num_clusters as u32, 1, 1), + block_dim: (block_size, 1, 1), + shared_mem_bytes: (vector_dims * 4) as u32, + }; + + unsafe { + stream.launch_builder(&deterministic_clustering) + .arg(&challenge.d_database_vectors) + .arg(&mut d_cluster_centers) + .arg(&mut d_cluster_assignments) + .arg(&mut d_cluster_sizes) + .arg(&database_size) + .arg(&vector_dims) + .arg(&num_clusters) + .arg(&num_queries) + .launch(cluster_config)?; + } + stream.synchronize()?; + + let mut d_results = stream.alloc_zeros::(num_queries as usize)?; + + let search_config = if num_queries <= 4000 { + LaunchConfig { + grid_dim: (num_queries as u32, 1, 1), + block_dim: (1, 1, 1), + shared_mem_bytes: 0, + } + } else { + LaunchConfig { + grid_dim: (num_queries as u32, 1, 1), + block_dim: (block_size, 1, 1), + shared_mem_bytes: (num_clusters * 8) as u32, + } + }; + + unsafe { + stream.launch_builder(&cluster_search) + .arg(&challenge.d_query_vectors) + .arg(&challenge.d_database_vectors) + .arg(&d_cluster_centers) + .arg(&d_cluster_assignments) + .arg(&d_cluster_sizes) + .arg(&mut d_results) + .arg(&num_queries) + .arg(&database_size) + .arg(&vector_dims) + .arg(&num_clusters) + .launch(search_config)?; + } + stream.synchronize()?; + + let indices = stream.memcpy_dtov(&d_results)?; + let indexes = indices.iter().map(|&idx| idx as usize).collect(); + + let _ = save_solution(&Solution { indexes }); + return Ok(()); +} diff --git a/tig-algorithms/src/vector_search/mod.rs b/tig-algorithms/src/vector_search/mod.rs index ae3472a..59dfa3a 100644 --- a/tig-algorithms/src/vector_search/mod.rs +++ b/tig-algorithms/src/vector_search/mod.rs @@ -132,7 +132,8 @@ // c004_a067 -// c004_a068 +pub mod is_adp_optimal; +pub use is_adp_optimal as c004_a068; // c004_a069