Submitted vector_search/is_adp_optimal

This commit is contained in:
FiveMovesAhead 2025-10-16 11:22:50 +01:00
parent a2fbd8c435
commit e728a43aac
4 changed files with 450 additions and 1 deletions

View File

@ -0,0 +1,23 @@
# TIG Code Submission
## Submission Details
* **Challenge Name:** vector_search
* **Submission Name:** is_adp_optimal
* **Copyright:** 2025 OptimusMaximus
* **Identity of Submitter:** OptimusMaximus
* **Identity of Creator of Algorithmic Method:** null
* **Unique Algorithm Identifier (UAI):** null
## License
The files in this folder are under the following licenses:
* TIG Benchmarker Outbound License
* TIG Commercial License
* TIG Inbound Game License
* TIG Innovator Outbound Game License
* TIG Open Data License
* TIG THV Game License
Copies of the licenses can be obtained at:
https://github.com/tig-foundation/tig-monorepo/tree/main/docs/licenses

View File

@ -0,0 +1,323 @@
/*!
Copyright 2025 OptimusMaximus
Identity of Submitter OptimusMaximus
UAI null
Licensed under the TIG Inbound Game License v2.0 or (at your option) any later
version (the "License"); you may not use this file except in compliance with the
License. You may obtain a copy of the License at
https://github.com/tig-foundation/tig-monorepo/tree/main/docs/licenses
Unless required by applicable law or agreed to in writing, software distributed
under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
CONDITIONS OF ANY KIND, either express or implied. See the License for the specific
language governing permissions and limitations under the License.
*/
#include <cuda_runtime.h>
#include <float.h>
#define MAX_FLOAT 3.402823466e+38F
__device__ float euclidean_distance(const float* a, const float* b, int dims) {
float sum = 0.0f;
int i;
for (i = 0; i < dims - 3; i += 4) {
float diff0 = a[i] - b[i];
float diff1 = a[i+1] - b[i+1];
float diff2 = a[i+2] - b[i+2];
float diff3 = a[i+3] - b[i+3];
sum = fmaf(diff0, diff0, sum);
sum = fmaf(diff1, diff1, sum);
sum = fmaf(diff2, diff2, sum);
sum = fmaf(diff3, diff3, sum);
}
for (; i < dims; i++) {
float diff = a[i] - b[i];
sum = fmaf(diff, diff, sum);
}
return sum;
}
__device__ float euclidean_distance_high(const float* a, const float* b, int dims) {
float sum = 0.0f;
for (int i = 0; i < dims; i += 4) {
float diff0 = a[i] - b[i];
float diff1 = a[i+1] - b[i+1];
float diff2 = a[i+2] - b[i+2];
float diff3 = a[i+3] - b[i+3];
sum = fmaf(diff0, diff0, sum);
sum = fmaf(diff1, diff1, sum);
sum = fmaf(diff2, diff2, sum);
sum = fmaf(diff3, diff3, sum);
}
return sum;
}
extern "C" __global__ void deterministic_clustering(
const float* database_vectors,
float* cluster_centers,
int* cluster_assignments,
int* cluster_sizes,
int database_size,
int vector_dims,
int num_clusters,
int num_queries
) {
int cluster_idx = blockIdx.x;
int tid = threadIdx.x;
if (cluster_idx >= num_clusters) return;
extern __shared__ float shared_mem[];
float* center = shared_mem;
for (int d = tid; d < vector_dims; d += blockDim.x) {
center[d] = 0.0f;
}
__syncthreads();
int seed_idx = ((cluster_idx * 982451653LL + 1566083941LL) % (long long)database_size);
const float* seed_vector = database_vectors + seed_idx * vector_dims;
for (int d = tid; d < vector_dims; d += blockDim.x) {
center[d] = seed_vector[d];
cluster_centers[cluster_idx * vector_dims + d] = seed_vector[d];
}
if (tid == 0) {
cluster_sizes[cluster_idx] = 0;
}
__syncthreads();
for (int vec_idx = tid; vec_idx < database_size; vec_idx += blockDim.x) {
const float* vector = database_vectors + vec_idx * vector_dims;
float min_dist = MAX_FLOAT;
int best_cluster = 0;
for (int c = 0; c < num_clusters; c++) {
const float* c_center = cluster_centers + c * vector_dims;
float dist = (num_queries <= 4000) ?
euclidean_distance(vector, c_center, vector_dims) :
euclidean_distance_high(vector, c_center, vector_dims);
if (dist < min_dist) {
min_dist = dist;
best_cluster = c;
}
}
cluster_assignments[vec_idx] = best_cluster;
if (best_cluster == cluster_idx) {
atomicAdd(&cluster_sizes[cluster_idx], 1);
}
}
}
extern "C" __global__ void cluster_search(
const float* query_vectors,
const float* database_vectors,
const float* cluster_centers,
const int* cluster_assignments,
const int* cluster_sizes,
int* results,
int num_queries,
int database_size,
int vector_dims,
int num_clusters
) {
if (num_queries <= 4000) {
int query_idx = blockIdx.x;
if (query_idx >= num_queries) return;
const float* query = query_vectors + query_idx * vector_dims;
float cluster_dists[8];
for (int i = 0; i < num_clusters; i++) {
cluster_dists[i] = MAX_FLOAT;
}
float best_dist[3] = {MAX_FLOAT, MAX_FLOAT, MAX_FLOAT};
int best_clusters[3] = {-1, -1, -1};
for (int cluster = 0; cluster < num_clusters; cluster++) {
const float* center = cluster_centers + cluster * vector_dims;
float dist = euclidean_distance(query, center, vector_dims);
cluster_dists[cluster] = dist;
if (dist < best_dist[0]) {
best_dist[2] = best_dist[1];
best_clusters[2] = best_clusters[1];
best_dist[1] = best_dist[0];
best_clusters[1] = best_clusters[0];
best_dist[0] = dist;
best_clusters[0] = cluster;
} else if (dist < best_dist[1]) {
best_dist[2] = best_dist[1];
best_clusters[2] = best_clusters[1];
best_dist[1] = dist;
best_clusters[1] = cluster;
} else if (dist < best_dist[2]) {
best_dist[2] = dist;
best_clusters[2] = cluster;
}
}
float min_dist = MAX_FLOAT;
int best_idx = -1;
int target_cluster = best_clusters[0];
if (target_cluster != -1 && cluster_sizes[target_cluster] > 0) {
for (int vec_idx = 0; vec_idx < database_size; vec_idx++) {
if (cluster_assignments[vec_idx] == target_cluster) {
const float* db_vector = database_vectors + vec_idx * vector_dims;
float dist = euclidean_distance(query, db_vector, vector_dims);
if (dist < min_dist) {
min_dist = dist;
best_idx = vec_idx;
}
}
}
}
if (best_clusters[1] != -1 && cluster_sizes[best_clusters[1]] > 0) {
target_cluster = best_clusters[1];
for (int vec_idx = 0; vec_idx < database_size; vec_idx++) {
if (cluster_assignments[vec_idx] == target_cluster) {
const float* db_vector = database_vectors + vec_idx * vector_dims;
float dist = euclidean_distance(query, db_vector, vector_dims);
if (dist < min_dist) {
min_dist = dist;
best_idx = vec_idx;
}
}
}
}
if (best_clusters[2] != -1 && cluster_sizes[best_clusters[2]] > 0) {
target_cluster = best_clusters[2];
for (int vec_idx = 0; vec_idx < database_size; vec_idx++) {
if (cluster_assignments[vec_idx] == target_cluster) {
const float* db_vector = database_vectors + vec_idx * vector_dims;
float dist = euclidean_distance(query, db_vector, vector_dims);
if (dist < min_dist) {
min_dist = dist;
best_idx = vec_idx;
}
}
}
}
for (int cluster = 0; cluster < num_clusters; cluster++) {
if (cluster == best_clusters[0] || cluster == best_clusters[1] || cluster == best_clusters[2]) continue;
if (cluster_sizes[cluster] == 0) continue;
for (int vec_idx = 0; vec_idx < database_size; vec_idx++) {
if (cluster_assignments[vec_idx] == cluster) {
const float* db_vector = database_vectors + vec_idx * vector_dims;
float dist = euclidean_distance(query, db_vector, vector_dims);
if (dist < min_dist) {
min_dist = dist;
best_idx = vec_idx;
}
}
}
}
results[query_idx] = best_idx;
} else {
int query_idx = blockIdx.x * blockDim.x + threadIdx.x;
if (query_idx >= num_queries) return;
const float* query = query_vectors + query_idx * vector_dims;
extern __shared__ float shared_mem[];
float* cluster_dists = shared_mem;
int* cluster_indices = (int*)&shared_mem[num_clusters];
if (threadIdx.x < num_clusters) {
cluster_dists[threadIdx.x] = MAX_FLOAT;
cluster_indices[threadIdx.x] = -1;
}
float best_dist[2] = {MAX_FLOAT, MAX_FLOAT};
int best_clusters[2] = {-1, -1};
for (int cluster = 0; cluster < num_clusters; cluster++) {
const float* center = cluster_centers + cluster * vector_dims;
float dist = euclidean_distance_high(query, center, vector_dims);
if (dist < best_dist[0]) {
best_dist[1] = best_dist[0];
best_clusters[1] = best_clusters[0];
best_dist[0] = dist;
best_clusters[0] = cluster;
} else if (dist < best_dist[1]) {
best_dist[1] = dist;
best_clusters[1] = cluster;
}
if (cluster < num_clusters && threadIdx.x == 0) {
cluster_dists[cluster] = dist;
}
}
float min_dist = MAX_FLOAT;
int best_idx = -1;
int target_cluster = best_clusters[0];
if (target_cluster != -1 && cluster_sizes[target_cluster] > 0) {
for (int vec_idx = 0; vec_idx < database_size; vec_idx++) {
if (cluster_assignments[vec_idx] == target_cluster) {
const float* db_vector = database_vectors + vec_idx * vector_dims;
float dist = euclidean_distance_high(query, db_vector, vector_dims);
if (dist < min_dist) {
min_dist = dist;
best_idx = vec_idx;
}
}
}
}
if (min_dist == MAX_FLOAT && best_clusters[1] != -1 && cluster_sizes[best_clusters[1]] > 0) {
target_cluster = best_clusters[1];
for (int vec_idx = 0; vec_idx < database_size; vec_idx++) {
if (cluster_assignments[vec_idx] == target_cluster) {
const float* db_vector = database_vectors + vec_idx * vector_dims;
float dist = euclidean_distance_high(query, db_vector, vector_dims);
if (dist < min_dist) {
min_dist = dist;
best_idx = vec_idx;
}
}
}
}
if (min_dist == MAX_FLOAT) {
float search_radius = cluster_dists[0] * 2.0f;
for (int cluster = 0; cluster < num_clusters; cluster++) {
if (cluster == best_clusters[0] || cluster == best_clusters[1]) continue;
if (cluster_dists[cluster] >= search_radius) continue;
if (cluster_sizes[cluster] == 0) continue;
for (int vec_idx = 0; vec_idx < database_size; vec_idx++) {
if (cluster_assignments[vec_idx] == cluster) {
const float* db_vector = database_vectors + vec_idx * vector_dims;
float dist = euclidean_distance_high(query, db_vector, vector_dims);
if (dist < min_dist) {
min_dist = dist;
best_idx = vec_idx;
}
}
}
}
}
results[query_idx] = best_idx;
}
}

View File

@ -0,0 +1,102 @@
use cudarc::{
driver::{safe::LaunchConfig, CudaModule, CudaStream, PushKernelArg},
runtime::sys::cudaDeviceProp,
};
use std::sync::Arc;
use serde_json::{Map, Value};
use tig_challenges::vector_search::*;
pub fn solve_challenge(
challenge: &Challenge,
save_solution: &dyn Fn(&Solution) -> anyhow::Result<()>,
hyperparameters: &Option<Map<String, Value>>,
module: Arc<CudaModule>,
stream: Arc<CudaStream>,
_prop: &cudaDeviceProp,
) -> anyhow::Result<()> {
let vector_dims = challenge.vector_dims as i32;
let database_size = challenge.database_size as i32;
let num_queries = challenge.difficulty.num_queries as i32;
let block_size = 128;
let num_clusters = if num_queries <= 6000 {
2
} else if num_queries < 9000 {
4
} else if num_queries < 10000 {
6
} else if num_queries < 11000 {
10
} else if num_queries < 12000 {
12
} else if num_queries < 14000 {
14
} else {
14
};
let deterministic_clustering = module.load_function("deterministic_clustering")?;
let cluster_search = module.load_function("cluster_search")?;
let mut d_cluster_centers = stream.alloc_zeros::<f32>((num_clusters * vector_dims) as usize)?;
let mut d_cluster_assignments = stream.alloc_zeros::<i32>(database_size as usize)?;
let mut d_cluster_sizes = stream.alloc_zeros::<i32>(num_clusters as usize)?;
let cluster_config = LaunchConfig {
grid_dim: (num_clusters as u32, 1, 1),
block_dim: (block_size, 1, 1),
shared_mem_bytes: (vector_dims * 4) as u32,
};
unsafe {
stream.launch_builder(&deterministic_clustering)
.arg(&challenge.d_database_vectors)
.arg(&mut d_cluster_centers)
.arg(&mut d_cluster_assignments)
.arg(&mut d_cluster_sizes)
.arg(&database_size)
.arg(&vector_dims)
.arg(&num_clusters)
.arg(&num_queries)
.launch(cluster_config)?;
}
stream.synchronize()?;
let mut d_results = stream.alloc_zeros::<i32>(num_queries as usize)?;
let search_config = if num_queries <= 4000 {
LaunchConfig {
grid_dim: (num_queries as u32, 1, 1),
block_dim: (1, 1, 1),
shared_mem_bytes: 0,
}
} else {
LaunchConfig {
grid_dim: (num_queries as u32, 1, 1),
block_dim: (block_size, 1, 1),
shared_mem_bytes: (num_clusters * 8) as u32,
}
};
unsafe {
stream.launch_builder(&cluster_search)
.arg(&challenge.d_query_vectors)
.arg(&challenge.d_database_vectors)
.arg(&d_cluster_centers)
.arg(&d_cluster_assignments)
.arg(&d_cluster_sizes)
.arg(&mut d_results)
.arg(&num_queries)
.arg(&database_size)
.arg(&vector_dims)
.arg(&num_clusters)
.launch(search_config)?;
}
stream.synchronize()?;
let indices = stream.memcpy_dtov(&d_results)?;
let indexes = indices.iter().map(|&idx| idx as usize).collect();
let _ = save_solution(&Solution { indexes });
return Ok(());
}

View File

@ -132,7 +132,8 @@
// c004_a067
// c004_a068
pub mod is_adp_optimal;
pub use is_adp_optimal as c004_a068;
// c004_a069