mirror of
https://github.com/tig-pool-nk/tig-monorepo.git
synced 2026-02-21 11:29:31 +08:00
Submitted vector_search/is_adp_optimal
This commit is contained in:
parent
a2fbd8c435
commit
e728a43aac
23
tig-algorithms/src/vector_search/is_adp_optimal/README.md
Normal file
23
tig-algorithms/src/vector_search/is_adp_optimal/README.md
Normal file
@ -0,0 +1,23 @@
|
||||
# TIG Code Submission
|
||||
|
||||
## Submission Details
|
||||
|
||||
* **Challenge Name:** vector_search
|
||||
* **Submission Name:** is_adp_optimal
|
||||
* **Copyright:** 2025 OptimusMaximus
|
||||
* **Identity of Submitter:** OptimusMaximus
|
||||
* **Identity of Creator of Algorithmic Method:** null
|
||||
* **Unique Algorithm Identifier (UAI):** null
|
||||
|
||||
## License
|
||||
|
||||
The files in this folder are under the following licenses:
|
||||
* TIG Benchmarker Outbound License
|
||||
* TIG Commercial License
|
||||
* TIG Inbound Game License
|
||||
* TIG Innovator Outbound Game License
|
||||
* TIG Open Data License
|
||||
* TIG THV Game License
|
||||
|
||||
Copies of the licenses can be obtained at:
|
||||
https://github.com/tig-foundation/tig-monorepo/tree/main/docs/licenses
|
||||
323
tig-algorithms/src/vector_search/is_adp_optimal/kernels.cu
Normal file
323
tig-algorithms/src/vector_search/is_adp_optimal/kernels.cu
Normal file
@ -0,0 +1,323 @@
|
||||
/*!
|
||||
Copyright 2025 OptimusMaximus
|
||||
|
||||
Identity of Submitter OptimusMaximus
|
||||
|
||||
UAI null
|
||||
|
||||
Licensed under the TIG Inbound Game License v2.0 or (at your option) any later
|
||||
version (the "License"); you may not use this file except in compliance with the
|
||||
License. You may obtain a copy of the License at
|
||||
|
||||
https://github.com/tig-foundation/tig-monorepo/tree/main/docs/licenses
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software distributed
|
||||
under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
CONDITIONS OF ANY KIND, either express or implied. See the License for the specific
|
||||
language governing permissions and limitations under the License.
|
||||
*/
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <float.h>
|
||||
|
||||
#define MAX_FLOAT 3.402823466e+38F
|
||||
|
||||
__device__ float euclidean_distance(const float* a, const float* b, int dims) {
|
||||
float sum = 0.0f;
|
||||
int i;
|
||||
for (i = 0; i < dims - 3; i += 4) {
|
||||
float diff0 = a[i] - b[i];
|
||||
float diff1 = a[i+1] - b[i+1];
|
||||
float diff2 = a[i+2] - b[i+2];
|
||||
float diff3 = a[i+3] - b[i+3];
|
||||
sum = fmaf(diff0, diff0, sum);
|
||||
sum = fmaf(diff1, diff1, sum);
|
||||
sum = fmaf(diff2, diff2, sum);
|
||||
sum = fmaf(diff3, diff3, sum);
|
||||
}
|
||||
for (; i < dims; i++) {
|
||||
float diff = a[i] - b[i];
|
||||
sum = fmaf(diff, diff, sum);
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
__device__ float euclidean_distance_high(const float* a, const float* b, int dims) {
|
||||
float sum = 0.0f;
|
||||
for (int i = 0; i < dims; i += 4) {
|
||||
float diff0 = a[i] - b[i];
|
||||
float diff1 = a[i+1] - b[i+1];
|
||||
float diff2 = a[i+2] - b[i+2];
|
||||
float diff3 = a[i+3] - b[i+3];
|
||||
sum = fmaf(diff0, diff0, sum);
|
||||
sum = fmaf(diff1, diff1, sum);
|
||||
sum = fmaf(diff2, diff2, sum);
|
||||
sum = fmaf(diff3, diff3, sum);
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
extern "C" __global__ void deterministic_clustering(
|
||||
const float* database_vectors,
|
||||
float* cluster_centers,
|
||||
int* cluster_assignments,
|
||||
int* cluster_sizes,
|
||||
int database_size,
|
||||
int vector_dims,
|
||||
int num_clusters,
|
||||
int num_queries
|
||||
) {
|
||||
int cluster_idx = blockIdx.x;
|
||||
int tid = threadIdx.x;
|
||||
|
||||
if (cluster_idx >= num_clusters) return;
|
||||
|
||||
extern __shared__ float shared_mem[];
|
||||
float* center = shared_mem;
|
||||
|
||||
for (int d = tid; d < vector_dims; d += blockDim.x) {
|
||||
center[d] = 0.0f;
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
int seed_idx = ((cluster_idx * 982451653LL + 1566083941LL) % (long long)database_size);
|
||||
const float* seed_vector = database_vectors + seed_idx * vector_dims;
|
||||
|
||||
for (int d = tid; d < vector_dims; d += blockDim.x) {
|
||||
center[d] = seed_vector[d];
|
||||
cluster_centers[cluster_idx * vector_dims + d] = seed_vector[d];
|
||||
}
|
||||
|
||||
if (tid == 0) {
|
||||
cluster_sizes[cluster_idx] = 0;
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
for (int vec_idx = tid; vec_idx < database_size; vec_idx += blockDim.x) {
|
||||
const float* vector = database_vectors + vec_idx * vector_dims;
|
||||
|
||||
float min_dist = MAX_FLOAT;
|
||||
int best_cluster = 0;
|
||||
|
||||
for (int c = 0; c < num_clusters; c++) {
|
||||
const float* c_center = cluster_centers + c * vector_dims;
|
||||
float dist = (num_queries <= 4000) ?
|
||||
euclidean_distance(vector, c_center, vector_dims) :
|
||||
euclidean_distance_high(vector, c_center, vector_dims);
|
||||
if (dist < min_dist) {
|
||||
min_dist = dist;
|
||||
best_cluster = c;
|
||||
}
|
||||
}
|
||||
|
||||
cluster_assignments[vec_idx] = best_cluster;
|
||||
if (best_cluster == cluster_idx) {
|
||||
atomicAdd(&cluster_sizes[cluster_idx], 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ void cluster_search(
|
||||
const float* query_vectors,
|
||||
const float* database_vectors,
|
||||
const float* cluster_centers,
|
||||
const int* cluster_assignments,
|
||||
const int* cluster_sizes,
|
||||
int* results,
|
||||
int num_queries,
|
||||
int database_size,
|
||||
int vector_dims,
|
||||
int num_clusters
|
||||
) {
|
||||
if (num_queries <= 4000) {
|
||||
int query_idx = blockIdx.x;
|
||||
if (query_idx >= num_queries) return;
|
||||
|
||||
const float* query = query_vectors + query_idx * vector_dims;
|
||||
|
||||
float cluster_dists[8];
|
||||
for (int i = 0; i < num_clusters; i++) {
|
||||
cluster_dists[i] = MAX_FLOAT;
|
||||
}
|
||||
|
||||
float best_dist[3] = {MAX_FLOAT, MAX_FLOAT, MAX_FLOAT};
|
||||
int best_clusters[3] = {-1, -1, -1};
|
||||
|
||||
for (int cluster = 0; cluster < num_clusters; cluster++) {
|
||||
const float* center = cluster_centers + cluster * vector_dims;
|
||||
float dist = euclidean_distance(query, center, vector_dims);
|
||||
|
||||
cluster_dists[cluster] = dist;
|
||||
|
||||
if (dist < best_dist[0]) {
|
||||
best_dist[2] = best_dist[1];
|
||||
best_clusters[2] = best_clusters[1];
|
||||
best_dist[1] = best_dist[0];
|
||||
best_clusters[1] = best_clusters[0];
|
||||
best_dist[0] = dist;
|
||||
best_clusters[0] = cluster;
|
||||
} else if (dist < best_dist[1]) {
|
||||
best_dist[2] = best_dist[1];
|
||||
best_clusters[2] = best_clusters[1];
|
||||
best_dist[1] = dist;
|
||||
best_clusters[1] = cluster;
|
||||
} else if (dist < best_dist[2]) {
|
||||
best_dist[2] = dist;
|
||||
best_clusters[2] = cluster;
|
||||
}
|
||||
}
|
||||
|
||||
float min_dist = MAX_FLOAT;
|
||||
int best_idx = -1;
|
||||
|
||||
int target_cluster = best_clusters[0];
|
||||
if (target_cluster != -1 && cluster_sizes[target_cluster] > 0) {
|
||||
for (int vec_idx = 0; vec_idx < database_size; vec_idx++) {
|
||||
if (cluster_assignments[vec_idx] == target_cluster) {
|
||||
const float* db_vector = database_vectors + vec_idx * vector_dims;
|
||||
float dist = euclidean_distance(query, db_vector, vector_dims);
|
||||
if (dist < min_dist) {
|
||||
min_dist = dist;
|
||||
best_idx = vec_idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (best_clusters[1] != -1 && cluster_sizes[best_clusters[1]] > 0) {
|
||||
target_cluster = best_clusters[1];
|
||||
for (int vec_idx = 0; vec_idx < database_size; vec_idx++) {
|
||||
if (cluster_assignments[vec_idx] == target_cluster) {
|
||||
const float* db_vector = database_vectors + vec_idx * vector_dims;
|
||||
float dist = euclidean_distance(query, db_vector, vector_dims);
|
||||
if (dist < min_dist) {
|
||||
min_dist = dist;
|
||||
best_idx = vec_idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (best_clusters[2] != -1 && cluster_sizes[best_clusters[2]] > 0) {
|
||||
target_cluster = best_clusters[2];
|
||||
for (int vec_idx = 0; vec_idx < database_size; vec_idx++) {
|
||||
if (cluster_assignments[vec_idx] == target_cluster) {
|
||||
const float* db_vector = database_vectors + vec_idx * vector_dims;
|
||||
float dist = euclidean_distance(query, db_vector, vector_dims);
|
||||
if (dist < min_dist) {
|
||||
min_dist = dist;
|
||||
best_idx = vec_idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int cluster = 0; cluster < num_clusters; cluster++) {
|
||||
if (cluster == best_clusters[0] || cluster == best_clusters[1] || cluster == best_clusters[2]) continue;
|
||||
if (cluster_sizes[cluster] == 0) continue;
|
||||
|
||||
for (int vec_idx = 0; vec_idx < database_size; vec_idx++) {
|
||||
if (cluster_assignments[vec_idx] == cluster) {
|
||||
const float* db_vector = database_vectors + vec_idx * vector_dims;
|
||||
float dist = euclidean_distance(query, db_vector, vector_dims);
|
||||
if (dist < min_dist) {
|
||||
min_dist = dist;
|
||||
best_idx = vec_idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
results[query_idx] = best_idx;
|
||||
} else {
|
||||
int query_idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (query_idx >= num_queries) return;
|
||||
|
||||
const float* query = query_vectors + query_idx * vector_dims;
|
||||
|
||||
extern __shared__ float shared_mem[];
|
||||
float* cluster_dists = shared_mem;
|
||||
int* cluster_indices = (int*)&shared_mem[num_clusters];
|
||||
|
||||
if (threadIdx.x < num_clusters) {
|
||||
cluster_dists[threadIdx.x] = MAX_FLOAT;
|
||||
cluster_indices[threadIdx.x] = -1;
|
||||
}
|
||||
|
||||
float best_dist[2] = {MAX_FLOAT, MAX_FLOAT};
|
||||
int best_clusters[2] = {-1, -1};
|
||||
|
||||
for (int cluster = 0; cluster < num_clusters; cluster++) {
|
||||
const float* center = cluster_centers + cluster * vector_dims;
|
||||
float dist = euclidean_distance_high(query, center, vector_dims);
|
||||
|
||||
if (dist < best_dist[0]) {
|
||||
best_dist[1] = best_dist[0];
|
||||
best_clusters[1] = best_clusters[0];
|
||||
best_dist[0] = dist;
|
||||
best_clusters[0] = cluster;
|
||||
} else if (dist < best_dist[1]) {
|
||||
best_dist[1] = dist;
|
||||
best_clusters[1] = cluster;
|
||||
}
|
||||
|
||||
if (cluster < num_clusters && threadIdx.x == 0) {
|
||||
cluster_dists[cluster] = dist;
|
||||
}
|
||||
}
|
||||
|
||||
float min_dist = MAX_FLOAT;
|
||||
int best_idx = -1;
|
||||
|
||||
int target_cluster = best_clusters[0];
|
||||
if (target_cluster != -1 && cluster_sizes[target_cluster] > 0) {
|
||||
for (int vec_idx = 0; vec_idx < database_size; vec_idx++) {
|
||||
if (cluster_assignments[vec_idx] == target_cluster) {
|
||||
const float* db_vector = database_vectors + vec_idx * vector_dims;
|
||||
float dist = euclidean_distance_high(query, db_vector, vector_dims);
|
||||
if (dist < min_dist) {
|
||||
min_dist = dist;
|
||||
best_idx = vec_idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (min_dist == MAX_FLOAT && best_clusters[1] != -1 && cluster_sizes[best_clusters[1]] > 0) {
|
||||
target_cluster = best_clusters[1];
|
||||
for (int vec_idx = 0; vec_idx < database_size; vec_idx++) {
|
||||
if (cluster_assignments[vec_idx] == target_cluster) {
|
||||
const float* db_vector = database_vectors + vec_idx * vector_dims;
|
||||
float dist = euclidean_distance_high(query, db_vector, vector_dims);
|
||||
if (dist < min_dist) {
|
||||
min_dist = dist;
|
||||
best_idx = vec_idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (min_dist == MAX_FLOAT) {
|
||||
float search_radius = cluster_dists[0] * 2.0f;
|
||||
|
||||
for (int cluster = 0; cluster < num_clusters; cluster++) {
|
||||
if (cluster == best_clusters[0] || cluster == best_clusters[1]) continue;
|
||||
if (cluster_dists[cluster] >= search_radius) continue;
|
||||
if (cluster_sizes[cluster] == 0) continue;
|
||||
|
||||
for (int vec_idx = 0; vec_idx < database_size; vec_idx++) {
|
||||
if (cluster_assignments[vec_idx] == cluster) {
|
||||
const float* db_vector = database_vectors + vec_idx * vector_dims;
|
||||
float dist = euclidean_distance_high(query, db_vector, vector_dims);
|
||||
if (dist < min_dist) {
|
||||
min_dist = dist;
|
||||
best_idx = vec_idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
results[query_idx] = best_idx;
|
||||
}
|
||||
}
|
||||
102
tig-algorithms/src/vector_search/is_adp_optimal/mod.rs
Normal file
102
tig-algorithms/src/vector_search/is_adp_optimal/mod.rs
Normal file
@ -0,0 +1,102 @@
|
||||
use cudarc::{
|
||||
driver::{safe::LaunchConfig, CudaModule, CudaStream, PushKernelArg},
|
||||
runtime::sys::cudaDeviceProp,
|
||||
};
|
||||
use std::sync::Arc;
|
||||
use serde_json::{Map, Value};
|
||||
use tig_challenges::vector_search::*;
|
||||
|
||||
pub fn solve_challenge(
|
||||
challenge: &Challenge,
|
||||
save_solution: &dyn Fn(&Solution) -> anyhow::Result<()>,
|
||||
hyperparameters: &Option<Map<String, Value>>,
|
||||
module: Arc<CudaModule>,
|
||||
stream: Arc<CudaStream>,
|
||||
_prop: &cudaDeviceProp,
|
||||
) -> anyhow::Result<()> {
|
||||
let vector_dims = challenge.vector_dims as i32;
|
||||
let database_size = challenge.database_size as i32;
|
||||
let num_queries = challenge.difficulty.num_queries as i32;
|
||||
|
||||
let block_size = 128;
|
||||
let num_clusters = if num_queries <= 6000 {
|
||||
2
|
||||
} else if num_queries < 9000 {
|
||||
4
|
||||
} else if num_queries < 10000 {
|
||||
6
|
||||
} else if num_queries < 11000 {
|
||||
10
|
||||
} else if num_queries < 12000 {
|
||||
12
|
||||
} else if num_queries < 14000 {
|
||||
14
|
||||
} else {
|
||||
14
|
||||
};
|
||||
|
||||
let deterministic_clustering = module.load_function("deterministic_clustering")?;
|
||||
let cluster_search = module.load_function("cluster_search")?;
|
||||
|
||||
let mut d_cluster_centers = stream.alloc_zeros::<f32>((num_clusters * vector_dims) as usize)?;
|
||||
let mut d_cluster_assignments = stream.alloc_zeros::<i32>(database_size as usize)?;
|
||||
let mut d_cluster_sizes = stream.alloc_zeros::<i32>(num_clusters as usize)?;
|
||||
|
||||
let cluster_config = LaunchConfig {
|
||||
grid_dim: (num_clusters as u32, 1, 1),
|
||||
block_dim: (block_size, 1, 1),
|
||||
shared_mem_bytes: (vector_dims * 4) as u32,
|
||||
};
|
||||
|
||||
unsafe {
|
||||
stream.launch_builder(&deterministic_clustering)
|
||||
.arg(&challenge.d_database_vectors)
|
||||
.arg(&mut d_cluster_centers)
|
||||
.arg(&mut d_cluster_assignments)
|
||||
.arg(&mut d_cluster_sizes)
|
||||
.arg(&database_size)
|
||||
.arg(&vector_dims)
|
||||
.arg(&num_clusters)
|
||||
.arg(&num_queries)
|
||||
.launch(cluster_config)?;
|
||||
}
|
||||
stream.synchronize()?;
|
||||
|
||||
let mut d_results = stream.alloc_zeros::<i32>(num_queries as usize)?;
|
||||
|
||||
let search_config = if num_queries <= 4000 {
|
||||
LaunchConfig {
|
||||
grid_dim: (num_queries as u32, 1, 1),
|
||||
block_dim: (1, 1, 1),
|
||||
shared_mem_bytes: 0,
|
||||
}
|
||||
} else {
|
||||
LaunchConfig {
|
||||
grid_dim: (num_queries as u32, 1, 1),
|
||||
block_dim: (block_size, 1, 1),
|
||||
shared_mem_bytes: (num_clusters * 8) as u32,
|
||||
}
|
||||
};
|
||||
|
||||
unsafe {
|
||||
stream.launch_builder(&cluster_search)
|
||||
.arg(&challenge.d_query_vectors)
|
||||
.arg(&challenge.d_database_vectors)
|
||||
.arg(&d_cluster_centers)
|
||||
.arg(&d_cluster_assignments)
|
||||
.arg(&d_cluster_sizes)
|
||||
.arg(&mut d_results)
|
||||
.arg(&num_queries)
|
||||
.arg(&database_size)
|
||||
.arg(&vector_dims)
|
||||
.arg(&num_clusters)
|
||||
.launch(search_config)?;
|
||||
}
|
||||
stream.synchronize()?;
|
||||
|
||||
let indices = stream.memcpy_dtov(&d_results)?;
|
||||
let indexes = indices.iter().map(|&idx| idx as usize).collect();
|
||||
|
||||
let _ = save_solution(&Solution { indexes });
|
||||
return Ok(());
|
||||
}
|
||||
@ -132,7 +132,8 @@
|
||||
|
||||
// c004_a067
|
||||
|
||||
// c004_a068
|
||||
pub mod is_adp_optimal;
|
||||
pub use is_adp_optimal as c004_a068;
|
||||
|
||||
// c004_a069
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user