una-dinosauria/Rayuela.jl

Search code in Julia

una-dinosauria opened this issue · 2 comments

Without multithreading and due to JuliaLang/julia#939, implementing the lookup-table-based search of MCQ in Julia is simply not competitive. We should revisit this once Julia gets decent multithreading and faster partial sort.

This would also remove the C++ code that we have and, save the CUDA code, make the project fully-Julian.

I have written a Julia version here

function linscan_pq_julia(
B::Matrix{Int16}, # m-by-n. The database, encoded
X::Matrix{Cfloat}, # d-by-nq. The queries.
C::Vector{Matrix{Cfloat}}, # The cluster centers
knn:: Int = 10000) # Number of knn results to return
m, n = size( B )
d, nq = size( X )
subdims = splitarray( 1:d, m )
@show knn, nq
dists = zeros( Cfloat, knn, nq )
idx = zeros( Cuint, knn, nq )
Bt = B'
# Compute distance tables between queries and codebooks
tables = Vector{Matrix{Cfloat}}(m)
for i = 1:m
# tables[i] = Distances.pairwise(Distances.SqEuclidean(), X[subdims[i],:], C[i])
tables[i] = Distances.pairwise(Distances.SqEuclidean(), C[i], X[subdims[i],:])
end
# Compute approximate distances and sort
@inbounds for i = 1:nq # Loop over each query
# println(i)
xq_dists = zeros(Cfloat, n)
p = zeros(Cuint, n)
for j = 1:m # Loop over each codebook
t = tables[j][:,i]
for k = 1:n # Loop over each code
xq_dists[k] += t[ Bt[k,j] ]
end
end
# p = sortperm(xq_dists; alg=PartialQuickSort(knn))
sortperm!(p, xq_dists; alg=PartialQuickSort(knn))
# @simd for j=1:n; p[j]=j; end
# sortperm!(p, xq_dists; alg=PartialQuickSort(knn), initialized=true)
# sort(xq_dists; alg=PartialQuickSort(knn))
dists[:,i] = xq_dists[ p[1:knn] ]
idx[:,i] = p[1:knn]
end # @inbounds
return dists, idx
end

Too slow in the meantime.