Unable to reuse the analysis if we only store one triangle for symmetric factorizations
amontoison opened this issue · 0 comments
amontoison commented
using CUDA, CUDA.CUSPARSE
using CUDSS
using LinearAlgebra
using SparseArrays
using Test
println(CUDSS.CUDSS_INSTALLATION)
function cudss_generic()
n = 100
p = 5
@testset "precision = $T" for T in (Float32, Float64, ComplexF32, ComplexF64)
R = real(T)
@testset "Unsymmetric -- Non-Hermitian" begin
A_cpu = sprand(T, n, n, 0.02) + I
b_cpu = rand(T, n)
A_gpu = CuSparseMatrixCSR(A_cpu)
b_gpu = CuVector(b_cpu)
@testset "ldiv!" begin
x_cpu = zeros(T, n)
x_gpu = CuVector(x_cpu)
solver = lu(A_gpu)
ldiv!(x_gpu, solver, b_gpu)
r_gpu = b_gpu - A_gpu * x_gpu
@test norm(r_gpu) ≤ √eps(R)
A_gpu2 = rand(T) * A_gpu
lu!(solver, A_gpu2)
x_gpu .= b_gpu
ldiv!(solver, x_gpu)
r_gpu2 = b_gpu - A_gpu2 * x_gpu
@test norm(r_gpu2) ≤ √eps(R)
end
@testset "\\" begin
solver = lu(A_gpu)
x_gpu = solver \ b_gpu
r_gpu = b_gpu - A_gpu * x_gpu
@test norm(r_gpu) ≤ √eps(R)
A_gpu2 = rand(T) * A_gpu
lu!(solver, A_gpu2)
x_gpu = solver \ b_gpu
r_gpu2 = b_gpu - A_gpu2 * x_gpu
@test norm(r_gpu2) ≤ √eps(R)
end
end
@testset "Symmetric -- Hermitian" begin
@testset "view = $view" for view in ('F', 'L', 'U')
A_cpu = sprand(T, n, n, 0.01) + I
A_cpu = A_cpu + A_cpu'
B_cpu = rand(T, n, p)
(view == 'L') && (A_gpu = CuSparseMatrixCSR(A_cpu |> tril))
(view == 'U') && (A_gpu = CuSparseMatrixCSR(A_cpu |> triu))
(view == 'F') && (A_gpu = CuSparseMatrixCSR(A_cpu))
B_gpu = CuMatrix(B_cpu)
@testset "ldiv!" begin
X_cpu = zeros(T, n, p)
X_gpu = CuMatrix(X_cpu)
solver = ldlt(A_gpu; view)
ldiv!(X_gpu, solver, B_gpu)
R_gpu = B_gpu - A_gpu * X_gpu
@test norm(R_gpu) ≤ √eps(R)
c = rand(R)
A_cpu2 = c * A_cpu
A_gpu2 = c * A_gpu
ldlt!(solver, A_gpu2)
X_gpu .= B_gpu
ldiv!(solver, X_gpu)
R_gpu2 = B_gpu - CuSparseMatrixCSR(A_cpu2) * X_gpu
@test norm(R_gpu2) ≤ √eps(R)
end
@testset "\\" begin
solver = ldlt(A_gpu; view)
X_gpu = solver \ B_gpu
R_gpu = B_gpu - A_gpu * X_gpu
@test norm(R_gpu) ≤ √eps(R)
c = rand(R)
A_cpu2 = c * A_cpu
A_gpu2 = c * A_gpu
ldlt!(solver, A_gpu2)
X_gpu = solver \ B_gpu
R_gpu2 = B_gpu - CuSparseMatrixCSR(A_cpu2) * X_gpu
@test norm(R_gpu2) ≤ √eps(R)
end
end
end
@testset "SPD -- HPD" begin
@testset "view = $view" for view in ('F', 'L', 'U')
A_cpu = sprand(T, n, n, 0.01)
A_cpu = A_cpu * A_cpu' + I
B_cpu = rand(T, n, p)
(view == 'L') && (A_gpu = CuSparseMatrixCSR(A_cpu |> tril))
(view == 'U') && (A_gpu = CuSparseMatrixCSR(A_cpu |> triu))
(view == 'F') && (A_gpu = CuSparseMatrixCSR(A_cpu))
B_gpu = CuMatrix(B_cpu)
@testset "ldiv!" begin
X_cpu = zeros(T, n, p)
X_gpu = CuMatrix(X_cpu)
solver = cholesky(A_gpu; view)
ldiv!(X_gpu, solver, B_gpu)
R_gpu = B_gpu - A_gpu * X_gpu
@test norm(R_gpu) ≤ √eps(R)
c = rand(R)
A_cpu2 = c * A_cpu
A_gpu2 = c * A_gpu
cholesky!(solver, A_gpu2)
X_gpu .= B_gpu
ldiv!(solver, X_gpu)
R_gpu2 = B_gpu - CuSparseMatrixCSR(A_cpu2) * X_gpu
@test norm(R_gpu2) ≤ √eps(R)
end
@testset "\\" begin
solver = cholesky(A_gpu; view)
X_gpu = solver \ B_gpu
R_gpu = B_gpu - A_gpu * X_gpu
@test norm(R_gpu) ≤ √eps(R)
c = rand(R)
A_cpu2 = c * A_cpu
A_gpu2 = c * A_gpu
cholesky!(solver, A_gpu2)
X_gpu = solver \ B_gpu
R_gpu2 = B_gpu - CuSparseMatrixCSR(A_cpu2) * X_gpu
@test norm(R_gpu2) ≤ √eps(R)
end
end
end
end
end
cudss_generic()