GPUCompiler error when running 3D diffusion example on the GPU
muendlein opened this issue · 4 comments
While trying out the provided example, I receive the following error (Julia 1.8):
ERROR: MethodError: no method matching return_types(::GPUArrays.var"#5#6", ::Type{Tuple{CUDA.CuKernelContext, CUDA.CuDeviceArray{Float64, 3, 1}, Float64}}, ::GPUCompiler.GPUInterpreter)
Closest candidates are:
return_types(::Any, ::Any; world, interp) at reflection.jl:1294
return_types(::Any) at reflection.jl:1294
Thanks for reporting @muendlein! Could you be more verbose on packages version you used, exact code example you run, etc? That would make it easier to locate the issue.
The following code is run with the error being triggered by @zeros(nx, ny, nz);
const USE_GPU = true
using ParallelStencil
using ParallelStencil.FiniteDifferences3D
@static if USE_GPU
@init_parallel_stencil(CUDA, Float64, 3);
else
@init_parallel_stencil(Threads, Float64, 3);
end
@parallel function diffusion3D_step!(T2, T, Ci, lam, dt, dx, dy, dz)
@inn(T2) = @inn(T) + dt*(lam*@inn(Ci)*(@d2_xi(T)/dx^2 + @d2_yi(T)/dy^2 + @d2_zi(T)/dz^2));
return
end
function diffusion3D()
# Physics
lam = 1.0; # Thermal conductivity
cp_min = 1.0; # Minimal heat capacity
lx, ly, lz = 10.0, 10.0, 10.0; # Length of domain in dimensions x, y and z.
# Numerics
nx, ny, nz = 256, 256, 256; # Number of gridpoints dimensions x, y and z.
nt = 100; # Number of time steps
dx = lx/(nx-1); # Space step in x-dimension
dy = ly/(ny-1); # Space step in y-dimension
dz = lz/(nz-1); # Space step in z-dimension
# Array initializations
T = @zeros(nx, ny, nz);
T2 = @zeros(nx, ny, nz);
Ci = @zeros(nx, ny, nz);
# Initial conditions (heat capacity and temperature with two Gaussian anomalies each)
Ci .= 1.0./( cp_min .+ Data.Array([5*exp(-(((ix-1)*dx-lx/1.5))^2-(((iy-1)*dy-ly/2))^2-(((iz-1)*dz-lz/1.5))^2) +
5*exp(-(((ix-1)*dx-lx/3.0))^2-(((iy-1)*dy-ly/2))^2-(((iz-1)*dz-lz/1.5))^2) for ix=1:size(T,1), iy=1:size(T,2), iz=1:size(T,3)]) )
T .= Data.Array([100*exp(-(((ix-1)*dx-lx/2)/2)^2-(((iy-1)*dy-ly/2)/2)^2-(((iz-1)*dz-lz/3.0)/2)^2) +
50*exp(-(((ix-1)*dx-lx/2)/2)^2-(((iy-1)*dy-ly/2)/2)^2-(((iz-1)*dz-lz/1.5)/2)^2) for ix=1:size(T,1), iy=1:size(T,2), iz=1:size(T,3)])
T2 .= T; # Assign also T2 to get correct boundary conditions.
# Time loop
dt = min(dx^2,dy^2,dz^2)*cp_min/lam/8.1; # Time step for the 3D Heat diffusion
for it = 1:nt
@parallel diffusion3D_step!(T2, T, Ci, lam, dt, dx, dy, dz);
T, T2 = T2, T;
end
end
diffusion3D()
Full stack trace:
Stacktrace:
[1] check_method(job::GPUCompiler.CompilerJob)
@ GPUCompiler \.julia\packages\GPUCompiler\I9fZc\src\validation.jl:19
[2] macro expansion
@ \.julia\packages\TimerOutputs\4yHI4\src\TimerOutput.jl:253 [inlined]
[3] macro expansion
@ \.julia\packages\GPUCompiler\I9fZc\src\driver.jl:89 [inlined]
[4] emit_julia(job::GPUCompiler.CompilerJob)
@ GPUCompiler \.julia\packages\GPUCompiler\I9fZc\src\utils.jl:64
[5] cufunction_compile(job::GPUCompiler.CompilerJob)
@ CUDA \.julia\packages\CUDA\5jdFl\src\compiler\execution.jl:324
[6] cached_compilation(cache::Dict{UInt64, Any}, job::GPUCompiler.CompilerJob, compiler::typeof(CUDA.cufunction_compile), linker::typeof(CUDA.cufunction_link))
@ GPUCompiler \.julia\packages\GPUCompiler\I9fZc\src\cache.jl:90
[7] cufunction(f::GPUArrays.var"#5#6", tt::Type{Tuple{CUDA.CuKernelContext, CUDA.CuDeviceArray{Float64, 3, 1}, Float64}}; name::Nothing, kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ CUDA \.julia\packages\CUDA\5jdFl\src\compiler\execution.jl:297
[8] cufunction
@ \.julia\packages\CUDA\5jdFl\src\compiler\execution.jl:290 [inlined]
[9] macro expansion
@ \.julia\packages\CUDA\5jdFl\src\compiler\execution.jl:102 [inlined]
[10] #launch_heuristic#282
@ \.julia\packages\CUDA\5jdFl\src\gpuarrays.jl:17 [inlined]
[11] gpu_call(::GPUArrays.var"#5#6", ::CUDA.CuArray{Float64, 3, CUDA.Mem.DeviceBuffer}, ::Float64; target::CUDA.CuArray{Float64, 3, CUDA.Mem.DeviceBuffer}, elements::Nothing, threads::Nothing,
blocks::Nothing, name::Nothing)
@ GPUArrays \.julia\packages\GPUArrays\fqD8z\src\device\execution.jl:61
[12] gpu_call
@ \.julia\packages\GPUArrays\fqD8z\src\device\execution.jl:34 [inlined]
[13] fill!(A::CUDA.CuArray{Float64, 3, CUDA.Mem.DeviceBuffer}, x::Float64)
@ GPUArrays \.julia\packages\GPUArrays\fqD8z\src\host\construction.jl:14
[14] zeros(::Type, ::Int64, ::Vararg{Int64})
@ CUDA \.julia\packages\CUDA\5jdFl\src\array.jl:551
[15] diffusion3D()
@ Main \Desktop\julia\stencil_test_2.jl:29
[16] top-level scope
@ \Desktop\julia\stencil_test_2.jl:49
Installed packages:
⌅ [052768ef] CUDA v3.8.5
[4d7a3746] ImplicitGlobalGrid v0.12.0
[da04e1cc] MPI v0.19.2
[94395366] ParallelStencil v0.6.0
[91a5bcdd] Plots v1.31.7
CUDA Version: "11.6.0"
Please let me know if additional information are required.
Using the following package versions does work without an error:
[052768ef] CUDA v3.12.0
⌃ [4d7a3746] ImplicitGlobalGrid v0.11.0
⌅ [da04e1cc] MPI v0.18.2
[94395366] ParallelStencil v0.6.0
[91a5bcdd] Plots v1.31.7
I was gonna suggest you to try to update the pkgs. Glad it worked!