Cannot compile CUDA kernels
mcleantom opened this issue · 1 comments
I'm trying to compile the CUDA kernels to use PhiFlow on the GPU, however, the compilation fails. For reference, I am using WSL2 with Ubuntu 20.04. This was the log file of the compilation:
/home/mclea/PhiFlow/phi/tf/cuda/src/helpers.h: In instantiation of ‘void copyDataToArray(const T*, cudaArray*, cudaSurfaceObject_t, cudaMemcpy3DParms, int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int) [with T = float; cudaSurfaceObject_t = long long unsigned int]’:
/home/mclea/PhiFlow/phi/tf/cuda/src/resample.cu.cc:313:21: required from ‘void tensorflow::ResampleTextureMemory(unsigned int, int, const unsigned int*, unsigned int, unsigned int, unsigned int, unsigned int, const T*, const T*, T*, const Boundary*) [with T = float]’
/home/mclea/PhiFlow/phi/tf/cuda/src/resample.cu.cc:347:155: required from here
/home/mclea/PhiFlow/phi/tf/cuda/src/helpers.h:323:18: warning: ‘cudaError_t cudaMemcpyToArray(cudaArray_t, size_t, size_t, const void*, size_t, cudaMemcpyKind)’ is deprecated [-Wdeprecated-declarations]
323 | cudaMemcpyToArray(cuArray, 0, 0, data + batch * xSize * ySize * zSize * components, xSize * ySize * zSize * components * sizeof(T), cudaMemcpyDeviceToDevice);
| ~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/usr/local/cuda/bin/../targets/x86_64-linux/include/cuda_runtime_api.h:7368:46: note: declared here
7368 | extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaMemcpyToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, size_t count, enum cudaMemcpyKind kind);
| ^~~~~~~~~~~~~~~~~
/home/mclea/PhiFlow/phi/tf/cuda/src/helpers.h:323:18: warning: ‘cudaError_t cudaMemcpyToArray(cudaArray_t, size_t, size_t, const void*, size_t, cudaMemcpyKind)’ is deprecated [-Wdeprecated-declarations]
323 | cudaMemcpyToArray(cuArray, 0, 0, data + batch * xSize * ySize * zSize * components, xSize * ySize * zSize * components * sizeof(T), cudaMemcpyDeviceToDevice);
| ~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/usr/local/cuda/bin/../targets/x86_64-linux/include/cuda_runtime_api.h:7368:46: note: declared here
7368 | extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaMemcpyToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, size_t count, enum cudaMemcpyKind kind);
| ^~~~~~~~~~~~~~~~~
/home/mclea/PhiFlow/phi/tf/cuda/src/helpers.h:323:18: warning: ‘cudaError_t cudaMemcpyToArray(cudaArray_t, size_t, size_t, const void*, size_t, cudaMemcpyKind)’ is deprecated [-Wdeprecated-declarations]
323 | cudaMemcpyToArray(cuArray, 0, 0, data + batch * xSize * ySize * zSize * components, xSize * ySize * zSize * components * sizeof(T), cudaMemcpyDeviceToDevice);
| ~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/usr/local/cuda/bin/../targets/x86_64-linux/include/cuda_runtime_api.h:7368:46: note: declared here
7368 | extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaMemcpyToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, size_t count, enum cudaMemcpyKind kind);
| ^~~~~~~~~~~~~~~~~
/home/mclea/PhiFlow/phi/tf/cuda/src/helpers.h:323:18: warning: ‘cudaError_t cudaMemcpyToArray(cudaArray_t, size_t, size_t, const void*, size_t, cudaMemcpyKind)’ is deprecated [-Wdeprecated-declarations]
323 | cudaMemcpyToArray(cuArray, 0, 0, data + batch * xSize * ySize * zSize * components, xSize * ySize * zSize * components * sizeof(T), cudaMemcpyDeviceToDevice);
| ~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/usr/local/cuda/bin/../targets/x86_64-linux/include/cuda_runtime_api.h:7368:46: note: declared here
7368 | extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaMemcpyToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, size_t count, enum cudaMemcpyKind kind);
| ^~~~~~~~~~~~~~~~~
In file included from /home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/platform/notification.h:27,
from /home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/lib/core/notification.h:21,
from /home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/framework/cancellation.h:22,
from /home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/framework/op_kernel.h:27,
from /home/mclea/PhiFlow/phi/tf/cuda/src/resample.cc:2:
/home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/platform/default/notification.h:61:65: warning: ‘tensorflow::int64’ is deprecated: Use int64_t instead. [-Wdeprecated-declarations]
61 | int64 timeout_in_us);
| ^
In file included from /home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/platform/types.h:31,
from /home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/framework/numeric_types.h:27,
from /home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/framework/allocator.h:26,
from /home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/framework/op_kernel.h:26,
from /home/mclea/PhiFlow/phi/tf/cuda/src/resample.cc:2:
/home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/platform/default/integral_types.h:29:63: note: declared here
29 | [[deprecated("Use int64_t instead.")]] typedef ::std::int64_t int64;
| ^~~~~
In file included from /home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/platform/notification.h:27,
from /home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/lib/core/notification.h:21,
from /home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/framework/cancellation.h:22,
from /home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/framework/op_kernel.h:27,
from /home/mclea/PhiFlow/phi/tf/cuda/src/resample.cc:2:
/home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/platform/default/notification.h:62:58: warning: ‘tensorflow::int64’ is deprecated: Use int64_t instead. [-Wdeprecated-declarations]
62 | bool WaitForNotificationWithTimeout(int64 timeout_in_us) {
| ^
In file included from /home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/platform/types.h:31,
from /home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/framework/numeric_types.h:27,
from /home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/framework/allocator.h:26,
from /home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/framework/op_kernel.h:26,
from /home/mclea/PhiFlow/phi/tf/cuda/src/resample.cc:2:
/home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/platform/default/integral_types.h:29:63: note: declared here
29 | [[deprecated("Use int64_t instead.")]] typedef ::std::int64_t int64;
| ^~~~~
In file included from /home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/platform/notification.h:27,
from /home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/lib/core/notification.h:21,
from /home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/framework/cancellation.h:22,
from /home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/framework/op_kernel.h:27,
from /home/mclea/PhiFlow/phi/tf/cuda/src/resample.cc:2:
/home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/platform/default/notification.h:81:63: warning: ‘tensorflow::int64’ is deprecated: Use int64_t instead. [-Wdeprecated-declarations]
81 | int64 timeout_in_us) {
| ^
In file included from /home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/platform/types.h:31,
from /home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/framework/numeric_types.h:27,
from /home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/framework/allocator.h:26,
from /home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/framework/op_kernel.h:26,
from /home/mclea/PhiFlow/phi/tf/cuda/src/resample.cc:2:
/home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/platform/default/integral_types.h:29:63: note: declared here
29 | [[deprecated("Use int64_t instead.")]] typedef ::std::int64_t int64;
| ^~~~~
In file included from /home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/framework/tensor.h:24,
from /home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/framework/device_base.h:26,
from /home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/framework/op_kernel.h:29,
from /home/mclea/PhiFlow/phi/tf/cuda/src/resample.cc:2:
/home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/framework/tensor_shape.h:305:22: warning: ‘tensorflow::int64’ is deprecated: Use int64_t instead. [-Wdeprecated-declarations]
305 | gtl::InlinedVector<int64, 4> dim_sizes() const;
| ^~~~~
In file included from /home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/platform/types.h:31,
from /home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/framework/numeric_types.h:27,
from /home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/framework/allocator.h:26,
from /home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/framework/op_kernel.h:26,
from /home/mclea/PhiFlow/phi/tf/cuda/src/resample.cc:2:
/home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/platform/default/integral_types.h:29:63: note: declared here
29 | [[deprecated("Use int64_t instead.")]] typedef ::std::int64_t int64;
| ^~~~~
In file included from /home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/framework/tensor.h:25,
from /home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/framework/device_base.h:26,
from /home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/framework/op_kernel.h:29,
from /home/mclea/PhiFlow/phi/tf/cuda/src/resample.cc:2:
/home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/framework/tensor_types.h: In member function ‘void tensorflow::internal::MaybeWith32BitIndexingImpl<Eigen::GpuDevice>::operator()(Func, Args&& ...) const’:
/home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/framework/tensor_types.h:176:25: error: use of ‘auto’ in lambda parameter declaration only available with ‘-std=c++14’ or ‘-std=gnu++14’
176 | auto all = [](const auto&... bool_vals) {
| ^~~~
/home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/framework/tensor_types.h:176:34: error: expansion pattern ‘const int&’ contains no parameter packs
176 | auto all = [](const auto&... bool_vals) {
| ^~~~~~~~~
/home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/framework/tensor_types.h: In lambda function:
/home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include/tensorflow/core/framework/tensor_types.h:177:22: error: ‘bool_vals’ was not declared in this scope
177 | for (bool b : {bool_vals...}) {
| ^~~~~~~~~
/usr/local/cuda/bin/nvcc /home/mclea/PhiFlow/phi/tf/cuda/src/resample.cu.cc -o /home/mclea/PhiFlow/phi/tf/cuda/build/resample.cu.o -std=c++11 -c -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC --expt-relaxed-constexpr -DNDEBUG -O3 -I/home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include -D_GLIBCXX_USE_CXX11_ABI=0 -DEIGEN_MAX_ALIGN_BYTES=64
gcc /home/mclea/PhiFlow/phi/tf/cuda/src/resample.cc /home/mclea/PhiFlow/phi/tf/cuda/build/resample.cu.o -o /home/mclea/PhiFlow/phi/tf/cuda/build/resample.so -std=c++11 -shared -fPIC -lcudart -O3 -L/usr/local/cuda/lib64/ -I/home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow/include -D_GLIBCXX_USE_CXX11_ABI=0 -DEIGEN_MAX_ALIGN_BYTES=64 -L/home/mclea/miniconda3/envs/phi_flow/lib/python3.8/site-packages/tensorflow -l:libtensorflow_framework.so.2
I cant find why this might fail, could it be my TensorFlow version?
Sometimes GPU stuff in WSL can fail because there is no nvidia driver.
Hi @mcleantom, first off you can run PhiFlow on the GPU even without compiling the kernels.
I assume you are running python setup.py tf_cuda
on the master
branch? What version of TensorFlow do you have?
I tested it on Ubuntu 18.04 with TensorFlow 2.4.1 and it compiles there. With WSL it's always a bit tricky to get stuff to work with the GPU. Do you use the special WSL drivers from NVidia?