pygpu.gpuarray.GpuArrayException: ('The following error happened while compiling the node', GpuSoftmaxWithBias(GpuDot22.0, dense_3_b)
Opened this issue · 5 comments
Jorrit2002 commented
I try to run Theano on a GPU. I know OpenCL is not finished yet but is this there a fix for this?
Theano: 0.9.0.dev-c697eeab84e5b8a74908da654b66ec9eca4f1291
OSX: 10.12.4
GPU: AMD Radeon HD - FirePro D700 Compute Engine
pygpu.gpuarray.GpuArrayException: ('The following error happened while compiling the node', GpuSoftmaxWithBias(GpuDot22.0, dense_3_b), '
', "Program build failure ::
<program source>:44:82: error: kernel pointer arguments must have a global, local, or constant address space qualifier
KERNEL void kSoftmaxWithBias (const ga_size M, const ga_size N, const ga_float * x, const ga_size offset_x, const ga_ssize sx0, const ga_ssize sx1, const ga_float * b, const ga_size offset_b, const ga_ssize sb0, ga_float * sm, const ga_size offset_sm, const ga_ssize sm_s0, const ga_ssize sm_s1)
^
<program source>:44:166: error: kernel pointer arguments must have a global, local, or constant address space qualifier
KERNEL void kSoftmaxWithBias (const ga_size M, const ga_size N, const ga_float * x, const ga_size offset_x, const ga_ssize sx0, const ga_ssize sx1, const ga_float * b, const ga_size offset_b, const ga_ssize sb0, ga_float * sm, const ga_size offset_sm, const ga_ssize sm_s0, const ga_ssize sm_s1)
^
<program source>:44:224: error: kernel pointer arguments must have a global, local, or constant address space qualifier
KERNEL void kSoftmaxWithBias (const ga_size M, const ga_size N, const ga_float * x, const ga_size offset_x, const ga_ssize sx0, const ga_ssize sx1, const ga_float * b, const ga_size offset_b, const ga_ssize sb0, ga_float * sm, const ga_size offset_sm, const ga_ssize sm_s0, const ga_ssize sm_s1)
^
<program source>:46:16: error: unknown type name '__shared__'
extern __shared__ ga_float buf[];
^
<program source>:46:27: error: expected identifier or '('
extern __shared__ ga_float buf[];
^
<program source>:34:18: note: expanded from macro 'ga_float'
#define ga_float float
^
<program source>:47:19: error: use of undeclared identifier 'buf'; did you mean 'buf2'?
ga_float * buf2 = buf + N;
^~~
buf2
<program source>:47:12: note: 'buf2' declared here
ga_float * buf2 = buf + N;
^
<program source>:51:21: error: use of undeclared identifier 'blockIdx'; did you mean 'blockIDX'?
for (int blockIDX = blockIdx.x; blockIDX < M; blockIDX += gridDim.x){;
^~~~~~~~
blockIDX
<program source>:51:10: note: 'blockIDX' declared here
for (int blockIDX = blockIdx.x; blockIDX < M; blockIDX += gridDim.x){;
^
<program source>:51:29: error: member reference base type 'int' is not a structure or union
for (int blockIDX = blockIdx.x; blockIDX < M; blockIDX += gridDim.x){;
~~~~~~~~^~
<program source>:51:63: error: use of undeclared identifier 'gridDim'
for (int blockIDX = blockIdx.x; blockIDX < M; blockIDX += gridDim.x){;
^
<program source>:52:15: error: use of undeclared identifier 'threadIdx'
for (int tx = threadIdx.x; tx< N; tx += blockDim.x){;
^
<program source>:52:41: error: use of undeclared identifier 'blockDim'
for (int tx = threadIdx.x; tx< N; tx += blockDim.x){;
^
<program source>:53:1: error: use of undeclared identifier 'buf'; did you mean 'buf2'?
buf[tx] = (x[blockIDX * sx0 + tx * sx1]);
^~~
buf2
<program source>:47:12: note: 'buf2' declared here
ga_float * buf2 = buf + N;
^
<program source>:54:1: error: use of undeclared identifier 'buf'; did you mean 'buf2'?
buf[tx] += (b[tx * sb0]);
^~~
buf2
<program source>:47:12: note: 'buf2' declared here
ga_float * buf2 = buf + N;
^
<program source>:55:12: error: use of undeclared identifier 'buf'; did you mean 'buf2'?
buf2[tx] = buf[tx];
^~~
buf2
<program source>:47:12: note: 'buf2' declared here
ga_float * buf2 = buf + N;
^
<program source>:57:1: warning: implicit declaration of function '__syncthreads' is invalid in C99
__syncthreads();
^
<program source>:63:13: error: use of undeclared identifier 'threadIdx'
if (threadIdx.x < warpSize) {
^
<program source>:63:27: error: use of undeclared identifier 'warpSize'
if (threadIdx.x < warpSize) {
^
<program source>:64:26: error: use of undeclared identifier 'threadIdx'
for (int i = threadIdx.x + warpSize; i < N; i += warpSize)
^
<program source>:64:40: error: use of undeclared identifier 'warpSize'
for (int i = threadIdx.x + warpSize; i < N; i += warpSize)
^
<program source>:64:62: error: use of undeclared identifier 'warpSize'
for (int i = threadIdx.x + warpSize; i < N; i += warpSize)
^
<program source>:66:17: error: use of undeclared identifier 'buf'; did you mean 'buf2'?
buf[threadIdx.x] = max(buf[threadIdx.x], buf[i]);
^~~
buf2
<program source>:47:12: note: 'buf2' declared here
ga_float * buf2 = buf + N;
^
<program source>:66:21: error: use of undeclared identifier 'threadIdx'
buf[threadIdx.x] = max(buf[threadIdx.x], buf[i]);
^
<program source>:66:40: error: use of undeclared identifier 'buf'; did you mean 'buf2'?
buf[threadIdx.x] = max(buf[threadIdx.x], buf[i]);
^~~
buf2
<program source>:47:12: note: 'buf2' declared here
ga_float * buf2 = buf + N;
^
<program source>:66:44: error: use of undeclared identifier 'threadIdx'
buf[threadIdx.x] = max(buf[threadIdx.x], buf[i]);
^
<program source>:71:32: error: use of undeclared identifier 'warpSize'
for (unsigned int _n = warpSize / 2; _n > 0; _n /= 2) {
^
<program source>:72:15: error: use of undeclared identifier 'threadIdx'
if (threadIdx.x < _n && threadIdx.x + _n < N)
^
<program source>:72:35: error: use of undeclared identifier 'threadIdx'
if (threadIdx.x < _n && threadIdx.x + _n < N)
^
<program source>:73:13: error: use of undeclared identifier 'buf'; did you mean 'buf2'?
buf[threadIdx.x] = max(buf[threadIdx.x], buf[threadIdx.x+_n]);
^~~
buf2
<program source>:47:12: note: 'buf2' declared here
ga_float * buf2 = buf + N;
^
<program source>:73:17: error: use of undeclared identifier 'threadIdx'
buf[threadIdx.x] = max(buf[threadIdx.x], buf[threadIdx.x+_n]);
^
<program source>:73:36: error: use of undeclared identifier 'buf'; did you mean 'buf2'?
buf[threadIdx.x] = max(buf[threadIdx.x], buf[threadIdx.x+_n]);
^~~
buf2
<program source>:47:12: note: 'buf2' declared here
ga_float * buf2 = buf + N;
^
<program source>:73:40: error: use of undeclared identifier 'threadIdx'
buf[threadIdx.x] = max(buf[threadIdx.x], buf[threadIdx.x+_n]);
^
<program source>:79:20: error: use of undeclared identifier 'buf'; did you mean 'buf2'?
ga_float row_max = buf[0];
^~~
buf2
<program source>:47:12: note: 'buf2' declared here
ga_float * buf2 = buf + N;
^
<program source>:81:13: error: use of undeclared identifier 'threadIdx'
for(int __i=threadIdx.x; __i<N; __i+=blockDim.x){;
^
<program source>:81:38: error: use of undeclared identifier 'blockDim'
for(int __i=threadIdx.x; __i<N; __i+=blockDim.x){;
^
<program source>:82:1: error: use of undeclared identifier 'buf'; did you mean 'buf2'?
buf[__i] = exp(buf2[__i] - row_max);
^~~
buf2
<program source>:47:12: note: 'buf2' declared here
ga_float * buf2 = buf + N;
^
<program source>:83:13: error: use of undeclared identifier 'buf'; did you mean 'buf2'?
buf2[__i] = buf[__i];
^~~
buf2
<program source>:47:12: note: 'buf2' declared here
ga_float * buf2 = buf + N;
^
<program source>:91:13: error: use of undeclared identifier 'threadIdx'
if (threadIdx.x < warpSize) {
^
<program source>:91:27: error: use of undeclared identifier 'warpSize'
if (threadIdx.x < warpSize) {
^
<program source>:92:26: error: use of undeclared identifier 'threadIdx'
for (int i = threadIdx.x + warpSize; i < N; i += warpSize)
^
<program source>:92:40: error: use of undeclared identifier 'warpSize'
for (int i = threadIdx.x + warpSize; i < N; i += warpSize)
^
<program source>:92:62: error: use of undeclared identifier 'warpSize'
for (int i = threadIdx.x + warpSize; i < N; i += warpSize)
^
<program source>:94:17: error: use of undeclared identifier 'buf'; did you mean 'buf2'?
buf[threadIdx.x] = buf[threadIdx.x] + buf[i];
^~~
buf2
<program source>:47:12: note: 'buf2' declared here
ga_float * buf2 = buf + N;
^
<program source>:94:21: error: use of undeclared identifier 'threadIdx'
buf[threadIdx.x] = buf[threadIdx.x] + buf[i];
^
<program source>:94:36: error: use of undeclared identifier 'buf'; did you mean 'buf2'?
buf[threadIdx.x] = buf[threadIdx.x] + buf[i];
^~~
buf2
<program source>:47:12: note: 'buf2' declared here
ga_float * buf2 = buf + N;
^
<program source>:94:40: error: use of undeclared identifier 'threadIdx'
buf[threadIdx.x] = buf[threadIdx.x] + buf[i];
^
<program source>:94:55: error: use of undeclared identifier 'buf'; did you mean 'buf2'?
buf[threadIdx.x] = buf[threadIdx.x] + buf[i];
^~~
buf2
<program source>:47:12: note: 'buf2' declared here
ga_float * buf2 = buf + N;
^
<program source>:99:32: error: use of undeclared identifier 'warpSize'
for (unsigned int _n = warpSize / 2; _n > 0; _n /= 2) {
^
<program source>:100:15: error: use of undeclared identifier 'threadIdx'
if (threadIdx.x < _n && threadIdx.x + _n < N)
^
<program source>:100:35: error: use of undeclared identifier 'threadIdx'
if (threadIdx.x < _n && threadIdx.x + _n < N)
^
<program source>:101:13: error: use of undeclared identifier 'buf'; did you mean 'buf2'?
buf[threadIdx.x] = buf[threadIdx.x] + buf[threadIdx.x+_n];
^~~
buf2
<program source>:47:12: note: 'buf2' declared here
ga_float * buf2 = buf + N;
^
<program source>:101:17: error: use of undeclared identifier 'threadIdx'
buf[threadIdx.x] = buf[threadIdx.x] + buf[threadIdx.x+_n];
^
<program source>:101:32: error: use of undeclared identifier 'buf'; did you mean 'buf2'?
buf[threadIdx.x] = buf[threadIdx.x] + buf[threadIdx.x+_n];
^~~
buf2
<program source>:47:12: note: 'buf2' declared here
ga_float * buf2 = buf + N;
^
<program source>:101:36: error: use of undeclared identifier 'threadIdx'
buf[threadIdx.x] = buf[threadIdx.x] + buf[threadIdx.x+_n];
^
<program source>:101:51: error: use of undeclared identifier 'buf'; did you mean 'buf2'?
buf[threadIdx.x] = buf[threadIdx.x] + buf[threadIdx.x+_n];
^~~
buf2
<program source>:47:12: note: 'buf2' declared here
ga_float * buf2 = buf + N;
^
<program source>:101:55: error: use of undeclared identifier 'threadIdx'
buf[threadIdx.x] = buf[threadIdx.x] + buf[threadIdx.x+_n];
^
<program source>:107:20: error: use of undeclared identifier 'buf'; did you mean 'buf2'?
ga_float row_sum = buf[0];
^~~
buf2
<program source>:47:12: note: 'buf2' declared here
ga_float * buf2 = buf + N;
^
<program source>:109:13: error: use of undeclared identifier 'threadIdx'
for(int __i=threadIdx.x; __i<N; __i+=blockDim.x){;
^
<program source>:109:38: error: use of undeclared identifier 'blockDim'
for(int __i=threadIdx.x; __i<N; __i+=blockDim.x){;
^
<program source>:110:1: error: use of undeclared identifier 'buf'; did you mean 'buf2'?
buf[__i] = buf2[__i] / row_sum;
^~~
buf2
<program source>:47:12: note: 'buf2' declared here
ga_float * buf2 = buf + N;
^
<program source>:113:15: error: use of undeclared identifier 'threadIdx'
for (int tx = threadIdx.x; tx< N; tx += blockDim.x){;
^
<program source>:113:41: error: use of undeclared identifier 'blockDim'
for (int tx = threadIdx.x; tx< N; tx += blockDim.x){;
^
<program source>:114:38: error: use of undeclared identifier 'buf'; did you mean 'buf2'?
sm[blockIDX * sm_s0 + tx * sm_s1] = (buf[tx]);
^~~
buf2
<program source>:47:12: note: 'buf2' declared here
ga_float * buf2 = buf + N;
^
<program source>:51:42: warning: comparison of integers of different signs: 'int' and 'const ulong' (aka 'const unsigned long')
for (int blockIDX = blockIdx.x; blockIDX < M; blockIDX += gridDim.x){;
~~~~~~~~ ^ ~
<program source>:52:30: warning: comparison of integers of different signs: 'int' and 'const ulong' (aka 'const unsigned long')
for (int tx = threadIdx.x; tx< N; tx += blockDim.x){;
~~^ ~
<program source>:64:52: warning: comparison of integers of different signs: 'int' and 'const ulong' (aka 'const unsigned long')
for (int i = threadIdx.x + warpSize; i < N; i += warpSize)
~ ^ ~
<program source>:81:29: warning: comparison of integers of different signs: 'int' and 'const ulong' (aka 'const unsigned long')
for(int __i=threadIdx.x; __i<N; __i+=blockDim.x){;
~~~^~
<program source>:92:52: warning: comparison of integers of different signs: 'int' and 'const ulong' (aka 'const unsigned long')
for (int i = threadIdx.x + warpSize; i < N; i += warpSize)
~ ^ ~
<program source>:109:29: warning: comparison of integers of different signs: 'int' and 'const ulong' (aka 'const unsigned long')
for(int __i=threadIdx.x; __i<N; __i+=blockDim.x){;
~~~^~
<program source>:113:30: warning: comparison of integers of different signs: 'int' and 'const ulong' (aka 'const unsigned long')
for (int tx = threadIdx.x; tx< N; tx += blockDim.x){;
~~^ ~
0001 #define local_barrier() barrier(CLK_LOCAL_MEM_FENCE)
0002 #define WITHIN_KERNEL /* empty */
0003 #define KERNEL __kernel
0004 #define GLOBAL_MEM __global
0005 #define LOCAL_MEM __local
0006 #define LOCAL_MEM_ARG __local
0007 #define REQD_WG_SIZE(x, y, z) __attribute__((reqd_work_group_size(x, y, z)))
0008 #ifndef NULL
0009 #define NULL ((void*)0)
0010 #endif
0011 #define LID_0 get_local_id(0)
0012 #define LID_1 get_local_id(1)
0013 #define LID_2 get_local_id(2)
0014 #define LDIM_0 get_local_size(0)
0015 #define LDIM_1 get_local_size(1)
0016 #define LDIM_2 get_local_size(2)
0017 #define GID_0 get_group_id(0)
0018 #define GID_1 get_group_id(1)
0019 #define GID_2 get_group_id(2)
0020 #define GDIM_0 get_num_groups(0)
0021 #define GDIM_1 get_num_groups(1)
0022 #define GDIM_2 get_num_groups(2)
0023 #define ga_bool uchar
0024 #define ga_byte char
0025 #define ga_ubyte uchar
0026 #define ga_short short
0027 #define ga_ushort ushort
0028 #define ga_int int
0029 #define ga_uint uint
0030 #define ga_long long
0031 #define ga_ulong ulong
0032 #define ga_float float
0033 #define ga_double double
0034 #define ga_half half
0035 #define ga_size ulong
0036 #define ga_ssize long
0037 #define load_half(p) vload_half(0, p)
0038 #define store_half(p, v) vstore_half_rtn(v, 0, p)
0039 #define GA_DECL_SHARED_PARAM(type, name) , __local type *name
0040 #define GA_DECL_SHARED_BODY(type, name)
0041 #define GA_WARP_SIZE 64
0042 KERNEL void kSoftmaxWithBias (const ga_size M, const ga_size N, const ga_float * x, const ga_size offset_x, const ga_ssize sx0, const ga_ssize sx1, const ga_float * b, const ga_size offset_b, const ga_ssize sb0, ga_float * sm, const ga_size offset_sm, const ga_ssize sm_s0, const ga_ssize sm_s1)
0043 {
0044 extern __shared__ ga_float buf[];
0045 ga_float * buf2 = buf + N;
0046 x = (const ga_float *)(((char *)x)+offset_x);
0047 b = (const ga_float *)(((char *)b)+offset_b);
0048 sm = (ga_float *)(((char *)sm)+offset_sm);
0049 for (int blockIDX = blockIdx.x; blockIDX < M; blockIDX += gridDim.x){;
0050 for (int tx = threadIdx.x; tx< N; tx += blockDim.x){;
0051 buf[tx] = (x[blockIDX * sx0 + tx * sx1]);
0052 buf[tx] += (b[tx * sb0]);
0053 buf2[tx] = buf[tx];
0054 };
0055 __syncthreads();
0056
0057 {
0058 // This function trashes buf[1..warpSize],
0059 // leaving the reduction result in buf[0].
0060
0061 if (threadIdx.x < warpSize) {
0062 for (int i = threadIdx.x + warpSize; i < N; i += warpSize)
0063 {
0064 buf[threadIdx.x] = max(buf[threadIdx.x], buf[i]);
0065 }
0066 }
0067 __syncthreads();
0068 //reduce so that threadIdx.x 0 has the reduction of everything
0069 for (unsigned int _n = warpSize / 2; _n > 0; _n /= 2) {
0070 if (threadIdx.x < _n && threadIdx.x + _n < N)
0071 buf[threadIdx.x] = max(buf[threadIdx.x], buf[threadIdx.x+_n]);
0072 __syncthreads();
0073 }
0074 }
0075 ;
0076 __syncthreads();
0077 ga_float row_max = buf[0];
0078 __syncthreads();
0079 for(int __i=threadIdx.x; __i<N; __i+=blockDim.x){;
0080 buf[__i] = exp(buf2[__i] - row_max);
0081 buf2[__i] = buf[__i];
0082 };
0083 __syncthreads();
0084
0085 {
0086 // This function trashes buf[1..warpSize],
0087 // leaving the reduction result in buf[0].
0088
0089 if (threadIdx.x < warpSize) {
0090 for (int i = threadIdx.x + warpSize; i < N; i += warpSize)
0091 {
0092 buf[threadIdx.x] = buf[threadIdx.x] + buf[i];
0093 }
0094 }
0095 __syncthreads();
0096 //reduce so that threadIdx.x 0 has the reduction of everything
0097 for (unsigned int _n = warpSize / 2; _n > 0; _n /= 2) {
0098 if (threadIdx.x < _n && threadIdx.x + _n < N)
0099 buf[threadIdx.x] = buf[threadIdx.x] + buf[threadIdx.x+_n];
0100 __syncthreads();
0101 }
0102 }
0103 ;
0104 __syncthreads();
0105 ga_float row_sum = buf[0];
0106 __syncthreads();
0107 for(int __i=threadIdx.x; __i<N; __i+=blockDim.x){;
0108 buf[__i] = buf2[__i] / row_sum;
0109 };
0110 __syncthreads();
0111 for (int tx = threadIdx.x; tx< N; tx += blockDim.x){;
0112 sm[blockIDX * sm_s0 + tx * sm_s1] = (buf[tx]);
0113 };
0114 __syncthreads();
0115 };
0116 }
0117
")
nouiz commented
This error isn't readable. Can you give it again while keeping the new lines?
abergeron commented
The softmax kernel is not ready for OpenCL apparently and is missing a couple of GLOBAL_MEM markers around pointers.
If you want to do it, we will accept a PR.
lesves commented
I think I have the same problem with libgpuarray and OpenCL.
nouiz commented
OpenCL isn't supported for all operation. We don't have time to finish this
and need help get this working. If you don't have time to help us finish
this, then I would tell it isn't in a usable state for you now.
…On Thu, Apr 13, 2017 at 12:00 PM abergeron ***@***.***> wrote:
The softmax kernel is not ready for OpenCL apparently and is missing a
couple of GLOBAL_MEM markers around pointers.
If you want to do it, we will accept a PR.
—
You are receiving this because you commented.
Reply to this email directly, view it on GitHub
<#405 (comment)>,
or mute the thread
<https://github.com/notifications/unsubscribe-auth/AALC-ykUxrZlQOelECmyuhkRTmjkRLcVks5rvkZ_gaJpZM4M8mn4>
.
lesves commented
OK, I understand. I am a beginner, so I don't think I can do this.