Compute 7_0 errors
YuvRad opened this issue · 0 comments
I'm getting the following errors when attempting to compile with CUDA 10.1 V10.1.243:
ptxas /tmp/tmpxft_00004b30_00000000-5_distributedmp.compute_70.ptx, line 41537; error : Instruction 'shfl' without '.sync' is not supported on .target sm_70 and higher from PTX ISA version 6.4
I've fixed the errors by using shfl.sync i.e.
#if __CUDA_ARCH__ >= 700
asm volatile ("shfl.sync.up.b32 %0|%%c1, %1, 1, %2, 0xffffffff;" : "=r"(x) :\
"r"(r1), "r"((256-_width)*256));
#else
asm volatile ("shfl.up.b32 %0|%%c1, %1, 1, %2;" : "=r"(x) : "r"(r1), "r"((25\
6-_width)*256));
#endif
and
#if CUDA_ARCH >= 700
asm volatile ("setp.eq.and.u32 %%c2,%1,0xFFFFFFFF,%%c1;\n\t"
"vote.sync.ballot.b32 %0,%%c2, 0xFFFFFFFF;"
: "=r"(p) : "r"(x));
asm volatile ("setp.eq.u32 %%c2,%1,1;\n\t"
"vote.sync.ballot.b32 %0,%%c2, 0xFFFFFFFF;"
: "=r"(g) : "r"(r1));
#else
asm volatile ("setp.eq.and.u32 %%c2,%1,0xFFFFFFFF,%%c1;\n\t"
"vote.ballot.b32 %0,%%c2;" : "=r"(p) : "r"(x));
asm volatile ("setp.eq.u32 %%c2,%1,1;\n\t"
"vote.ballot.b32 %0,%%c2;" : "=r"(g) : "r"(r1));
#endif
Wherever needed, however, it was a mechanical Turk kind of thing because I never programmed in CUDA and certainly not low level CUDA, so I'm not sure if the implementation is correct.