The result of CLWinogradConvolutionLayer is incorrect.How should I do?
superHappy-yo opened this issue · 1 comments
superHappy-yo commented
Output of 'strings libarm_compute.so | grep arm_compute_version':
Platform: rock-5b , opencl2.0
Operating System: ubuntu
Problem description: I wanna compare the speed between winograd and im2Col+gemm in opencl .But I found the result of winorgrad is incorrect. (The right part)
When the BATCH_SIZE = 512, it happend !!!
the winograd code:
` CLScheduler::get().default_init();
CLTensor imgTensor;
CLTensor kernelTensor, OTensor;
imgTensor.allocator()->init(TensorInfo(TensorShape(IMG_H, IMG_W, IMG_CHANNEL), 1, DataType::F32));
kernelTensor.allocator()->init(TensorInfo(TensorShape(KERNEL_H, KERNEL_W, KERNEL_CHANNEL, BATCH_SIZE), 1, DataType::F32));
OTensor.allocator()->init(TensorInfo(TensorShape(out_h, out_w, BATCH_SIZE), 1, DataType::F32));
struct timeval tstart1, tend1;
gettimeofday(&tstart1, NULL);
// std::cout << "winogradInfo :" << std::endl;
CLWinogradConvolutionLayer winogradInfo;
winogradInfo.configure(&imgTensor, &kernelTensor, nullptr, &OTensor, PadStrideInfo(STRIDE, STRIDE, PAD, PAD, PAD, PAD, DimensionRoundingType::FLOOR));
gettimeofday(&tend1, NULL);
imgTensor.allocator()->allocate();
kernelTensor.allocator()->allocate();
OTensor.allocator()->allocate();
// std::cout << "img tensor:" << std::endl;
imgTensor.map();
arm_compute::utils::fill_tensor_vector(imgTensor, img);
// imgTensor.print(std::cout);
imgTensor.unmap();
// std::cout << "kernel tensor:" << std::endl;
kernelTensor.map();
arm_compute::utils::fill_tensor_vector(kernelTensor, kernel_list);
// kernelTensor.print(std::cout);
kernelTensor.unmap();
// 开始计时
struct timeval tstart, tend;
gettimeofday(&tstart, NULL);
winogradInfo.run();
// 结束计时
gettimeofday(&tend, NULL);
// std::cout << "out tensor:" << std::endl;
OTensor.map();
OTensor.print(std::cout);
OTensor.unmap();`
the im2Col code:
` CLScheduler::get().default_init();
CLTensor imgTensor;
CLTensor kernelTensor, OTensor;
imgTensor.allocator()->init(TensorInfo(TensorShape(IMG_H, IMG_W, IMG_CHANNEL), 1, DataType::F32));
kernelTensor.allocator()->init(TensorInfo(TensorShape(KERNEL_H, KERNEL_W, KERNEL_CHANNEL, BATCH_SIZE), 1, DataType::F32));
OTensor.allocator()->init(TensorInfo(TensorShape(out_h, out_w, BATCH_SIZE), 1, DataType::F32));
struct timeval tstart1, tend1;
gettimeofday(&tstart1, NULL);
std::cout << "GEMMInfo :" << std::endl;
CLGEMMConvolutionLayer GEMMInfo;
GEMMInfo.configure(&imgTensor, &kernelTensor, nullptr, &OTensor, PadStrideInfo(STRIDE, STRIDE, PAD, PAD, PAD, PAD, DimensionRoundingType::FLOOR));
gettimeofday(&tend1, NULL);
imgTensor.allocator()->allocate();
kernelTensor.allocator()->allocate();
OTensor.allocator()->allocate();
std::cout << "img tensor:" << std::endl;
imgTensor.map();
arm_compute::utils::fill_tensor_vector(imgTensor, img);
// imgTensor.print(std::cout);
imgTensor.unmap();
std::cout << "kernel tensor:" << std::endl;
kernelTensor.map();
arm_compute::utils::fill_tensor_vector(kernelTensor, kernel_list);
// kernelTensor.print(std::cout);
kernelTensor.unmap();
// CLScheduler::get().sync();
// 开始计时
struct timeval tstart, tend;
gettimeofday(&tstart, NULL);
GEMMInfo.run();
// 结束计时
gettimeofday(&tend, NULL);
std::cout << "out tensor:" << std::endl;
OTensor.map();
OTensor.print(std::cout);
OTensor.unmap();`