Multiple DDR banks and q.enqueueWriteBuffer
RatkoFri opened this issue · 2 comments
Greetings,
I am playing with the multiple DDR banks and vector addition problem. I am using two memory banks, one to store one input and one output array, while the other banks is to store otherr input array. I compile successfully compile code, but when I run hardware emulation I get this error:
[XRT] ERROR: Cannot allocate buffer at unknown memory index [XRT] ERROR: Operation failed due to earlier error 'Cannot allocate buffer at unknown memory index' [XRT] ERROR: Operation failed due to earlier error 'Cannot allocate buffer at unknown memory index'
The host code is following:
`
#include <iostream>
#include <fstream>
#include <iterator>
#include <vector>
// XRT includes
#include "xrt/xrt_bo.h"
#include <experimental/xrt_xclbin.h>
#include "xrt/xrt_device.h"
#include "xrt/xrt_kernel.h"
// OpenCL API
#define CL_HPP_TARGET_OPENCL_VERSION 120
#define CL_HPP_MINIMUM_OPENCL_VERSION 120
#define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#include <CL/cl2.hpp>
#include <CL/cl_ext_xilinx.h>
#define DATA_SIZE 1024
#define KERNEL_CL "vadd"
using namespace std;
vector<unsigned char> read_binary_file(const std::string &filename)
{
std::cout << "INFO: Reading " << filename << std::endl;
std::ifstream file(filename, std::ios::binary);
file.unsetf(std::ios::skipws);
std::streampos file_size;
file.seekg(0, std::ios::end);
file_size = file.tellg();
file.seekg(0, std::ios::beg);
std::vector<unsigned char> data;
data.reserve(file_size);
data.insert(data.begin(),
std::istream_iterator<unsigned char>(file),
std::istream_iterator<unsigned char>());
return data;
}
int main(int argc, char** argv) {
cl_int err;
// read arguments
if (argc != 3) {
cout << "Usage: " << argv[0] << " device name kernel" << endl;
return EXIT_FAILURE;
}
string binary_file = argv[2];
cout<<binary_file<<endl;
//***************************************************
// STEP 0: Initialize data
//***************************************************
vector<int> source_a(DATA_SIZE, 1); // ini
vector<int> source_b(DATA_SIZE, 1);
vector<int> source_c(DATA_SIZE, 0);
//***************************************************
// STEP 1: Get the platform
//***************************************************
vector<cl::Platform> platforms;
cl::Platform::get(&platforms);
cl::Platform platform;
for(cl::Platform &p: platforms)
{
const string name = p.getInfo<CL_PLATFORM_NAME>();
cout << "PLATFORM: " << name << endl;
if(name == "Xilinx")
{
platform = p;
break;
}
}
if(platform == cl::Platform())
{
cout << "Xilinx platform not found!" << endl;
exit(EXIT_FAILURE);
}
//***************************************************
// STEP 2: Get the devices and select the desired device
//***************************************************
vector<cl::Device> devices;
platform.getDevices(CL_DEVICE_TYPE_ACCELERATOR, &devices);
cout<<"Number of devices found: " << devices.size() << endl;
cl::Device device;
for(cl::Device &iterDevice: devices){
cout << "DEVICE: " << iterDevice.getInfo<CL_DEVICE_NAME>() << endl;
if(iterDevice.getInfo<CL_DEVICE_NAME>() == argv[1])
device = iterDevice;
}
cout << "SELECTED DEVICE: " << device.getInfo<CL_DEVICE_NAME>() << endl;
//***************************************************
// STEP 3: Create a context
//***************************************************
// we create a context with the selected device using Context class
cl::Context context(device, nullptr, nullptr, nullptr, &err);
cout << "CONTEXT ERROR: " << err << endl;
//***************************************************
// STEP 4: Create a command queue
//***************************************************
// we create a command queue with the selected device and context using CommandQueue class
cl::CommandQueue q(context, device, CL_QUEUE_PROFILING_ENABLE, &err);
cout << "COMMAND QUEUE ERROR: " << err << endl;
//***************************************************
// STEP 5: Create device buffers
//***************************************************
//cl::Buffer buffer_a(context, CL_MEM_COPY_HOST_PTR | CL_MEM_READ_ONLY, source_a.size() * sizeof(int), source_a.data(), &err);
//cl::Buffer buffer_b(context, CL_MEM_COPY_HOST_PTR | CL_MEM_READ_ONLY, source_b.size() * sizeof(int), source_b.data(), &err);
cl::Buffer buffer_a(context, CL_MEM_READ_ONLY, source_a.size() * sizeof(int), NULL, &err);
cl::Buffer buffer_b(context, CL_MEM_READ_ONLY, source_b.size() * sizeof(int), NULL, &err);
cl::Buffer buffer_res(context, CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, source_c.size() * sizeof(int), nullptr, &err);
//***************************************************
// STEP 6: Create a program object for the context
//***************************************************
// read the kernel .xo file
cl::Kernel kernel;
auto program_binary = read_binary_file(binary_file);
cl::Program::Binaries bins{{program_binary.data(), program_binary.size()}};
std::cout << "Trying to program device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
cl::Program program(context, {device}, bins, nullptr, &err);
//***************************************************
// STEP 6: Create the kernel object
//***************************************************
if (err != CL_SUCCESS) {
std::cout << "Failed to program device with xclbin file!\n";
cout << err << endl;
} else {
std::cout << "Device: program successful!\n";
kernel = cl::Kernel(program, KERNEL_CL, &err);
// we break because we found a valid device
}
//***************************************************
// STEP 7: Write host data to device buffers
//***************************************************
q.enqueueWriteBuffer( buffer_a, CL_FALSE, 0, source_a.size() * sizeof(int), source_a.data());
q.enqueueWriteBuffer( buffer_b, CL_FALSE, 0, source_b.size() * sizeof(int), source_b.data());
//***************************************************
// STEP 8: Set the kernel arguments
//***************************************************
kernel.setArg(0, buffer_res);
kernel.setArg(1, buffer_a);
kernel.setArg(2, buffer_b);
kernel.setArg(3, DATA_SIZE);
//***************************************************
// STEP 9: Enqueue the kernel for execution
//***************************************************
q.enqueueTask(kernel);
//***************************************************
// STEP 12: Read the output buffer back to the host
//***************************************************
// Synchronous/blocking read of results
vector<int> result(DATA_SIZE, 0);
q.finish();
q.enqueueReadBuffer(buffer_res, CL_TRUE, 0, result.size() * sizeof(int), result.data());
cout << "Calculating sum of resulting array: " << endl;
int sum = 0;
for (int i = 0; i < DATA_SIZE; i++){
sum += result[i];
}
cout << "SUM: " << sum << endl;
return 0;
}
`
Vector kernel
#define BUFFER_SIZE 256
extern "C" {
void vadd(int* c,
const int* a,
const int* b,
const int n_elements)
{
#pragma HLS interface m_axi port=a bundle=aximm1
#pragma HLS interface m_axi port=b bundle=aximm2
#pragma HLS interface m_axi port=c bundle=aximm1
int arrayA[BUFFER_SIZE];
int arrayB[BUFFER_SIZE];
main_loop:
for (int i = 0; i < n_elements; i += BUFFER_SIZE)
{
int size = BUFFER_SIZE;
if(i + size > n_elements)
size = n_elements - i;
readA:
for(int j = 0; j < size; j++)
arrayA[j] = a[i + j];
readB:
for(int j = 0; j < size; j++)
arrayB[j] = b[i + j];
vadd_writeC:
for(int j = 0; j < size; j++)
c[i + j] = arrayA[j] + arrayB[j];
}
}
}
Config file:
debug=1
save-temps=1
[connectivity]
sp=vadd_1.a:DDR[1]
sp=vadd_1.b:DDR[2]
sp=vadd_1.c:DDR[1]
[profile]
data=all:all:all
Btw, if I comment enqueueWriteBuffer command and add CL_MEM_COPY_HOST_PTR when creating buffer, everything works great.
Any suggestions ?
Thanks,
Ratko
Hi RatkoFri, if this issue is related to a tutorial, please let me know which one so that I can assign it to the author. But if it is not a tutorial-related issue, please try to ask for help from our Vitis Forum or create a support case through your sales representative.
Thanks for the information. I will post it on Vitis Forum.