cornell-zhang/heterocl

Support of Constant Arrays

Opened this issue · 3 comments

Currently, HeteroCL cannot infer constant arrays from the source code. For example, the weights (F) of the convolution layer are read-only and should be declared as constants in the generated C code.

def conv1():
    A = hcl.placeholder((6, 6), "A")
    F = hcl.placeholder((3, 3), "F")

    def kernel(A, F):
        r = hcl.reduce_axis(0, 3)
        c = hcl.reduce_axis(0, 3)
        return hcl.compute((4, 4),
                lambda y, x: hcl.sum(A[y+r, x+c] * F[r, c], axis=[r, c]), "B")

    s = hcl.create_schedule([A, F], kernel)
    s[kernel.B].pipeline(kernel.B.axis[1])
    
    target = hcl.platform.zc706
    target.config(compile="vivado_hls",mode="csim",project="conv1")
    f = hcl.build(s, target=target)
    hcl_A = hcl.asarray(np.random.randint(0, 10, A.shape))
    hcl_F = hcl.asarray(np.random.randint(0, 10, F.shape))
    hcl_B = hcl.asarray(np.zeros((4, 4)))
    f(hcl_A, hcl_F, hcl_B)

def conv2():
    A = hcl.placeholder((6, 6), "A")

    def kernel(A):
        r = hcl.reduce_axis(0, 3)
        c = hcl.reduce_axis(0, 3)
        F = hcl.copy(np.random.randint(0,10,(3,3)),"F")
        return hcl.compute((4, 4),
                lambda y, x: hcl.sum(A[y+r, x+c] * F[r, c], axis=[r, c]), "B")

    s = hcl.create_schedule([A], kernel)
    s[kernel.B].pipeline(kernel.B.axis[1])

    target = hcl.platform.zc706
    target.config(compile="vivado_hls",mode="csim",project="conv2")
    f = hcl.build(s, target=target)
    hcl_A = hcl.asarray(np.random.randint(0, 10, A.shape))
    hcl_B = hcl.asarray(np.zeros((4, 4)))
    f(hcl_A, hcl_B)

However, the first method (conv1) puts F in the function argument, and conv2 that uses hcl.copy makes assignments for the array one by one.

// conv1
void test(bit32 A[6][6], bit32 F[3][3], bit32 B[4][4]) {
    B_y: for (bit32 y = 0; y < 4; ++y) {
      B_x: for (bit32 x = 0; x < 4; ++x) {
      #pragma HLS pipeline
        bit32 sum;
        sum = 0;
        B_ra2: for (bit32 ra2 = 0; ra2 < 3; ++ra2) {
          B_ra3: for (bit32 ra3 = 0; ra3 < 3; ++ra3) {
            sum = ((bit32)(((ap_int<65>)(((ap_int<64>)A[(y + ra2)][(x + ra3)]) * ((ap_int<64>)F[ra2][ra3]))) + ((ap_int<65>)sum)));
          }
        }
        B[y][x] = sum;
      }
    }
  }

// conv2
void test(bit32 A[6][6], bit32 B[4][4]) {
    bit32 F[3][3];
    F[0][0] = 1;
    F[0][1] = 5;
    F[0][2] = 9;
    F[1][0] = 0;
    F[1][1] = 5;
    F[1][2] = 9;
    F[2][0] = 8;
    F[2][1] = 1;
    F[2][2] = 1;
    B_y: for (bit32 y = 0; y < 4; ++y) {
      B_x: for (bit32 x = 0; x < 4; ++x) {
      #pragma HLS pipeline
        bit32 sum;
        sum = 0;
        B_ra0: for (bit32 ra0 = 0; ra0 < 3; ++ra0) {
          B_ra1: for (bit32 ra1 = 0; ra1 < 3; ++ra1) {
            sum = ((bit32)(((ap_int<65>)(((ap_int<64>)A[(y + ra0)][(x + ra1)]) * ((ap_int<64>)F[ra0][ra1]))) + ((ap_int<65>)sum)));
          }
        }
        B[y][x] = sum;
      }
    }
  }

@seanlatias we also need to test the const struct array.

@chhzh123 can you test it with the HLS compiler and see if F is synthesized as const or they just simply be inlined? You can check the resource report for that. If it can, please increase the array size and push them to the limit to see when the HLS tool will fail. Thanks.

NVM, I'll do that.