cornell-zhang/heterocl

Redundancy in generated vhls code

Opened this issue · 2 comments

There are some redundant code in HeteroCL generated vhls code. The test case is:

def test_redundancy(dtype=hcl.Int()):
    hcl.init(dtype)
    A = hcl.placeholder((2, 3), "A")
    B = hcl.placeholder((3, 5), "B")
    C = hcl.placeholder((2, 5), "C")

    def kernel_gemm(A, B, C):
        r = hcl.reduce_axis(0, 3, "r")
        out_AB = hcl.compute((2, 3),
                lambda x, y: hcl.sum(2 * A[x, r] * B[r, y],
                axis = r, dtype = dtype), name = "out_AB")
        hcl.update(C, lambda x, y: 3 * C[x, y] + out_AB[x, y], name = "C")

    s = hcl.create_schedule([A, B, C], kernel_gemm)
    code = hcl.build(s, target="vhls")
    print(code)

And the generated code is:

#include <ap_int.h>
#include <ap_fixed.h>
#include <ap_axi_sdata.h>
#include <hls_stream.h>
#include <hls_math.h>
#include <math.h>
#include <stdint.h>
void default_function(ap_int<32> A[2][3], ap_int<32> B[3][5], ap_int<32> C[2][5]) {
  ap_int<32> _top;
  ap_int<32> out_AB[2][3];
  out_AB_x: for (ap_int<32> x = 0; x < 2; ++x) {
    out_AB_y: for (ap_int<32> y = 0; y < 3; ++y) {
      ap_int<32> sum;
      sum_x1: for (ap_int<32> x1 = 0; x1 < 1; ++x1) {   // using a loop to initialize a scalar
        sum = 0;
      }
      out_AB_r: for (ap_int<32> r = 0; r < 3; ++r) { 
        sum = ((ap_int<32>)(((ap_int<65>)(((ap_int<64>)(A[x][r] * 2)) * ((ap_int<64>)B[r][y]))) + ((ap_int<65>)sum)));
      }
      out_AB[x][y] = sum;
    }
  }
  ap_int<32> C1;   // dead code
  C_x2: for (ap_int<32> x2 = 0; x2 < 2; ++x2) {
    C_y1: for (ap_int<32> y1 = 0; y1 < 5; ++y1) {
      C[x2][y1] = ((ap_int<32>)(((ap_int<33>)(C[x2][y1] * 3)) + ((ap_int<33>)out_AB[((y1 / 3) + x2)][(y1 % 3)])));
    }
  }
}

Fixed: 1866ee8

Link #272.