cornell-zhang/heterocl

Unnecessary data casting for const_tensor

chhzh123 opened this issue · 1 comments

As the title mentioned, current implementation of const_tensor will have a separate stage to explicitly cast the datatype from the input tensor (since it is represented in i64 array), which is not necessary and introduce extra data processing overheads. We should remove this part and let memref.global preserve the data as it is.

#set = affine_set<(d0) : (d0 - 2 >= 0)>
module {
  func.func @top(%arg0: memref<4x6x8x8xi1>) -> memref<4x16x6x6xi32> attributes {itypes = "u", otypes = "s"} {
    %0 = memref.get_global @F : memref<16x6x3x3xi64>
    %1 = memref.alloc() {name = "F_cast"} : memref<16x6x3x3xi1>
    // unnecessary data casting
    affine.for %arg1 = 0 to 16 {
      affine.for %arg2 = 0 to 6 {
        affine.for %arg3 = 0 to 3 {
          affine.for %arg4 = 0 to 3 {
            %5 = affine.load %0[%arg1, %arg2, %arg3, %arg4] {from = "F"} : memref<16x6x3x3xi64>
            %6 = arith.trunci %5 {unsigned} : i64 to i1
            affine.store %6, %1[%arg1, %arg2, %arg3, %arg4] {to = "F_cast", unsigned} : memref<16x6x3x3xi1>
          } {loop_name = "i3"}
        } {loop_name = "i2"}
      } {loop_name = "i1"}
    } {loop_name = "i0", op_name = "F_cast"}
    %2 = memref.alloc() {name = "B"} : memref<4x16x6x6xi32>
    %3 = memref.alloc() {name = "B_reuse_2"} : memref<6x3x8xi1>
    %4 = memref.alloc() {name = "B_reuse_3"} : memref<6x3x3xi1>
    affine.for %arg1 = 0 to 4 {
        // bconv
    } {loop_name = "n", op_name = "B"}
    return %2 : memref<4x16x6x6xi32>
  }
  // no need to store an i64 array
  memref.global "private" constant @F : memref<16x6x3x3xi64> = dense<"0x...">

I'll get the following error if I do not change the numpy array type

  File "/work/zhang-capra/users/hc676/heterocl-mlir/heterocl/ast/ir_builder.py", line 299, in build_visitor
    self.build_func_op(op, ip)
  File "/work/zhang-capra/users/hc676/heterocl-mlir/heterocl/ast/ir_builder.py", line 394, in build_func_op
    self.build_visitor(body_op, ip)
  File "/work/zhang-capra/users/hc676/heterocl-mlir/heterocl/ast/ir_builder.py", line 293, in build_visitor
    self.build_constant_tensor_op(op, ip)
  File "/work/zhang-capra/users/hc676/heterocl-mlir/heterocl/ast/ir_builder.py", line 1413, in build_constant_tensor_op
    value_attr = DenseElementsAttr.get(val, shape=val.shape, type=dtype)
ValueError: DenseElementsAttr could not be constructed from the given buffer. This may mean that the Python buffer layout does not match that MLIR expected layout and is a bug.