Unnecessary data casting for const_tensor
chhzh123 opened this issue · 1 comments
chhzh123 commented
As the title mentioned, current implementation of const_tensor
will have a separate stage to explicitly cast the datatype from the input tensor (since it is represented in i64
array), which is not necessary and introduce extra data processing overheads. We should remove this part and let memref.global
preserve the data as it is.
#set = affine_set<(d0) : (d0 - 2 >= 0)>
module {
func.func @top(%arg0: memref<4x6x8x8xi1>) -> memref<4x16x6x6xi32> attributes {itypes = "u", otypes = "s"} {
%0 = memref.get_global @F : memref<16x6x3x3xi64>
%1 = memref.alloc() {name = "F_cast"} : memref<16x6x3x3xi1>
// unnecessary data casting
affine.for %arg1 = 0 to 16 {
affine.for %arg2 = 0 to 6 {
affine.for %arg3 = 0 to 3 {
affine.for %arg4 = 0 to 3 {
%5 = affine.load %0[%arg1, %arg2, %arg3, %arg4] {from = "F"} : memref<16x6x3x3xi64>
%6 = arith.trunci %5 {unsigned} : i64 to i1
affine.store %6, %1[%arg1, %arg2, %arg3, %arg4] {to = "F_cast", unsigned} : memref<16x6x3x3xi1>
} {loop_name = "i3"}
} {loop_name = "i2"}
} {loop_name = "i1"}
} {loop_name = "i0", op_name = "F_cast"}
%2 = memref.alloc() {name = "B"} : memref<4x16x6x6xi32>
%3 = memref.alloc() {name = "B_reuse_2"} : memref<6x3x8xi1>
%4 = memref.alloc() {name = "B_reuse_3"} : memref<6x3x3xi1>
affine.for %arg1 = 0 to 4 {
// bconv
} {loop_name = "n", op_name = "B"}
return %2 : memref<4x16x6x6xi32>
}
// no need to store an i64 array
memref.global "private" constant @F : memref<16x6x3x3xi64> = dense<"0x...">
chhzh123 commented
I'll get the following error if I do not change the numpy array type
File "/work/zhang-capra/users/hc676/heterocl-mlir/heterocl/ast/ir_builder.py", line 299, in build_visitor
self.build_func_op(op, ip)
File "/work/zhang-capra/users/hc676/heterocl-mlir/heterocl/ast/ir_builder.py", line 394, in build_func_op
self.build_visitor(body_op, ip)
File "/work/zhang-capra/users/hc676/heterocl-mlir/heterocl/ast/ir_builder.py", line 293, in build_visitor
self.build_constant_tensor_op(op, ip)
File "/work/zhang-capra/users/hc676/heterocl-mlir/heterocl/ast/ir_builder.py", line 1413, in build_constant_tensor_op
value_attr = DenseElementsAttr.get(val, shape=val.shape, type=dtype)
ValueError: DenseElementsAttr could not be constructed from the given buffer. This may mean that the Python buffer layout does not match that MLIR expected layout and is a bug.