[Pass] Incorrect operation sinking in loop funsion
Closed this issue · 0 comments
chhzh123 commented
The following test program generate incorrect code.
def test_schedule_intra_stage():
hcl.init()
def popcount(A, B): # each element in A is a 32-bit integer
with hcl.for_(0, A.shape[0], tag="C") as x:
with hcl.for_(0, A.shape[1]) as y:
B[x, y] = 0
with hcl.for_(0, 32) as i:
B[x, y] += A[x, y][i]
A = hcl.placeholder((10, 20))
B = hcl.placeholder(A.shape)
def test_fuse():
s = hcl.create_schedule([A, B], popcount)
C = popcount.C
s[C].fuse(C.axis[0], C.axis[1])
ir = hcl.lower(s)
print(ir)
error: operand #2 does not dominate this use
// Verification failed, printing generic form
#map0 = affine_map<(d0, d1) -> (d0, d1)>
#map1 = affine_map<(d0)[s0] -> (d0 mod s0)>
#map2 = affine_map<(d0)[s0] -> (d0 floordiv s0)>
#map3 = affine_map<() -> (0)>
#map4 = affine_map<() -> (32)>
#map5 = affine_map<() -> (200)>
"builtin.module"() ({
"builtin.func"() ({
^bb0(%arg0: memref<10x20xi32>, %arg1: memref<10x20xi32>):
%0 = "arith.constant"() {value = 10 : index} : () -> index
%1 = "arith.constant"() {value = 20 : index} : () -> index
"affine.for"() ({
^bb0(%arg2: index):
%2 = "arith.constant"() {value = 0 : i32} : () -> i32
"affine.store"(%2, %arg1, %4, %3) {map = #map0, to = "compute_1"} : (i32, memref<10x20xi32>, index, index) -> ()
"affine.for"() ({
^bb0(%arg3: index):
%3 = "affine.apply"(%arg2, %1) {map = #map1} : (index, index) -> index
%4 = "affine.apply"(%arg2, %1) {map = #map2} : (index, index) -> index
%5 = "affine.load"(%arg1, %4, %3) {from = "compute_1", map = #map0} : (memref<10x20xi32>, index, index) -> i32
%6 = "affine.load"(%arg0, %4, %3) {from = "compute_0", map = #map0} : (memref<10x20xi32>, index, index) -> i32
%7 = "hcl.get_bit"(%6, %arg3) : (i32, index) -> i1
%8 = "arith.extui"(%7) : (i1) -> i32
%9 = "arith.addi"(%5, %8) : (i32, i32) -> i32
"affine.store"(%9, %arg1, %4, %3) {map = #map0, to = "compute_1"} : (i32, memref<10x20xi32>, index, index) -> ()
"affine.yield"() : () -> ()
}) {loop_name = "loop_2", lower_bound = #map3, step = 1 : i32, upper_bound = #map4} : () -> ()
"affine.yield"() : () -> ()
}) {loop_name = "loop_0_loop_1_fused", lower_bound = #map3, stage_name = "C", step = 1 : i32, upper_bound = #map5} : () -> ()
"std.return"() : () -> ()
}) {bit, extra_itypes = "ss", extra_otypes = "", sym_name = "top", type = (memref<10x20xi32>, memref<10x20xi32>) -> ()} : () -> ()
}) : () -> ()
The first affine.apply
operation should not been sunk into the innermost loop.