Fail to Reorder Reduction Loops
sqPoseidon opened this issue · 2 comments
sqPoseidon commented
In packed_conv2d_nchw
function, there're four reduction loops: in_channel
, kernel_h
, kernel_w
, and bitwidth
. When I try to move the output channel loop into the reduction loops, I get the error message:
heterocl-mlir/hcl-dialect/llvm-project/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp:1496:
unsigned int mlir::permuteLoops(llvm::MutableArrayRef<mlir::AffineForOp>, llvm::ArrayRef<unsigned int>):
Assertion `false && "invalid permutation map"' failed.
Here's the example:
import heterocl as hcl
import heterocl.op.bnn as bnn
import numpy as np
def test_bconv_popcnt():
packing_factor=8
out_channel = 64
strides = (1, 1)
padding = (1, 1)
in_channel = 8
bitwidth = min(in_channel, packing_factor)
in_dtype = hcl.Float()
out_dtype = hcl.Float()
in_shape = (1, in_channel, 3, 3) # n, c, h, w
weight_shape = (out_channel, in_channel, 3, 3) # o, i, h, w
out_shape = (1, out_channel, 3, 3)
def conv(data, weight):
data = hcl.compute(
data.shape,
lambda *args: hcl.select(data[args] > 0, 1, 0),
name="data",
dtype=hcl.UInt(1),
)
weight = hcl.compute(
weight.shape,
lambda *args: hcl.select(weight[args] > 0, 1, 0),
name="weight",
dtype=hcl.UInt(1),
)
# pack along channel dimension
packed_data = hcl.pack(
data,
axis=1,
factor=bitwidth,
name="conv_packed",
dtype=hcl.UInt(bitwidth),
)
packed_weight = hcl.pack(
weight,
axis=1,
factor=bitwidth,
name="conv_packed",
dtype=hcl.UInt(bitwidth),
)
return bnn.packed_conv2d_nchw(
packed_data,
packed_weight,
strides=strides,
padding=padding,
name="conv_conv2d",
out_dtype=out_dtype,
)
data = hcl.placeholder(in_shape, "data", dtype=in_dtype)
weight = hcl.placeholder(weight_shape, "weight", dtype=in_dtype)
s = hcl.create_schedule([data, weight], conv)
B = getattr(conv, "conv_conv2d")
print("B.axis: ", B.axis) # nn, ff, yy, xx, conv_conv2d_rc, conv_conv2d_rx, conv_conv2d_ry
# s[B].reorder(B.axis[0], B.axis[2], B.axis[1])
s[B].reorder(B.axis[0], B.axis[2], B.axis[3], B.axis[4], B.axis[1], B.axis[5], B.axis[6]) # nn, yy, xx, conv_conv2d_rc, conv_conv2d_rx, conv_conv2d_ry, ff,
f = hcl.build(s)
print(f.host_src)
a_np = np.random.randint(0, 10, in_shape)
b_np = np.random.randint(0, 10, weight_shape)
hcl_a = hcl.asarray(a_np, dtype=in_dtype)
hcl_b = hcl.asarray(b_np, dtype=in_dtype)
hcl_c = hcl.asarray(np.zeros(out_shape), dtype=hcl.Float())
f(hcl_a, hcl_b, hcl_c)
n, c, h, w = in_shape
o, i, kh, kw = weight_shape
# binarize a_np, b_np
a_np = np.where(a_np > 0, 1, -1)
b_np = np.where(b_np > 0, 1, -1)
# pad a_np
a_np = np.pad(a_np, ((0, 0), (0, 0), (1, 1), (1, 1)), 'constant')
# calculate convolution
baseline_output = np.zeros((n, o, h, w))
for i in range(n):
for j in range(o):
for k in range(h):
for l in range(w):
for m in range(c):
for p in range(kh):
for q in range(kw):
baseline_output[i][j][k][l] += a_np[i][m][k + p][l + q] * b_np[j][m][p][q]
assert np.allclose(hcl_c.asnumpy(), baseline_output)
test_bconv_popcnt()
zzzDavid commented
This seems like a limitation of milr::permuteLoops
, I will look into this and provide more detail
chhzh123 commented
It's actually our limitation. Currently we put the reduction variable outside all the reduction loops, causing inner loops imperfect, thus we cannot directly permute those reduction loops with spatial loops.