cornell-zhang/heterocl

Fail to Reorder Reduction Loops

sqPoseidon opened this issue · 2 comments

In packed_conv2d_nchw function, there're four reduction loops: in_channel, kernel_h, kernel_w, and bitwidth. When I try to move the output channel loop into the reduction loops, I get the error message:

heterocl-mlir/hcl-dialect/llvm-project/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp:1496: 
unsigned int mlir::permuteLoops(llvm::MutableArrayRef<mlir::AffineForOp>, llvm::ArrayRef<unsigned int>): 
Assertion `false && "invalid permutation map"' failed.

Here's the example:

import heterocl as hcl
import heterocl.op.bnn as bnn
import numpy as np


def test_bconv_popcnt():
    packing_factor=8
    out_channel = 64
    strides = (1, 1)
    padding = (1, 1)
    in_channel = 8
    bitwidth = min(in_channel, packing_factor)
    in_dtype = hcl.Float()
    out_dtype = hcl.Float()
    in_shape = (1, in_channel, 3, 3) # n, c, h, w
    weight_shape = (out_channel, in_channel, 3, 3) # o, i, h, w
    out_shape = (1, out_channel, 3, 3)

    def conv(data, weight):
        data = hcl.compute(
            data.shape,
            lambda *args: hcl.select(data[args] > 0, 1, 0),
            name="data",
            dtype=hcl.UInt(1),
        )
        weight = hcl.compute(
            weight.shape,
            lambda *args: hcl.select(weight[args] > 0, 1, 0),
            name="weight",
            dtype=hcl.UInt(1),
        )
        # pack along channel dimension
        packed_data = hcl.pack(
            data,
            axis=1,
            factor=bitwidth,
            name="conv_packed",
            dtype=hcl.UInt(bitwidth),
        )
        packed_weight = hcl.pack(
            weight,
            axis=1,
            factor=bitwidth,
            name="conv_packed",
            dtype=hcl.UInt(bitwidth),
        )
        return bnn.packed_conv2d_nchw(
            packed_data,
            packed_weight,
            strides=strides,
            padding=padding,
            name="conv_conv2d",
            out_dtype=out_dtype,
        )

    data = hcl.placeholder(in_shape, "data", dtype=in_dtype)
    weight = hcl.placeholder(weight_shape, "weight", dtype=in_dtype)
    s = hcl.create_schedule([data, weight], conv)

    B = getattr(conv, "conv_conv2d")
    print("B.axis: ", B.axis) # nn, ff, yy, xx, conv_conv2d_rc, conv_conv2d_rx, conv_conv2d_ry
    # s[B].reorder(B.axis[0], B.axis[2], B.axis[1])
    s[B].reorder(B.axis[0], B.axis[2], B.axis[3], B.axis[4], B.axis[1], B.axis[5], B.axis[6]) # nn, yy, xx, conv_conv2d_rc, conv_conv2d_rx, conv_conv2d_ry, ff,

    f = hcl.build(s)
    print(f.host_src)

    a_np = np.random.randint(0, 10, in_shape)
    b_np = np.random.randint(0, 10, weight_shape)

    hcl_a = hcl.asarray(a_np, dtype=in_dtype)
    hcl_b = hcl.asarray(b_np, dtype=in_dtype)
    hcl_c = hcl.asarray(np.zeros(out_shape), dtype=hcl.Float())

    f(hcl_a, hcl_b, hcl_c)

    n, c, h, w = in_shape
    o, i, kh, kw = weight_shape
    # binarize a_np, b_np
    a_np = np.where(a_np > 0, 1, -1)
    b_np = np.where(b_np > 0, 1, -1)
    # pad a_np
    a_np = np.pad(a_np, ((0, 0), (0, 0), (1, 1), (1, 1)), 'constant')
    # calculate convolution
    baseline_output = np.zeros((n, o, h, w))
    for i in range(n):
        for j in range(o):
            for k in range(h):
                for l in range(w):
                    for m in range(c):
                        for p in range(kh):
                            for q in range(kw):
                                baseline_output[i][j][k][l] += a_np[i][m][k + p][l + q] * b_np[j][m][p][q]

    assert np.allclose(hcl_c.asnumpy(), baseline_output)

test_bconv_popcnt()

This seems like a limitation of milr::permuteLoops, I will look into this and provide more detail

It's actually our limitation. Currently we put the reduction variable outside all the reduction loops, causing inner loops imperfect, thus we cannot directly permute those reduction loops with spatial loops.