cornell-zhang/hcl-dialect

[LLVM] CPU backend cannot support customized memory layout

Closed this issue · 7 comments

After I added s.partition in this case, the LLVM backend cannot build the code.

def test_conv2D():
    hcl.init()
    A = hcl.placeholder((10, 10))
    F = hcl.placeholder((3, 3))
    def conv(A, F):
        r = hcl.reduce_axis(0, 3)
        c = hcl.reduce_axis(0, 3)
        B = hcl.compute((8, 8), lambda y, x: hcl.sum(A[y+r, x+c]*F[r,c], axis=[r, c]), name="B")
        return B
    s = hcl.create_schedule([A, F], conv)
    B = conv.B
    LB = s.reuse_at(A, s[B], B.axis[0])
    WB = s.reuse_at(LB, s[B], B.axis[1])
    s.partition(LB, dim=1)
    s.partition(WB)
    f = hcl.build(s)
    print(s.device_module)

It seems it is the memory layout issue. Since after partitioning, the memory layout attribute will be changed for memref. Maybe we can change back the layout when lowering to LLVM in order to generate the correct code.

loc("-":7:10): error: failed to legalize operation 'memref.alloc'
loc("-":5:3): error: cannot be converted to LLVM IR: missing `LLVMTranslationDialectInterface` registration for dialect for op: builtin.func

I added this test case as tests/mlir/test_schedule_memory.py::test_reuse_and_partition in the HeteroCL repo here.
As of commit point 1f0d502, this issue is fixed and the test has passed.

I did some experiment and found that the memref lowering pass provided by MemRef dialect populateMemRefToLLVMConversionPatterns(typeConverter, patterns) does not lower memref with custom layout.

I added a pass called RemoveStrideMap in the LLVM backend to remove the layout and solve this issue, and with the above test, we verified that removing the memref layout does not affect result.

I still got the following error.

python3: /scratch/users/hc676/llvm-project/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp:304: llvm::SmallVector<mlir::Type, 5> mlir::LLVMTypeConverter::getMemRefDescriptorFields(mlir::MemRefType, bool): Assertion `isStrided(type) && "Non-strided layout maps must have been normalized away"' failed.

I don't get the same error. Are you referring to the test_reuse_and_partition test above? Or is it another test case? Does rebuild solve this issue?

image

No, not this test case. Try the following example.

import heterocl as hcl
import numpy as np
import sys

bs = 4
ic, oc = 6, 16
ih, iw = 8, 8
kh, kw = 3, 3
oh, ow = ih - kh + 1, iw - kw + 1


def test_bconv2D_nhwc():
    hcl.init(hcl.UInt(1))
    A = hcl.placeholder((bs, ih, iw, ic))
    F = hcl.placeholder((oc, kh, kw, ic))

    def conv(A, F):
        rc = hcl.reduce_axis(0, ic)
        rh = hcl.reduce_axis(0, kh)
        rw = hcl.reduce_axis(0, kw)
        L = ic * kh * kw
        B = hcl.compute(
            (bs, oh, ow, oc),
            lambda n, h, w, c: L
            - (
                hcl.sum(
                    A[n, h + rh, w + rw, rc] ^ F[c, rh, rw, rc],
                    axis=[rh, rw, rc],
                    dtype=hcl.Int(32),
                )
                << 1
            ),
            name="B",
            dtype=hcl.Int(32),
        )
        return B

    s = hcl.create_schedule([A, F], conv)
    B = conv.B
    LB = s.reuse_at(A, s[B], B.axis[1])
    WB = s.reuse_at(LB, s[B], B.axis[2])
    s.partition(LB, dim=2)
    s.partition(LB, dim=3)
    s.partition(WB, dim=2)
    s.partition(WB, dim=3)
    s.partition(WB, dim=4)
    s.partition(F, dim=1)
    s.partition(F, dim=2)
    s.partition(F, dim=3)
    s.partition(F, dim=4)
    s.partition(A, dim=4)
    s.partition(B, dim=4)
    w_outer, w_inner = s[B].split(B.axis[2], factor=2)
    s[B].pipeline(w_outer)
    f = hcl.build(s)

    np_A = np.random.randint(0, 2, size=(bs, ih, iw, ic))
    np_B = np.random.randint(0, 2, size=(oc, kh, kw, ic))
    np_C = np.zeros((bs, oh, ow, oc), dtype="int")

    for n in range(0, bs):
        for y in range(0, oh):
            for x in range(0, ow):
                for c in range(0, oc):
                    for rc in range(0, ic):
                        for rh in range(0, kh):
                            for rw in range(0, kw):
                                np_C[n][y][x][c] += 1 - 2 * (
                                    np_A[n][y + rh][x + rw][rc]
                                    ^ np_B[c][rh][rw][rc]
                                )

    hcl_A = hcl.asarray(np_A, dtype=hcl.UInt(1))
    hcl_B = hcl.asarray(np_B, dtype=hcl.UInt(1))
    hcl_C = hcl.asarray(np_C, dtype=hcl.Int(32))

    f(hcl_A, hcl_B, hcl_C)

    assert np.array_equal(np_C, hcl_C.asnumpy())


if __name__ == "__main__":
    test_bconv2D_nhwc()

hmm I don't have error for this one, the result is correct:
image

Could you pull the frontend repo and try again? I found out that I have some frontend changes not pushed to remote, now it's synced. Let me know if it works

It works now. Thanks!