cornell-zhang/hcl-dialect

[Pass] Support for complex outline patterns

chhzh123 opened this issue · 2 comments

See the following example, although B and C are independent and do not share intermediate buffers, they can still be grouped as a function.

def test_outline():

    A = hcl.placeholder((32, 32), "A")

    def kernel(A):
        B = hcl.compute(A.shape, lambda i, j: A[i, j] + 1, "B")
        C = hcl.compute(A.shape, lambda i, j: A[i, j] + 1, "C")
        D = hcl.compute(A.shape, lambda i, j: B[i, j] + C[i, j], "D")
        return D

    target = hcl.Platform.xilinx_zc706
    target.config(compiler="vivado_hls", mode="debug",
                  project="stages-outline.prj")
    s = hcl.create_schedule([A], kernel)
    s[kernel.B].pipeline(kernel.B.axis[1])
    s.partition(kernel.B, dim=2)
    func_B_C, func_D = s.outline([s[kernel.B], s[kernel.C]], [s[kernel.D]])
    print(hcl.lower(s))

The expected MLIR code is shown below.

#map = affine_map<(d0, d1) -> (0, d1, d0, 0)>
module {
  func private @Stage_D(%arg0: memref<32x32xi32, #map>, %arg1: memref<32x32xi32>, %arg2: memref<32x32xi32>) {
    affine.for %arg3 = 0 to 32 {
      affine.for %arg4 = 0 to 32 {
        %0 = affine.load %arg0[%arg3, %arg4] {from = "B"} : memref<32x32xi32, #map>
        %1 = affine.load %arg1[%arg3, %arg4] {from = "C"} : memref<32x32xi32>
        %2 = arith.addi %0, %1 : i32
        affine.store %2, %arg2[%arg3, %arg4] {to = "D"} : memref<32x32xi32>
      } {loop_name = "j"}
    } {loop_name = "i", stage_name = "D"}
    return
  }
  func private @Stage_B_C(%arg0: memref<32x32xi32>, %arg1: memref<32x32xi32, #map>, %arg2: memref<32x32xi32>) {
    affine.for %arg3 = 0 to 32 {
      affine.for %arg4 = 0 to 32 {
        %0 = affine.load %arg0[%arg3, %arg4] {from = "A"} : memref<32x32xi32>
        %c1_i32 = arith.constant 1 : i32
        %1 = arith.addi %0, %c1_i32 : i32
        affine.store %1, %arg1[%arg3, %arg4] {to = "B"} : memref<32x32xi32, #map>
      } {loop_name = "j", pipeline_ii = 1 : i32}
    } {loop_name = "i", stage_name = "B"}
    affine.for %arg3 = 0 to 32 {
      affine.for %arg4 = 0 to 32 {
        %0 = affine.load %arg0[%arg3, %arg4] {from = "A"} : memref<32x32xi32>
        %c1_i32 = arith.constant 1 : i32
        %1 = arith.addi %0, %c1_i32 : i32
        affine.store %1, %arg2[%arg3, %arg4] {to = "C"} : memref<32x32xi32>
      } {loop_name = "j"}
    } {loop_name = "i", stage_name = "C"}
    return
  }
  func @top(%arg0: memref<32x32xi32>) -> memref<32x32xi32> attributes {itypes = "s", otypes = "s"} {
    %0 = memref.alloc() {name = "B"} : memref<32x32xi32, #map>
    %1 = memref.alloc() {name = "C"} : memref<32x32xi32>
    call @Stage_B_C(%arg0, %0, %1) : (memref<32x32xi32>, memref<32x32xi32, #map>, memref<32x32xi32>) -> ()
    %2 = memref.alloc() {name = "D"} : memref<32x32xi32>
    call @Stage_D(%0, %1, %2) : (memref<32x32xi32, #map>, memref<32x32xi32>, memref<32x32xi32>) -> ()
    return %2 : memref<32x32xi32>
  }
}

What's the difference between func and func private?

Private functions can only be accessed inside the same module, while public functions can be accessed by other modules. I actually don’t know why in Python I can only create private functions.