Module not executed with option `--lower-composite`
sqPoseidon opened this issue · 0 comments
sqPoseidon commented
In some cases, the kernels are not executed by the main module. Not sure which part affects the inference.
I have two kernels: bn1.mlir
and relu.mlir
. They are called in the main.mlir
. I delete the execution part and only print an array.
bn1.mlir:
module {
memref.global "private" @gv0 : memref<4x4xf32> = dense<[[1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0]]>
func.func private @printMemrefF32(%ptr : memref<*xf32>)
memref.global "private" constant @weight : memref<64xi8> = dense<1>
memref.global "private" constant @bias : memref<64xi8> = dense<0>
func.func @bn1(%arg0: memref<1x64x112x112xf32>) -> memref<1x64x112x112xi8> attributes {itypes = "_", llvm.emit_c_interface, otypes = "s", top} {
%c0 = arith.constant 0 : index
%0 = memref.get_global @weight : memref<64xi8>
%1 = memref.get_global @bias : memref<64xi8>
%2 = memref.alloc() {name = "BatchNorm2d"} : memref<1x64x112x112xi8>
%temp = memref.get_global @gv0 : memref<4x4xf32>
// hcl.print(%0) : memref<4x4xf32>
%casted = memref.cast %temp : memref<4x4xf32> to memref<*xf32>
func.call @printMemrefF32(%casted) : (memref<*xf32>) -> ()
return %2 : memref<1x64x112x112xi8>
}
}
relu.mlir
module {
memref.global "private" @gv0 : memref<4x4xf32> = dense<[[1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0]]>
func.func private @printMemrefF32(%ptr : memref<*xf32>)
func.func @relu(%arg0: memref<1x64x112x112xi8>) -> memref<1x64x112x112xi8> attributes {itypes = "s", llvm.emit_c_interface, otypes = "s", top} {
%c0 = arith.constant 0 : index
%0 = memref.alloc() {name = "ReLU"} : memref<1x64x112x112xi8>
%temp = memref.get_global @gv0 : memref<4x4xf32>
// hcl.print(%0) : memref<4x4xf32>
%casted = memref.cast %temp : memref<4x4xf32> to memref<*xf32>
func.call @printMemrefF32(%casted) : (memref<*xf32>) -> ()
return %0 : memref<1x64x112x112xi8>
}
}
main.mlir
module {
func.func private @bn1(memref<1x64x112x112xf32>) -> memref<1x64x112x112xi8>
func.func private @relu(memref<1x64x112x112xi8>) -> memref<1x64x112x112xi8>
func.func @main() {
%0 = memref.alloc() : memref<1x64x112x112xf32>
// affine.for %arg = 0 to 20 {
%1 = func.call @bn1(%0) : (memref<1x64x112x112xf32>) -> memref<1x64x112x112xi8>
%2 = func.call @relu(%1) : (memref<1x64x112x112xi8>) -> memref<1x64x112x112xi8>
// }
return
}
}
compile.sh
set -x
# export HCL_OPT_FLAGS=" --fixed-to-integer --verify-each --verify-diagnostics --allow-unregistered-dialect --preload-dialects-in-context --lower-bitops --lower-to-llvm"
export HCL_OPT_FLAGS=" --fixed-to-integer --verify-each --verify-diagnostics --allow-unregistered-dialect --preload-dialects-in-context --lower-composite --lower-bitops --lower-to-llvm"
hcl-opt bn1.mlir $HCL_OPT_FLAGS | mlir-translate -mlir-to-llvmir | opt -O3 -S | llc -O3 -filetype=obj -o bn1.o
hcl-opt relu.mlir $HCL_OPT_FLAGS | mlir-translate -mlir-to-llvmir | opt -O3 -S | llc -O3 -filetype=obj -o relu.o
hcl-opt main.mlir $HCL_OPT_FLAGS | mlir-translate -mlir-to-llvmir | opt -O3 -S | llc -O3 -filetype=obj -o main.o
clang main.o bn1.o relu.o --target=x86-64 -fPIE \
-L/work/shared/users/staff/qs228/MLIR/heterocl-mlir/hcl-dialect/llvm-project/build/lib/ \
-lmlir_c_runner_utils \
-lmlir_runner_utils \
-o x86-app
echo "Compilation Done."
start=$(date +%s%6N)
./x86-app # 2>&1 | tee temp.log
end=$(date +%s%6N)
runtime=`expr "scale=3; $( echo "$end - $start" | bc -l ) / 1000.0" | bc `
echo "X86 Execution Done."
echo Simulation Time: $runtime ms.
If these two modules are executed, you'll see the printed arrays. With option --lower-composite
, it doesn't print anything.