Unrecognised hardware primitive

Question

Unrecognised hardware primitive

Closed this issue a year ago · 2 comments

Hi there,

I am trying to lower an MLIR code using CITCT and Calyx using the lowering pipeline below:

$mlir-opt --lower-affine <input>-o <output> 

$mlir-opt --scf-for-to-while <input>-o <output> 

$circt-opt --lower-scf-to-calyx --canonicalize --verify-diagnostics <input> -o <output> 

$circt-translate -export-calyx --verify-diagnostics <input> -o $2/$3_calyx_naive.mlir

$calyx <input> --log info -p validate -p discover-external -p validate -x discover-external:default=DEPTH -x discover-external:strip-suffix=_ -o <output> 

$calyx -l $Calyx_PATH -b verilog --synthesis --disable-verify --nested <input> -o <output> 

$calyx -l $Calyx_PATH -b xilinx --synthesis --disable-verify --nested <input> -o <output> 

$calyx -l $Calyx_PATH -b xilinx-xml --synthesis --disable-verify --nested <input>-o <output>

The MLIR I am trying to lower is

  func.func @main(%arg0: memref<784xi32, 1>, %arg1: memref<147xi32, 1>, %arg2: memref<3xi32, 1>, %arg3: memref<363xi32, 1>, %arg4: memref<375xi32, 1>, %arg5: memref<5xi32, 1>, %arg6: memref<245xi32, 1>, %arg7: memref<875xi32, 1>, %arg8: memref<7xi32, 1>, %arg9: memref<63xi32, 1>, %arg10: memref<630xi32, 1>, %arg11: memref<10xi32, 1>) {
    %c65536_i64 = arith.constant 65536 : i64
    %c2_i64 = arith.constant 2 : i64
    %c32768_i64 = arith.constant 32768 : i64
    %c0_i32 = arith.constant 0 : i32
    %alloca = memref.alloca() : memref<1xi32>
    %alloca_0 = memref.alloca() : memref<1xi32>
    %alloca_1 = memref.alloca() : memref<1xi32>
    %alloca_2 = memref.alloca() : memref<1xi32>
    %alloca_3 = memref.alloca() : memref<1xi32>
    %alloca_4 = memref.alloca() : memref<1xi32>
    %alloca_5 = memref.alloca() : memref<1xi32>
    %alloca_6 = memref.alloca() : memref<1xi32>
    affine.for %arg12 = 0 to 3 {
      affine.for %arg13 = 0 to 11 {
        affine.for %arg14 = 0 to 11 {
          %0 = affine.for %arg15 = 0 to 7 iter_args(%arg16 = %c0_i32) -> (i32) {
            %7 = affine.for %arg17 = 0 to 7 iter_args(%arg18 = %arg16) -> (i32) {
              %8 = affine.load %arg1[%arg15 * 21 + %arg12 + %arg17 * 3] : memref<147xi32, 1>
              %9 = arith.extsi %8 : i32 to i64
              %10 = affine.load %arg0[(%arg15 + %arg13 * 2) * 28 + %arg14 * 2 + %arg17] : memref<784xi32, 1>
              %11 = arith.extsi %10 : i32 to i64
              %12 = arith.muli %9, %11 : i64
              %13 = arith.divsi %12, %c32768_i64 : i64
              %14 = arith.remsi %13, %c2_i64 : i64
              %15 = arith.divsi %12, %c65536_i64 : i64
              %16 = arith.addi %14, %15 : i64
              %17 = arith.trunci %16 : i64 to i32
              %18 = arith.addi %arg18, %17 : i32
              affine.yield %18 : i32
            }
            affine.yield %7 : i32
          }
          %1 = affine.load %arg2[%arg12] : memref<3xi32, 1>
          %2 = arith.addi %1, %0 : i32
          affine.store %2, %alloca[0] : memref<1xi32>
          affine.store %c0_i32, %alloca_0[0] : memref<1xi32>
          %3 = arith.cmpi sgt, %2, %c0_i32 : i32
          %4 = affine.load %alloca[0] : memref<1xi32>
          %5 = affine.load %alloca_0[0] : memref<1xi32>
          %6 = arith.select %3, %4, %5 : i32
          affine.store %6, %arg3[%arg13 * 33 + %arg12 + %arg14 * 3] : memref<363xi32, 1>
        }
      }
    }
    affine.for %arg12 = 0 to 5 {
      affine.for %arg13 = 0 to 7 {
        affine.for %arg14 = 0 to 7 {
          %0 = affine.for %arg15 = 0 to 5 iter_args(%arg16 = %c0_i32) -> (i32) {
            %7 = affine.for %arg17 = 0 to 5 iter_args(%arg18 = %arg16) -> (i32) {
              %8 = affine.for %arg19 = 0 to 3 iter_args(%arg20 = %arg18) -> (i32) {
                %9 = affine.load %arg4[%arg15 * 75 + %arg12 + %arg17 * 15 + %arg19 * 5] : memref<375xi32, 1>
                %10 = arith.extsi %9 : i32 to i64
                %11 = affine.load %arg3[(%arg17 + %arg14) * 3 + (%arg15 + %arg13) * 33 + %arg19] : memref<363xi32, 1>
                %12 = arith.extsi %11 : i32 to i64
                %13 = arith.muli %10, %12 : i64
                %14 = arith.divsi %13, %c32768_i64 : i64
                %15 = arith.remsi %14, %c2_i64 : i64
                %16 = arith.divsi %13, %c65536_i64 : i64
                %17 = arith.addi %15, %16 : i64
                %18 = arith.trunci %17 : i64 to i32
                %19 = arith.addi %arg20, %18 : i32
                affine.yield %19 : i32
              }
              affine.yield %8 : i32
            }
            affine.yield %7 : i32
          }
          %1 = affine.load %arg5[%arg12] : memref<5xi32, 1>
          %2 = arith.addi %1, %0 : i32
          affine.store %2, %alloca_1[0] : memref<1xi32>
          affine.store %c0_i32, %alloca_2[0] : memref<1xi32>
          %3 = arith.cmpi sgt, %2, %c0_i32 : i32
          %4 = affine.load %alloca_1[0] : memref<1xi32>
          %5 = affine.load %alloca_2[0] : memref<1xi32>
          %6 = arith.select %3, %4, %5 : i32
          affine.store %6, %arg3[%arg13 * 33 + %arg12 + %arg14 * 3] : memref<363xi32, 1>
        }
      }
    }
    affine.for %arg12 = 0 to 7 {
      affine.for %arg13 = 0 to 3 {
        affine.for %arg14 = 0 to 3 {
          %0 = affine.for %arg15 = 0 to 5 iter_args(%arg16 = %c0_i32) -> (i32) {
            %7 = affine.for %arg17 = 0 to 5 iter_args(%arg18 = %arg16) -> (i32) {
              %8 = affine.for %arg19 = 0 to 5 iter_args(%arg20 = %arg18) -> (i32) {
                %9 = affine.load %arg7[%arg15 * 175 + %arg12 + %arg17 * 35 + %arg19 * 7] : memref<875xi32, 1>
                %10 = arith.extsi %9 : i32 to i64
                %11 = affine.load %arg6[(%arg17 + %arg14) * 5 + (%arg15 + %arg13) * 35 + %arg19] : memref<245xi32, 1>
                %12 = arith.extsi %11 : i32 to i64
                %13 = arith.muli %10, %12 : i64
                %14 = arith.divsi %13, %c32768_i64 : i64
                %15 = arith.remsi %14, %c2_i64 : i64
                %16 = arith.divsi %13, %c65536_i64 : i64
                %17 = arith.addi %15, %16 : i64
                %18 = arith.trunci %17 : i64 to i32
                %19 = arith.addi %arg20, %18 : i32
                affine.yield %19 : i32
              }
              affine.yield %8 : i32
            }
            affine.yield %7 : i32
          }
          %1 = affine.load %arg8[%arg12] : memref<7xi32, 1>
          %2 = arith.addi %1, %0 : i32
          affine.store %2, %alloca_3[0] : memref<1xi32>
          affine.store %c0_i32, %alloca_4[0] : memref<1xi32>
          %3 = arith.cmpi sgt, %2, %c0_i32 : i32
          %4 = affine.load %alloca_3[0] : memref<1xi32>
          %5 = affine.load %alloca_4[0] : memref<1xi32>
          %6 = arith.select %3, %4, %5 : i32
          affine.store %6, %arg3[%arg13 * 33 + %arg12 + %arg14 * 3] : memref<363xi32, 1>
        }
      }
    }
    affine.for %arg12 = 0 to 10 {
      %0 = affine.for %arg13 = 0 to 3 iter_args(%arg14 = %c0_i32) -> (i32) {
        %7 = affine.for %arg15 = 0 to 3 iter_args(%arg16 = %arg14) -> (i32) {
          %8 = affine.for %arg17 = 0 to 7 iter_args(%arg18 = %arg16) -> (i32) {
            %9 = affine.load %arg10[%arg13 * 210 + %arg12 + %arg15 * 70 + %arg17 * 10] : memref<630xi32, 1>
            %10 = arith.extsi %9 : i32 to i64
            %11 = affine.load %arg9[%arg15 * 7 + %arg13 * 21 + %arg17] : memref<63xi32, 1>
            %12 = arith.extsi %11 : i32 to i64
            %13 = arith.muli %10, %12 : i64
            %14 = arith.divsi %13, %c32768_i64 : i64
            %15 = arith.remsi %14, %c2_i64 : i64
            %16 = arith.divsi %13, %c65536_i64 : i64
            %17 = arith.addi %15, %16 : i64
            %18 = arith.trunci %17 : i64 to i32
            %19 = arith.addi %arg18, %18 : i32
            affine.yield %19 : i32
          }
          affine.yield %8 : i32
        }
        affine.yield %7 : i32
      }
      %1 = affine.load %arg11[%arg12] : memref<10xi32, 1>
      %2 = arith.addi %1, %0 : i32
      affine.store %2, %alloca_5[0] : memref<1xi32>
      affine.store %c0_i32, %alloca_6[0] : memref<1xi32>
      %3 = arith.cmpi sgt, %2, %c0_i32 : i32
      %4 = affine.load %alloca_5[0] : memref<1xi32>
      %5 = affine.load %alloca_6[0] : memref<1xi32>
      %6 = arith.select %3, %4, %5 : i32
      affine.store %6, %arg3[%arg12] : memref<363xi32, 1>
    }
    return
  }

The error complaining that std_mux.sel is not acceptable when it comes to the command

$calyx <input> --log info -p validate -p discover-external -p validate -x discover-external:default=DEPTH -x discover-external:strip-suffix=_ -o <output>

While looking at the hardware primitive files, std_mux does not contain a sel port, but has a cond port. I am not sure if they are equivalent. I know sel port is from ... = arith.select ... in the MLIR. And I notice CIRCT has an example converting arith.select to std_mux_0.sel. But it cannot be lowered in Calyx?

Looking forward to your reply. Many thank!

Answer 1 · 2023-10-24T15:14:02.000Z

Looks like the the primitive definition in CIRCT uses sel while the calyx primitive uses the cond port. Should be an easy fix in the CIRCT repo.

Answer 2 · 2023-10-24T22:15:25.000Z

@zzy666666zzy should be fixed on the CIRCT side now!