Floating point arithmetic issue related to reduction loop

Question

Floating point arithmetic issue related to reduction loop

Closed this issue 3 years ago · 1 comments

Description

For program with reduction loop, the computation result using float32 has uncertainty across different runs.

To Repeat the Issue

Below is an example of hcl.sum() that sums up a two elements in a vector.

import ctypes
import numpy as np
import os

import mlir.all_passes_registration

from mlir import ir
from mlir import runtime as rt
from mlir import execution_engine
from mlir import passmanager

from mlir.dialects import builtin

class Compiler:
    def __init__(self):
        self.pipeline = (
            f'lower-affine,' 
            f'convert-scf-to-std,' 
            f'convert-memref-to-llvm,'
            f'convert-std-to-llvm'
        )
    def __call__(self, module: ir.Module):
        passmanager.PassManager.parse(self.pipeline).run(module)


def code():
    return f"""
module  {{
memref.global "private" @gv0 : memref<2xf32> = dense<[1.0, 2.0]>
  func @top(%arg0: memref<2xf32>) -> memref<1xf32> {{
    //%0 = hcl.create_stage_handle "sum" : !hcl.StageHandle
    %1 = memref.alloc() : memref<1xf32>
    //%2 = hcl.create_loop_handle "_" : !hcl.LoopHandle
    affine.for %arg1 = 0 to 1 {{
      %3 = memref.alloc() : memref<1xf32>
      affine.for %arg2 = 0 to 2 {{
        %5 = affine.load %arg0[%arg2] : memref<2xf32>
        %c0_0 = constant 0 : index
        %6 = affine.load %3[%c0_0] : memref<1xf32>
        %7 = addf %5, %6 : f32
        affine.store %7, %3[%c0_0] : memref<1xf32>
      }} {{loop_name = "x"}}
      %c0 = constant 0 : index
      %4 = affine.load %3[%c0] : memref<1xf32>
      affine.store %4, %1[%arg1] : memref<1xf32>
    }} {{loop_name = "_", stage_name = "sum"}}
    return %1 : memref<1xf32>
  }}

  func @main(%0 : memref<2xf32>) -> memref<1xf32> attributes {{ llvm.emit_c_interface }} {{
    %1 = call @top(%0) : (memref<2xf32>) -> (memref<1xf32>)
    %U = memref.cast %1 : memref<1xf32> to memref<*xf32>
    call @print_memref_f32(%U) : (memref<*xf32>) -> ()
    return %1 : memref<1xf32>
  }}
  func private @print_memref_f32(memref<*xf32>) attributes {{ llvm.emit_c_interface }}
}}
"""

def main():
  # Change these support library path to yours 
  # We need these support libraries for @print_memref_f32
    support_lib = [
        "/work/shared/users/phd/nz264/llvm-13.0/build/lib/libmlir_c_runner_utils.so",
        "/work/shared/users/phd/nz264/llvm-13.0/build/lib/libmlir_runner_utils.so"
    ]

    with ir.Context() as ctx, ir.Location.unknown():
        compiler = Compiler()
        module = ir.Module.parse(code())
        compiler(module)
        engine = execution_engine.ExecutionEngine(module, opt_level=0, shared_libs=support_lib)
        a = np.array([1, 2], np.float32)
        b = np.zeros((1,), np.float32)
        mem_a  = ctypes.pointer(ctypes.pointer(rt.get_ranked_memref_descriptor(a)))
        mem_b = ctypes.pointer(ctypes.pointer(rt.get_ranked_memref_descriptor(b)))
        engine.invoke('main', mem_b, mem_a)
        out = rt.ranked_memref_to_numpy(mem_b[0])
        print(out)

if __name__ == "__main__":
    main()

Across multiple runs, the result printed out is:

(mlir) $ python issues/debug.py 
Unranked Memref base@ = 0x562199587810 rank = 1 offset = 0 sizes = [1] strides = [1] data = 
[3]
[3.]
(mlir) $ python issues/debug.py 
Unranked Memref base@ = 0x5619f1bb3f20 rank = 1 offset = 0 sizes = [1] strides = [1] data = 
[-1.91384e+30]
[-1.9138384e+30]
(mlir) $ python issues/debug.py 
Unranked Memref base@ = 0x55ff3a092ab0 rank = 1 offset = 0 sizes = [1] strides = [1] data = 
[3.00055]
[3.0005467]

Answer 1 · 2022-01-20T15:48:40.000Z

This issue is caused by the single-element memref above reduction loop not initialized to zero. Fixed by d4ce005