Fp32-->fp16: original fp32 model works well with input data, but converted fp16 model failed with the same input data

Question

Fp32-->fp16: original fp32 model works well with input data, but converted fp16 model failed with the same input data

yetingqiaqia opened this issue 3 years ago · 0 comments

Hi,

I am using onnxmltools to convert a fp32 model to fp16. The original fp32 model was converted from pyTorch model with opset12. The fp32 model works well on input data. However, the fp16 model failed with error msg when inferencing the input data. Could you help have a check?

This is the code I used to convert fp32 onnx model to fp16 model. It finished successfully.

import onnx
from onnxmltools.utils.float16_converter import *
from onnxmltools.utils import load_model, save_model

onnx_model_path = './graph_opset12.onnx'
new_onnx_model = convert_float_to_float16_model_path(onnx_model_path, keep_io_types=True)
save_model(new_onnx_model, './graph_opset12_fp16.onnx')

After I got the converted fp16 model, I used below code to run this model:

# Import Libraries
import argparse
import torch
import random
from torchvision.transforms import *
from PIL import Image, ImageFile
from io import BytesIO
import base64
import time
from torch.utils.data import IterableDataset
import torchvision as tv

ImageFile.LOAD_TRUNCATED_IMAGES = True
script_start_time = time.time()

class ImageDataset_Base64(IterableDataset):

    def __init__(self, filename, transforms=None):
        print("File from which we are training {}".format(filename))
        self.filename = filename
        self.transform = transforms
        self.parts = {}
        self.lines = open(self.filename).readlines()
        self.length = len(self.lines)
        print("Number of data points {}".format(self.length))
        for i in range(8):
            self.parts[i] = self.lines[int(i*self.length/8):int((i+1)*self.length/8)]

    def preprocess_img(self, img_b64):
        try :
            im = Image.open(BytesIO(base64.b64decode(img_b64)))
            X = im.convert('RGB')
        except :
            X = Image.new('RGB', (480, 480)) # default color is black
        if self.transform is not None:
            X = self.transform(X)
        return X

    def preprocess_id(self, id):
        return int(id)

    def preprocess_label(self, label):
        try :
            y = int(label)
        except:
            return 0
        if y in [0, 1, 2]: return y
        else: return 0

    def line_mapper(self, line):
        # splits the line into text and label and applies preprocessing to the text
        url, id, imgb64  = line.rstrip().split('\t')
        label = random.choice([0,1,2])
        # id_str = random.randint(1,1000000)
        id = self.preprocess_id(id)
        X = self.preprocess_img(imgb64)
        y = self.preprocess_label(label)
        return id, X, y

    def __iter__(self):
        # create an iterator
        worker_info = torch.utils.data.get_worker_info()
        worker_id = worker_info.id
        # map each element using the line_mapper
        mapped_itr = map(self.line_mapper, self.parts[worker_id])
        return mapped_itr

def get_val_loader(args):
    val_tx = tv.transforms.Compose([
        tv.transforms.Resize((args.image_size, args.image_size)),
        tv.transforms.ToTensor(),
        tv.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])
    val_set = ImageDataset_Base64(args.test_file_path, transforms=val_tx)
    val_loader = torch.utils.data.DataLoader(val_set, args.batch_size, 
                        num_workers=8, pin_memory=True)
    return val_loader

def evaluate(args):
    val_loader = get_val_loader(args)

    import onnxruntime as ort
    sess_options = ort.SessionOptions()
    sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
    sess_options.intra_op_num_threads = 0
    sess = ort.InferenceSession(args.onnx_model_path, sess_options)
    input_name = sess.get_inputs()[0].name
    label_name = sess.get_outputs()[0].name
    def to_numpy(torch_tensor):
        return torch_tensor.detach().cpu().numpy() if torch_tensor.requires_grad else torch_tensor.cpu().numpy()
    
    accumulated_inference_time = 0
    with torch.no_grad():
        for i, (id, data, target) in enumerate(val_loader):
            data = data.to(args.device)
            #print(data.size())
            
            #below is onnx inference code
            data = to_numpy(data)
            start_stamp = time.time()
            pred = sess.run([label_name], {input_name: data})[0]
            accumulated_inference_time += time.time() - start_stamp

    print(f"Total Onnx model inference time is {accumulated_inference_time}")
    
if __name__ == "__main__":
    script_start_time = time.time()
    parser = argparse.ArgumentParser()
    args, unknown = parser.parse_known_args()

    args.batch_size = 128
    args.image_size = 480

    args.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    args.n_gpu = torch.cuda.device_count()
    print(" ARGS.device " + str(args.device) + "  ARGS.n_gpu " + str(args.n_gpu))

    args.test_file_path = "./DivideImages_0.tsv"
    args.onnx_model_path = "./graph_opset12.onnx"
    evaluate(args)

    print(f"Total running time: {time.time()-script_start_time}")

However, it failed with below failure msg when running fp16 model. While fp32 model runs fine.

File "/home/tiy/.local/lib/python3.6/site-packages/onnxruntime/capi/onnxruntime_inference_collection.py", line 188, in run
return self._sess.run(output_names, input_feed, run_options)
onnxruntime.capi.onnxruntime_pybind11_state.Fail: [ONNXRuntimeError] : 1 : FAIL : Non-zero status code returned while running InstanceNormalization node. Name:'InstanceNormalization_31' Status Message: CUDNN error executing cudnnBatchNormalizationForwardTraining( CudnnHandle(), CUDNN_BATCHNORM_SPATIAL, &one, &zero, data_desc, x_data, data_desc, y_data, stats_desc, unused_scale.get(), unused_bias.get(), 1.0f, mean.get(), variance.get(), CUDNN_BN_MIN_EPSILON, nullptr, nullptr)

The code, model and data can be found here: https://www.dropbox.com/s/27nlnm7avp7wins/Resnet_fp16_test.zip?dl=0

To run the code: python resnet-opset12_fp16-test.py
To test fp16 model: replace args.onnx_model_path = "./graph_opset12.onnx" in the script to be args.onnx_model_path = "./graph_opset12_fp16.onnx" and run the command python resnet-opset12_fp16-test.py