Fp32-->fp16: original fp32 model works well with input data, but converted fp16 model failed with the same input data
yetingqiaqia opened this issue · 0 comments
Hi,
I am using onnxmltools to convert a fp32 model to fp16. The original fp32 model was converted from pyTorch model with opset12. The fp32 model works well on input data. However, the fp16 model failed with error msg when inferencing the input data. Could you help have a check?
This is the code I used to convert fp32 onnx model to fp16 model. It finished successfully.
import onnx
from onnxmltools.utils.float16_converter import *
from onnxmltools.utils import load_model, save_model
onnx_model_path = './graph_opset12.onnx'
new_onnx_model = convert_float_to_float16_model_path(onnx_model_path, keep_io_types=True)
save_model(new_onnx_model, './graph_opset12_fp16.onnx')
After I got the converted fp16 model, I used below code to run this model:
# Import Libraries
import argparse
import torch
import random
from torchvision.transforms import *
from PIL import Image, ImageFile
from io import BytesIO
import base64
import time
from torch.utils.data import IterableDataset
import torchvision as tv
ImageFile.LOAD_TRUNCATED_IMAGES = True
script_start_time = time.time()
class ImageDataset_Base64(IterableDataset):
def __init__(self, filename, transforms=None):
print("File from which we are training {}".format(filename))
self.filename = filename
self.transform = transforms
self.parts = {}
self.lines = open(self.filename).readlines()
self.length = len(self.lines)
print("Number of data points {}".format(self.length))
for i in range(8):
self.parts[i] = self.lines[int(i*self.length/8):int((i+1)*self.length/8)]
def preprocess_img(self, img_b64):
try :
im = Image.open(BytesIO(base64.b64decode(img_b64)))
X = im.convert('RGB')
except :
X = Image.new('RGB', (480, 480)) # default color is black
if self.transform is not None:
X = self.transform(X)
return X
def preprocess_id(self, id):
return int(id)
def preprocess_label(self, label):
try :
y = int(label)
except:
return 0
if y in [0, 1, 2]: return y
else: return 0
def line_mapper(self, line):
# splits the line into text and label and applies preprocessing to the text
url, id, imgb64 = line.rstrip().split('\t')
label = random.choice([0,1,2])
# id_str = random.randint(1,1000000)
id = self.preprocess_id(id)
X = self.preprocess_img(imgb64)
y = self.preprocess_label(label)
return id, X, y
def __iter__(self):
# create an iterator
worker_info = torch.utils.data.get_worker_info()
worker_id = worker_info.id
# map each element using the line_mapper
mapped_itr = map(self.line_mapper, self.parts[worker_id])
return mapped_itr
def get_val_loader(args):
val_tx = tv.transforms.Compose([
tv.transforms.Resize((args.image_size, args.image_size)),
tv.transforms.ToTensor(),
tv.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])
val_set = ImageDataset_Base64(args.test_file_path, transforms=val_tx)
val_loader = torch.utils.data.DataLoader(val_set, args.batch_size,
num_workers=8, pin_memory=True)
return val_loader
def evaluate(args):
val_loader = get_val_loader(args)
import onnxruntime as ort
sess_options = ort.SessionOptions()
sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
sess_options.intra_op_num_threads = 0
sess = ort.InferenceSession(args.onnx_model_path, sess_options)
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name
def to_numpy(torch_tensor):
return torch_tensor.detach().cpu().numpy() if torch_tensor.requires_grad else torch_tensor.cpu().numpy()
accumulated_inference_time = 0
with torch.no_grad():
for i, (id, data, target) in enumerate(val_loader):
data = data.to(args.device)
#print(data.size())
#below is onnx inference code
data = to_numpy(data)
start_stamp = time.time()
pred = sess.run([label_name], {input_name: data})[0]
accumulated_inference_time += time.time() - start_stamp
print(f"Total Onnx model inference time is {accumulated_inference_time}")
if __name__ == "__main__":
script_start_time = time.time()
parser = argparse.ArgumentParser()
args, unknown = parser.parse_known_args()
args.batch_size = 128
args.image_size = 480
args.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
args.n_gpu = torch.cuda.device_count()
print(" ARGS.device " + str(args.device) + " ARGS.n_gpu " + str(args.n_gpu))
args.test_file_path = "./DivideImages_0.tsv"
args.onnx_model_path = "./graph_opset12.onnx"
evaluate(args)
print(f"Total running time: {time.time()-script_start_time}")
However, it failed with below failure msg when running fp16 model. While fp32 model runs fine.
File "/home/tiy/.local/lib/python3.6/site-packages/onnxruntime/capi/onnxruntime_inference_collection.py", line 188, in run
return self._sess.run(output_names, input_feed, run_options)
onnxruntime.capi.onnxruntime_pybind11_state.Fail: [ONNXRuntimeError] : 1 : FAIL : Non-zero status code returned while running InstanceNormalization node. Name:'InstanceNormalization_31' Status Message: CUDNN error executing cudnnBatchNormalizationForwardTraining( CudnnHandle(), CUDNN_BATCHNORM_SPATIAL, &one, &zero, data_desc, x_data, data_desc, y_data, stats_desc, unused_scale.get(), unused_bias.get(), 1.0f, mean.get(), variance.get(), CUDNN_BN_MIN_EPSILON, nullptr, nullptr)
The code, model and data can be found here: https://www.dropbox.com/s/27nlnm7avp7wins/Resnet_fp16_test.zip?dl=0
- To run the code:
python resnet-opset12_fp16-test.py
- To test fp16 model: replace args.onnx_model_path = "./graph_opset12.onnx" in the script to be args.onnx_model_path = "./graph_opset12_fp16.onnx" and run the command
python resnet-opset12_fp16-test.py