TensorRT FP16(RGBH) not working

Question

TensorRT FP16(RGBH) not working

Selur opened this issue 2 years ago · 2 comments

Using FP32 (RGBS):

# Imports
import vapoursynth as vs
# getting Vapoursynth core
core = vs.core
import site
import os
import ctypes
# Adding torch dependencies to PATH
path = site.getsitepackages()[0]+'/torch_dependencies/'
ctypes.windll.kernel32.SetDllDirectoryW(path)
path = path.replace('\\', '/')
os.environ["PATH"] = path + os.pathsep + os.environ["PATH"]
# Loading Plugins
core.std.LoadPlugin(path="i:/Hybrid/64bit/vsfilters/Support/fmtconv.dll")
core.std.LoadPlugin(path="i:/Hybrid/64bit/vsfilters/SourceFilter/LSmashSource/vslsmashsource.dll")
# source: 'G:\TestClips&Co\files\test.avi'
# current color space: YUV420P8, bit depth: 8, resolution: 640x352, fps: 25, color matrix: 470bg, yuv luminance scale: limited, scanorder: progressive
# Loading G:\TestClips&Co\files\test.avi using LWLibavSource
clip = core.lsmas.LWLibavSource(source="G:/TestClips&Co/files/test.avi", format="YUV420P8", stream_index=0, cache=0, prefer_hw=0)
# Setting color matrix to 470bg.
clip = core.std.SetFrameProps(clip, _Matrix=5)
clip = clip if not core.text.FrameProps(clip,'_Transfer') else core.std.SetFrameProps(clip, _Transfer=5)
clip = clip if not core.text.FrameProps(clip,'_Primaries') else core.std.SetFrameProps(clip, _Primaries=5)
# Setting color range to TV (limited) range.
clip = core.std.SetFrameProp(clip=clip, prop="_ColorRange", intval=1)
# making sure frame rate is set to 25
clip = core.std.AssumeFPS(clip=clip, fpsnum=25, fpsden=1)
clip = core.std.SetFrameProp(clip=clip, prop="_FieldBased", intval=0)
from vsanimesr import animesr as AnimeSR
# adjusting color space from YUV420P8 to RGBS for VsAnimeSR
clip = core.resize.Bicubic(clip=clip, format=vs.RGBS, matrix_in_s="470bg", range_s="limited")
# resizing using AnimeSR
clip = AnimeSR(clip=clip, device_index=0, trt=True, trt_cache_path=r"G:\Temp") # 2560x1408
# resizing 2560x1408 to 640x352
# adjusting resizing
clip = core.fmtc.resample(clip=clip, w=640, h=352, kernel="lanczos", interlaced=False, interlacedd=False)
# adjusting output color from: RGBS to YUV420P8 for x264Model
clip = core.resize.Bicubic(clip=clip, format=vs.YUV420P8, matrix_s="470bg", range_s="limited", dither_type="error_diffusion")
# set output frame rate to 25fps
clip = core.std.AssumeFPS(clip=clip, fpsnum=25, fpsden=1)
# Output
clip.set_output()

the script works, but using FP16 (RGBH):

# Imports
import vapoursynth as vs
# getting Vapoursynth core
core = vs.core
import site
import os
import ctypes
# Adding torch dependencies to PATH
path = site.getsitepackages()[0]+'/torch_dependencies/'
ctypes.windll.kernel32.SetDllDirectoryW(path)
path = path.replace('\\', '/')
os.environ["PATH"] = path + os.pathsep + os.environ["PATH"]
# Loading Plugins
core.std.LoadPlugin(path="i:/Hybrid/64bit/vsfilters/Support/fmtconv.dll")
core.std.LoadPlugin(path="i:/Hybrid/64bit/vsfilters/SourceFilter/LSmashSource/vslsmashsource.dll")
# source: 'G:\TestClips&Co\files\test.avi'
# current color space: YUV420P8, bit depth: 8, resolution: 640x352, fps: 25, color matrix: 470bg, yuv luminance scale: limited, scanorder: progressive
# Loading G:\TestClips&Co\files\test.avi using LWLibavSource
clip = core.lsmas.LWLibavSource(source="G:/TestClips&Co/files/test.avi", format="YUV420P8", stream_index=0, cache=0, prefer_hw=0)
# Setting color matrix to 470bg.
clip = core.std.SetFrameProps(clip, _Matrix=5)
clip = clip if not core.text.FrameProps(clip,'_Transfer') else core.std.SetFrameProps(clip, _Transfer=5)
clip = clip if not core.text.FrameProps(clip,'_Primaries') else core.std.SetFrameProps(clip, _Primaries=5)
# Setting color range to TV (limited) range.
clip = core.std.SetFrameProp(clip=clip, prop="_ColorRange", intval=1)
# making sure frame rate is set to 25
clip = core.std.AssumeFPS(clip=clip, fpsnum=25, fpsden=1)
clip = core.std.SetFrameProp(clip=clip, prop="_FieldBased", intval=0)
from vsanimesr import animesr as AnimeSR
# adjusting color space from YUV420P8 to RGBH for VsAnimeSR
clip = core.resize.Bicubic(clip=clip, format=vs.RGBH, matrix_in_s="470bg", range_s="limited")
# resizing using AnimeSR
clip = AnimeSR(clip=clip, device_index=0, trt=True, trt_cache_path=r"G:\Temp") # 2560x1408
# resizing 2560x1408 to 640x352
# adjusting resizing
clip = core.resize.Bicubic(clip=clip, format=vs.RGBS, range_s="limited")
clip = core.fmtc.resample(clip=clip, w=640, h=352, kernel="lanczos", interlaced=False, interlacedd=False)
# adjusting output color from: RGBS to YUV420P8 for x264Model
clip = core.resize.Bicubic(clip=clip, format=vs.YUV420P8, matrix_s="470bg", range_s="limited", dither_type="error_diffusion")
# set output frame rate to 25fps
clip = core.std.AssumeFPS(clip=clip, fpsnum=25, fpsden=1)
# Output
clip.set_output()

preview simply closes.

using VSPipe.exe --progress c:\Users\Selur\Desktop\test_2.vpy -c y4m NUL
I get:

Warning: I:\Hybrid\64bit\Vapoursynth\Lib\site-packages\torch\fx\operator_schemas.py:180: UserWarning: We were not able to successfully create type hint from the type (slice(<class 'NoneType'>, <class 'NoneType'>, <class 'NoneType'>), slice(<class 'int'>, <class 'int'>, <class 'NoneType'>))
  warnings.warn(f"We were not able to successfully create type hint from the type {x}")

Warning: I:\Hybrid\64bit\Vapoursynth\Lib\site-packages\torch\fx\operator_schemas.py:180: UserWarning: We were not able to successfully create type hint from the type (slice(<class 'NoneType'>, <class 'NoneType'>, <class 'NoneType'>), slice(<class 'NoneType'>, <class 'int'>, <class 'NoneType'>))
  warnings.warn(f"We were not able to successfully create type hint from the type {x}")

Warning: I:\Hybrid\64bit\Vapoursynth\Lib\site-packages\torch\fx\operator_schemas.py:180: UserWarning: We were not able to successfully create type hint from the type (slice(<class 'NoneType'>, <class 'NoneType'>, <class 'NoneType'>), slice(<class 'int'>, <class 'NoneType'>, <class 'NoneType'>))
  warnings.warn(f"We were not able to successfully create type hint from the type {x}")

Information: == Log pass <function fuse_permute_matmul at 0x0000017DD8DA7C70> before/after graph to C:\Users\Selur\AppData\Local\Temp\tmpp8ef3_2_, before/after are the same = True
Information: == Log pass <function fuse_permute_linear at 0x0000017DD8DA7A30> before/after graph to C:\Users\Selur\AppData\Local\Temp\tmp0a776tp0, before/after are the same = True

Supported node types in the model:
acc_ops.getitem: ((), {'input': torch.float16})
acc_ops.cat: ((), {})
acc_ops.conv2d: ((), {'input': torch.float16, 'weight': torch.float16, 'bias': torch.float16})
acc_ops.leaky_relu: ((), {'input': torch.float16})
acc_ops.add: ((), {'input': torch.float16, 'other': torch.float16})
acc_ops.add: ((), {'input': torch.float16})
acc_ops.relu: ((), {'input': torch.float16})
acc_ops.mul: ((), {'input': torch.float16})
acc_ops.interpolate: ((), {'input': torch.float16})

Unsupported node types in the model:
torch.pixel_unshuffle: ((torch.float16,), {})
torch.pixel_shuffle: ((torch.float16,), {})

Got 2 acc subgraphs and 2 non-acc subgraphs
Information: Now lowering submodule _run_on_acc_1
Information: split_name=_run_on_acc_1, input_specs=[InputTensorSpec(shape=torch.Size([1, 9, 352, 640]), dtype=torch.float16, device=device(type='cuda', index=0), shape_ranges=[], has_batch_dim=True), InputTensorSpec(shape=torch.Size([1, 48, 352, 640]), dtype=torch.float16, device=device(type='cuda', index=0), shape_ranges=[], has_batch_dim=True), InputTensorSpec(shape=torch.Size([1, 64, 352, 640]), dtype=torch.float16, device=device(type='cuda', index=0), shape_ranges=[], has_batch_dim=True)]
Information: Timing cache is used!
[12/17/2022-22:45:45] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage. See `CUDA_MODULE_LOADING` in https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars
Information: TRT INetwork construction elapsed time: 0:00:00.085907

This happens with both models.

Answer 1 · 2022-12-18T03:27:22.000Z

Have you upgraded your TensorRT DLLs and Python package from 8.5.1.7 to 8.5.2.2? It fixed an issue which would crash the builder.

Answer 2 · 2022-12-18T07:23:16.000Z

Ahh, thanks! I missed that. :)After installing 8.5.2.2 it worked! :)