Testing it with torch1.7.3+cuda11.0, it fails
jsw7961 opened this issue · 3 comments
Describe the problem
Tested torch-batch-svd with torch version 1.7.1 and cuda 10.0, it works well
However, when testing it with torch version 1.7.3 and cuda 11.0, it fails.
always shows this message:
RuntimeError: Expected CUSOLVER_STATUS_SUCCESS == status to be true, but got false. (Could this error message be improved? If so, please report an enhancement request to PyTorch.)
(GTX 30XX series does not support cuda version 10.x. No choice but to use CUDA 11.X)
How can I solve this problem?
Error message
collected 4 items
test.py FFFF [100%]
============================================================= FAILURES ==============================================================
____________________________________________________________ test_float _____________________________________________________________
def test_float():
torch.manual_seed(0)
a = torch.randn(N, H, W).cuda()
b = a.clone()
a.requires_grad = True
b.requires_grad = True
U, S, V = svd(a)
test.py:16:
ctx = <torch.autograd.function.BatchSVDFunctionBackward object at 0x7f1dfaa1a140>
input = tensor([[[-1.1258, -1.1524, -0.2506],
[-0.4339, 0.8487, 0.6920],
[-0.3160, -2.1152, 0.3223],
...77],
[ 1.4223, 0.2985, 0.0924],
[-1.0208, 0.3279, 0.0111]]], device='cuda:0', requires_grad=True)
some = True, compute_uv = True, out = None
@staticmethod
def forward(ctx,
input: torch.Tensor,
some=True,
compute_uv=True,
out=None):
"""
This function returns `(U, S, V)`
which is the singular value decomposition
of a input real matrix or batches of real matrices `input`
:param ctx:
:param input:
:param out:
:return:
"""
assert input.shape[-1] < 32 and input.shape[-2] < 32, \
'This implementation only supports matrices having dims smaller than 32'
is_double = True if input.dtype == torch.double else False
if input.dtype == torch.half:
input = input.float()
ctx.is_half = True
else:
ctx.is_half = False
if out is None:
b, m, n = input.shape
U = torch.empty(b, m, m, dtype=input.dtype).to(input.device)
S = torch.empty(b, min(m, n), dtype=input.dtype).to(input.device)
V = torch.empty(b, n, n, dtype=input.dtype).to(input.device)
else:
U, S, V = out
_c.batch_svd_forward(input, U, S, V, True, 1e-7, 100, is_double)
E RuntimeError: Expected CUSOLVER_STATUS_SUCCESS == status to be true, but got false. (Could this error message be improved? If so, please report an enhancement request to PyTorch.)
../../../../../../anaconda3/lib/python3.7/site-packages/torch_batch_svd-1.0.0-py3.7-linux-x86_64.egg/torch_batch_svd/batch_svd.py:41: RuntimeError
____________________________________________________________ test_double ____________________________________________________________
def test_double():
torch.manual_seed(0)
a = torch.randn(N, H, W).cuda().double()
b = a.clone()
a.requires_grad = True
b.requires_grad = True
U, S, V = svd(a)
test.py:38:
ctx = <torch.autograd.function.BatchSVDFunctionBackward object at 0x7f1e02d43500>
input = tensor([[[-1.1258, -1.1524, -0.2506],
[-0.4339, 0.8487, 0.6920],
[-0.3160, -2.1152, 0.3223],
...985, 0.0924],
[-1.0208, 0.3279, 0.0111]]], device='cuda:0', dtype=torch.float64,
requires_grad=True)
some = True, compute_uv = True, out = None
@staticmethod
def forward(ctx,
input: torch.Tensor,
some=True,
compute_uv=True,
out=None):
"""
This function returns `(U, S, V)`
which is the singular value decomposition
of a input real matrix or batches of real matrices `input`
:param ctx:
:param input:
:param out:
:return:
"""
assert input.shape[-1] < 32 and input.shape[-2] < 32, \
'This implementation only supports matrices having dims smaller than 32'
is_double = True if input.dtype == torch.double else False
if input.dtype == torch.half:
input = input.float()
ctx.is_half = True
else:
ctx.is_half = False
if out is None:
b, m, n = input.shape
U = torch.empty(b, m, m, dtype=input.dtype).to(input.device)
S = torch.empty(b, min(m, n), dtype=input.dtype).to(input.device)
V = torch.empty(b, n, n, dtype=input.dtype).to(input.device)
else:
U, S, V = out
_c.batch_svd_forward(input, U, S, V, True, 1e-7, 100, is_double)
E RuntimeError: Expected CUSOLVER_STATUS_SUCCESS == status to be true, but got false. (Could this error message be improved? If so, please report an enhancement request to PyTorch.)
../../../../../../anaconda3/lib/python3.7/site-packages/torch_batch_svd-1.0.0-py3.7-linux-x86_64.egg/torch_batch_svd/batch_svd.py:41: RuntimeError
_____________________________________________________________ test_half _____________________________________________________________
def test_half():
torch.manual_seed(0)
a = torch.randn(N, H, W).cuda().half()
b = a.clone()
a.requires_grad = True
b.requires_grad = True
U, S, V = svd(a)
test.py:65:
ctx = <torch.autograd.function.BatchSVDFunctionBackward object at 0x7f1dfaa1a8c0>
input = tensor([[[-1.1260, -1.1523, -0.2505],
[-0.4338, 0.8486, 0.6919],
[-0.3159, -2.1152, 0.3223],
...0254, -1.2598, -0.9976],
[ 1.4219, 0.2986, 0.0923],
[-1.0205, 0.3279, 0.0111]]], device='cuda:0')
some = True, compute_uv = True, out = None
@staticmethod
def forward(ctx,
input: torch.Tensor,
some=True,
compute_uv=True,
out=None):
"""
This function returns `(U, S, V)`
which is the singular value decomposition
of a input real matrix or batches of real matrices `input`
:param ctx:
:param input:
:param out:
:return:
"""
assert input.shape[-1] < 32 and input.shape[-2] < 32, \
'This implementation only supports matrices having dims smaller than 32'
is_double = True if input.dtype == torch.double else False
if input.dtype == torch.half:
input = input.float()
ctx.is_half = True
else:
ctx.is_half = False
if out is None:
b, m, n = input.shape
U = torch.empty(b, m, m, dtype=input.dtype).to(input.device)
S = torch.empty(b, min(m, n), dtype=input.dtype).to(input.device)
V = torch.empty(b, n, n, dtype=input.dtype).to(input.device)
else:
U, S, V = out
_c.batch_svd_forward(input, U, S, V, True, 1e-7, 100, is_double)
E RuntimeError: Expected CUSOLVER_STATUS_SUCCESS == status to be true, but got false. (Could this error message be improved? If so, please report an enhancement request to PyTorch.)
../../../../../../anaconda3/lib/python3.7/site-packages/torch_batch_svd-1.0.0-py3.7-linux-x86_64.egg/torch_batch_svd/batch_svd.py:41: RuntimeError
________________________________________________________ test_multiple_gpus _________________________________________________________
def test_multiple_gpus():
num_gpus = torch.cuda.device_count()
for gpu_idx in range(num_gpus):
device = torch.device('cuda:{}'.format(gpu_idx))
torch.manual_seed(0)
a = torch.randn(N, H, W).to(device)
b = a.clone()
a.requires_grad = True
b.requires_grad = True
U, S, V = svd(a)
test.py:91:
ctx = <torch.autograd.function.BatchSVDFunctionBackward object at 0x7f1e02d43500>
input = tensor([[[-1.1258, -1.1524, -0.2506],
[-0.4339, 0.8487, 0.6920],
[-0.3160, -2.1152, 0.3223],
...77],
[ 1.4223, 0.2985, 0.0924],
[-1.0208, 0.3279, 0.0111]]], device='cuda:0', requires_grad=True)
some = True, compute_uv = True, out = None
@staticmethod
def forward(ctx,
input: torch.Tensor,
some=True,
compute_uv=True,
out=None):
"""
This function returns `(U, S, V)`
which is the singular value decomposition
of a input real matrix or batches of real matrices `input`
:param ctx:
:param input:
:param out:
:return:
"""
assert input.shape[-1] < 32 and input.shape[-2] < 32, \
'This implementation only supports matrices having dims smaller than 32'
is_double = True if input.dtype == torch.double else False
if input.dtype == torch.half:
input = input.float()
Environments
- OS : Ubuntu 16.04
- CUDA version 11.0
- Pytorch version 1.7.3
- Python version 3.7.3
There are several assertions in https://github.com/KinglittleQ/torch-batch-svd/blob/master/torch_batch_svd/csrc/torch_batch_svd.cpp. Could you look into which assert statement is failed and print the failed status code?
Traceback (most recent call last):
File "bench_speed.py", line 32, in
bench_speed(10000, 9, 9)
File "bench_speed.py", line 17, in bench_speed
U, S, V = svd(a)
File "/home/iilab-1080ti-a/anaconda3/lib/python3.7/site-packages/torch_batch_svd-1.0.0-py3.7-linux-x86_64.egg/torch_batch_svd/batch_svd.py", line 41, in forward
_c.batch_svd_forward(input, U, S, V, True, 1e-7, 100, is_double)
RuntimeError: Expected CUSOLVER_STATUS_SUCCESS == status to be true, but got false. (Could this error message be improved? If so, please report an enhancement request to PyTorch.)
In https://github.com/KinglittleQ/torch-batch-svd/blob/master/torch_batch_svd/csrc/torch_batch_svd.cpp,
line 42~47 asks whether 'status' variable true or not.
Maybe, line 42 'auto status = cusolverDnXgesvdjSetTolerance(params.get(), tol);' be false
@jsw7961 Could you please add some printf
statements into the code and figure out which status code cause this problem?
Such as:
if (CUSOLVER_STATUS_SUCCESS != status) {
printf("line %d: status: %d\n", __LINE__, status);
}