[Bug] Outputs of torch.to abnormally mismatch on GPU when adding extra output var
Azyka opened this issue · 1 comments
Azyka commented
Describe the bug
When adding an extra node of torch.to
as output in this model:
class Model0():
def forward(self, *args):
to = args[0].to(dtype = torch.float32)
return (to)
New:
class Model1():
def forward(self, *args):
to = args[0].to(dtype = torch.float32)
to_1 = args[0].to(dtype = torch.float32)
return (to, to_1)
The output of torch.to
is expected to be the same for the same input. However, it mismatched between the 2 models.
This mismatch is seen only on cuda.
To Reproduce
Repro script:
import numpy as np
import pickle
from numpy import testing
import torch
DEVICE='cuda'
class Model0(torch.nn.Module):
def __init__(self):
super().__init__()
def forward(self, *args):
to = args[0].to(dtype = torch.float32)
return (to)
model_0 = Model0()
output_names_0 = ['v0_0']
class Model1(torch.nn.Module):
def __init__(self):
super().__init__()
def forward(self, *args):
to = args[0].to(dtype = torch.float32)
to_1 = args[0].to(dtype = torch.float32)
return (to, to_1)
model_1 = Model1()
output_names_1 = ['v5_0', 'v0_0']
data = np.random.rand(41).astype(np.float16)
input_data_0 = [data]
optmodel_0 = torch.compile(model_0, fullgraph=True, backend='hidet', mode=None)
model_out_0 = optmodel_0(*[torch.from_numpy(v).to(DEVICE) for v in input_data_0])
model_out_0 = [v.to(DEVICE).detach() for v in model_out_0] if isinstance(model_out_0, tuple) else [model_out_0.to(DEVICE).detach()]
model_out_0 = [v.cpu().resolve_conj().numpy() if v.is_conj() else v.cpu().numpy() for v in model_out_0]
output_0 = dict(zip(output_names_0, model_out_0))
input_data_1 = [data]
optmodel_1 = torch.compile(model_1, fullgraph=True, backend='hidet', mode=None)
model_out_1 = optmodel_1(*[torch.from_numpy(v).to(DEVICE) for v in input_data_1])
model_out_1 = [v.to(DEVICE).detach() for v in model_out_1] if isinstance(model_out_1, tuple) else [model_out_1.to(DEVICE).detach()]
model_out_1 = [v.cpu().resolve_conj().numpy() if v.is_conj() else v.cpu().numpy() for v in model_out_1]
output_1 = dict(zip(output_names_1, model_out_1))
output_name_dict = {'v0_0': 'v0_0'}
print('=========================')
try:
for tensor_name_0, tensor_name_1 in output_name_dict.items():
testing.assert_allclose(output_0[tensor_name_0], output_1[tensor_name_1], rtol=1, err_msg=f'at {tensor_name_0}, {tensor_name_1}')
print("hidet does not trigger assertion")
except AssertionError as e:
print("hidet triggers assertion")
print(e)
print('=========================')
model_out_0 = model_0(*[torch.from_numpy(v).to(DEVICE) for v in input_data_0])
model_out_0 = [v.to(DEVICE).detach() for v in model_out_0] if isinstance(model_out_0, tuple) else [model_out_0.to(DEVICE).detach()]
model_out_0 = [v.cpu().resolve_conj().numpy() if v.is_conj() else v.cpu().numpy() for v in model_out_0]
output_0 = dict(zip(output_names_0, model_out_0))
model_out_1 = model_1(*[torch.from_numpy(v).to(DEVICE) for v in input_data_1])
model_out_1 = [v.to(DEVICE).detach() for v in model_out_1] if isinstance(model_out_1, tuple) else [model_out_1.to(DEVICE).detach()]
model_out_1 = [v.cpu().resolve_conj().numpy() if v.is_conj() else v.cpu().numpy() for v in model_out_1]
output_1 = dict(zip(output_names_1, model_out_1))
print('=========================')
try:
for tensor_name_0, tensor_name_1 in output_name_dict.items():
testing.assert_allclose(output_0[tensor_name_0], output_1[tensor_name_1], rtol=1, err_msg=f'at {tensor_name_0}, {tensor_name_1}')
print("torch_eager does not trigger assertion")
except AssertionError as e:
print("torch_eager triggers assertion")
print(e)
print('=========================')
Output:
=========================
hidet triggers assertion
Not equal to tolerance rtol=1, atol=0
at v0_0, v0_0
Mismatched elements: 41 / 41 (100%)
Max absolute difference: 0.98828125
Max relative difference: inf
x: array([3.936768e-02, 6.499023e-01, 1.479492e-01, 3.615723e-01,
7.329102e-01, 4.431152e-01, 4.995117e-01, 9.067383e-01,
9.796143e-02, 7.086182e-02, 6.757812e-01, 2.117920e-01,...
y: array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0.], dtype=float32)
=========================
=========================
torch_eager does not trigger assertion
=========================
Expected behavior
The output of torch.to
is expected to be the same for the same input.
Enviroment
- OS: Ubuntu 22.04.3 LTS (x86_64)
- GPU: RTX 1660
- NVIDIA GPU Driver: 525.147.05
- Hidet Version: 0.3.0
- PyTorch Version: 2.1.0+cu118
Azyka commented
Fixed in #384 , Thanks for you efforts on it! @Aalanli and @yaoyaoding