test or val issue

gmy92 opened this issue 4 years ago · 2 comments

gmy92 commented 4 years ago

I 've already trained the model ,but when i want to test it,i failed a lot time

CalledProcessError Traceback (most recent call last)
~/ldif/ldif/inference/predict.py in _grid_eval_cuda(self, sif_vector, resolution, extent)
845 try:
--> 846 cmd_result = sp.check_output(cmd, shell=True)
847 log.info(cmd_result.decode('utf-8').replace('\n', ''))

~/anaconda3/lib/python3.7/subprocess.py in check_output(timeout, *popenargs, **kwargs)
410 return run(*popenargs, stdout=PIPE, timeout=timeout, check=True,
--> 411 **kwargs).stdout
412

~/anaconda3/lib/python3.7/subprocess.py in run(input, capture_output, timeout, check, *popenargs, **kwargs)
511 raise CalledProcessError(retcode, process.args,
--> 512 output=stdout, stderr=stderr)
513 return CompletedProcess(process.args, retcode, stdout, stderr)

CalledProcessError: Command 'CUDA_VISIBLE_DEVICES=1 /home/gemengyuan/ldif/ldif/ldif2mesh/ldif2mesh /tmp/tmp0e9rowkf/ldif.txt /home/gemengyuan/ldif/ldif/ldif2mesh/extracted.occnet /tmp/tmp0e9rowkf/grid.grd -resolution 256' returned non-zero exit status 35.

During handling of the above exception, another exception occurred:

ValueError Traceback (most recent call last)
in
1 embedding = encoder.run_example(e)
----> 2 mesh = decoder.extract_mesh(embedding, resolution=256)
3 gaps_util.mshview(mesh)

~/ldif/ldif/inference/predict.py in extract_mesh(self, sif_vectors, resolution, extent, return_success, world2local)
959 extent,
960 extract_parts=False,
--> 961 world2local=world2local)
962 grid_out_time = time.time()
963 log.verbose(f'Grid eval time: {grid_out_time - extract_start_time}')

~/ldif/ldif/inference/predict.py in _grid_eval(self, sif_vector, resolution, extent, extract_parts, world2local)
886 log.verbose('Evaluating SDF grid for mesh.')
887 if self.use_inference_kernel and not extract_parts:
--> 888 return self._grid_eval_cuda(sif_vector, resolution, extent)
889 if extract_parts or world2local:
890 log.warning('Part extraction and world2local are not supported with the'

~/ldif/ldif/inference/predict.py in _grid_eval_cuda(self, sif_vector, resolution, extent)
867 'possible.')
868 else:
--> 869 raise ValueError(f'Unrecognized error code {e.returncode} occurred'
870 f' during inference kernel evaluation: {e.output}')
871

ValueError: Unrecognized error code 35 occurred during inference kernel evaluation: b'GPUCheckOk Failure: CUDA driver version is insufficient for CUDA runtime version ldif2mesh.cu 985\n'

CalledProcessError Traceback (most recent call last)
~/ldif/ldif/inference/predict.py in _grid_eval_cuda(self, sif_vector, resolution, extent)
845 try:
--> 846 cmd_result = sp.check_output(cmd, shell=True)
847 log.info(cmd_result.decode('utf-8').replace('\n', ''))

~/anaconda3/lib/python3.7/subprocess.py in check_output(timeout, *popenargs, **kwargs)
410 return run(*popenargs, stdout=PIPE, timeout=timeout, check=True,
--> 411 **kwargs).stdout
412

~/anaconda3/lib/python3.7/subprocess.py in run(input, capture_output, timeout, check, *popenargs, **kwargs)
511 raise CalledProcessError(retcode, process.args,
--> 512 output=stdout, stderr=stderr)
513 return CompletedProcess(process.args, retcode, stdout, stderr)

CalledProcessError: Command 'CUDA_VISIBLE_DEVICES=1 /home/gemengyuan/ldif/ldif/ldif2mesh/ldif2mesh /tmp/tmp0e9rowkf/ldif.txt /home/gemengyuan/ldif/ldif/ldif2mesh/extracted.occnet /tmp/tmp0e9rowkf/grid.grd -resolution 256' returned non-zero exit status 35.

During handling of the above exception, another exception occurred:

ValueError Traceback (most recent call last)
in
1 embedding = encoder.run_example(e)
----> 2 mesh = decoder.extract_mesh(embedding, resolution=256)
3 gaps_util.mshview(mesh)

~/ldif/ldif/inference/predict.py in extract_mesh(self, sif_vectors, resolution, extent, return_success, world2local)
959 extent,
960 extract_parts=False,
--> 961 world2local=world2local)
962 grid_out_time = time.time()
963 log.verbose(f'Grid eval time: {grid_out_time - extract_start_time}')

~/ldif/ldif/inference/predict.py in _grid_eval(self, sif_vector, resolution, extent, extract_parts, world2local)
886 log.verbose('Evaluating SDF grid for mesh.')
887 if self.use_inference_kernel and not extract_parts:
--> 888 return self._grid_eval_cuda(sif_vector, resolution, extent)
889 if extract_parts or world2local:
890 log.warning('Part extraction and world2local are not supported with the'

~/ldif/ldif/inference/predict.py in _grid_eval_cuda(self, sif_vector, resolution, extent)
867 'possible.')
868 else:
--> 869 raise ValueError(f'Unrecognized error code {e.returncode} occurred'
870 f' during inference kernel evaluation: {e.output}')
871

ValueError: Unrecognized error code 35 occurred during inference kernel evaluation: b'GPUCheckOk Failure: CUDA driver version is insufficient for CUDA runtime version ldif2mesh.cu 985\n'

CalledProcessError Traceback (most recent call last)
~/ldif/ldif/inference/predict.py in _grid_eval_cuda(self, sif_vector, resolution, extent)
845 try:
--> 846 cmd_result = sp.check_output(cmd, shell=True)
847 log.info(cmd_result.decode('utf-8').replace('\n', ''))

~/anaconda3/lib/python3.7/subprocess.py in check_output(timeout, *popenargs, **kwargs)
410 return run(*popenargs, stdout=PIPE, timeout=timeout, check=True,
--> 411 **kwargs).stdout
412

~/anaconda3/lib/python3.7/subprocess.py in run(input, capture_output, timeout, check, *popenargs, **kwargs)
511 raise CalledProcessError(retcode, process.args,
--> 512 output=stdout, stderr=stderr)
513 return CompletedProcess(process.args, retcode, stdout, stderr)

CalledProcessError: Command 'CUDA_VISIBLE_DEVICES=1 /home/gemengyuan/ldif/ldif/ldif2mesh/ldif2mesh /tmp/tmp0e9rowkf/ldif.txt /home/gemengyuan/ldif/ldif/ldif2mesh/extracted.occnet /tmp/tmp0e9rowkf/grid.grd -resolution 256' returned non-zero exit status 35.

During handling of the above exception, another exception occurred:

ValueError Traceback (most recent call last)
in
1 embedding = encoder.run_example(e)
----> 2 mesh = decoder.extract_mesh(embedding, resolution=256)
3 gaps_util.mshview(mesh)

~/ldif/ldif/inference/predict.py in extract_mesh(self, sif_vectors, resolution, extent, return_success, world2local)
959 extent,
960 extract_parts=False,
--> 961 world2local=world2local)
962 grid_out_time = time.time()
963 log.verbose(f'Grid eval time: {grid_out_time - extract_start_time}')

~/ldif/ldif/inference/predict.py in _grid_eval(self, sif_vector, resolution, extent, extract_parts, world2local)
886 log.verbose('Evaluating SDF grid for mesh.')
887 if self.use_inference_kernel and not extract_parts:
--> 888 return self._grid_eval_cuda(sif_vector, resolution, extent)
889 if extract_parts or world2local:
890 log.warning('Part extraction and world2local are not supported with the'

~/ldif/ldif/inference/predict.py in _grid_eval_cuda(self, sif_vector, resolution, extent)
867 'possible.')
868 else:
--> 869 raise ValueError(f'Unrecognized error code {e.returncode} occurred'
870 f' during inference kernel evaluation: {e.output}')
871

ValueError: Unrecognized error code 35 occurred during inference kernel evaluation: b'GPUCheckOk Failure: CUDA driver version is insufficient for CUDA runtime version ldif2mesh.cu 985\n'
What might cause these errors SOS！！！

kylegenova commented 4 years ago

It appears there is a version mismatch between your CUDA toolkit and your GPU driver's runtime version. In particular, your driver may be behind the CUDA toolkit. I think you have three options: 1) Update your NVIDIA GPU drivers so your CUDA runtime version catches up to the toolkit. 2) Recompile the inference kernel with a older version of the CUDA toolkit. 3) Pass the flag --nouse_inference_kernel to the eval.py script. This should work but be much, much slower.

👍1

gmy92 commented 4 years ago

thanks for your patience!u r right ，I solved the problem by update my NVIDIA GPU driver