ZeroDivisionError: division by zero error
Closed this issue · 0 comments
Hi, I run both neus and nerf, and I got the same ZeroDivisionError in systems\neus.py and systems\nerf.py.
Here's the cmd output for running nerf:
Global seed set to 42
Using 16bit None Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Trainer(limit_train_batches=1.0)
was configured so 100% of the batches per epoch will be used..
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
| Name | Type | Params
0 | model | NeRFModel | 12.6 M
12.6 M Trainable params
0 Non-trainable params
12.6 M Total params
25.220 Total estimated model params size (MB)
Traceback (most recent call last):
File "launch.py", line 128, in
main()
File "launch.py", line 117, in main
trainer.fit(system, datamodule=dm)
File "D:\envs\instant-nsr\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 608, in fit
call._call_and_handle_interrupt(
File "D:\envs\instant-nsr\lib\site-packages\pytorch_lightning\trainer\call.py", line 38, in _call_and_handle_interrupt
return trainer_fn(*args, **kwargs)
File "D:\envs\instant-nsr\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 650, in _fit_impl
self._run(model, ckpt_path=self.ckpt_path)
File "D:\envs\instant-nsr\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1112, in _run
results = self._run_stage()
File "D:\envs\instant-nsr\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1191, in _run_stage
self._run_train()
File "D:\envs\instant-nsr\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1214, in _run_train
self.fit_loop.run()
File "D:\envs\instant-nsr\lib\site-packages\pytorch_lightning\loops\loop.py", line 199, in run
self.advance(*args, **kwargs)
File "D:\envs\instant-nsr\lib\site-packages\pytorch_lightning\loops\fit_loop.py", line 267, in advance
self._outputs = self.epoch_loop.run(self._data_fetcher)
File "D:\envs\instant-nsr\lib\site-packages\pytorch_lightning\loops\loop.py", line 199, in run
self.advance(*args, **kwargs)
File "D:\envs\instant-nsr\lib\site-packages\pytorch_lightning\loops\epoch\training_epoch_loop.py", line 213, in advance
batch_output = self.batch_loop.run(kwargs)
File "D:\envs\instant-nsr\lib\site-packages\pytorch_lightning\loops\loop.py", line 199, in run
self.advance(*args, **kwargs)
File "D:\envs\instant-nsr\lib\site-packages\pytorch_lightning\loops\batch\training_batch_loop.py", line 88, in advance
outputs = self.optimizer_loop.run(optimizers, kwargs)
File "D:\envs\instant-nsr\lib\site-packages\pytorch_lightning\loops\loop.py", line 199, in run
self.advance(*args, **kwargs)
File "D:\envs\instant-nsr\lib\site-packages\pytorch_lightning\loops\optimization\optimizer_loop.py", line 202, in advance
result = self._run_optimization(kwargs, self._optimizers[self.optim_progress.optimizer_position])
File "D:\envs\instant-nsr\lib\site-packages\pytorch_lightning\loops\optimization\optimizer_loop.py", line 249, in _run_optimization
self._optimizer_step(optimizer, opt_idx, kwargs.get("batch_idx", 0), closure)
File "D:\envs\instant-nsr\lib\site-packages\pytorch_lightning\loops\optimization\optimizer_loop.py", line 370, in _optimizer_step
self.trainer._call_lightning_module_hook(
File "D:\envs\instant-nsr\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1356, in _call_lightning_module_hook
output = fn(*args, **kwargs)
File "D:\envs\instant-nsr\lib\site-packages\pytorch_lightning\core\module.py", line 1754, in optimizer_step
optimizer.step(closure=optimizer_closure)
File "D:\envs\instant-nsr\lib\site-packages\pytorch_lightning\core\optimizer.py", line 169, in step
step_output = self._strategy.optimizer_step(self._optimizer, self._optimizer_idx, closure, **kwargs)
File "D:\envs\instant-nsr\lib\site-packages\pytorch_lightning\strategies\strategy.py", line 234, in optimizer_step
return self.precision_plugin.optimizer_step(
File "D:\envs\instant-nsr\lib\site-packages\pytorch_lightning\plugins\precision\native_amp.py", line 75, in optimizer_step
closure_result = closure()
File "D:\envs\instant-nsr\lib\site-packages\pytorch_lightning\loops\optimization\optimizer_loop.py", line 149, in call
self._result = self.closure(*args, **kwargs)
File "D:\envs\instant-nsr\lib\site-packages\pytorch_lightning\loops\optimization\optimizer_loop.py", line 135, in closure
step_output = self._step_fn()
File "D:\envs\instant-nsr\lib\site-packages\pytorch_lightning\loops\optimization\optimizer_loop.py", line 419, in _training_step
training_step_output = self.trainer._call_strategy_hook("training_step", *kwargs.values())
File "D:\envs\instant-nsr\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1494, in _call_strategy_hook
output = fn(*args, **kwargs)
File "D:\envs\instant-nsr\lib\site-packages\pytorch_lightning\strategies\dp.py", line 134, in training_step
return self.model(*args, **kwargs)
File "D:\envs\instant-nsr\lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "D:\envs\instant-nsr\lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "D:\envs\instant-nsr\lib\site-packages\torch\nn\parallel\data_parallel.py", line 183, in forward
return self.module(*inputs[0], **module_kwargs[0])
File "D:\envs\instant-nsr\lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "D:\envs\instant-nsr\lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "D:\envs\instant-nsr\lib\site-packages\pytorch_lightning\overrides\data_parallel.py", line 77, in forward
output = super().forward(*inputs, **kwargs)
File "D:\envs\instant-nsr\lib\site-packages\pytorch_lightning\overrides\base.py", line 98, in forward
output = self._forward_module.training_step(*inputs, **kwargs)
File "F:\instant-nsr-pl-main\systems\nerf.py", line 94, in training_step
train_num_rays = int(self.train_num_rays * (self.train_num_samples / out['num_samples'].sum().item()))
ZeroDivisionError: division by zero
Epoch 0: : 0it [1:46:06, ?it/s]
[W CudaIPCTypes.cpp:15] Producer process has been terminated before all shared CUDA tensors released. See Note [Sharing CUDA tensors]