BrokenPipeError: [Errno 32] Broken pipe
LaughBuddha opened this issue · 9 comments
Hi,
After setting up as per the readme.md I ran the command python main.py -n1 --auto_gpu_config 0 --split val
and getting the below error.
Dumping at ./tmp//models/exp1/
Namespace(alpha=0.99, auto_gpu_config=0, camera_height=1.25, clip_param=0.2, collision_threshold=0.2, cuda=True, du_scale=2, dump_location='./tmp/', entropy_coef=0.001, env_frame_height=256, env_frame_width=256, eps=1e-05, eval=0, exp_loss_coeff=1.0, exp_name='exp1', frame_height=128, frame_width=128, gamma=0.99, global_downscaling=2, global_hidden_size=256, global_lr=2.5e-05, goals_size=2, hfov=90.0, load_global='0', load_local='0', load_slam='0', local_hidden_size=512, local_optimizer='adam,lr=0.0001', local_policy_update_freq=5, log_interval=10, map_pred_threshold=0.5, map_resolution=5, map_size_cm=2400, max_episode_length=1000, max_grad_norm=0.5, no_cuda=False, noise_level=1.0, noisy_actions=1, noisy_odometry=1, num_episodes=1000000, num_global_steps=40, num_local_steps=25, num_mini_batch=0, num_processes=1, num_processes_on_first_gpu=0, num_processes_per_gpu=11, obs_threshold=1, obstacle_boundary=5, pose_loss_coeff=10000.0, ppo_epoch=4, pretrained_resnet=1, print_images=0, proj_loss_coeff=1.0, randomize_env_every=1000, save_interval=1, save_periodic=500000, save_trajectory_data='0', seed=1, short_goal_dist=1, sim_gpu_id=0, slam_batch_size=72, slam_iterations=10, slam_memory_size=500000, slam_optimizer='adam,lr=0.0001', split='val', task_config='tasks/pointnav_gibson.yaml', tau=0.95, total_num_scenes='auto', train_global=1, train_local=1, train_slam=1, use_deterministic_local=0, use_gae=False, use_pose_estimation=2, use_recurrent_global=0, use_recurrent_local=1, value_loss_coef=0.5, vis_type=1, vision_range=64, visualize=0)
Loading data/scene_datasets/gibson/Cantwell.glb
2021-02-04 22:14:49,265 initializing sim Sim-v0
Process ForkServerProcess-1:
Traceback (most recent call last):
File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "/mnt/beegfs/home/sidgoel/Neural-SLAM/env/habitat/habitat_api/habitat/core/vector_env.py", line 148, in _worker_env
env = env_fn(*env_fn_args)
File "/mnt/beegfs/home/sidgoel/Neural-SLAM/env/habitat/__init__.py", line 22, in make_env_fn
config_env=config_env, config_baseline=config_baseline, dataset=dataset
File "/mnt/beegfs/home/sidgoel/Neural-SLAM/env/habitat/exploration_env.py", line 85, in __init__
super().__init__(config_env, dataset)
File "/mnt/beegfs/home/sidgoel/habitat-api/habitat/core/env.py", line 290, in __init__
self._env = Env(config, dataset)
File "/mnt/beegfs/home/sidgoel/habitat-api/habitat/core/env.py", line 93, in __init__
id_sim=self._config.SIMULATOR.TYPE, config=self._config.SIMULATOR
File "/mnt/beegfs/home/sidgoel/habitat-api/habitat/sims/registration.py", line 19, in make_sim
return _sim(**kwargs)
File "/mnt/beegfs/home/sidgoel/habitat-api/habitat/sims/habitat_simulator/habitat_simulator.py", line 155, in __init__
sim_sensors.append(sensor_type(sensor_cfg))
File "/mnt/beegfs/home/sidgoel/habitat-api/habitat/sims/habitat_simulator/habitat_simulator.py", line 52, in __init__
super().__init__(config=config)
File "/mnt/beegfs/home/sidgoel/habitat-api/habitat/core/simulator.py", line 186, in __init__
super().__init__(*args, **kwargs)
File "/mnt/beegfs/home/sidgoel/habitat-api/habitat/core/simulator.py", line 148, in __init__
self.observation_space = self._get_observation_space(*args, **kwargs)
File "/mnt/beegfs/home/sidgoel/habitat-api/habitat/sims/habitat_simulator/habitat_simulator.py", line 59, in _get_observation_space
dtype=np.uint8,
TypeError: __init__() got an unexpected keyword argument 'dtype'
Exception ignored in: <bound method Env.__del__ of <env.habitat.exploration_env.Exploration_Env object at 0x7f29ad2dc860>>
Traceback (most recent call last):
File "/mnt/beegfs/home/sidgoel/ActiveNeuralSLAM/lib/python3.6/site-packages/gym/core.py", line 203, in __del__
self.close()
File "/mnt/beegfs/home/sidgoel/habitat-api/habitat/core/env.py", line 382, in close
self._env.close()
AttributeError: 'Exploration_Env' object has no attribute '_env'
Traceback (most recent call last):
File "main.py", line 769, in <module>
main()
File "main.py", line 119, in main
envs = make_vec_envs(args)
File "/mnt/beegfs/home/sidgoel/Neural-SLAM/env/__init__.py", line 7, in make_vec_envs
envs = construct_envs(args)
File "/mnt/beegfs/home/sidgoel/Neural-SLAM/env/habitat/__init__.py", line 102, in construct_envs
range(args.num_processes))
File "/mnt/beegfs/home/sidgoel/Neural-SLAM/env/habitat/habitat_api/habitat/core/vector_env.py", line 117, in __init__
read_fn() for read_fn in self._connection_read_fns
File "/mnt/beegfs/home/sidgoel/Neural-SLAM/env/habitat/habitat_api/habitat/core/vector_env.py", line 117, in <listcomp>
read_fn() for read_fn in self._connection_read_fns
File "/usr/lib/python3.6/multiprocessing/connection.py", line 250, in recv
buf = self._recv_bytes()
File "/usr/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
buf = self._recv(4)
File "/usr/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
chunk = read(handle, remaining)
ConnectionResetError: [Errno 104] Connection reset by peer
Exception ignored in: <bound method VectorEnv.__del__ of <env.habitat.habitat_api.habitat.core.vector_env.VectorEnv object at 0x7fa714619c88>>
Traceback (most recent call last):
File "/mnt/beegfs/home/sidgoel/Neural-SLAM/env/habitat/habitat_api/habitat/core/vector_env.py", line 487, in __del__
self.close()
File "/mnt/beegfs/home/sidgoel/Neural-SLAM/env/habitat/habitat_api/habitat/core/vector_env.py", line 351, in close
write_fn((CLOSE_COMMAND, None))
File "/usr/lib/python3.6/multiprocessing/connection.py", line 206, in send
self._send_bytes(_ForkingPickler.dumps(obj))
File "/usr/lib/python3.6/multiprocessing/connection.py", line 404, in _send_bytes
self._send(header + buf)
File "/usr/lib/python3.6/multiprocessing/connection.py", line 368, in _send
n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe
Request help in resolving this.
Also when I try to run the command
python main.py --split val --eval 1 --train_global 0 --train_local 0 --train_slam 0 \
--load_global pretrained_models/model_best.global \
--load_local pretrained_models/model_best.local \
--load_slam pretrained_models/model_best.slam
I get the below dump.
(ActiveNeuralSLAM) sidgoel@node-1080ti-0:~/Neural-SLAM$ python main.py --split val --eval 1 --train_global 0 --train_local 0 --train_slam 0 \
> --load_global pretrained_models/model_best.global \
> --load_local pretrained_models/model_best.local \
> --load_slam pretrained_models/model_best.slam
Auto GPU config:
Number of processes: 0
Number of processes on GPU 0: 0
Number of processes per GPU: 0
Dumping at ./tmp//models/exp1/
Namespace(alpha=0.99, auto_gpu_config=1, camera_height=1.25, clip_param=0.2, collision_threshold=0.2, cuda=True, du_scale=2, dump_location='./tmp/', entropy_coef=0.001, env_frame_height=256, env_frame_width=256, eps=1e-05, eval=1, exp_loss_coeff=1.0, exp_name='exp1', frame_height=128, frame_width=128, gamma=0.99, global_downscaling=2, global_hidden_size=256, global_lr=2.5e-05, goals_size=2, hfov=90.0, load_global='pretrained_models/model_best.global', load_local='pretrained_models/model_best.local', load_slam='pretrained_models/model_best.slam', local_hidden_size=512, local_optimizer='adam,lr=0.0001', local_policy_update_freq=5, log_interval=10, map_pred_threshold=0.5, map_resolution=5, map_size_cm=2400, max_episode_length=1000, max_grad_norm=0.5, no_cuda=False, noise_level=1.0, noisy_actions=1, noisy_odometry=1, num_episodes=1000000, num_global_steps=40, num_local_steps=25, num_mini_batch=0, num_processes=0, num_processes_on_first_gpu=0, num_processes_per_gpu=0, obs_threshold=1, obstacle_boundary=5, pose_loss_coeff=10000.0, ppo_epoch=4, pretrained_resnet=1, print_images=0, proj_loss_coeff=1.0, randomize_env_every=1000, save_interval=1, save_periodic=500000, save_trajectory_data='0', seed=1, short_goal_dist=1, sim_gpu_id=1, slam_batch_size=72, slam_iterations=10, slam_memory_size=500000, slam_optimizer='adam,lr=0.0001', split='val', task_config='tasks/pointnav_gibson.yaml', tau=0.95, total_num_scenes=1, train_global=0, train_local=0, train_slam=0, use_deterministic_local=0, use_gae=False, use_pose_estimation=2, use_recurrent_global=0, use_recurrent_local=1, value_loss_coef=0.5, vis_type=1, vision_range=64, visualize=0)
Traceback (most recent call last):
File "main.py", line 769, in <module>
main()
File "main.py", line 119, in main
envs = make_vec_envs(args)
File "/mnt/beegfs/home/sidgoel/Neural-SLAM/env/__init__.py", line 7, in make_vec_envs
envs = construct_envs(args)
File "/mnt/beegfs/home/sidgoel/Neural-SLAM/env/habitat/__init__.py", line 102, in construct_envs
range(args.num_processes))
File "/mnt/beegfs/home/sidgoel/Neural-SLAM/env/habitat/habitat_api/habitat/core/vector_env.py", line 95, in __init__
), "number of environments to be created should be greater than 0"
AssertionError: number of environments to be created should be greater than 0
Are these two issues related ?
For the first error, maybe it is related to this issue:
#1 (comment)
For the second error, you will probably need to manually specify the number of processes and number of processes per GPU, see instructions here:
https://github.com/devendrachaplot/Neural-SLAM/blob/master/docs/INSTRUCTIONS.md#specifying-number-of-threads
The first issue got resolved by installing gym version 0.10.9 which is the version required by habitat.
Now getting the following error on executing both -
python main.py -n1 --auto_gpu_config 0 --split val
and
python main.py --split val_mt --eval 1 \
--auto_gpu_config 0 -n 14 --num_episodes 71 --num_processes_per_gpu 7 \
--load_global pretrained_models/model_best.global --train_global 0 \
--load_local pretrained_models/model_best.local --train_local 0 \
--load_slam pretrained_models/model_best.slam --train_slam 0
Dumping at ./tmp//models/exp1/
Namespace(alpha=0.99, auto_gpu_config=0, camera_height=1.25, clip_param=0.2, collision_threshold=0.2, cuda=True, du_scale=2, dump_location='./tmp/', entropy_coef=0.001, env_frame_height=256, env_frame_width=256, eps=1e-05, eval=0, exp_loss_coeff=1.0, exp_name='exp1', frame_height=128, frame_width=128, gamma=0.99, global_downscaling=2, global_hidden_size=256, global_lr=2.5e-05, goals_size=2, hfov=90.0, load_global='0', load_local='0', load_slam='0', local_hidden_size=512, local_optimizer='adam,lr=0.0001', local_policy_update_freq=5, log_interval=10, map_pred_threshold=0.5, map_resolution=5, map_size_cm=2400, max_episode_length=1000, max_grad_norm=0.5, no_cuda=False, noise_level=1.0, noisy_actions=1, noisy_odometry=1, num_episodes=1000000, num_global_steps=40, num_local_steps=25, num_mini_batch=0, num_processes=1, num_processes_on_first_gpu=0, num_processes_per_gpu=11, obs_threshold=1, obstacle_boundary=5, pose_loss_coeff=10000.0, ppo_epoch=4, pretrained_resnet=1, print_images=0, proj_loss_coeff=1.0, randomize_env_every=1000, save_interval=1, save_periodic=500000, save_trajectory_data='0', seed=1, short_goal_dist=1, sim_gpu_id=0, slam_batch_size=72, slam_iterations=10, slam_memory_size=500000, slam_optimizer='adam,lr=0.0001', split='val', task_config='tasks/pointnav_gibson.yaml', tau=0.95, total_num_scenes='auto', train_global=1, train_local=1, train_slam=1, use_deterministic_local=0, use_gae=False, use_pose_estimation=2, use_recurrent_global=0, use_recurrent_local=1, value_loss_coef=0.5, vis_type=1, vision_range=64, visualize=0)
Loading data/scene_datasets/gibson/Cantwell.glb
2021-02-10 01:22:30,099 initializing sim Sim-v0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I0210 01:22:30.155874 184104 WindowlessContext.cpp:98] [EGL] Detected 5 EGL devices
Traceback (most recent call last):
File "main.py", line 769, in <module>
main()
File "main.py", line 119, in main
envs = make_vec_envs(args)
File "/mnt/beegfs/home/sidgoel/Neural-SLAM/env/__init__.py", line 7, in make_vec_envs
envs = construct_envs(args)
File "/mnt/beegfs/home/sidgoel/Neural-SLAM/env/habitat/__init__.py", line 102, in construct_envs
range(args.num_processes))
File "/mnt/beegfs/home/sidgoel/Neural-SLAM/env/habitat/habitat_api/habitat/core/vector_env.py", line 117, in __init__
read_fn() for read_fn in self._connection_read_fns
File "/mnt/beegfs/home/sidgoel/Neural-SLAM/env/habitat/habitat_api/habitat/core/vector_env.py", line 117, in <listcomp>
read_fn() for read_fn in self._connection_read_fns
File "/usr/lib/python3.6/multiprocessing/connection.py", line 250, in recv
buf = self._recv_bytes()
File "/usr/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
buf = self._recv(4)
File "/usr/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
chunk = read(handle, remaining)
ConnectionResetError: [Errno 104] Connection reset by peer
Exception ignored in: <bound method VectorEnv.__del__ of <env.habitat.habitat_api.habitat.core.vector_env.VectorEnv object at 0x7f4e7d4ee908>>
Traceback (most recent call last):
File "/mnt/beegfs/home/sidgoel/Neural-SLAM/env/habitat/habitat_api/habitat/core/vector_env.py", line 487, in __del__
self.close()
File "/mnt/beegfs/home/sidgoel/Neural-SLAM/env/habitat/habitat_api/habitat/core/vector_env.py", line 351, in close
write_fn((CLOSE_COMMAND, None))
File "/usr/lib/python3.6/multiprocessing/connection.py", line 206, in send
self._send_bytes(_ForkingPickler.dumps(obj))
File "/usr/lib/python3.6/multiprocessing/connection.py", line 404, in _send_bytes
self._send(header + buf)
File "/usr/lib/python3.6/multiprocessing/connection.py", line 368, in _send
n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe
Am I passing wrong values for -n 14 --num_episodes 71 --num_processes_per_gpu 7
This seems like an issue with habitat installation. Quick way to check this is by running examples/benchmark.py in habitat-api directory (where you installed habitat-api, not the submodule within Neural-SLAM directory). If it throws an error, it indicates habitat-sim or api is not installed correctly.
Fixed all the issues with habitat-api and habitat-sim and verified by running the respective python examples/example.py
I am trying to evaluate the Active Neural SLAM results by using the following script.
python main.py --split val --eval 1 --train_global 0 --train_local 0 --train_slam 0 \
--load_global pretrained_models/model_best.global \
--load_local pretrained_models/model_best.local \
--load_slam pretrained_models/model_best.slam
Getting the below output
python main.py --split val --eval 1 --train_global 0 --train_local 0 --train_slam 0 \
> --load_global pretrained_models/model_best.global \
> --load_local pretrained_models/model_best.local \
> --load_slam pretrained_models/model_best.slam
Auto GPU config:
Number of processes: 0
Number of processes on GPU 0: 0
Number of processes per GPU: 0
Dumping at ./tmp//models/exp1/
Namespace(alpha=0.99, auto_gpu_config=1, camera_height=1.25, clip_param=0.2, collision_threshold=0.2, cuda=True, du_scale=2, dump_location='./tmp/', entropy_coef=0.001, env_frame_height=256, env_frame_width=256, eps=1e-05, eval=1, exp_loss_coeff=1.0, exp_name='exp1', frame_height=128, frame_width=128, gamma=0.99, global_downscaling=2, global_hidden_size=256, global_lr=2.5e-05, goals_size=2, hfov=90.0, load_global='pretrained_models/model_best.global', load_local='pretrained_models/model_best.local', load_slam='pretrained_models/model_best.slam', local_hidden_size=512, local_optimizer='adam,lr=0.0001', local_policy_update_freq=5, log_interval=10, map_pred_threshold=0.5, map_resolution=5, map_size_cm=2400, max_episode_length=1000, max_grad_norm=0.5, no_cuda=False, noise_level=1.0, noisy_actions=1, noisy_odometry=1, num_episodes=1000000, num_global_steps=40, num_local_steps=25, num_mini_batch=0, num_processes=0, num_processes_on_first_gpu=0, num_processes_per_gpu=0, obs_threshold=1, obstacle_boundary=5, pose_loss_coeff=10000.0, ppo_epoch=4, pretrained_resnet=1, print_images=0, proj_loss_coeff=1.0, randomize_env_every=1000, save_interval=1, save_periodic=500000, save_trajectory_data='0', seed=1, short_goal_dist=1, sim_gpu_id=1, slam_batch_size=72, slam_iterations=10, slam_memory_size=500000, slam_optimizer='adam,lr=0.0001', split='val', task_config='tasks/pointnav_gibson.yaml', tau=0.95, total_num_scenes=1, train_global=0, train_local=0, train_slam=0, use_deterministic_local=0, use_gae=False, use_pose_estimation=2, use_recurrent_global=0, use_recurrent_local=1, value_loss_coef=0.5, vis_type=1, vision_range=64, visualize=0)
Traceback (most recent call last):
File "main.py", line 769, in <module>
main()
File "main.py", line 119, in main
envs = make_vec_envs(args)
File "/mnt/beegfs/home/sidgoel/Neural-SLAM/env/__init__.py", line 7, in make_vec_envs
envs = construct_envs(args)
File "/mnt/beegfs/home/sidgoel/Neural-SLAM/env/habitat/__init__.py", line 102, in construct_envs
range(args.num_processes))
File "/mnt/beegfs/home/sidgoel/Neural-SLAM/env/habitat/habitat_api/habitat/core/vector_env.py", line 95, in __init__
), "number of environments to be created should be greater than 0"
AssertionError: number of environments to be created should be greater than 0
Upon specifying parameters -n 14 --num_episodes 71 --num_processes_per_gpu 7
getting the output as follows -
log.txt
Request help in resolving this.
It seems like you do not have sufficient GPU memory on the system or torch is not compiled with cuda. Can you try running the above with --no_cuda
argument.
Closing due to inactivity.