ValueError: The two structures don't have the same nested structure. when python experiments/ppo_4x4grid.py
lie12huo opened this issue · 7 comments
When I executed the command “python experiments/ppo_4x4grid.py” for training, the following error occurred:
Failure # 1 (occurred at 2023-12-28_10-41-43)
�[36mray::PPO.train()�[39m (pid=8400, ip=127.0.0.1, actor_id=ad9e6648b1b2ed22aab2737601000000, repr=PPO)
File "python\ray_raylet.pyx", line 1813, in ray._raylet.execute_task
File "python\ray_raylet.pyx", line 1754, in ray._raylet.execute_task.function_executor
File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray_private\function_manager.py", line 726, in actor_method_executor
return method(__ray_actor, *args, **kwargs)
File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\util\tracing\tracing_helper.py", line 467, in _resume_span
return method(self, *_args, **_kwargs)
File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\tune\trainable\trainable.py", line 342, in train
raise skipped from exception_cause(skipped)
File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\tune\trainable\trainable.py", line 339, in train
result = self.step()
File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\util\tracing\tracing_helper.py", line 467, in _resume_span
return method(self, *_args, **_kwargs)
File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\algorithms\algorithm.py", line 852, in step
results, train_iter_ctx = self._run_one_training_iteration()
File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\util\tracing\tracing_helper.py", line 467, in _resume_span
return method(self, *_args, **_kwargs)
File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\algorithms\algorithm.py", line 3042, in _run_one_training_iteration
results = self.training_step()
File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\util\tracing\tracing_helper.py", line 467, in _resume_span
return method(self, *_args, **_kwargs)
File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\algorithms\ppo\ppo.py", line 407, in training_step
train_batch = synchronous_parallel_sample(
File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\execution\rollout_ops.py", line 83, in synchronous_parallel_sample
sample_batches = worker_set.foreach_worker(
File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\evaluation\worker_set.py", line 705, in foreach_worker
handle_remote_call_result_errors(remote_results, self._ignore_worker_failures)
File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\evaluation\worker_set.py", line 78, in handle_remote_call_result_errors
raise r.get()
ray.exceptions.RayTaskError(ValueError): �[36mray::RolloutWorker.apply()�[39m (pid=2132, ip=127.0.0.1, actor_id=3e746f41fe7f2d8a17e49dfe01000000, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x0000029A98298220>)
ValueError: The two structures don't have the same nested structure.First structure: type=ndarray str=[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Second structure: type=OrderedDict str=OrderedDict([('0', array([0.34979227, 0.8725393 , 0.08013749, 0.4683012 , 0.9829854 ,
0.98014855, 0.58258814, 0.05028085, 0.993543 , 0.79412615,
0.06607185], dtype=float32)), ('1', array([0.88398516, 0.7195979 , 0.94150907, 0.9829224 , 0.92211246,
0.7414135 , 0.9920831 , 0.14566854, 0.81726855, 0.9366454 ,
0.6274262 ], dtype=float32)), ('10', array([0.16446641, 0.6756482 , 0.7539203 , 0.4934962 , 0.8585525 ,
0.6322913 , 0.7542331 , 0.7486755 , 0.49466062, 0.72004724,
0.8117338 ], dtype=float32)), ('11', array([0.5329431 , 0.8486154 , 0.9495838 , 0.8622418 , 0.8732489 ,
0.11242072, 0.0763188 , 0.2220357 , 0.854032 , 0.973182 ,
0.14879882], dtype=float32)), ('12', array([0.732119 , 0.89823484, 0.69070387, 0.77506936, 0.98971057,
0.99043095, 0.3193064 , 0.69851375, 0.40632918, 0.406967 ,
0.62862474], dtype=float32)), ('13', array([0.47952086, 0.02109447, 0.42499098, 0.40413964, 0.264478 ,
0.10132027, 0.2610051 , 0.02141384, 0.231504 , 0.8975433 ,
0.99703795], dtype=float32)), ('14', array([0.67056215, 0.16663882, 0.08163167, 0.73198503, 0.18105489,
0.99467266, 0.02948203, 0.9314566 , 0.04069875, 0.8837653 ,
0.35313195], dtype=float32)), ('15', array([0.4275111 , 0.05799369, 0.20000993, 0.8529059 , 0.7172784 ,
0.7484241 , 0.90706795, 0.9734425 , 0.55966806, 0.81240386,
0.03192328], dtype=float32)), ('2', array([0.6548908 , 0.42107597, 0.06570876, 0.09022505, 0.07516731,
0.7484601 , 0.3176393 , 0.29006734, 0.6668242 , 0.76966023,
0.31101513], dtype=float32)), ('3', array([0.8722979 , 0.6160401 , 0.28451207, 0.3753895 , 0.59421366,
0.89204305, 0.01789684, 0.80874205, 0.4302826 , 0.9208242 ,
0.3285712 ], dtype=float32)), ('4', array([0.9464896 , 0.01961527, 0.834267 , 0.6228876 , 0.9121172 ,
0.01453374, 0.499453 , 0.12847178, 0.5970337 , 0.92107564,
0.1353088 ], dtype=float32)), ('5', array([0.77832663, 0.8915154 , 0.7435042 , 0.8148381 , 0.9744162 ,
0.55549747, 0.8838653 , 0.63371605, 0.23385969, 0.34045848,
0.1165311 ], dtype=float32)), ('6', array([0.45601034, 0.55218536, 0.40453702, 0.6130139 , 0.9555645 ,
0.8283712 , 0.02224702, 0.39311445, 0.791911 , 0.32984698,
0.18268831], dtype=float32)), ('7', array([0.92055535, 0.77292 , 0.27333862, 0.29092216, 0.782299 ,
0.25580984, 0.92083466, 0.0973004 , 0.17253524, 0.8128827 ,
0.504909 ], dtype=float32)), ('8', array([0.51502854, 0.56702006, 0.17254692, 0.7095564 , 0.03431124,
0.76993406, 0.86907685, 0.38690564, 0.3951562 , 0.11255713,
0.8427719 ], dtype=float32)), ('9', array([0.74191433, 0.6941966 , 0.6615604 , 0.7109615 , 0.21396402,
0.07951149, 0.48645335, 0.7014952 , 0.6249435 , 0.13057923,
0.56323195], dtype=float32))])More specifically: Substructure "type=OrderedDict str=OrderedDict([('0', array([0.34979227, 0.8725393 , 0.08013749, 0.4683012 , 0.9829854 ,
0.98014855, 0.58258814, 0.05028085, 0.993543 , 0.79412615,
0.06607185], dtype=float32)), ('1', array([0.88398516, 0.7195979 , 0.94150907, 0.9829224 , 0.92211246,
0.7414135 , 0.9920831 , 0.14566854, 0.81726855, 0.9366454 ,
0.6274262 ], dtype=float32)), ('10', array([0.16446641, 0.6756482 , 0.7539203 , 0.4934962 , 0.8585525 ,
0.6322913 , 0.7542331 , 0.7486755 , 0.49466062, 0.72004724,
0.8117338 ], dtype=float32)), ('11', array([0.5329431 , 0.8486154 , 0.9495838 , 0.8622418 , 0.8732489 ,
0.11242072, 0.0763188 , 0.2220357 , 0.854032 , 0.973182 ,
0.14879882], dtype=float32)), ('12', array([0.732119 , 0.89823484, 0.69070387, 0.77506936, 0.98971057,
0.99043095, 0.3193064 , 0.69851375, 0.40632918, 0.406967 ,
0.62862474], dtype=float32)), ('13', array([0.47952086, 0.02109447, 0.42499098, 0.40413964, 0.264478 ,
0.10132027, 0.2610051 , 0.02141384, 0.231504 , 0.8975433 ,
0.99703795], dtype=float32)), ('14', array([0.67056215, 0.16663882, 0.08163167, 0.73198503, 0.18105489,
0.99467266, 0.02948203, 0.9314566 , 0.04069875, 0.8837653 ,
0.35313195], dtype=float32)), ('15', array([0.4275111 , 0.05799369, 0.20000993, 0.8529059 , 0.7172784 ,
0.7484241 , 0.90706795, 0.9734425 , 0.55966806, 0.81240386,
0.03192328], dtype=float32)), ('2', array([0.6548908 , 0.42107597, 0.06570876, 0.09022505, 0.07516731,
0.7484601 , 0.3176393 , 0.29006734, 0.6668242 , 0.76966023,
0.31101513], dtype=float32)), ('3', array([0.8722979 , 0.6160401 , 0.28451207, 0.3753895 , 0.59421366,
0.89204305, 0.01789684, 0.80874205, 0.4302826 , 0.9208242 ,
0.3285712 ], dtype=float32)), ('4', array([0.9464896 , 0.01961527, 0.834267 , 0.6228876 , 0.9121172 ,
0.01453374, 0.499453 , 0.12847178, 0.5970337 , 0.92107564,
0.1353088 ], dtype=float32)), ('5', array([0.77832663, 0.8915154 , 0.7435042 , 0.8148381 , 0.9744162 ,
0.55549747, 0.8838653 , 0.63371605, 0.23385969, 0.34045848,
0.1165311 ], dtype=float32)), ('6', array([0.45601034, 0.55218536, 0.40453702, 0.6130139 , 0.9555645 ,
0.8283712 , 0.02224702, 0.39311445, 0.791911 , 0.32984698,
0.18268831], dtype=float32)), ('7', array([0.92055535, 0.77292 , 0.27333862, 0.29092216, 0.782299 ,
0.25580984, 0.92083466, 0.0973004 , 0.17253524, 0.8128827 ,
0.504909 ], dtype=float32)), ('8', array([0.51502854, 0.56702006, 0.17254692, 0.7095564 , 0.03431124,
0.76993406, 0.86907685, 0.38690564, 0.3951562 , 0.11255713,
0.8427719 ], dtype=float32)), ('9', array([0.74191433, 0.6941966 , 0.6615604 , 0.7109615 , 0.21396402,
0.07951149, 0.48645335, 0.7014952 , 0.6249435 , 0.13057923,
0.56323195], dtype=float32))])" is a sequence, while substructure "type=ndarray str=[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]" is notDuring handling of the above exception, another exception occurred:
�[36mray::RolloutWorker.apply()�[39m (pid=2132, ip=127.0.0.1, actor_id=3e746f41fe7f2d8a17e49dfe01000000, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x0000029A98298220>)
File "python\ray_raylet.pyx", line 1807, in ray._raylet.execute_task
File "python\ray_raylet.pyx", line 1908, in ray._raylet.execute_task
File "python\ray_raylet.pyx", line 1813, in ray._raylet.execute_task
File "python\ray_raylet.pyx", line 1754, in ray._raylet.execute_task.function_executor
File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray_private\function_manager.py", line 726, in actor_method_executor
return method(__ray_actor, *args, **kwargs)
File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\util\tracing\tracing_helper.py", line 467, in _resume_span
return method(self, *_args, **_kwargs)
File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\utils\actor_manager.py", line 189, in apply
raise e
File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\utils\actor_manager.py", line 178, in apply
return func(self, *args, **kwargs)
File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\execution\rollout_ops.py", line 84, in
lambda w: w.sample(), local_worker=False, healthy_only=True
File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\util\tracing\tracing_helper.py", line 467, in _resume_span
return method(self, *_args, **_kwargs)
File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 694, in sample
batches = [self.input_reader.next()]
File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\evaluation\sampler.py", line 91, in next
batches = [self.get_data()]
File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\evaluation\sampler.py", line 276, in get_data
item = next(self._env_runner)
File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 344, in run
outputs = self.step()
File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 370, in step
active_envs, to_eval, outputs = self.process_observations(
File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 637, in process_observations
processed = policy.agent_connectors(acd_list)
File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\connectors\agent\pipeline.py", line 41, in call
ret = c(ret)
File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\connectors\connector.py", line 265, in call
return [self.transform(d) for d in acd_list]
File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\connectors\connector.py", line 265, in
return [self.transform(d) for d in acd_list]
File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\connectors\agent\obs_preproc.py", line 58, in transform
d[SampleBatch.NEXT_OBS] = self.preprocessor.transform(
File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\models\preprocessors.py", line 329, in transform
self.check_shape(observation)
File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\models\preprocessors.py", line 69, in check_shape
observation = convert_element_to_space_type(
File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\utils\spaces\space_utils.py", line 472, in convert_element_to_space_type
return tree.map_structure(map, element, sampled_element, check_types=False)
File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\tree_init.py", line 433, in map_structure
assert_same_structure(structures[0], other, check_types=check_types)
File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\tree_init.py", line 288, in assert_same_structure
raise type(e)("%s\n"
ValueError: The two structures don't have the same nested structure.First structure: type=ndarray str=[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Second structure: type=OrderedDict str=OrderedDict([('0', array([0.34979227, 0.8725393 , 0.08013749, 0.4683012 , 0.9829854 ,
0.98014855, 0.58258814, 0.05028085, 0.993543 , 0.79412615,
0.06607185], dtype=float32)), ('1', array([0.88398516, 0.7195979 , 0.94150907, 0.9829224 , 0.92211246,
0.7414135 , 0.9920831 , 0.14566854, 0.81726855, 0.9366454 ,
0.6274262 ], dtype=float32)), ('10', array([0.16446641, 0.6756482 , 0.7539203 , 0.4934962 , 0.8585525 ,
0.6322913 , 0.7542331 , 0.7486755 , 0.49466062, 0.72004724,
0.8117338 ], dtype=float32)), ('11', array([0.5329431 , 0.8486154 , 0.9495838 , 0.8622418 , 0.8732489 ,
0.11242072, 0.0763188 , 0.2220357 , 0.854032 , 0.973182 ,
0.14879882], dtype=float32)), ('12', array([0.732119 , 0.89823484, 0.69070387, 0.77506936, 0.98971057,
0.99043095, 0.3193064 , 0.69851375, 0.40632918, 0.406967 ,
0.62862474], dtype=float32)), ('13', array([0.47952086, 0.02109447, 0.42499098, 0.40413964, 0.264478 ,
0.10132027, 0.2610051 , 0.02141384, 0.231504 , 0.8975433 ,
0.99703795], dtype=float32)), ('14', array([0.67056215, 0.16663882, 0.08163167, 0.73198503, 0.18105489,
0.99467266, 0.02948203, 0.9314566 , 0.04069875, 0.8837653 ,
0.35313195], dtype=float32)), ('15', array([0.4275111 , 0.05799369, 0.20000993, 0.8529059 , 0.7172784 ,
0.7484241 , 0.90706795, 0.9734425 , 0.55966806, 0.81240386,
0.03192328], dtype=float32)), ('2', array([0.6548908 , 0.42107597, 0.06570876, 0.09022505, 0.07516731,
0.7484601 , 0.3176393 , 0.29006734, 0.6668242 , 0.76966023,
0.31101513], dtype=float32)), ('3', array([0.8722979 , 0.6160401 , 0.28451207, 0.3753895 , 0.59421366,
0.89204305, 0.01789684, 0.80874205, 0.4302826 , 0.9208242 ,
0.3285712 ], dtype=float32)), ('4', array([0.9464896 , 0.01961527, 0.834267 , 0.6228876 , 0.9121172 ,
0.01453374, 0.499453 , 0.12847178, 0.5970337 , 0.92107564,
0.1353088 ], dtype=float32)), ('5', array([0.77832663, 0.8915154 , 0.7435042 , 0.8148381 , 0.9744162 ,
0.55549747, 0.8838653 , 0.63371605, 0.23385969, 0.34045848,
0.1165311 ], dtype=float32)), ('6', array([0.45601034, 0.55218536, 0.40453702, 0.6130139 , 0.9555645 ,
0.8283712 , 0.02224702, 0.39311445, 0.791911 , 0.32984698,
0.18268831], dtype=float32)), ('7', array([0.92055535, 0.77292 , 0.27333862, 0.29092216, 0.782299 ,
0.25580984, 0.92083466, 0.0973004 , 0.17253524, 0.8128827 ,
0.504909 ], dtype=float32)), ('8', array([0.51502854, 0.56702006, 0.17254692, 0.7095564 , 0.03431124,
0.76993406, 0.86907685, 0.38690564, 0.3951562 , 0.11255713,
0.8427719 ], dtype=float32)), ('9', array([0.74191433, 0.6941966 , 0.6615604 , 0.7109615 , 0.21396402,
0.07951149, 0.48645335, 0.7014952 , 0.6249435 , 0.13057923,
0.56323195], dtype=float32))])More specifically: Substructure "type=OrderedDict str=OrderedDict([('0', array([0.34979227, 0.8725393 , 0.08013749, 0.4683012 , 0.9829854 ,
0.98014855, 0.58258814, 0.05028085, 0.993543 , 0.79412615,
0.06607185], dtype=float32)), ('1', array([0.88398516, 0.7195979 , 0.94150907, 0.9829224 , 0.92211246,
0.7414135 , 0.9920831 , 0.14566854, 0.81726855, 0.9366454 ,
0.6274262 ], dtype=float32)), ('10', array([0.16446641, 0.6756482 , 0.7539203 , 0.4934962 , 0.8585525 ,
0.6322913 , 0.7542331 , 0.7486755 , 0.49466062, 0.72004724,
0.8117338 ], dtype=float32)), ('11', array([0.5329431 , 0.8486154 , 0.9495838 , 0.8622418 , 0.8732489 ,
0.11242072, 0.0763188 , 0.2220357 , 0.854032 , 0.973182 ,
0.14879882], dtype=float32)), ('12', array([0.732119 , 0.89823484, 0.69070387, 0.77506936, 0.98971057,
0.99043095, 0.3193064 , 0.69851375, 0.40632918, 0.406967 ,
0.62862474], dtype=float32)), ('13', array([0.47952086, 0.02109447, 0.42499098, 0.40413964, 0.264478 ,
0.10132027, 0.2610051 , 0.02141384, 0.231504 , 0.8975433 ,
0.99703795], dtype=float32)), ('14', array([0.67056215, 0.16663882, 0.08163167, 0.73198503, 0.18105489,
0.99467266, 0.02948203, 0.9314566 , 0.04069875, 0.8837653 ,
0.35313195], dtype=float32)), ('15', array([0.4275111 , 0.05799369, 0.20000993, 0.8529059 , 0.7172784 ,
0.7484241 , 0.90706795, 0.9734425 , 0.55966806, 0.81240386,
0.03192328], dtype=float32)), ('2', array([0.6548908 , 0.42107597, 0.06570876, 0.09022505, 0.07516731,
0.7484601 , 0.3176393 , 0.29006734, 0.6668242 , 0.76966023,
0.31101513], dtype=float32)), ('3', array([0.8722979 , 0.6160401 , 0.28451207, 0.3753895 , 0.59421366,
0.89204305, 0.01789684, 0.80874205, 0.4302826 , 0.9208242 ,
0.3285712 ], dtype=float32)), ('4', array([0.9464896 , 0.01961527, 0.834267 , 0.6228876 , 0.9121172 ,
0.01453374, 0.499453 , 0.12847178, 0.5970337 , 0.92107564,
0.1353088 ], dtype=float32)), ('5', array([0.77832663, 0.8915154 , 0.7435042 , 0.8148381 , 0.9744162 ,
0.55549747, 0.8838653 , 0.63371605, 0.23385969, 0.34045848,
0.1165311 ], dtype=float32)), ('6', array([0.45601034, 0.55218536, 0.40453702, 0.6130139 , 0.9555645 ,
0.8283712 , 0.02224702, 0.39311445, 0.791911 , 0.32984698,
0.18268831], dtype=float32)), ('7', array([0.92055535, 0.77292 , 0.27333862, 0.29092216, 0.782299 ,
0.25580984, 0.92083466, 0.0973004 , 0.17253524, 0.8128827 ,
0.504909 ], dtype=float32)), ('8', array([0.51502854, 0.56702006, 0.17254692, 0.7095564 , 0.03431124,
0.76993406, 0.86907685, 0.38690564, 0.3951562 , 0.11255713,
0.8427719 ], dtype=float32)), ('9', array([0.74191433, 0.6941966 , 0.6615604 , 0.7109615 , 0.21396402,
0.07951149, 0.48645335, 0.7014952 , 0.6249435 , 0.13057923,
0.56323195], dtype=float32))])" is a sequence, while substructure "type=ndarray str=[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]" is not
Entire first structure:
.
Entire second structure:
OrderedDict([('0', .), ('1', .), ('10', .), ('11', .), ('12', .), ('13', .), ('14', .), ('15', .), ('2', .), ('3', .), ('4', .), ('5', .), ('6', .), ('7', .), ('8', .), ('9', .)])
@lie12huo have you figured a way to solve it, i am facing the same issue
@SecondTheFirst This seems to be a version compatibility issue that I have never solved.
facing the same issue, running it in a docker container https://hub.docker.com/r/lionelpeer/sumo-rl
Facing the same issue. Any ideas?
Facing the same issue. Any ideas?
Hey, I eventually found a fix somewhere in a PR to rllib's PettingZoo wrappers and I am attaching the file that made it work for us: Simply copy this file somewhere and import PettingZooEnv
from there instead of from ray.rllib.env.wrappers.pettingzoo_env import ParallelPettingZooEnv
.
It's really just a hot-fix, but I don't remember exactly where I found the PR and it might have been merged into the rllib's main branch by now, so the first thing I would try is to upgrade rllib to the newest release.
from typing import Optional
from ray.rllib.env.multi_agent_env import MultiAgentEnv
from ray.rllib.utils.annotations import PublicAPI
from ray.rllib.utils.gym import convert_old_gym_space_to_gymnasium_space
from ray.rllib.utils.typing import MultiAgentDict
# import any
from gymnasium.spaces import Tuple
@PublicAPI
class PettingZooEnv(MultiAgentEnv):
"""An interface to the PettingZoo MARL environment library.
See: https://github.com/Farama-Foundation/PettingZoo
Inherits from MultiAgentEnv and exposes a given AEC
(actor-environment-cycle) game from the PettingZoo project via the
MultiAgentEnv public API.
Note that the wrapper has some important limitations:
1. All agents have the same action_spaces and observation_spaces.
Note: If, within your aec game, agents do not have homogeneous action /
observation spaces, apply SuperSuit wrappers
to apply padding functionality: https://github.com/Farama-Foundation/
SuperSuit#built-in-multi-agent-only-functions
2. Environments are positive sum games (-> Agents are expected to cooperate
to maximize reward). This isn't a hard restriction, it just that
standard algorithms aren't expected to work well in highly competitive
games.
Examples:
>>> from pettingzoo.butterfly import prison_v3
>>> from ray.rllib.env.wrappers.pettingzoo_env import PettingZooEnv
>>> env = PettingZooEnv(prison_v3.env())
>>> obs, infos = env.reset()
>>> print(obs)
# only returns the observation for the agent which should be stepping
{
'prisoner_0': array([[[0, 0, 0],
[0, 0, 0],
[0, 0, 0],
...,
[0, 0, 0],
[0, 0, 0],
[0, 0, 0]]], dtype=uint8)
}
>>> obs, rewards, terminateds, truncateds, infos = env.step({
... "prisoner_0": 1
... })
# only returns the observation, reward, info, etc, for
# the agent who's turn is next.
>>> print(obs)
{
'prisoner_1': array([[[0, 0, 0],
[0, 0, 0],
[0, 0, 0],
...,
[0, 0, 0],
[0, 0, 0],
[0, 0, 0]]], dtype=uint8)
}
>>> print(rewards)
{
'prisoner_1': 0
}
>>> print(terminateds)
{
'prisoner_1': False, '__all__': False
}
>>> print(truncateds)
{
'prisoner_1': False, '__all__': False
}
>>> print(infos)
{
'prisoner_1': {'map_tuple': (1, 0)}
}
"""
def __init__(self, env):
super().__init__()
self.env = env
env.reset()
# Since all agents have the same spaces, do not provide full observation-
# and action-spaces as Dicts, mapping agent IDs to the individual
# agents' spaces. Instead, `self.[action|observation]_space` are the single
# agent spaces.
self._obs_space_in_preferred_format = False
self._action_space_in_preferred_format = False
# Collect the individual agents' spaces (they should all be the same):
first_obs_space = self.env.observation_space(self.env.agents[0])
first_action_space = self.env.action_space(self.env.agents[0])
for agent in self.env.agents:
if self.env.observation_space(agent) != first_obs_space:
raise ValueError(
"Observation spaces for all agents must be identical. Perhaps "
"SuperSuit's pad_observations wrapper can help (useage: "
"`supersuit.aec_wrappers.pad_observations(env)`"
)
if self.env.action_space(agent) != first_action_space:
raise ValueError(
"Action spaces for all agents must be identical. Perhaps "
"SuperSuit's pad_action_space wrapper can help (usage: "
"`supersuit.aec_wrappers.pad_action_space(env)`"
)
# Convert from gym to gymnasium, if necessary.
self.observation_space = convert_old_gym_space_to_gymnasium_space(
first_obs_space
)
self.action_space = convert_old_gym_space_to_gymnasium_space(first_action_space)
self._agent_ids = self.env.agents
def observation_space_sample(self, agent_ids: list = None) -> MultiAgentDict:
if agent_ids is None:
agent_ids = self._agent_ids
return {id: self.observation_space.sample() for id in agent_ids}
def action_space_sample(self, agent_ids: list = None) -> MultiAgentDict:
if agent_ids is None:
agent_ids = self._agent_ids
return {id: self.action_space.sample() for id in agent_ids}
def action_space_contains(self, x: MultiAgentDict) -> bool:
if not isinstance(x, dict):
return False
return all(self.action_space.contains(val) for val in x.values())
def observation_space_contains(self, x: MultiAgentDict) -> bool:
if not isinstance(x, dict):
return False
return all(self.observation_space.contains(val) for val in x.values())
def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
info = self.env.reset(seed=seed, options=options)
return (
{self.env.agent_selection: self.env.observe(self.env.agent_selection)},
info or {},
)
def step(self, action):
self.env.step(action[self.env.agent_selection])
obs_d = {}
rew_d = {}
terminated_d = {}
truncated_d = {}
info_d = {}
while self.env.agents:
obs, rew, terminated, truncated, info = self.env.last()
agent_id = self.env.agent_selection
obs_d[agent_id] = obs
rew_d[agent_id] = rew
terminated_d[agent_id] = terminated
truncated_d[agent_id] = truncated
info_d[agent_id] = info
if (
self.env.terminations[self.env.agent_selection]
or self.env.truncations[self.env.agent_selection]
):
self.env.step(None)
else:
break
all_gone = not self.env.agents
terminated_d["__all__"] = all_gone and all(terminated_d.values())
truncated_d["__all__"] = all_gone and all(truncated_d.values())
return obs_d, rew_d, terminated_d, truncated_d, info_d
def close(self):
self.env.close()
def render(self):
return self.env.render(self.render_mode)
@property
def get_sub_environments(self):
return self.env.unwrapped
@PublicAPI
class ParallelPettingZooEnv(MultiAgentEnv):
def __init__(self, env):
super().__init__()
self.par_env = env
self.par_env.reset()
# Since all agents have the same spaces, do not provide full observation-
# and action-spaces as Dicts, mapping agent IDs to the individual
# agents' spaces. Instead, `self.[action|observation]_space` are the single
# agent spaces.
self._obs_space_in_preferred_format = False
self._action_space_in_preferred_format = False
# Get first observation space, assuming all agents have equal space
self.observation_space = self.par_env.observation_space(self.par_env.agents[0])
# Get first action space, assuming all agents have equal space
self.action_space = self.par_env.action_space(self.par_env.agents[0])
assert all(
self.par_env.observation_space(agent) == self.observation_space
for agent in self.par_env.agents
), (
"Observation spaces for all agents must be identical. Perhaps "
"SuperSuit's pad_observations wrapper can help (useage: "
"`supersuit.aec_wrappers.pad_observations(env)`"
)
if not all(
self.par_env.action_space(agent) == self.action_space
for agent in self.par_env.agents
):
print("Action spaces for all agents must be identical. Perhaps "
"SuperSuit's pad_action_space wrapper can help (useage: "
"`supersuit.aec_wrappers.pad_action_space(env)`"
)
assert all(
self.par_env.action_space(agent) == self.action_space
for agent in self.par_env.agents
), (
"Action spaces for all agents must be identical. Perhaps "
"SuperSuit's pad_action_space wrapper can help (useage: "
"`supersuit.aec_wrappers.pad_action_space(env)`"
)
def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
obs, info = self.par_env.reset(seed=seed, options=options)
return obs, info or {}
def step(self, action_dict):
obss, rews, terminateds, truncateds, infos = self.par_env.step(action_dict)
terminateds["__all__"] = all(terminateds.values())
truncateds["__all__"] = all(truncateds.values())
return obss, rews, terminateds, truncateds, infos
def close(self):
self.par_env.close()
def render(self):
return self.par_env.render(self.render_mode)
@property
def get_sub_environments(self):
return self.par_env.unwrapped
Facing the same issue. Any ideas?
Hey, I eventually found a fix somewhere in a PR to rllib's PettingZoo wrappers and I am attaching the file that made it work for us: Simply copy this file somewhere and import
PettingZooEnv
from there instead offrom ray.rllib.env.wrappers.pettingzoo_env import ParallelPettingZooEnv
.It's really just a hot-fix, but I don't remember exactly where I found the PR and it might have been merged into the rllib's main branch by now, so the first thing I would try is to upgrade rllib to the newest release.
from typing import Optional from ray.rllib.env.multi_agent_env import MultiAgentEnv from ray.rllib.utils.annotations import PublicAPI from ray.rllib.utils.gym import convert_old_gym_space_to_gymnasium_space from ray.rllib.utils.typing import MultiAgentDict # import any from gymnasium.spaces import Tuple @PublicAPI class PettingZooEnv(MultiAgentEnv): """An interface to the PettingZoo MARL environment library. See: https://github.com/Farama-Foundation/PettingZoo Inherits from MultiAgentEnv and exposes a given AEC (actor-environment-cycle) game from the PettingZoo project via the MultiAgentEnv public API. Note that the wrapper has some important limitations: 1. All agents have the same action_spaces and observation_spaces. Note: If, within your aec game, agents do not have homogeneous action / observation spaces, apply SuperSuit wrappers to apply padding functionality: https://github.com/Farama-Foundation/ SuperSuit#built-in-multi-agent-only-functions 2. Environments are positive sum games (-> Agents are expected to cooperate to maximize reward). This isn't a hard restriction, it just that standard algorithms aren't expected to work well in highly competitive games. Examples: >>> from pettingzoo.butterfly import prison_v3 >>> from ray.rllib.env.wrappers.pettingzoo_env import PettingZooEnv >>> env = PettingZooEnv(prison_v3.env()) >>> obs, infos = env.reset() >>> print(obs) # only returns the observation for the agent which should be stepping { 'prisoner_0': array([[[0, 0, 0], [0, 0, 0], [0, 0, 0], ..., [0, 0, 0], [0, 0, 0], [0, 0, 0]]], dtype=uint8) } >>> obs, rewards, terminateds, truncateds, infos = env.step({ ... "prisoner_0": 1 ... }) # only returns the observation, reward, info, etc, for # the agent who's turn is next. >>> print(obs) { 'prisoner_1': array([[[0, 0, 0], [0, 0, 0], [0, 0, 0], ..., [0, 0, 0], [0, 0, 0], [0, 0, 0]]], dtype=uint8) } >>> print(rewards) { 'prisoner_1': 0 } >>> print(terminateds) { 'prisoner_1': False, '__all__': False } >>> print(truncateds) { 'prisoner_1': False, '__all__': False } >>> print(infos) { 'prisoner_1': {'map_tuple': (1, 0)} } """ def __init__(self, env): super().__init__() self.env = env env.reset() # Since all agents have the same spaces, do not provide full observation- # and action-spaces as Dicts, mapping agent IDs to the individual # agents' spaces. Instead, `self.[action|observation]_space` are the single # agent spaces. self._obs_space_in_preferred_format = False self._action_space_in_preferred_format = False # Collect the individual agents' spaces (they should all be the same): first_obs_space = self.env.observation_space(self.env.agents[0]) first_action_space = self.env.action_space(self.env.agents[0]) for agent in self.env.agents: if self.env.observation_space(agent) != first_obs_space: raise ValueError( "Observation spaces for all agents must be identical. Perhaps " "SuperSuit's pad_observations wrapper can help (useage: " "`supersuit.aec_wrappers.pad_observations(env)`" ) if self.env.action_space(agent) != first_action_space: raise ValueError( "Action spaces for all agents must be identical. Perhaps " "SuperSuit's pad_action_space wrapper can help (usage: " "`supersuit.aec_wrappers.pad_action_space(env)`" ) # Convert from gym to gymnasium, if necessary. self.observation_space = convert_old_gym_space_to_gymnasium_space( first_obs_space ) self.action_space = convert_old_gym_space_to_gymnasium_space(first_action_space) self._agent_ids = self.env.agents def observation_space_sample(self, agent_ids: list = None) -> MultiAgentDict: if agent_ids is None: agent_ids = self._agent_ids return {id: self.observation_space.sample() for id in agent_ids} def action_space_sample(self, agent_ids: list = None) -> MultiAgentDict: if agent_ids is None: agent_ids = self._agent_ids return {id: self.action_space.sample() for id in agent_ids} def action_space_contains(self, x: MultiAgentDict) -> bool: if not isinstance(x, dict): return False return all(self.action_space.contains(val) for val in x.values()) def observation_space_contains(self, x: MultiAgentDict) -> bool: if not isinstance(x, dict): return False return all(self.observation_space.contains(val) for val in x.values()) def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): info = self.env.reset(seed=seed, options=options) return ( {self.env.agent_selection: self.env.observe(self.env.agent_selection)}, info or {}, ) def step(self, action): self.env.step(action[self.env.agent_selection]) obs_d = {} rew_d = {} terminated_d = {} truncated_d = {} info_d = {} while self.env.agents: obs, rew, terminated, truncated, info = self.env.last() agent_id = self.env.agent_selection obs_d[agent_id] = obs rew_d[agent_id] = rew terminated_d[agent_id] = terminated truncated_d[agent_id] = truncated info_d[agent_id] = info if ( self.env.terminations[self.env.agent_selection] or self.env.truncations[self.env.agent_selection] ): self.env.step(None) else: break all_gone = not self.env.agents terminated_d["__all__"] = all_gone and all(terminated_d.values()) truncated_d["__all__"] = all_gone and all(truncated_d.values()) return obs_d, rew_d, terminated_d, truncated_d, info_d def close(self): self.env.close() def render(self): return self.env.render(self.render_mode) @property def get_sub_environments(self): return self.env.unwrapped @PublicAPI class ParallelPettingZooEnv(MultiAgentEnv): def __init__(self, env): super().__init__() self.par_env = env self.par_env.reset() # Since all agents have the same spaces, do not provide full observation- # and action-spaces as Dicts, mapping agent IDs to the individual # agents' spaces. Instead, `self.[action|observation]_space` are the single # agent spaces. self._obs_space_in_preferred_format = False self._action_space_in_preferred_format = False # Get first observation space, assuming all agents have equal space self.observation_space = self.par_env.observation_space(self.par_env.agents[0]) # Get first action space, assuming all agents have equal space self.action_space = self.par_env.action_space(self.par_env.agents[0]) assert all( self.par_env.observation_space(agent) == self.observation_space for agent in self.par_env.agents ), ( "Observation spaces for all agents must be identical. Perhaps " "SuperSuit's pad_observations wrapper can help (useage: " "`supersuit.aec_wrappers.pad_observations(env)`" ) if not all( self.par_env.action_space(agent) == self.action_space for agent in self.par_env.agents ): print("Action spaces for all agents must be identical. Perhaps " "SuperSuit's pad_action_space wrapper can help (useage: " "`supersuit.aec_wrappers.pad_action_space(env)`" ) assert all( self.par_env.action_space(agent) == self.action_space for agent in self.par_env.agents ), ( "Action spaces for all agents must be identical. Perhaps " "SuperSuit's pad_action_space wrapper can help (useage: " "`supersuit.aec_wrappers.pad_action_space(env)`" ) def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): obs, info = self.par_env.reset(seed=seed, options=options) return obs, info or {} def step(self, action_dict): obss, rews, terminateds, truncateds, infos = self.par_env.step(action_dict) terminateds["__all__"] = all(terminateds.values()) truncateds["__all__"] = all(truncateds.values()) return obss, rews, terminateds, truncateds, infos def close(self): self.par_env.close() def render(self): return self.par_env.render(self.render_mode) @property def get_sub_environments(self): return self.par_env.unwrapped
It works! Thanks a lot!
Also fixed in: 973f169