代码运行问题 Practice05_Cliff_Walk
Opened this issue · 4 comments
tine8899 commented
您好我在运行代码
reinforce/reinforce/codes_for_book/c05/02_windy_grid/Practice05_Cliff_Walk.ipynb
q_agent = QAgent(env, capacity = 10000)
sarsa_agent = SarsaAgent(env, capacity = 10000)
sarsa_sta = sarsa_agent.learning(display = False,
max_episode_num = 10000,
epsilon = 0.1,
decaying_epsilon = False)
q_sta = q_agent.learning(display = False,
max_episode_num = 10000,
epsilon = 0.1,
decaying_epsilon = False)
报错
AttributeError Traceback (most recent call last)
<ipython-input-3-c0715cdf678b> in <module>()
7 max_episode_num = 10000,
8 epsilon = 0.1,
----> 9 decaying_epsilon = False)
10 q_sta = q_agent.learning(display = False,
11 max_episode_num = 10000,
~/Documents/01work/gluon/reinforce/reinforce/codes_for_book/c05/02_windy_grid/core.py in learning(self, lambda_, epsilon, decaying_epsilon, gamma, alpha, max_episode_num, display)
255 epsilon = 1.0 / (1 + num_episode)
256 time_in_episode, episode_reward = self.learning_method(lambda_ = lambda_, \
--> 257 gamma = gamma, alpha = alpha, epsilon = epsilon, display = display)
258 total_time += time_in_episode
259 num_episode += 1
~/Documents/01work/gluon/reinforce/reinforce/codes_for_book/c05/02_windy_grid/agents.py in learning_method(self, gamma, alpha, epsilon, display, lambda_)
31 if display:
32 self.env.render()
---> 33 a0 = self.perform_policy(s0, epsilon)
34 # print(self.action_t.name)
35 time_in_episode, total_reward = 0, 0
~/Documents/01work/gluon/reinforce/reinforce/codes_for_book/c05/02_windy_grid/core.py in perform_policy(self, s, Q, epsilon)
208
209 def perform_policy(self, s, Q = None, epsilon = 0.05):
--> 210 action = self.policy(self.A, s, Q, epsilon)
211 return int(action)
212
~/Documents/01work/gluon/reinforce/reinforce/codes_for_book/c05/02_windy_grid/agents.py in policy(self, A, s, Q, epsilon)
24
25 def policy(self, A, s, Q, epsilon):
---> 26 return epsilon_greedy_policy(A, s, Q, epsilon)
27
28 def learning_method(self, gamma = 0.9, alpha = 0.1, epsilon = 1e-5, display = False, lambda_ = None):
~/Documents/01work/gluon/reinforce/reinforce/codes_for_book/c05/02_windy_grid/utils.py in epsilon_greedy_policy(A, s, Q, epsilon)
134 return sample(A)
135 else:
--> 136 return greedy_policy(A, s, Q)
~/Documents/01work/gluon/reinforce/reinforce/codes_for_book/c05/02_windy_grid/utils.py in greedy_policy(A, s, Q, epsilon)
109 max_q, a_max_q = -float('inf'), []
110 for a_opt in A:
--> 111 q = get_dict(Q, s, a_opt)
112 if q > max_q:
113 max_q = q
~/Documents/01work/gluon/reinforce/reinforce/codes_for_book/c05/02_windy_grid/utils.py in get_dict(target_dict, *args)
61 if target_dict is None:
62 return
---> 63 return target_dict.get(str_key(*args),0)
64
65
AttributeError: 'float' object has no attribute 'get'
看上去是 Q 值得问题,但是我没有解决。
xxxkxin commented
你好 请问你的问题解决了吗?我也出现同样的问题,是不是utils的line63那里需要修改呢?
dakerbose commented
我也是同样的问题~~~
hangwuliuqi commented
你们有人解决了吗?我一开始是觉得是target_dict.get(str_key(*args),0)这里的target_dict的问题,后面改了之后他说float类型不能参加迭代,我最后也没解决
Easyboy0405 commented
这是来自QQ邮箱的假期自动回复邮件。
您好,我最近正在休假中,无法亲自回复您的邮件。我将在假期结束后,尽快给您回复。