QNet网络状态编码后是否需要加上激活函数?
Opened this issue · 0 comments
legao-2 commented
在net.py中QNetDuel、QNetTwin和QNetTwinDuel网络中都包含一个状态编码部分,但是状态编码部分不是网络的最终输出部分,为什么在状态编码后不加上激活函数呢(if_raw_out=False)?这部分是否可能会对算法性能造成影响呢,因为同样在SAC网络中也有状态编码部分,但是SAC中编码函数后却加入了激活函数。
class QNetDuel(QNetBase): # Dueling DQN
def __init__(self, dims: [int], state_dim: int, action_dim: int):
super().__init__(state_dim=state_dim, action_dim=action_dim)
self.net_state = build_mlp(dims=[state_dim, *dims])
self.net_adv = build_mlp(dims=[dims[-1], 1]) # advantage value
self.net_val = build_mlp(dims=[dims[-1], action_dim]) # Q value
layer_init_with_orthogonal(self.net_adv[-1], std=0.1)
layer_init_with_orthogonal(self.net_val[-1], std=0.1)
class ActorSAC(ActorBase):
def __init__(self, dims: [int], state_dim: int, action_dim: int):
super().__init__(state_dim=state_dim, action_dim=action_dim)
self.net_s = build_mlp(dims=[state_dim, *dims], if_raw_out=False) # network of encoded state
self.net_a = build_mlp(dims=[dims[-1], action_dim * 2]) # the average and log_std of action
layer_init_with_orthogonal(self.net_a[-1], std=0.1)