AI4Finance-Foundation/ElegantRL

QNet网络状态编码后是否需要加上激活函数?

Opened this issue · 0 comments

在net.py中QNetDuel、QNetTwin和QNetTwinDuel网络中都包含一个状态编码部分,但是状态编码部分不是网络的最终输出部分,为什么在状态编码后不加上激活函数呢(if_raw_out=False)?这部分是否可能会对算法性能造成影响呢,因为同样在SAC网络中也有状态编码部分,但是SAC中编码函数后却加入了激活函数。

class QNetDuel(QNetBase):  # Dueling DQN
    def __init__(self, dims: [int], state_dim: int, action_dim: int):
        super().__init__(state_dim=state_dim, action_dim=action_dim)
        self.net_state = build_mlp(dims=[state_dim, *dims])
        self.net_adv = build_mlp(dims=[dims[-1], 1])  # advantage value
        self.net_val = build_mlp(dims=[dims[-1], action_dim])  # Q value

        layer_init_with_orthogonal(self.net_adv[-1], std=0.1)
        layer_init_with_orthogonal(self.net_val[-1], std=0.1)
class ActorSAC(ActorBase):
    def __init__(self, dims: [int], state_dim: int, action_dim: int):
        super().__init__(state_dim=state_dim, action_dim=action_dim)
        self.net_s = build_mlp(dims=[state_dim, *dims], if_raw_out=False)  # network of encoded state
        self.net_a = build_mlp(dims=[dims[-1], action_dim * 2])  # the average and log_std of action

        layer_init_with_orthogonal(self.net_a[-1], std=0.1)