reward_norm

我们并不推荐使用 reward normalization,此处代码仅供参考。



from baselines.common.running_mean_std import RunningMeanStd


self.rms = {
    'reward': RunningMeanStd(epsilon=1e-9, shape=(1,)),
}



def reward_norm(self, reward):

    reward = np.array([reward])
    assert reward.shape == (1,)

    self.rms['reward'].update(reward)

    reward -= self.rms['reward'].mean
    reward /= np.sqrt(self.rms['reward'].var)

    assert np.all(np.isfinite(reward))

    reward = reward[0]
    assert reward.shape == ()

    return reward







深度学习推荐
深度学习推荐

墨之科技,版权所有 © Copyright 2017-2027

湘ICP备14012786号     邮箱:ai@inksci.com