我们并不推荐使用 reward normalization,此处代码仅供参考。
from baselines.common.running_mean_std import RunningMeanStd
self.rms = {
'reward': RunningMeanStd(epsilon=1e-9, shape=(1,)),
}
def reward_norm(self, reward):
reward = np.array([reward])
assert reward.shape == (1,)
self.rms['reward'].update(reward)
reward -= self.rms['reward'].mean
reward /= np.sqrt(self.rms['reward'].var)
assert np.all(np.isfinite(reward))
reward = reward[0]
assert reward.shape == ()
return reward
墨之科技,版权所有 © Copyright 2017-2027
湘ICP备14012786号 邮箱:ai@inksci.com