我们并不推荐使用 reward normalization,此处代码仅供参考。
from baselines.common.running_mean_std import RunningMeanStd
self.rms = { 'reward': RunningMeanStd(epsilon=1e-9, shape=(1,)), }
def reward_norm(self, reward): reward = np.array([reward]) assert reward.shape == (1,) self.rms['reward'].update(reward) reward -= self.rms['reward'].mean reward /= np.sqrt(self.rms['reward'].var) assert np.all(np.isfinite(reward)) reward = reward[0] assert reward.shape == () return reward
墨之科技,版权所有 © Copyright 2017-2027
湘ICP备14012786号 邮箱:ai@inksci.com