“Q 分布”神经网络的输出:
out = layers.fully_connected(out, num_outputs=num_actions * nb_atoms, activation_fn=None) out = tf.reshape(out, shape=[-1, num_actions, nb_atoms]) out = tf.nn.softmax(out, dim=-1, name='softmax')
选取 Q 值最大的为最佳动作:
def p_to_q(p_values, dist_params): z, _ = build_z(**dist_params) print(z, p_values) return tf.tensordot(p_values, z, [[-1], [-1]]) def pick_action(p_values, dist_params): q_values = p_to_q(p_values, dist_params) deterministic_actions = tf.argmax(q_values, axis=1) return deterministic_actions
墨之科技,版权所有 © Copyright 2017-2027
湘ICP备14012786号 邮箱:ai@inksci.com