未验证 提交 aede5aee 编写于 作者: B Bo Zhou 提交者: GitHub

resolve the compatibility issue (#226)

* fix compatibility issue with the newest paddle

* remove logging lines

* resolve the compatibility issue with the newest paddle

* yapf
Co-authored-by: Nrobot <zenghongsheng@baidu.com>
上级 b1cabc2d
......@@ -106,7 +106,7 @@ class AtariAgent(parl.Agent):
'reward': reward,
'next_obs': next_obs.astype('float32'),
'terminal': terminal,
'lr': lr
'lr': np.float32(lr)
}
cost = self.fluid_executor.run(
self.learn_program, feed=feed, fetch_list=[self.cost])[0]
......
......@@ -121,7 +121,9 @@ class Learner(object):
yield [
obs_np, actions_np, behaviour_logits_np, rewards_np,
dones_np, self.lr, self.entropy_coeff
dones_np,
np.float32(self.lr),
np.float32(self.entropy_coeff)
]
def run_learn(self):
......
......@@ -78,6 +78,7 @@ class VTraceLoss(object):
self.entropy = layers.reduce_sum(policy_entropy)
# The summed weighted loss
entropy_coeff = layers.reshape(entropy_coeff, shape=[1])
self.total_loss = (self.pi_loss + self.vf_loss * vf_loss_coeff +
self.entropy * entropy_coeff)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册