resolve the compatibility issue (#226)

* fix compatibility issue with the newest paddle * remove logging lines * resolve the compatibility issue with the newest paddle * yapf Co-authored-by: N robot <zenghongsheng@baidu.com>

resolve the compatibility issue (#226)
* fix compatibility issue with the newest paddle * remove logging lines * resolve the compatibility issue with the newest paddle * yapf Co-authored-by: N robot <zenghongsheng@baidu.com>
aede5aee · Bo Zhou · GitHub · b1cabc2d · aede5aee · aede5aee
Showing with 5 addition and 2 deletion

examples/DQN/atari_agent.py examples/DQN/atari_agent.py +1 -1

examples/IMPALA/train.py examples/IMPALA/train.py +3 -1

parl/algorithms/fluid/impala/impala.py parl/algorithms/fluid/impala/impala.py +1 -0

未找到文件。
--- a/examples/DQN/atari_agent.py
+++ b/examples/DQN/atari_agent.py
@@ -106,7 +106,7 @@ class AtariAgent(parl.Agent):
            'reward': reward,
            'next_obs': next_obs.astype('float32'),
            'terminal': terminal,
-            'lr': lr
+            'lr': np.float32(lr)
        }
        cost = self.fluid_executor.run(
            self.learn_program, feed=feed, fetch_list=[self.cost])[0]

--- a/examples/IMPALA/train.py
+++ b/examples/IMPALA/train.py
@@ -121,7 +121,9 @@ class Learner(object):

                yield [
                    obs_np, actions_np, behaviour_logits_np, rewards_np,
-                    dones_np, self.lr, self.entropy_coeff
+                    dones_np,
+                    np.float32(self.lr),
+                    np.float32(self.entropy_coeff)
                ]

    def run_learn(self):

--- a/parl/algorithms/fluid/impala/impala.py
+++ b/parl/algorithms/fluid/impala/impala.py
@@ -78,6 +78,7 @@ class VTraceLoss(object):
        self.entropy = layers.reduce_sum(policy_entropy)

        # The summed weighted loss
+        entropy_coeff = layers.reshape(entropy_coeff, shape=[1])
        self.total_loss = (self.pi_loss + self.vf_loss * vf_loss_coeff +
                           self.entropy * entropy_coeff)