diff --git a/README.cn.md b/README.cn.md index ff206c8491e5ae9f4d9bda99edb8e579b81525c3..99cba49478dffffcb62d3a4f89f14a8bca2d12ab 100644 --- a/README.cn.md +++ b/README.cn.md @@ -72,6 +72,7 @@ pip install parl # 算法示例 - [QuickStart](examples/QuickStart/) - [DQN](examples/DQN/) +- [ES(深度进化算法)](examples/ES/) - [DDPG](examples/DDPG/) - [PPO](examples/PPO/) - [IMPALA](examples/IMPALA/) diff --git a/README.md b/README.md index e2327bf58330f9d33ad064b0d8cd807af17ae871..a17d05793a0b93172a7661fe8c647cd443c6a7bf 100644 --- a/README.md +++ b/README.md @@ -75,6 +75,7 @@ pip install parl # Examples - [QuickStart](examples/QuickStart/) - [DQN](examples/DQN/) +- [ES](examples/ES/) - [DDPG](examples/DDPG/) - [PPO](examples/PPO/) - [IMPALA](examples/IMPALA/) diff --git a/examples/ES/mujoco_agent.py b/examples/ES/mujoco_agent.py index 58260d7495a23c4f08085895eb6a5158813a6c83..f914e4947528a1fc44bfc0ff965aeb9806bc8121 100644 --- a/examples/ES/mujoco_agent.py +++ b/examples/ES/mujoco_agent.py @@ -55,7 +55,7 @@ class MujocoAgent(parl.Agent): noises(np.float32): [batch_size, weights_total_size] """ - g, count = utils.batched_weighted_sum( + g = utils.batched_weighted_sum( # mirrored sampling: evaluate pairs of perturbations \epsilon, −\epsilon noisy_rewards[:, 0] - noisy_rewards[:, 1], noises, diff --git a/examples/ES/utils.py b/examples/ES/utils.py index 29d43e02a7f1772e604b57ae2d086651a5a4d266..265da51a77c95856e9b1f156782631b0e51c6480 100644 --- a/examples/ES/utils.py +++ b/examples/ES/utils.py @@ -19,6 +19,10 @@ def compute_ranks(x): def compute_centered_ranks(x): + """Return ranks that is normliazed to [-0.5, 0.5] with the rewards as input. + Args: + x(np.array): an array of rewards. + """ y = compute_ranks(x.ravel()).reshape(x.shape).astype(np.float32) y /= (x.size - 1) y -= 0.5 @@ -26,6 +30,7 @@ def compute_centered_ranks(x): def itergroups(items, group_size): + """An iterator that iterates a list with batch data.""" assert group_size >= 1 group = [] for x in items: @@ -38,16 +43,22 @@ def itergroups(items, group_size): def batched_weighted_sum(weights, vecs, batch_size): + """Compute the gradients for updating the parameters. + Args: + weights(np.array): the nomalized rewards computed by the function `compute_centered_ranks`. + vecs(np.array): the noise added to the parameters. + batch_size(int): the batch_size for speeding up the computation. + Return: + total(np.array): aggregated gradient. + """ total = 0 - num_items_summed = 0 for batch_weights, batch_vecs in zip( itergroups(weights, batch_size), itergroups(vecs, batch_size)): assert len(batch_weights) == len(batch_vecs) <= batch_size total += np.dot( np.asarray(batch_weights, dtype=np.float32), np.asarray(batch_vecs, dtype=np.float32)) - num_items_summed += len(batch_weights) - return total, num_items_summed + return total def unflatten(flat_array, array_shapes): diff --git a/parl/algorithms/__init__.py b/parl/algorithms/__init__.py index 20c3d3d467cf50b4fed42a7d1b37671b107f1e4b..8565455c374db1e16e878b3977f3c8f2f7c5557d 100644 --- a/parl/algorithms/__init__.py +++ b/parl/algorithms/__init__.py @@ -13,8 +13,13 @@ # limitations under the License. from parl.utils.utils import _HAS_FLUID, _HAS_TORCH +from parl.utils import logger if _HAS_FLUID: from parl.algorithms.fluid import * elif _HAS_TORCH: from parl.algorithms.torch import * +else: + logger.warning( + "No deep learning framework was found, but it's ok for parallel computation." + )