From ce152a62316eda2a0557d1aa1618938936c1fb3f Mon Sep 17 00:00:00 2001 From: rical730 Date: Tue, 22 Sep 2020 23:33:00 +0800 Subject: [PATCH] update lr interface and support training on single gpu (#415) * update lr interface and support training on single gpu * yapf * update warning message * update warning message --- examples/MADDPG/simple_agent.py | 5 +++++ examples/MADDPG/train.py | 12 +++++++++--- parl/algorithms/fluid/maddpg.py | 31 ++++++++++++++++++++++++++----- 3 files changed, 40 insertions(+), 8 deletions(-) diff --git a/examples/MADDPG/simple_agent.py b/examples/MADDPG/simple_agent.py index fe0e47d..f5a5bf9 100644 --- a/examples/MADDPG/simple_agent.py +++ b/examples/MADDPG/simple_agent.py @@ -17,6 +17,7 @@ import parl from parl import layers from paddle import fluid from parl.utils import ReplayMemory +from parl.utils import machine_info, get_gpu_count class MAAgent(parl.Agent): @@ -47,6 +48,10 @@ class MAAgent(parl.Agent): act_dim=self.act_dim_n[agent_index]) self.global_train_step = 0 + if machine_info.is_gpu_available(): + assert get_gpu_count() == 1, 'Only support training in single GPU,\ + Please set environment variable: `export CUDA_VISIBLE_DEVICES=[GPU_ID_TO_USE]` .' + super(MAAgent, self).__init__(algorithm) # Attention: In the beginning, sync target model totally. diff --git a/examples/MADDPG/train.py b/examples/MADDPG/train.py index 8454a73..95e4760 100644 --- a/examples/MADDPG/train.py +++ b/examples/MADDPG/train.py @@ -101,7 +101,8 @@ def train_agent(): act_space=env.action_space, gamma=args.gamma, tau=args.tau, - lr=args.lr) + critic_lr=args.critic_lr, + actor_lr=args.actor_lr) agent = MAAgent( algorithm, agent_index=i, @@ -195,10 +196,15 @@ if __name__ == '__main__': help='statistical interval of save model or count reward') # Core training parameters parser.add_argument( - '--lr', + '--critic_lr', type=float, default=1e-3, - help='learning rate for Adam optimizer') + help='learning rate for the critic model') + parser.add_argument( + '--actor_lr', + type=float, + default=1e-3, + help='learning rate of the actor model') parser.add_argument( '--gamma', type=float, default=0.95, help='discount factor') parser.add_argument( diff --git a/parl/algorithms/fluid/maddpg.py b/parl/algorithms/fluid/maddpg.py index 36b1470..dec1212 100644 --- a/parl/algorithms/fluid/maddpg.py +++ b/parl/algorithms/fluid/maddpg.py @@ -53,7 +53,9 @@ class MADDPG(Algorithm): act_space=None, gamma=None, tau=None, - lr=None): + lr=None, + actor_lr=None, + critic_lr=None): """ MADDPG algorithm Args: @@ -63,19 +65,38 @@ class MADDPG(Algorithm): act_space: action_space, gym space gamma (float): discounted factor for reward computation. tau (float): decay coefficient when updating the weights of self.target_model with self.model - lr (float): learning rate + lr (float): learning rate, lr will be assigned to both critic_lr and actor_lr + critic_lr (float): learning rate of the critic model + actor_lr (float): learning rate of the actor model """ assert isinstance(agent_index, int) assert isinstance(act_space, list) assert isinstance(gamma, float) assert isinstance(tau, float) - assert isinstance(lr, float) + # compatible upgrade of lr + if lr is None: + assert isinstance(actor_lr, float) + assert isinstance(critic_lr, float) + else: + assert isinstance(lr, float) + assert actor_lr is None, 'no need to set `actor_lr` if `lr` is not None' + assert critic_lr is None, 'no need to set `critic_lr` if `lr` is not None' + critic_lr = lr + actor_lr = lr + warnings.warn( + "the `lr` argument of `__init__` function in `parl.Algorithms.MADDPG` is deprecated \ + since version 1.4 and will be removed in version 2.0. \ + Recommend to use `actor_lr` and `critic_lr`. ", + DeprecationWarning, + stacklevel=2) self.agent_index = agent_index self.act_space = act_space self.gamma = gamma self.tau = tau self.lr = lr + self.actor_lr = actor_lr + self.critic_lr = critic_lr self.model = model self.target_model = deepcopy(model) @@ -145,7 +166,7 @@ class MADDPG(Algorithm): clip=fluid.clip.GradientClipByNorm(clip_norm=0.5), param_list=self.model.get_actor_params()) - optimizer = fluid.optimizer.AdamOptimizer(self.lr) + optimizer = fluid.optimizer.AdamOptimizer(self.actor_lr) optimizer.minimize(cost, parameter_list=self.model.get_actor_params()) return cost @@ -157,7 +178,7 @@ class MADDPG(Algorithm): clip=fluid.clip.GradientClipByNorm(clip_norm=0.5), param_list=self.model.get_critic_params()) - optimizer = fluid.optimizer.AdamOptimizer(self.lr) + optimizer = fluid.optimizer.AdamOptimizer(self.critic_lr) optimizer.minimize(cost, parameter_list=self.model.get_critic_params()) return cost -- GitLab