From 118cc6732101f76db706456b656fa8925fdb24c3 Mon Sep 17 00:00:00 2001 From: niuyazhe Date: Thu, 30 Dec 2021 11:09:41 +0800 Subject: [PATCH] polish(nyz): move actor_head_type to action_space field in qac and update readme new repo link --- README.md | 2 ++ ding/model/template/maqac.py | 34 ++++--------------- ding/model/template/qac.py | 6 ++-- .../config/bipedalwalker_sac_config.py | 2 +- .../config/bipedalwalker_td3_config.py | 2 +- .../d4rl/config/hopper_cql_default_config.py | 2 +- .../hopper_expert_cql_default_config.py | 2 +- .../hopper_medium_cql_default_config.py | 2 +- .../mujoco/config/ant_ddpg_default_config.py | 2 +- dizoo/mujoco/config/ant_sac_default_config.py | 2 +- dizoo/mujoco/config/ant_td3_default_config.py | 2 +- .../config/ant_trex_sac_default_config.py | 2 +- .../config/halfcheetah_ddpg_default_config.py | 2 +- dizoo/mujoco/config/halfcheetah_gcl_config.py | 2 +- .../config/halfcheetah_sac_default_config.py | 2 +- .../config/halfcheetah_td3_default_config.py | 2 +- .../halfcheetah_trex_sac_default_config.py | 2 +- .../config/hopper_cql_default_config.py | 2 +- .../config/hopper_d4pg_default_config.py | 2 +- .../config/hopper_ddpg_default_config.py | 2 +- ...pper_sac_data_generation_default_config.py | 2 +- .../config/hopper_sac_default_config.py | 2 +- .../config/hopper_td3_bc_default_config.py | 2 +- .../hopper_td3_data_generation_config.py | 2 +- .../config/hopper_td3_default_config.py | 2 +- .../config/hopper_trex_sac_default_config.py | 2 +- .../sac_halfcheetah_mbpo_default_config.py | 2 +- .../config/sac_hopper_mbpo_default_config.py | 2 +- .../config/walker2d_ddpg_default_config.py | 2 +- .../config/walker2d_ddpg_gail_config.py | 2 +- .../config/walker2d_sac_default_config.py | 2 +- .../config/walker2d_td3_default_config.py | 2 +- .../walker2d_trex_sac_default_config.py | 2 +- .../config/ant_masac_default_config.py | 2 +- .../config/ant_ddpg_default_config.py | 2 +- .../pybullet/config/ant_sac_default_config.py | 2 +- .../pybullet/config/ant_td3_default_config.py | 2 +- .../config/halfcheetah_ddpg_default_config.py | 2 +- .../config/halfcheetah_sac_default_config.py | 2 +- .../config/halfcheetah_td3_default_config.py | 2 +- .../config/hopper_ddpg_default_config.py | 2 +- .../config/hopper_sac_default_config.py | 2 +- .../config/hopper_td3_default_config.py | 2 +- .../config/walker2d_ddpg_default_config.py | 2 +- .../config/walker2d_sac_default_config.py | 2 +- .../config/walker2d_td3_default_config.py | 2 +- 46 files changed, 54 insertions(+), 74 deletions(-) diff --git a/README.md b/README.md index 75e788f..79fee88 100644 --- a/README.md +++ b/README.md @@ -54,11 +54,13 @@ Updated on 2021.12.03 DI-engine-v0.2.2 (beta) - [DI-star](https://github.com/opendilab/DI-star): Decision AI in StarCraftII - [DI-drive](https://github.com/opendilab/DI-drive): Auto-driving platform - [GoBigger](https://github.com/opendilab/GoBigger): Multi-Agent Decision Intelligence Environment + - [DI-smartcross](https://github.com/opendilab/DI-smartcross): Decision AI in Traffic Light Control - General nested data lib - [treevalue](https://github.com/opendilab/treevalue): Tree-nested data structure - [DI-treetensor](https://github.com/opendilab/DI-treetensor): Tree-nested PyTorch tensor Lib - Docs and Tutorials - [DI-engine-docs](https://github.com/opendilab/DI-engine-docs) + - [awesome-model-based-RL](https://github.com/opendilab/awesome-model-based-RL): A curated list of awesome Model-Based RL resources **DI-engine** also has some **system optimization and design** for efficient and robust large-scale RL training: diff --git a/ding/model/template/maqac.py b/ding/model/template/maqac.py index ca4273d..3eb0705 100644 --- a/ding/model/template/maqac.py +++ b/ding/model/template/maqac.py @@ -24,7 +24,6 @@ class MAQAC(nn.Module): agent_obs_shape: Union[int, SequenceType], global_obs_shape: Union[int, SequenceType], action_shape: Union[int, SequenceType], - # actor_head_type: str, twin_critic: bool = False, actor_head_hidden_size: int = 64, actor_head_layer_num: int = 1, @@ -39,7 +38,6 @@ class MAQAC(nn.Module): Arguments: - obs_shape (:obj:`Union[int, SequenceType]`): Observation's space. - action_shape (:obj:`Union[int, SequenceType]`): Action's space. - - actor_head_type (:obj:`str`): Whether choose ``regression`` or ``reparameterization``. - twin_critic (:obj:`bool`): Whether include twin critic. - actor_head_hidden_size (:obj:`Optional[int]`): The ``hidden_size`` to pass to actor-nn's ``Head``. - actor_head_layer_num (:obj:`int`): @@ -179,11 +177,6 @@ class MAQAC(nn.Module): - obs (:obj:`torch.Tensor`): :math:`(B, N1)`, where B is batch size and N1 is ``obs_shape`` - action (:obj:`torch.Tensor`): :math:`(B, N2)`, where B is batch size and N2 is ``action_shape`` - q_value (:obj:`torch.FloatTensor`): :math:`(B, )`, where B is batch size. - Examples: - >>> inputs = {'obs': torch.randn(4, N), 'action': torch.randn(4, 1)} - >>> model = QAC(obs_shape=(N, ),action_shape=1,actor_head_type='regression') - >>> model(inputs, mode='compute_critic')['q_value'] # q value - tensor([0.0773, 0.1639, 0.0917, 0.0370], grad_fn=) """ if self.twin_critic: @@ -208,7 +201,7 @@ class ContinuousMAQAC(nn.Module): agent_obs_shape: Union[int, SequenceType], global_obs_shape: Union[int, SequenceType], action_shape: Union[int, SequenceType, EasyDict], - actor_head_type: str, + action_space: str, twin_critic: bool = False, actor_head_hidden_size: int = 64, actor_head_layer_num: int = 1, @@ -222,9 +215,8 @@ class ContinuousMAQAC(nn.Module): Init the QAC Model according to arguments. Arguments: - obs_shape (:obj:`Union[int, SequenceType]`): Observation's space. - - action_shape (:obj:`Union[int, SequenceType, EasyDict]`): Action's space, such as 4, (3, ), - EasyDict({'action_type_shape': 3, 'action_args_shape': 4}). - - actor_head_type (:obj:`str`): Whether choose ``regression`` or ``reparameterization`` or ``hybrid`` . + - action_shape (:obj:`Union[int, SequenceType, EasyDict]`): Action's space, such as 4, (3, ) + - action_space (:obj:`str`): Whether choose ``regression`` or ``reparameterization``. - twin_critic (:obj:`bool`): Whether include twin critic. - actor_head_hidden_size (:obj:`Optional[int]`): The ``hidden_size`` to pass to actor-nn's ``Head``. - actor_head_layer_num (:obj:`int`): @@ -243,9 +235,9 @@ class ContinuousMAQAC(nn.Module): global_obs_shape: int = squeeze(global_obs_shape) action_shape = squeeze(action_shape) self.action_shape = action_shape - self.actor_head_type = actor_head_type - assert self.actor_head_type in ['regression', 'reparameterization'] - if self.actor_head_type == 'regression': # DDPG, TD3 + self.action_space = action_space + assert self.action_space in ['regression', 'reparameterization'] + if self.action_space == 'regression': # DDPG, TD3 self.actor = nn.Sequential( nn.Linear(obs_shape, actor_head_hidden_size), activation, RegressionHead( @@ -350,12 +342,6 @@ class ContinuousMAQAC(nn.Module): >>> actor_outputs['logit'][1].shape # sigma >>> torch.Size([4, 64]) - Critic Examples: - >>> inputs = {'obs': torch.randn(4,N), 'action': torch.randn(4,1)} - >>> model = QAC(obs_shape=(N, ),action_shape=1,actor_head_type='regression') - >>> model(inputs, mode='compute_critic')['q_value'] # q value - tensor([0.0773, 0.1639, 0.0917, 0.0370], grad_fn=) - """ assert mode in self.mode, "not support forward mode: {}/{}".format(mode, self.mode) return getattr(self, mode)(inputs) @@ -404,7 +390,7 @@ class ContinuousMAQAC(nn.Module): >>> torch.Size([4, 64]) """ inputs = inputs['agent_state'] - if self.actor_head_type == 'regression': + if self.action_space == 'regression': x = self.actor(inputs) return {'action': x['pred']} else: @@ -434,12 +420,6 @@ class ContinuousMAQAC(nn.Module): - obs (:obj:`torch.Tensor`): :math:`(B, N1)`, where B is batch size and N1 is ``obs_shape`` - action (:obj:`torch.Tensor`): :math:`(B, N2)`, where B is batch size and N2 is ``action_shape`` - q_value (:obj:`torch.FloatTensor`): :math:`(B, )`, where B is batch size. - - Examples: - >>> inputs = {'obs': torch.randn(4, N), 'action': torch.randn(4, 1)} - >>> model = QAC(obs_shape=(N, ),action_shape=1,actor_head_type='regression') - >>> model(inputs, mode='compute_critic')['q_value'] # q value - >>> tensor([0.0773, 0.1639, 0.0917, 0.0370], grad_fn=) """ obs, action = inputs['obs']['global_state'], inputs['action'] diff --git a/ding/model/template/qac.py b/ding/model/template/qac.py index 2074654..b7b3737 100644 --- a/ding/model/template/qac.py +++ b/ding/model/template/qac.py @@ -325,7 +325,6 @@ class DiscreteQAC(nn.Module): global_obs_shape: Union[int, SequenceType], action_shape: Union[int, SequenceType], encoder_hidden_size_list: SequenceType = [64], - #actor_head_type: str, twin_critic: bool = False, actor_head_hidden_size: int = 64, actor_head_layer_num: int = 1, @@ -340,7 +339,6 @@ class DiscreteQAC(nn.Module): Arguments: - obs_shape (:obj:`Union[int, SequenceType]`): Observation's space. - action_shape (:obj:`Union[int, SequenceType]`): Action's space. - - actor_head_type (:obj:`str`): Whether choose ``regression`` or ``reparameterization``. - twin_critic (:obj:`bool`): Whether include twin critic. - actor_head_hidden_size (:obj:`Optional[int]`): The ``hidden_size`` to pass to actor-nn's ``Head``. - actor_head_layer_num (:obj:`int`): @@ -468,7 +466,7 @@ class DiscreteQAC(nn.Module): Critic Examples: >>> inputs = {'obs': torch.randn(4,N), 'action': torch.randn(4,1)} - >>> model = QAC(obs_shape=(N, ),action_shape=1,actor_head_type='regression') + >>> model = QAC(obs_shape=(N, ), action_shape=1, action_space='regression') >>> model(inputs, mode='compute_critic')['q_value'] # q value tensor([0.0773, 0.1639, 0.0917, 0.0370], grad_fn=) @@ -537,7 +535,7 @@ class DiscreteQAC(nn.Module): Examples: >>> inputs = {'obs': torch.randn(4, N), 'action': torch.randn(4, 1)} - >>> model = QAC(obs_shape=(N, ),action_shape=1,actor_head_type='regression') + >>> model = QAC(obs_shape=(N, ),action_shape=1, action_space='regression') >>> model(inputs, mode='compute_critic')['q_value'] # q value tensor([0.0773, 0.1639, 0.0917, 0.0370], grad_fn=) diff --git a/dizoo/box2d/bipedalwalker/config/bipedalwalker_sac_config.py b/dizoo/box2d/bipedalwalker/config/bipedalwalker_sac_config.py index da79506..0deaa1e 100644 --- a/dizoo/box2d/bipedalwalker/config/bipedalwalker_sac_config.py +++ b/dizoo/box2d/bipedalwalker/config/bipedalwalker_sac_config.py @@ -21,7 +21,7 @@ bipedalwalker_sac_config = dict( obs_shape=24, action_shape=4, twin_critic=True, - actor_head_type='reparameterization', + action_space='reparameterization', actor_head_hidden_size=128, critic_head_hidden_size=128, ), diff --git a/dizoo/box2d/bipedalwalker/config/bipedalwalker_td3_config.py b/dizoo/box2d/bipedalwalker/config/bipedalwalker_td3_config.py index c676890..a196c3f 100644 --- a/dizoo/box2d/bipedalwalker/config/bipedalwalker_td3_config.py +++ b/dizoo/box2d/bipedalwalker/config/bipedalwalker_td3_config.py @@ -20,7 +20,7 @@ bipedalwalker_td3_config = dict( twin_critic=True, actor_head_hidden_size=400, critic_head_hidden_size=400, - actor_head_type='regression', + action_space='regression', ), learn=dict( update_per_collect=4, diff --git a/dizoo/d4rl/config/hopper_cql_default_config.py b/dizoo/d4rl/config/hopper_cql_default_config.py index 8d24f71..e1f623e 100644 --- a/dizoo/d4rl/config/hopper_cql_default_config.py +++ b/dizoo/d4rl/config/hopper_cql_default_config.py @@ -17,7 +17,7 @@ hopper_cql_default_config = dict( obs_shape=11, action_shape=3, twin_critic=True, - actor_head_type='reparameterization', + action_space='reparameterization', actor_head_hidden_size=256, critic_head_hidden_size=256, ), diff --git a/dizoo/d4rl/config/hopper_expert_cql_default_config.py b/dizoo/d4rl/config/hopper_expert_cql_default_config.py index e407fe7..443ed36 100644 --- a/dizoo/d4rl/config/hopper_expert_cql_default_config.py +++ b/dizoo/d4rl/config/hopper_expert_cql_default_config.py @@ -17,7 +17,7 @@ hopper_expert_cql_default_config = dict( obs_shape=11, action_shape=3, twin_critic=True, - actor_head_type='reparameterization', + action_space='reparameterization', actor_head_hidden_size=256, critic_head_hidden_size=256, ), diff --git a/dizoo/d4rl/config/hopper_medium_cql_default_config.py b/dizoo/d4rl/config/hopper_medium_cql_default_config.py index e6e14a6..3c82714 100644 --- a/dizoo/d4rl/config/hopper_medium_cql_default_config.py +++ b/dizoo/d4rl/config/hopper_medium_cql_default_config.py @@ -17,7 +17,7 @@ hopper_medium_cql_default_config = dict( obs_shape=11, action_shape=3, twin_critic=True, - actor_head_type='reparameterization', + action_space='reparameterization', actor_head_hidden_size=256, critic_head_hidden_size=256, ), diff --git a/dizoo/mujoco/config/ant_ddpg_default_config.py b/dizoo/mujoco/config/ant_ddpg_default_config.py index 066663d..458f638 100644 --- a/dizoo/mujoco/config/ant_ddpg_default_config.py +++ b/dizoo/mujoco/config/ant_ddpg_default_config.py @@ -20,7 +20,7 @@ ant_ddpg_default_config = dict( twin_critic=False, actor_head_hidden_size=256, critic_head_hidden_size=256, - actor_head_type='regression', + action_space='regression', ), learn=dict( update_per_collect=1, diff --git a/dizoo/mujoco/config/ant_sac_default_config.py b/dizoo/mujoco/config/ant_sac_default_config.py index 81bc183..69fd1e4 100644 --- a/dizoo/mujoco/config/ant_sac_default_config.py +++ b/dizoo/mujoco/config/ant_sac_default_config.py @@ -19,7 +19,7 @@ ant_sac_default_config = dict( obs_shape=111, action_shape=8, twin_critic=True, - actor_head_type='reparameterization', + action_space='reparameterization', actor_head_hidden_size=256, critic_head_hidden_size=256, ), diff --git a/dizoo/mujoco/config/ant_td3_default_config.py b/dizoo/mujoco/config/ant_td3_default_config.py index 2270886..90c481c 100644 --- a/dizoo/mujoco/config/ant_td3_default_config.py +++ b/dizoo/mujoco/config/ant_td3_default_config.py @@ -20,7 +20,7 @@ ant_td3_default_config = dict( twin_critic=True, actor_head_hidden_size=256, critic_head_hidden_size=256, - actor_head_type='regression', + action_space='regression', ), learn=dict( update_per_collect=1, diff --git a/dizoo/mujoco/config/ant_trex_sac_default_config.py b/dizoo/mujoco/config/ant_trex_sac_default_config.py index cd85d9a..4986407 100644 --- a/dizoo/mujoco/config/ant_trex_sac_default_config.py +++ b/dizoo/mujoco/config/ant_trex_sac_default_config.py @@ -36,7 +36,7 @@ ant_trex_sac_default_config = dict( obs_shape=111, action_shape=8, twin_critic=True, - actor_head_type='reparameterization', + action_space='reparameterization', actor_head_hidden_size=256, critic_head_hidden_size=256, ), diff --git a/dizoo/mujoco/config/halfcheetah_ddpg_default_config.py b/dizoo/mujoco/config/halfcheetah_ddpg_default_config.py index 1370ad5..6279fb2 100644 --- a/dizoo/mujoco/config/halfcheetah_ddpg_default_config.py +++ b/dizoo/mujoco/config/halfcheetah_ddpg_default_config.py @@ -20,7 +20,7 @@ halfcheetah_ddpg_default_config = dict( twin_critic=False, actor_head_hidden_size=256, critic_head_hidden_size=256, - actor_head_type='regression', + action_space='regression', ), learn=dict( update_per_collect=1, diff --git a/dizoo/mujoco/config/halfcheetah_gcl_config.py b/dizoo/mujoco/config/halfcheetah_gcl_config.py index 6709ded..249a42a 100644 --- a/dizoo/mujoco/config/halfcheetah_gcl_config.py +++ b/dizoo/mujoco/config/halfcheetah_gcl_config.py @@ -28,7 +28,7 @@ halfcheetah_gcl_default_config = dict( obs_shape=17, action_shape=6, twin_critic=True, - actor_head_type='reparameterization', + action_space='reparameterization', actor_head_hidden_size=256, critic_head_hidden_size=256, ), diff --git a/dizoo/mujoco/config/halfcheetah_sac_default_config.py b/dizoo/mujoco/config/halfcheetah_sac_default_config.py index 19253c4..92c9608 100644 --- a/dizoo/mujoco/config/halfcheetah_sac_default_config.py +++ b/dizoo/mujoco/config/halfcheetah_sac_default_config.py @@ -19,7 +19,7 @@ halfcheetah_sac_default_config = dict( obs_shape=17, action_shape=6, twin_critic=True, - actor_head_type='reparameterization', + action_space='reparameterization', actor_head_hidden_size=256, critic_head_hidden_size=256, ), diff --git a/dizoo/mujoco/config/halfcheetah_td3_default_config.py b/dizoo/mujoco/config/halfcheetah_td3_default_config.py index 9bcab25..b6dcbd5 100644 --- a/dizoo/mujoco/config/halfcheetah_td3_default_config.py +++ b/dizoo/mujoco/config/halfcheetah_td3_default_config.py @@ -20,7 +20,7 @@ halfcheetah_td3_default_config = dict( twin_critic=True, actor_head_hidden_size=256, critic_head_hidden_size=256, - actor_head_type='regression', + action_space='regression', ), learn=dict( update_per_collect=1, diff --git a/dizoo/mujoco/config/halfcheetah_trex_sac_default_config.py b/dizoo/mujoco/config/halfcheetah_trex_sac_default_config.py index 13194f7..b3e1251 100644 --- a/dizoo/mujoco/config/halfcheetah_trex_sac_default_config.py +++ b/dizoo/mujoco/config/halfcheetah_trex_sac_default_config.py @@ -36,7 +36,7 @@ halfcheetah_trex_sac_default_config = dict( obs_shape=17, action_shape=6, twin_critic=True, - actor_head_type='reparameterization', + action_space='reparameterization', actor_head_hidden_size=256, critic_head_hidden_size=256, ), diff --git a/dizoo/mujoco/config/hopper_cql_default_config.py b/dizoo/mujoco/config/hopper_cql_default_config.py index 97c5e7a..b4eefb2 100644 --- a/dizoo/mujoco/config/hopper_cql_default_config.py +++ b/dizoo/mujoco/config/hopper_cql_default_config.py @@ -17,7 +17,7 @@ hopper_cql_default_config = dict( obs_shape=11, action_shape=3, twin_critic=True, - actor_head_type='reparameterization', + action_space='reparameterization', actor_head_hidden_size=256, critic_head_hidden_size=256, ), diff --git a/dizoo/mujoco/config/hopper_d4pg_default_config.py b/dizoo/mujoco/config/hopper_d4pg_default_config.py index f15a994..f165a4d 100644 --- a/dizoo/mujoco/config/hopper_d4pg_default_config.py +++ b/dizoo/mujoco/config/hopper_d4pg_default_config.py @@ -21,7 +21,7 @@ hopper_d4pg_default_config = dict( action_shape=3, actor_head_hidden_size=256, critic_head_hidden_size=256, - actor_head_type='regression', + action_space='regression', critic_head_type='categorical', v_min=-100, v_max=100, diff --git a/dizoo/mujoco/config/hopper_ddpg_default_config.py b/dizoo/mujoco/config/hopper_ddpg_default_config.py index 36dae21..97d4864 100644 --- a/dizoo/mujoco/config/hopper_ddpg_default_config.py +++ b/dizoo/mujoco/config/hopper_ddpg_default_config.py @@ -20,7 +20,7 @@ hopper_ddpg_default_config = dict( twin_critic=False, actor_head_hidden_size=256, critic_head_hidden_size=256, - actor_head_type='regression', + action_space='regression', ), learn=dict( update_per_collect=1, diff --git a/dizoo/mujoco/config/hopper_sac_data_generation_default_config.py b/dizoo/mujoco/config/hopper_sac_data_generation_default_config.py index 901c518..6a126d8 100644 --- a/dizoo/mujoco/config/hopper_sac_data_generation_default_config.py +++ b/dizoo/mujoco/config/hopper_sac_data_generation_default_config.py @@ -18,7 +18,7 @@ hopper_sac_data_genearation_default_config = dict( obs_shape=11, action_shape=3, twin_critic=True, - actor_head_type='reparameterization', + action_space='reparameterization', actor_head_hidden_size=256, critic_head_hidden_size=256, ), diff --git a/dizoo/mujoco/config/hopper_sac_default_config.py b/dizoo/mujoco/config/hopper_sac_default_config.py index ae777b2..83ddebc 100644 --- a/dizoo/mujoco/config/hopper_sac_default_config.py +++ b/dizoo/mujoco/config/hopper_sac_default_config.py @@ -19,7 +19,7 @@ hopper_sac_default_config = dict( obs_shape=11, action_shape=3, twin_critic=True, - actor_head_type='reparameterization', + action_space='reparameterization', actor_head_hidden_size=256, critic_head_hidden_size=256, ), diff --git a/dizoo/mujoco/config/hopper_td3_bc_default_config.py b/dizoo/mujoco/config/hopper_td3_bc_default_config.py index a2e7c70..8745e67 100644 --- a/dizoo/mujoco/config/hopper_td3_bc_default_config.py +++ b/dizoo/mujoco/config/hopper_td3_bc_default_config.py @@ -19,7 +19,7 @@ hopper_td3_bc_default_config = dict( twin_critic=True, actor_head_hidden_size=256, critic_head_hidden_size=256, - actor_head_type='regression', + action_space='regression', ), learn=dict( normalize_states=True, diff --git a/dizoo/mujoco/config/hopper_td3_data_generation_config.py b/dizoo/mujoco/config/hopper_td3_data_generation_config.py index ce22198..9a0f71c 100644 --- a/dizoo/mujoco/config/hopper_td3_data_generation_config.py +++ b/dizoo/mujoco/config/hopper_td3_data_generation_config.py @@ -20,7 +20,7 @@ halfcheetah_td3_default_config = dict( twin_critic=True, actor_head_hidden_size=256, critic_head_hidden_size=256, - actor_head_type='regression', + action_space='regression', ), learn=dict( update_per_collect=1, diff --git a/dizoo/mujoco/config/hopper_td3_default_config.py b/dizoo/mujoco/config/hopper_td3_default_config.py index 514a355..307ed81 100644 --- a/dizoo/mujoco/config/hopper_td3_default_config.py +++ b/dizoo/mujoco/config/hopper_td3_default_config.py @@ -20,7 +20,7 @@ hopper_td3_default_config = dict( twin_critic=True, actor_head_hidden_size=256, critic_head_hidden_size=256, - actor_head_type='regression', + action_space='regression', ), learn=dict( update_per_collect=1, diff --git a/dizoo/mujoco/config/hopper_trex_sac_default_config.py b/dizoo/mujoco/config/hopper_trex_sac_default_config.py index c1176b2..904e02e 100644 --- a/dizoo/mujoco/config/hopper_trex_sac_default_config.py +++ b/dizoo/mujoco/config/hopper_trex_sac_default_config.py @@ -36,7 +36,7 @@ hopper_trex_sac_default_config = dict( obs_shape=11, action_shape=3, twin_critic=True, - actor_head_type='reparameterization', + action_space='reparameterization', actor_head_hidden_size=256, critic_head_hidden_size=256, ), diff --git a/dizoo/mujoco/config/sac_halfcheetah_mbpo_default_config.py b/dizoo/mujoco/config/sac_halfcheetah_mbpo_default_config.py index 04bcf35..74d2f20 100644 --- a/dizoo/mujoco/config/sac_halfcheetah_mbpo_default_config.py +++ b/dizoo/mujoco/config/sac_halfcheetah_mbpo_default_config.py @@ -44,7 +44,7 @@ main_config = dict( obs_shape=obs_shape, action_shape=action_shape, twin_critic=True, - actor_head_type='reparameterization', + action_space='reparameterization', actor_head_hidden_size=256, critic_head_hidden_size=256, ), diff --git a/dizoo/mujoco/config/sac_hopper_mbpo_default_config.py b/dizoo/mujoco/config/sac_hopper_mbpo_default_config.py index d7af7b8..03237f1 100644 --- a/dizoo/mujoco/config/sac_hopper_mbpo_default_config.py +++ b/dizoo/mujoco/config/sac_hopper_mbpo_default_config.py @@ -44,7 +44,7 @@ main_config = dict( obs_shape=obs_shape, action_shape=action_shape, twin_critic=True, - actor_head_type='reparameterization', + action_space='reparameterization', actor_head_hidden_size=256, critic_head_hidden_size=256, ), diff --git a/dizoo/mujoco/config/walker2d_ddpg_default_config.py b/dizoo/mujoco/config/walker2d_ddpg_default_config.py index 2db0ff5..e860c36 100644 --- a/dizoo/mujoco/config/walker2d_ddpg_default_config.py +++ b/dizoo/mujoco/config/walker2d_ddpg_default_config.py @@ -21,7 +21,7 @@ walker2d_ddpg_default_config = dict( twin_critic=False, actor_head_hidden_size=256, critic_head_hidden_size=256, - actor_head_type='regression', + action_space='regression', ), learn=dict( update_per_collect=1, diff --git a/dizoo/mujoco/config/walker2d_ddpg_gail_config.py b/dizoo/mujoco/config/walker2d_ddpg_gail_config.py index 2c08820..2009c05 100644 --- a/dizoo/mujoco/config/walker2d_ddpg_gail_config.py +++ b/dizoo/mujoco/config/walker2d_ddpg_gail_config.py @@ -34,7 +34,7 @@ walker2d_ddpg_gail_default_config = dict( twin_critic=False, actor_head_hidden_size=256, critic_head_hidden_size=256, - actor_head_type='regression', + action_space='regression', ), learn=dict( update_per_collect=1, diff --git a/dizoo/mujoco/config/walker2d_sac_default_config.py b/dizoo/mujoco/config/walker2d_sac_default_config.py index 3ba1c2a..ca9895d 100644 --- a/dizoo/mujoco/config/walker2d_sac_default_config.py +++ b/dizoo/mujoco/config/walker2d_sac_default_config.py @@ -18,7 +18,7 @@ walker2d_sac_default_config = dict( obs_shape=17, action_shape=6, twin_critic=True, - actor_head_type='reparameterization', + action_space='reparameterization', actor_head_hidden_size=256, critic_head_hidden_size=256, ), diff --git a/dizoo/mujoco/config/walker2d_td3_default_config.py b/dizoo/mujoco/config/walker2d_td3_default_config.py index 438a6c7..82933df 100644 --- a/dizoo/mujoco/config/walker2d_td3_default_config.py +++ b/dizoo/mujoco/config/walker2d_td3_default_config.py @@ -20,7 +20,7 @@ walker2d_td3_default_config = dict( twin_critic=True, actor_head_hidden_size=256, critic_head_hidden_size=256, - actor_head_type='regression', + action_space='regression', ), learn=dict( update_per_collect=1, diff --git a/dizoo/mujoco/config/walker2d_trex_sac_default_config.py b/dizoo/mujoco/config/walker2d_trex_sac_default_config.py index e02fe2b..0a06c30 100644 --- a/dizoo/mujoco/config/walker2d_trex_sac_default_config.py +++ b/dizoo/mujoco/config/walker2d_trex_sac_default_config.py @@ -36,7 +36,7 @@ walker2d_trex_sac_default_config = dict( obs_shape=17, action_shape=6, twin_critic=True, - actor_head_type='reparameterization', + action_space='reparameterization', actor_head_hidden_size=256, critic_head_hidden_size=256, ), diff --git a/dizoo/multiagent_mujoco/config/ant_masac_default_config.py b/dizoo/multiagent_mujoco/config/ant_masac_default_config.py index 885c7d8..c10aa8b 100644 --- a/dizoo/multiagent_mujoco/config/ant_masac_default_config.py +++ b/dizoo/multiagent_mujoco/config/ant_masac_default_config.py @@ -22,7 +22,7 @@ ant_sac_default_config = dict( global_obs_shape=111, action_shape=4, twin_critic=True, - actor_head_type='reparameterization', + action_space='reparameterization', actor_head_hidden_size=256, critic_head_hidden_size=256, ), diff --git a/dizoo/pybullet/config/ant_ddpg_default_config.py b/dizoo/pybullet/config/ant_ddpg_default_config.py index d73b41d..c56fb4a 100644 --- a/dizoo/pybullet/config/ant_ddpg_default_config.py +++ b/dizoo/pybullet/config/ant_ddpg_default_config.py @@ -20,7 +20,7 @@ ant_ddpg_default_config = dict( twin_critic=False, actor_head_hidden_size=256, critic_head_hidden_size=256, - actor_head_type='regression', + action_space='regression', ), learn=dict( update_per_collect=1, diff --git a/dizoo/pybullet/config/ant_sac_default_config.py b/dizoo/pybullet/config/ant_sac_default_config.py index 685a278..702ffb9 100644 --- a/dizoo/pybullet/config/ant_sac_default_config.py +++ b/dizoo/pybullet/config/ant_sac_default_config.py @@ -18,7 +18,7 @@ ant_sac_default_config = dict( obs_shape=111, action_shape=8, twin_critic=True, - actor_head_type='reparameterization', + action_space='reparameterization', actor_head_hidden_size=256, critic_head_hidden_size=256, ), diff --git a/dizoo/pybullet/config/ant_td3_default_config.py b/dizoo/pybullet/config/ant_td3_default_config.py index 7801e13..9065ee9 100644 --- a/dizoo/pybullet/config/ant_td3_default_config.py +++ b/dizoo/pybullet/config/ant_td3_default_config.py @@ -20,7 +20,7 @@ ant_td3_default_config = dict( twin_critic=True, actor_head_hidden_size=256, critic_head_hidden_size=256, - actor_head_type='regression', + action_space='regression', ), learn=dict( update_per_collect=1, diff --git a/dizoo/pybullet/config/halfcheetah_ddpg_default_config.py b/dizoo/pybullet/config/halfcheetah_ddpg_default_config.py index c3aba1f..e374830 100644 --- a/dizoo/pybullet/config/halfcheetah_ddpg_default_config.py +++ b/dizoo/pybullet/config/halfcheetah_ddpg_default_config.py @@ -20,7 +20,7 @@ halfcheetah_ddpg_default_config = dict( twin_critic=False, actor_head_hidden_size=256, critic_head_hidden_size=256, - actor_head_type='regression', + action_space='regression', ), learn=dict( update_per_collect=1, diff --git a/dizoo/pybullet/config/halfcheetah_sac_default_config.py b/dizoo/pybullet/config/halfcheetah_sac_default_config.py index 5996295..f6395dc 100644 --- a/dizoo/pybullet/config/halfcheetah_sac_default_config.py +++ b/dizoo/pybullet/config/halfcheetah_sac_default_config.py @@ -18,7 +18,7 @@ halfcheetah_sac_default_config = dict( obs_shape=17, action_shape=6, twin_critic=True, - actor_head_type='reparameterization', + action_space='reparameterization', actor_head_hidden_size=256, critic_head_hidden_size=256, ), diff --git a/dizoo/pybullet/config/halfcheetah_td3_default_config.py b/dizoo/pybullet/config/halfcheetah_td3_default_config.py index 61d3e91..2b39266 100644 --- a/dizoo/pybullet/config/halfcheetah_td3_default_config.py +++ b/dizoo/pybullet/config/halfcheetah_td3_default_config.py @@ -20,7 +20,7 @@ halfcheetah_td3_default_config = dict( twin_critic=True, actor_head_hidden_size=256, critic_head_hidden_size=256, - actor_head_type='regression', + action_space='regression', ), learn=dict( update_per_collect=1, diff --git a/dizoo/pybullet/config/hopper_ddpg_default_config.py b/dizoo/pybullet/config/hopper_ddpg_default_config.py index 1cf101b..3b525da 100644 --- a/dizoo/pybullet/config/hopper_ddpg_default_config.py +++ b/dizoo/pybullet/config/hopper_ddpg_default_config.py @@ -20,7 +20,7 @@ hopper_ddpg_default_config = dict( twin_critic=False, actor_head_hidden_size=256, critic_head_hidden_size=256, - actor_head_type='regression', + action_space='regression', ), learn=dict( update_per_collect=1, diff --git a/dizoo/pybullet/config/hopper_sac_default_config.py b/dizoo/pybullet/config/hopper_sac_default_config.py index 307a584..cbac184 100644 --- a/dizoo/pybullet/config/hopper_sac_default_config.py +++ b/dizoo/pybullet/config/hopper_sac_default_config.py @@ -18,7 +18,7 @@ hopper_sac_default_config = dict( obs_shape=11, action_shape=3, twin_critic=True, - actor_head_type='reparameterization', + action_space='reparameterization', actor_head_hidden_size=256, critic_head_hidden_size=256, ), diff --git a/dizoo/pybullet/config/hopper_td3_default_config.py b/dizoo/pybullet/config/hopper_td3_default_config.py index 05f6627..98024ae 100644 --- a/dizoo/pybullet/config/hopper_td3_default_config.py +++ b/dizoo/pybullet/config/hopper_td3_default_config.py @@ -20,7 +20,7 @@ hopper_td3_default_config = dict( twin_critic=True, actor_head_hidden_size=256, critic_head_hidden_size=256, - actor_head_type='regression', + action_space='regression', ), learn=dict( update_per_collect=1, diff --git a/dizoo/pybullet/config/walker2d_ddpg_default_config.py b/dizoo/pybullet/config/walker2d_ddpg_default_config.py index 5b0c7d5..39f1527 100644 --- a/dizoo/pybullet/config/walker2d_ddpg_default_config.py +++ b/dizoo/pybullet/config/walker2d_ddpg_default_config.py @@ -20,7 +20,7 @@ walker2d_ddpg_default_config = dict( twin_critic=False, actor_head_hidden_size=256, critic_head_hidden_size=256, - actor_head_type='regression', + action_space='regression', ), learn=dict( update_per_collect=1, diff --git a/dizoo/pybullet/config/walker2d_sac_default_config.py b/dizoo/pybullet/config/walker2d_sac_default_config.py index 5701101..ce4de31 100644 --- a/dizoo/pybullet/config/walker2d_sac_default_config.py +++ b/dizoo/pybullet/config/walker2d_sac_default_config.py @@ -18,7 +18,7 @@ walker2d_sac_default_config = dict( obs_shape=17, action_shape=6, twin_critic=True, - actor_head_type='reparameterization', + action_space='reparameterization', actor_head_hidden_size=256, critic_head_hidden_size=256, ), diff --git a/dizoo/pybullet/config/walker2d_td3_default_config.py b/dizoo/pybullet/config/walker2d_td3_default_config.py index 4f8f22b..ee56008 100644 --- a/dizoo/pybullet/config/walker2d_td3_default_config.py +++ b/dizoo/pybullet/config/walker2d_td3_default_config.py @@ -20,7 +20,7 @@ walker2d_td3_default_config = dict( twin_critic=True, actor_head_hidden_size=256, critic_head_hidden_size=256, - actor_head_type='regression', + action_space='regression', ), learn=dict( update_per_collect=1, -- GitLab