From fbde3d5ffe43ca0586b0dc7a87a96adf483b289d Mon Sep 17 00:00:00 2001 From: PaParaZz1 Date: Fri, 17 Dec 2021 07:12:30 +0000 Subject: [PATCH] =?UTF-8?q?Deploying=20to=20gh-pages=20from=20=20@=2016833?= =?UTF-8?q?c62b43a223f79d6dac16f362de7b02057c1=20=F0=9F=9A=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- _modules/ding/policy/sac.html | 9 ++++++--- hands_on/sac.html | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/_modules/ding/policy/sac.html b/_modules/ding/policy/sac.html index 20098ee..e173472 100644 --- a/_modules/ding/policy/sac.html +++ b/_modules/ding/policy/sac.html @@ -185,7 +185,7 @@ class SACDiscretePolicy(Policy): r""" Overview: - Policy class of Discrete SAC algorithm. + Policy class of discrete SAC algorithm. Config: == ==================== ======== ============= ================================= ======================= @@ -573,7 +573,10 @@ """ self._unroll_len = self._cfg.collect.unroll_len self._multi_agent = self._cfg.multi_agent - self._collect_model = model_wrap(self._model, wrapper_name='eps_greedy_sample') + # Empirically, we found that eps_greedy_multinomial_sample works better than multinomial_sample + # and eps_greedy_sample, and we don't divide logit by alpha, + # for the details please refer to ding/model/wrapper/model_wrappers + self._collect_model = model_wrap(self._model, wrapper_name='eps_greedy_multinomial_sample') self._collect_model.reset() def _forward_collect(self, data: dict, eps: float) -> dict: @@ -682,7 +685,7 @@ class SACPolicy(Policy): r""" Overview: - Policy class of SAC algorithm. + Policy class of continuous SAC algorithm. https://arxiv.org/pdf/1801.01290.pdf diff --git a/hands_on/sac.html b/hands_on/sac.html index c25862d..26074df 100644 --- a/hands_on/sac.html +++ b/hands_on/sac.html @@ -282,7 +282,7 @@ We implement reparameterization trick through configuring class ding.policy.sac.SACPolicy(cfg: dict, model: Optional[Union[type, torch.nn.modules.module.Module]] = None, enable_field: Optional[List[str]] = None)[source]ΒΆ
-
Overview:

Policy class of SAC algorithm.

+
Overview:

Policy class of continuous SAC algorithm.

https://arxiv.org/pdf/1801.01290.pdf

Config:
-- GitLab