提交 fbde3d5f 编写于 作者: P PaParaZz1

Deploying to gh-pages from @ 16833c62 🚀

上级 b2318bf4
......@@ -185,7 +185,7 @@
<span class="k">class</span> <span class="nc">SACDiscretePolicy</span><span class="p">(</span><span class="n">Policy</span><span class="p">):</span>
<span class="sa">r</span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Overview:</span>
<span class="sd"> Policy class of Discrete SAC algorithm.</span>
<span class="sd"> Policy class of discrete SAC algorithm.</span>
<span class="sd"> Config:</span>
<span class="sd"> == ==================== ======== ============= ================================= =======================</span>
......@@ -573,7 +573,10 @@
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_unroll_len</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_cfg</span><span class="o">.</span><span class="n">collect</span><span class="o">.</span><span class="n">unroll_len</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_multi_agent</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_cfg</span><span class="o">.</span><span class="n">multi_agent</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_collect_model</span> <span class="o">=</span> <span class="n">model_wrap</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_model</span><span class="p">,</span> <span class="n">wrapper_name</span><span class="o">=</span><span class="s1">&#39;eps_greedy_sample&#39;</span><span class="p">)</span>
<span class="c1"># Empirically, we found that eps_greedy_multinomial_sample works better than multinomial_sample</span>
<span class="c1"># and eps_greedy_sample, and we don&#39;t divide logit by alpha,</span>
<span class="c1"># for the details please refer to ding/model/wrapper/model_wrappers</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_collect_model</span> <span class="o">=</span> <span class="n">model_wrap</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_model</span><span class="p">,</span> <span class="n">wrapper_name</span><span class="o">=</span><span class="s1">&#39;eps_greedy_multinomial_sample&#39;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_collect_model</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">_forward_collect</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="nb">dict</span><span class="p">,</span> <span class="n">eps</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">dict</span><span class="p">:</span>
......@@ -682,7 +685,7 @@
<span class="k">class</span> <span class="nc">SACPolicy</span><span class="p">(</span><span class="n">Policy</span><span class="p">):</span>
<span class="sa">r</span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Overview:</span>
<span class="sd"> Policy class of SAC algorithm.</span>
<span class="sd"> Policy class of continuous SAC algorithm.</span>
<span class="sd"> https://arxiv.org/pdf/1801.01290.pdf</span>
......
......@@ -282,7 +282,7 @@ We implement reparameterization trick through configuring <code class="docutils
<dt class="sig sig-object py" id="ding.policy.sac.SACPolicy">
<em class="property"><span class="pre">class</span> </em><span class="sig-prename descclassname"><span class="pre">ding.policy.sac.</span></span><span class="sig-name descname"><span class="pre">SACPolicy</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">cfg</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><span class="pre">dict</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">model</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><span class="pre">Union</span><span class="p"><span class="pre">[</span></span><span class="pre">type</span><span class="p"><span class="pre">,</span> </span><span class="pre">torch.nn.modules.module.Module</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></span> <span class="o"><span class="pre">=</span></span> <span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">enable_field</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></span> <span class="o"><span class="pre">=</span></span> <span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/ding/policy/sac.html#SACPolicy"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#ding.policy.sac.SACPolicy" title="Permalink to this definition"></a></dt>
<dd><dl>
<dt>Overview:</dt><dd><p>Policy class of SAC algorithm.</p>
<dt>Overview:</dt><dd><p>Policy class of continuous SAC algorithm.</p>
<p><a class="reference external" href="https://arxiv.org/pdf/1801.01290.pdf">https://arxiv.org/pdf/1801.01290.pdf</a></p>
</dd>
<dt>Config:</dt><dd><table class="docutils align-default">
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册