diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 0000000000000000000000000000000000000000..007bc029a9a96a70b329efaeeed43a715484f539 --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,8 @@ +version: 2 +formats: all +sphinx: + configuration: docs/conf.py +python: + version: 3.7 + install: + - requirements: docs/requirements.txt diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..5dede4aa4a23f17efed56f090855c4b111d2a84d --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,19 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +SOURCEDIR = . +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/PARL-logo-2.png b/docs/PARL-logo-2.png new file mode 100644 index 0000000000000000000000000000000000000000..a56972f1c59afeb92f11c6402e289b191bbc007a Binary files /dev/null and b/docs/PARL-logo-2.png differ diff --git a/docs/api_docs/index.rst b/docs/api_docs/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..71f92106a95729c600c81900743a6cb1b6b2f709 --- /dev/null +++ b/docs/api_docs/index.rst @@ -0,0 +1,12 @@ +.. PARL_docs documentation master file, created by + sphinx-quickstart on Mon Apr 22 11:12:25 2019. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +PARL Documentation +===================================== + +.. toctree:: + :maxdepth: 1 + + utils diff --git a/docs/api_docs/utils.rst b/docs/api_docs/utils.rst new file mode 100644 index 0000000000000000000000000000000000000000..4142ccab9651c57f145046c0ad31f44c34154278 --- /dev/null +++ b/docs/api_docs/utils.rst @@ -0,0 +1,6 @@ +parl.Model +-------------------- +.. automodule:: parl.framework.model_base + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/basic_structure/agent.rst b/docs/basic_structure/agent.rst new file mode 100644 index 0000000000000000000000000000000000000000..ee8dd336b1c0b464a1997802206b9451f0afb1e5 --- /dev/null +++ b/docs/basic_structure/agent.rst @@ -0,0 +1,29 @@ +Agent (*Generate Data Flow*) +=============================== + +Methods +-------- +1. __init__(self, algorithm, gpu_id=None) + + Call build_program here and run initialization for default_startup_program. + +2. build_program(self) + + Use define_predict and define_learn in Algorithm to build training program and prediction program. This will be called + by __init__ method in class Agent. + +3. predict(self, obs) + + Predict the action with current observation of the enviroment. Note that this function will only do the prediction and it doesn't try any exploration. + To explore in the action space, you should create your process in `sample` function below. + Basically, this function is often used in test process. + +4. sample(self, obs) + + Predict the action given current observation of the enviroment. + Additionaly, action will be added noise here to explore a new trajectory. + Basically, this function is often used in training process. + +5. learn(self, obs, action, reward, next_obs, terminal) + + Pass data to the training program to update model. This method is the training interface for Agent. diff --git a/docs/basic_structure/algorithm.rst b/docs/basic_structure/algorithm.rst new file mode 100644 index 0000000000000000000000000000000000000000..f99e41fe6ed3274322e4be48a29d550f2262430a --- /dev/null +++ b/docs/basic_structure/algorithm.rst @@ -0,0 +1,44 @@ +Algorithm (*Backward Part*) +============================= + +Methods +--------- +1. define_predict(self, obs) + + Use method policy( ) from Agent to predict the probabilities of actions. + +2. define_learn(self, obs, action, reward, next_obs, terminal) + + Define loss function and optimizer here to update the policy model. + +An Example +----------- + + + +.. code-block:: python + :linenos: + + # From https://github.com/PaddlePaddle/PARL/blob/develop/parl/algorithms/policy_gradient.py + + class PolicyGradient(Algorithm): + def __init__(self, model, hyperparas): + Algorithm.__init__(self, model, hyperparas) + self.model = model + self.lr = hyperparas['lr'] + + def define_predict(self, obs): + """ use policy model self.model to predict the action probability + """ + return self.model.policy(obs) + + def define_learn(self, obs, action, reward): + """ update policy model self.model with policy gradient algorithm + """ + act_prob = self.model.policy(obs) + log_prob = layers.cross_entropy(act_prob, action) + cost = log_prob * reward + cost = layers.reduce_mean(cost) + optimizer = fluid.optimizer.Adam(self.lr) + optimizer.minimize(cost) + return cost diff --git a/docs/basic_structure/model.rst b/docs/basic_structure/model.rst new file mode 100644 index 0000000000000000000000000000000000000000..861e809a029e8ceadf0f04851d4257b510d8734e --- /dev/null +++ b/docs/basic_structure/model.rst @@ -0,0 +1,39 @@ +Model (*Forward Part*) +======================= + +Methods +---------- +1. policy(self, *args) + + Define the structure of networks here. Algorithm will call this method to predict probabilities of actions. + It is optional. + +2. value(self, *args) + + Return: values: a dict of estimated values for the current observations and states. + For example, "q_value" and "v_value". + +3. sync_params_to(self, target_net, gpu_id, decay=0.0, share_vars_parallel_executor=None) + + This method deepcopied the parameters from the current network to the target network, which two have the same structure. + +An example +------------ +.. code-block:: python + :linenos: + + class MLPModel(Model): + def __init__(self): + self.fc = layers.fc(size=64) + + def policy(self, obs): + out = self.fc(obs) + return out + + model = MLPModel() + target_model = deepcopy(model) # automatically create new unique parameters names for target_model.fc + + # build program + x = layers.data(name='x', shape=[100], dtype="float32") + y1 = model.policy(x) + y2 = target_model.policy(x) diff --git a/docs/basic_structure/overview.rst b/docs/basic_structure/overview.rst new file mode 100644 index 0000000000000000000000000000000000000000..5e7a202f31ce4017e137bdb28cd5cae131ddf678 --- /dev/null +++ b/docs/basic_structure/overview.rst @@ -0,0 +1,20 @@ +Overview +========== +Three Components +------------------ +PARL is made up of three components: **Model, Algorithm, Agent**. They are constructed layer-by-layer to build the main body. + +Model +--------- +A Model is owned by an Algorithm. Model is responsible for the entire network model (**forward part**) for the specific problems. + +Algorithm +---------- +Algorithm defines the way to update the parameters in the Model (**backward part**). We already implemented some common +used algorithms__, like DQN/DDPG/PPO/A3C, you can directly import and use them. + +.. __: https://github.com/PaddlePaddle/PARL/tree/develop/parl/algorithms + +Agent +-------- +Agent interates with the environment and **generate data flow** outside the Algorithm. diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000000000000000000000000000000000000..f4985d6920bb217a3070cf856f6af3174e329175 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,87 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# http://www.sphinx-doc.org/en/master/config + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +# import os +# import sys +# sys.path.insert(0, os.path.abspath('.')) + +# -- Project information ----------------------------------------------------- + +import sphinx_rtd_theme +import os +import sys +import parl +release = parl.__version__ +project = 'PARL' +copyright = '2019, nlp-ol@baidu.com' +author = 'nlp-ol@baidu.com' + +# The full version, including alpha/beta/rc tags + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.viewcode', + 'sphinx.ext.autodoc', + 'sphinx.ext.todo', + 'sphinx.ext.napoleon', + 'sphinx.ext.mathjax', + 'sphinx.ext.intersphinx', +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = 'zh_CN' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = [] + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'sphinx_rtd_theme' +html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] +html_logo = './PARL-logo-2.png' + +master_doc = 'index' diff --git a/docs/features.rst b/docs/features.rst new file mode 100644 index 0000000000000000000000000000000000000000..9b5d852232da5f92877412260bad18444039840f --- /dev/null +++ b/docs/features.rst @@ -0,0 +1,18 @@ +Features +=========== + +**1. Reproducible** + +| We provide algorithms that reproduce stably the results of many influential reinforcement learning algorithms. + +**2. Large Scale** + +| Ability to support high-performance parallelization of training with thousands of CPUs and multi-GPUs. + +**3. Reusable** + +| Algorithms provided in the repository can be directly adapted to new tasks by defining a forward network and training mechanism will be built automatically. + +**4. Extensible** + +| Build new algorithms quickly by inheriting the abstract class in the framework. diff --git a/docs/implementations.rst b/docs/implementations.rst new file mode 100644 index 0000000000000000000000000000000000000000..df9a664efe7525e3acb064056cdbd5d2a09a8558 --- /dev/null +++ b/docs/implementations.rst @@ -0,0 +1,2 @@ +Implemented Algorithms +======================== diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..1ca1990047b149360f017374baef1c3e47749409 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,90 @@ +.. PARL_docs documentation master file, created by + sphinx-quickstart on Mon Apr 22 11:12:25 2019. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +PARL +===================================== +*PARL is a flexible, distributed and eager mode oriented reinforcement learning framework.* + +Features +---------------- ++----------------------------------------------+-----------------------------------------------+ +| **Eager Mode** | **Distributed Training** | ++----------------------------------------------+-----------------------------------------------+ +|.. code-block:: python |.. code-block:: python | +| | | +| # Target Network in DQN | # Real multi-thread programming | +| | # witout the GIL limitation | +| | | +| target_network = copy.deepcopy(Q_network) | @parl.remote_class | +| ... | class HelloWorld(object): | +| #reset parameters periodically | def sum(self, a, b): | +| target_network.load(Q_network) | return a + b | +| | | +| | parl.init() | +| | obj = HelloWorld() | +| | # NOT consume local computation resources | +| | ans = obj.sum(a, b) | +| | | ++----------------------------------------------+-----------------------------------------------+ + + +| PARL is distributed on PyPI and can be installed with pip: + +.. centered:: ``pip install parl`` + +.. toctree:: + :maxdepth: 1 + :caption: Installation + + installation.rst + +.. toctree:: + :maxdepth: 1 + :caption: Features + + features.rst + +.. toctree:: + :maxdepth: 1 + :caption: Basic_structure + + ./basic_structure/overview + ./basic_structure/model + ./basic_structure/algorithm + ./basic_structure/agent + +.. toctree:: + :maxdepth: 1 + :caption: Tutorial + + tutorial.rst + +.. toctree:: + :maxdepth: 1 + :caption: High-quality Implementations + + implementations.rst + +.. toctree:: + :maxdepth: 1 + :caption: APIs + + ./api_docs.utils + ./api_docs.index + +Abstractions +---------------- +.. image:: ../.github/abstractions.png + :align: center + :width: 400px + +| PARL aims to build an **agent** for training algorithms to perform complex tasks. +| The main abstractions introduced by PARL that are used to build an agent recursively are the following: + +* **Model** is abstracted to construct the forward network which defines a policy network or critic network given state as input. + +* **Algorithm** describes the mechanism to update parameters in the *model* and often contains at least one model. + +* **Agent**, a data bridge between the *environment* and the *algorithm*, is responsible for data I/O with the outside environment and describes data preprocessing before feeding data into the training process. diff --git a/docs/installation.rst b/docs/installation.rst new file mode 100644 index 0000000000000000000000000000000000000000..fd0710c4bb7427a02dafcdb4af8e52c0245483ce --- /dev/null +++ b/docs/installation.rst @@ -0,0 +1,12 @@ +Installation +============= +Dependencies +------------------- +- Python 2.7 or 3.5+. +- PaddlePaddle >=1.2.1 (**Optional**, if you only want to use APIs related to parallelization alone) + +Install +------------- +PARL is distributed on PyPI and can be installed with pip: +:: + pip install parl diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..01cee6c47d60b8b08e1e9f8516c593c810bedc17 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1 @@ +parl diff --git a/docs/tutorial.rst b/docs/tutorial.rst new file mode 100644 index 0000000000000000000000000000000000000000..a2f31870c31f9deba01b0335a8a688b6b814b581 --- /dev/null +++ b/docs/tutorial.rst @@ -0,0 +1,2 @@ +Tutorial +===========