diff --git a/.readthedocs.yml b/.readthedocs.yml
new file mode 100644
index 0000000000000000000000000000000000000000..007bc029a9a96a70b329efaeeed43a715484f539
--- /dev/null
+++ b/.readthedocs.yml
@@ -0,0 +1,8 @@
+version: 2
+formats: all
+sphinx:
+  configuration: docs/conf.py
+python:
+   version: 3.7
+   install:
+    - requirements: docs/requirements.txt
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..5dede4aa4a23f17efed56f090855c4b111d2a84d
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,19 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS    =
+SPHINXBUILD   = sphinx-build
+SOURCEDIR     = .
+BUILDDIR      = build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/PARL-logo-2.png b/docs/PARL-logo-2.png
new file mode 100644
index 0000000000000000000000000000000000000000..a56972f1c59afeb92f11c6402e289b191bbc007a
Binary files /dev/null and b/docs/PARL-logo-2.png differ
diff --git a/docs/api_docs/index.rst b/docs/api_docs/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..71f92106a95729c600c81900743a6cb1b6b2f709
--- /dev/null
+++ b/docs/api_docs/index.rst
@@ -0,0 +1,12 @@
+.. PARL_docs documentation master file, created by
+   sphinx-quickstart on Mon Apr 22 11:12:25 2019.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+PARL Documentation
+=====================================
+
+.. toctree::
+  :maxdepth: 1
+
+  utils
diff --git a/docs/api_docs/utils.rst b/docs/api_docs/utils.rst
new file mode 100644
index 0000000000000000000000000000000000000000..4142ccab9651c57f145046c0ad31f44c34154278
--- /dev/null
+++ b/docs/api_docs/utils.rst
@@ -0,0 +1,6 @@
+parl.Model
+--------------------
+.. automodule:: parl.framework.model_base
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/basic_structure/agent.rst b/docs/basic_structure/agent.rst
new file mode 100644
index 0000000000000000000000000000000000000000..ee8dd336b1c0b464a1997802206b9451f0afb1e5
--- /dev/null
+++ b/docs/basic_structure/agent.rst
@@ -0,0 +1,29 @@
+Agent (*Generate Data Flow*)
+===============================
+
+Methods
+--------
+1. __init__(self, algorithm, gpu_id=None)
+
+    Call build_program here and run initialization for default_startup_program.
+
+2. build_program(self)
+
+    Use define_predict and define_learn in Algorithm to build training program and prediction program. This will be called
+    by __init__ method in class Agent.
+
+3. predict(self, obs)
+
+    Predict the action with current observation of the enviroment. Note that this function will only do the prediction and it doesn't try any exploration.
+    To explore in the action space, you should create your process in `sample` function below.
+    Basically, this function is often used in test process.
+
+4. sample(self, obs)
+
+    Predict the action given current observation of the enviroment. 
+    Additionaly, action will be added noise here to explore a new trajectory. 
+    Basically, this function is often used in training process.
+
+5. learn(self, obs, action, reward, next_obs, terminal)
+
+    Pass data to the training program to update model. This method is the training interface for Agent.
diff --git a/docs/basic_structure/algorithm.rst b/docs/basic_structure/algorithm.rst
new file mode 100644
index 0000000000000000000000000000000000000000..f99e41fe6ed3274322e4be48a29d550f2262430a
--- /dev/null
+++ b/docs/basic_structure/algorithm.rst
@@ -0,0 +1,44 @@
+Algorithm (*Backward Part*)
+=============================
+
+Methods
+---------
+1. define_predict(self, obs)
+
+    Use method policy( ) from Agent to predict the probabilities of actions.
+
+2. define_learn(self, obs, action, reward, next_obs, terminal)
+
+    Define loss function and optimizer here to update the policy model.
+
+An Example
+-----------
+
+
+
+.. code-block:: python
+    :linenos:
+
+    # From https://github.com/PaddlePaddle/PARL/blob/develop/parl/algorithms/policy_gradient.py
+
+    class PolicyGradient(Algorithm):
+    def __init__(self, model, hyperparas):
+        Algorithm.__init__(self, model, hyperparas)
+        self.model = model
+        self.lr = hyperparas['lr']
+
+    def define_predict(self, obs):
+        """ use policy model self.model to predict the action probability
+        """
+        return self.model.policy(obs)
+
+    def define_learn(self, obs, action, reward):
+        """ update policy model self.model with policy gradient algorithm
+        """
+        act_prob = self.model.policy(obs)
+        log_prob = layers.cross_entropy(act_prob, action)
+        cost = log_prob * reward
+        cost = layers.reduce_mean(cost)
+        optimizer = fluid.optimizer.Adam(self.lr)
+        optimizer.minimize(cost)
+        return cost
diff --git a/docs/basic_structure/model.rst b/docs/basic_structure/model.rst
new file mode 100644
index 0000000000000000000000000000000000000000..861e809a029e8ceadf0f04851d4257b510d8734e
--- /dev/null
+++ b/docs/basic_structure/model.rst
@@ -0,0 +1,39 @@
+Model (*Forward Part*)
+=======================
+
+Methods
+----------
+1. policy(self, *args)
+
+    Define the structure of networks here. Algorithm will call this method to predict probabilities of actions. 
+    It is optional. 
+
+2. value(self, *args)
+
+    Return: values: a dict of estimated values for the current observations and states. 
+    For example, "q_value" and "v_value".
+
+3. sync_params_to(self, target_net, gpu_id, decay=0.0, share_vars_parallel_executor=None)
+
+    This method deepcopied the parameters from the current network to the target network, which two have the same structure.  
+
+An example
+------------
+.. code-block:: python
+    :linenos:
+
+    class MLPModel(Model):
+        def __init__(self):
+            self.fc = layers.fc(size=64)
+
+        def policy(self, obs):
+            out = self.fc(obs)
+            return out
+            
+    model = MLPModel() 
+    target_model = deepcopy(model) # automatically create new unique parameters names for target_model.fc
+
+    # build program
+    x = layers.data(name='x', shape=[100], dtype="float32")
+    y1 = model.policy(x) 
+    y2 = target_model.policy(x)  
diff --git a/docs/basic_structure/overview.rst b/docs/basic_structure/overview.rst
new file mode 100644
index 0000000000000000000000000000000000000000..5e7a202f31ce4017e137bdb28cd5cae131ddf678
--- /dev/null
+++ b/docs/basic_structure/overview.rst
@@ -0,0 +1,20 @@
+Overview
+==========
+Three Components
+------------------
+PARL is made up of three components: **Model, Algorithm, Agent**. They are constructed layer-by-layer to build the main body.
+
+Model
+---------
+A Model is owned by an Algorithm. Model is responsible for the entire network model (**forward part**) for the specific problems.
+
+Algorithm
+----------
+Algorithm defines the way to update the parameters in the Model (**backward part**). We already implemented some common
+used algorithms__, like DQN/DDPG/PPO/A3C, you can directly import and use them.
+
+.. __: https://github.com/PaddlePaddle/PARL/tree/develop/parl/algorithms
+
+Agent
+--------
+Agent interates with the environment and **generate data flow** outside the Algorithm. 
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 0000000000000000000000000000000000000000..f4985d6920bb217a3070cf856f6af3174e329175
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,87 @@
+#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# http://www.sphinx-doc.org/en/master/config
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+# import os
+# import sys
+# sys.path.insert(0, os.path.abspath('.'))
+
+# -- Project information -----------------------------------------------------
+
+import sphinx_rtd_theme
+import os
+import sys
+import parl
+release = parl.__version__
+project = 'PARL'
+copyright = '2019, nlp-ol@baidu.com'
+author = 'nlp-ol@baidu.com'
+
+# The full version, including alpha/beta/rc tags
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    'sphinx.ext.viewcode',
+    'sphinx.ext.autodoc',
+    'sphinx.ext.todo',
+    'sphinx.ext.napoleon',
+    'sphinx.ext.mathjax',
+    'sphinx.ext.intersphinx',
+]
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#
+# This is also used if you do content translation via gettext catalogs.
+# Usually you set "language" from the command line for these cases.
+language = 'zh_CN'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = []
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'sphinx_rtd_theme'
+html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+html_logo = './PARL-logo-2.png'
+
+master_doc = 'index'
diff --git a/docs/features.rst b/docs/features.rst
new file mode 100644
index 0000000000000000000000000000000000000000..9b5d852232da5f92877412260bad18444039840f
--- /dev/null
+++ b/docs/features.rst
@@ -0,0 +1,18 @@
+Features
+===========
+
+**1. Reproducible**
+
+| We provide algorithms that reproduce stably the results of many influential reinforcement learning algorithms.
+
+**2. Large Scale**
+
+| Ability to support high-performance parallelization of training with thousands of CPUs and multi-GPUs.
+
+**3. Reusable**
+
+| Algorithms provided in the repository can be directly adapted to new tasks by defining a forward network and training mechanism will be built automatically.
+
+**4. Extensible**
+
+| Build new algorithms quickly by inheriting the abstract class in the framework.
diff --git a/docs/implementations.rst b/docs/implementations.rst
new file mode 100644
index 0000000000000000000000000000000000000000..df9a664efe7525e3acb064056cdbd5d2a09a8558
--- /dev/null
+++ b/docs/implementations.rst
@@ -0,0 +1,2 @@
+Implemented Algorithms
+========================
diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..1ca1990047b149360f017374baef1c3e47749409
--- /dev/null
+++ b/docs/index.rst
@@ -0,0 +1,90 @@
+.. PARL_docs documentation master file, created by
+   sphinx-quickstart on Mon Apr 22 11:12:25 2019.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+PARL
+=====================================
+*PARL is a flexible, distributed and eager mode oriented reinforcement learning framework.*
+
+Features
+----------------
++----------------------------------------------+-----------------------------------------------+
+| **Eager Mode**                               | **Distributed Training**                      |
++----------------------------------------------+-----------------------------------------------+
+|.. code-block:: python                        |.. code-block:: python                         |
+|                                              |                                               |
+|  # Target Network in DQN                     |  # Real multi-thread programming              |
+|                                              |  # witout the GIL limitation                  |
+|                                              |                                               |
+|    target_network = copy.deepcopy(Q_network) |  @parl.remote_class                           |
+|    ...                                       |  class HelloWorld(object):                    |
+|    #reset parameters periodically            |      def sum(self, a, b):                     |
+|    target_network.load(Q_network)            |          return a + b                         |
+|                                              |                                               |
+|                                              |  parl.init()                                  |
+|                                              |  obj = HelloWorld()                           |
+|                                              |  # NOT consume local computation resources    |
+|                                              |  ans = obj.sum(a, b)                          |
+|                                              |                                               |
++----------------------------------------------+-----------------------------------------------+
+
+
+| PARL is distributed on PyPI and can be installed with pip:
+
+.. centered:: ``pip install parl``
+
+.. toctree::
+   :maxdepth: 1
+   :caption: Installation
+
+   installation.rst
+
+.. toctree::
+   :maxdepth: 1
+   :caption: Features
+
+   features.rst
+
+.. toctree::
+   :maxdepth: 1
+   :caption: Basic_structure
+
+   ./basic_structure/overview
+   ./basic_structure/model
+   ./basic_structure/algorithm
+   ./basic_structure/agent
+
+.. toctree::
+   :maxdepth: 1
+   :caption: Tutorial
+
+   tutorial.rst
+
+.. toctree::
+   :maxdepth: 1
+   :caption: High-quality Implementations
+
+   implementations.rst
+
+.. toctree::
+   :maxdepth: 1
+   :caption: APIs
+
+   ./api_docs.utils
+   ./api_docs.index
+
+Abstractions
+----------------
+.. image:: ../.github/abstractions.png
+  :align: center
+  :width: 400px
+
+| PARL aims to build an **agent** for training algorithms to perform complex tasks.   
+| The main abstractions introduced by PARL that are used to build an agent recursively are the following:
+
+* **Model** is abstracted to construct the forward network which defines a policy network or critic network given state as input.
+
+* **Algorithm** describes the mechanism to update parameters in the *model* and often contains at least one model.
+
+* **Agent**, a data bridge between the *environment* and the *algorithm*, is responsible for data I/O with the outside environment and describes data preprocessing before feeding data into the training process.
diff --git a/docs/installation.rst b/docs/installation.rst
new file mode 100644
index 0000000000000000000000000000000000000000..fd0710c4bb7427a02dafcdb4af8e52c0245483ce
--- /dev/null
+++ b/docs/installation.rst
@@ -0,0 +1,12 @@
+Installation
+=============
+Dependencies
+-------------------
+- Python 2.7 or 3.5+.
+- PaddlePaddle >=1.2.1 (**Optional**, if you only want to use APIs related to parallelization alone)
+
+Install
+-------------
+PARL is distributed on PyPI and can be installed with pip:
+:: 
+    pip install parl
diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..01cee6c47d60b8b08e1e9f8516c593c810bedc17
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1 @@
+parl
diff --git a/docs/tutorial.rst b/docs/tutorial.rst
new file mode 100644
index 0000000000000000000000000000000000000000..a2f31870c31f9deba01b0335a8a688b6b814b581
--- /dev/null
+++ b/docs/tutorial.rst
@@ -0,0 +1,2 @@
+Tutorial
+===========