diff --git a/README_cn.md b/README_cn.md
index 5ac0e30305501b0b9f6308a415f4b57a32c89374..3ca13bacc465506f2ede4f2bb4586e7bce1ae5f6 100644
--- a/README_cn.md
+++ b/README_cn.md
@@ -61,14 +61,9 @@ PaddleDetection的目的是为工业界和学术界提供大量易使用的目
 
 ## 开始
 
-在预测阶段，可以通过运行以下指令得到可视化结果并保存在`output`目录下。
-
-```bash
-export PYTHONPATH=`pwd`:$PYTHONPATH
-python tools/infer.py -c configs/mask_rcnn_r50_1x.yml \
-    -o weights=https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r50_1x.tar \
-    --infer_img=demo/000000570688.jpg
-```
+## 快速入门
+
+PaddleDetection提供了快速开始的demo利于用户能够快速上手，示例请参考[QUICK_STARTED_cn.md](docs/QUICK_STARTED_cn.md)
 
 更多训练及评估流程，请参考[GETTING_STARTED_cn.md](docs/GETTING_STARTED_cn.md).
 
diff --git a/configs/yolov3_mobilenet_v1_fruit.yml b/configs/yolov3_mobilenet_v1_fruit.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b7922489a35ce637f55376c9460e626ff81ed3d2
--- /dev/null
+++ b/configs/yolov3_mobilenet_v1_fruit.yml
@@ -0,0 +1,125 @@
+architecture: YOLOv3
+train_feed: YoloTrainFeed
+eval_feed: YoloEvalFeed
+test_feed: YoloTestFeed
+use_gpu: true
+max_iters: 20000
+log_smooth_window: 20
+save_dir: output
+snapshot_iter: 200
+metric: VOC
+map_type: 11point
+pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1.tar
+weights: output/yolov3_mobilenet_v1_fruit/best_model
+num_classes: 3
+finetune_exclude_pretrained_params: ['yolo_output']
+
+YOLOv3:
+  backbone: MobileNet
+  yolo_head: YOLOv3Head
+
+MobileNet:
+  norm_type: sync_bn
+  norm_decay: 0.
+  conv_group_scale: 1
+  with_extra_blocks: false
+
+YOLOv3Head:
+  anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
+  anchors: [[10, 13], [16, 30], [33, 23],
+            [30, 61], [62, 45], [59, 119],
+            [116, 90], [156, 198], [373, 326]]
+  norm_decay: 0.
+  ignore_thresh: 0.7
+  label_smooth: true
+  nms:
+    background_label: -1
+    keep_top_k: 100
+    nms_threshold: 0.45
+    nms_top_k: 1000
+    normalized: false
+    score_threshold: 0.01
+
+LearningRate:
+  base_lr: 0.00001
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones:
+    - 15000
+    - 18000
+  - !LinearWarmup
+    start_factor: 0.
+    steps: 100
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0005
+    type: L2
+
+YoloTrainFeed:
+  batch_size: 1
+  dataset:
+    dataset_dir: dataset/fruit/fruit-detection
+    annotation: ./ImageSets/Main/train.txt
+    image_dir: ./JPEGImages
+    use_default_label: false
+  num_workers: 16
+  bufsize: 128
+  use_process: true
+  mixup_epoch: -1
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+    with_mixup: false
+  - !NormalizeBox {}
+  - !ExpandImage
+    max_ratio: 4.0
+    mean: [123.675, 116.28, 103.53]
+    prob: 0.5
+  - !RandomInterpImage
+    max_size: 0
+    target_size: 608
+  - !RandomFlipImage
+    is_mask_flip: false
+    is_normalized: true
+    prob: 0.5
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: true
+    mean:
+    - 0.485
+    - 0.456
+    - 0.406
+    std:
+    - 0.229
+    - 0.224
+    - 0.225
+  - !Permute
+    channel_first: true
+    to_bgr: false
+  batch_transforms:
+  - !RandomShape 
+    sizes: [608] 
+  with_background: false
+
+YoloEvalFeed:
+  batch_size: 1
+  image_shape: [3, 608, 608]
+  dataset:
+    dataset_dir: dataset/fruit/fruit-detection
+    annotation: ./ImageSets/Main/val.txt
+    image_dir: ./JPEGImages
+    use_default_label: false
+ 
+
+YoloTestFeed:
+  batch_size: 1
+  image_shape: [3, 608, 608]
+  dataset:
+    dataset_dir: dataset/fruit/fruit-detection
+    annotation: ./ImageSets/Main/label_list.txt
+    use_default_label: false
diff --git a/dataset/fruit/download.sh b/dataset/fruit/download.sh
new file mode 100644
index 0000000000000000000000000000000000000000..dd5d4c043967bbcb96c5c9aeaa495cbf1e05c2d2
--- /dev/null
+++ b/dataset/fruit/download.sh
@@ -0,0 +1,14 @@
+DIR="$( cd "$(dirname "$0")" ; pwd -P )"
+cd "$DIR"
+
+# Download the data.
+echo "Downloading..."
+wget https://dataset.bj.bcebos.com/PaddleDetection_demo/fruit-detection.tar
+# Extract the data.
+echo "Extracting..."
+tar xvf fruit-detection.tar
+cd fruit-detection
+tar xvf Annotations.tar
+tar xvf ImageSets.tar
+tar xvf JPEGImages.tar
+rm -rf ./*.tar
diff --git a/demo/orange_71.jpg b/demo/orange_71.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..da7974a1a1371298f1ca5f4ef9c82bd3824d7ac3
Binary files /dev/null and b/demo/orange_71.jpg differ
diff --git a/demo/orange_71_detection.jpg b/demo/orange_71_detection.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..88cbf9c97120f79d5ef5f80ccb789a6e3c29bedf
Binary files /dev/null and b/demo/orange_71_detection.jpg differ
diff --git a/demo/tensorboard_fruit.jpg b/demo/tensorboard_fruit.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..44a955fafffb4ab03d911818e20e6f72499f2f4f
Binary files /dev/null and b/demo/tensorboard_fruit.jpg differ
diff --git a/docs/GETTING_STARTED.md b/docs/GETTING_STARTED.md
index 3607313030809464a0b3cbfd0f9cdee61e67ee9d..843b119d4484c8ab607b868919b3ee2b78aed066 100644
--- a/docs/GETTING_STARTED.md
+++ b/docs/GETTING_STARTED.md
@@ -37,7 +37,8 @@ python tools/train.py -c configs/faster_rcnn_r50_1x.yml -o use_gpu=false
 - `--eval`: Whether to perform evaluation in training, default is `False`
 - `--output_eval`: If perform evaluation in training, this edits evaluation directory, default is current directory.
 - `-d` or `--dataset_dir`: Dataset path, same as `dataset_dir` of configs. Such as: `-d dataset/coco`
-- `-o`: Set configuration options in config file. Such as: `-o max_iters=180000`
+- `-c`: Select config file and all files are saved in `configs/`
+- `-o`: Set configuration options in config file. Such as: `-o max_iters=180000`. `-o` has higher priority to file configured by `-c`
 - `--use_tb`: Whether to record the data with [tb-paddle](https://github.com/linshuliang/tb-paddle), so as to display in Tensorboard, default is `False`
 - `--tb_log_dir`: tb-paddle logging directory for scalar, default is `tb_log_dir/scalar`
 
@@ -57,7 +58,7 @@ causes time-consuming in training, we suggest decreasing evaluation times or eva
 the best model with highest MAP is saved at each `snapshot_iter`. `best_model` has the same path as `model_final`.
 
 
-- configuration options and assign Dataset path
+- Configure dataset path
 ```bash
 export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
 export PYTHONPATH=$PYTHONPATH:.
@@ -65,6 +66,17 @@ python -u tools/train.py -c configs/faster_rcnn_r50_1x.yml \
                          -d dataset/coco
 ```
 
+- Fine-tune other task
+
+When using pre-trained model to fine-tune other task, the excluded pre-trained parameters can be set by finetune_exclude_pretrained_params in YAML config or -o finetune_exclude_pretrained_params in the arguments.
+
+```bash
+export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+export PYTHONPATH=$PYTHONPATH:.
+python -u tools/train.py -c configs/faster_rcnn_r50_1x.yml \
+                         -o pretrain_weights=output/faster_rcnn_r50_1x/model_final/ \
+                            finetune_exclude_pretrained_params = ['cls_score','bbox_pred']
+```
 
 ##### NOTES
 
@@ -73,6 +85,7 @@ python -u tools/train.py -c configs/faster_rcnn_r50_1x.yml \
 - Dataset will be downloaded automatically and cached in `~/.cache/paddle/dataset` if not be found locally.
 - Pretrained model is downloaded automatically and cached in `~/.cache/paddle/weights`.
 - Model checkpoints are saved in `output` by default (configurable).
+- When finetuning, users could set `pretrain_weights` to the models published by PaddlePaddle. Parameters matched by fields in finetune_exclude_pretrained_params will be ignored in loading and fields can be wildcard matching. For detailed information, please refer to [Transfer Learning](TRANSFER_LEARNING.md).
 - To check out hyper parameters used, please refer to the [configs](../configs).
 - RCNN models training on CPU is not supported on PaddlePaddle<=1.5.1 and will be fixed on later version.
 
@@ -80,7 +93,6 @@ python -u tools/train.py -c configs/faster_rcnn_r50_1x.yml \
 
 ## Evaluation
 
-
 ```bash
 # run on GPU with:
 export PYTHONPATH=$PYTHONPATH:.
@@ -97,7 +109,7 @@ python tools/eval.py -c configs/faster_rcnn_r50_1x.yml
 
 #### Examples
 
-- configuration options && assign Dataset path
+- Evaluate by specified weights path and dataset path 
 ```bash
 # run on GPU with:
 export PYTHONPATH=$PYTHONPATH:.
@@ -107,7 +119,7 @@ python -u tools/eval.py -c configs/faster_rcnn_r50_1x.yml \
                         -d dataset/coco
 ```
 
-- Evaluation with json
+- Evaluate with json
 ```bash
 # run on GPU with:
 export PYTHONPATH=$PYTHONPATH:.
@@ -172,10 +184,10 @@ python tools/infer.py -c configs/faster_rcnn_r50_1x.yml \
 ```
 
 The visualization files are saved in `output` by default, to specify a different path, simply add a `--output_dir=` flag.  
-`--draw_threshold` is an optional argument. Default is 0.5. 
+`--draw_threshold` is an optional argument. Default is 0.5.
 Different thresholds will produce different results depending on the calculation of [NMS](https://ieeexplore.ieee.org/document/1699659).
 If users want to infer according to customized model path, `-o weights` can be set for specified path.
-`--use_tb` is an optional argument, if `--use_tb` is `True`, the tb-paddle will record data in directory, 
+`--use_tb` is an optional argument, if `--use_tb` is `True`, the tb-paddle will record data in directory,
 so users can see the results in Tensorboard.
 
 - Save inference model
@@ -206,8 +218,15 @@ The calculation rules are as follows，they are equivalent: </br>
 | 4           | 0.005          | 360000    | [240000, 320000] |
 | 8           | 0.01           | 180000    | [120000, 160000] |
 
+
 **Q:**  How to reduce GPU memory usage? </br>
 **A:**  Setting environment variable FLAGS_conv_workspace_size_limit to a smaller
 number can reduce GPU memory footprint without affecting training speed.
 Take Mask-RCNN (R50) as example, by setting `export FLAGS_conv_workspace_size_limit=512`,
 batch size could reach 4 per GPU (Tesla V100 16GB).
+
+
+**Q:**  How to change data preprocessing? </br>
+**A:**  Set `sample_transform` in configuration. Note that **the whole transforms** need to be added in configuration.
+For example, `DecodeImage`, `NormalizeImage` and `Permute` in RCNN models. For detail description, please refer
+to [config_example](config_example).
diff --git a/docs/GETTING_STARTED_cn.md b/docs/GETTING_STARTED_cn.md
index 1eedf7b1e94039417ca1dc199e12d305df30834a..e2817354905eebe2cd1ee2d2972b622e159375ce 100644
--- a/docs/GETTING_STARTED_cn.md
+++ b/docs/GETTING_STARTED_cn.md
@@ -38,7 +38,8 @@ python tools/train.py -c configs/faster_rcnn_r50_1x.yml -o use_gpu=false
 - `--eval`: 是否边训练边测试，默认是 `False`
 - `--output_eval`: 如果边训练边测试, 这个参数可以编辑评测保存json路径, 默认是当前目录。
 - `-d` or `--dataset_dir`: 数据集路径, 同配置文件里的`dataset_dir`. 例如: `-d dataset/coco`
-- `-o`: 设置配置文件里的参数内容。 例如: `-o max_iters=180000`
+- `-c`: 选择配置文件，所有配置文件在`configs/`中
+- `-o`: 设置配置文件里的参数内容。例如: `-o max_iters=180000`。使用`-o`配置相较于`-c`选择的配置文件具有更高的优先级。
 - `--use_tb`: 是否使用[tb-paddle](https://github.com/linshuliang/tb-paddle)记录数据，进而在TensorBoard中显示，默认是False。
 - `--tb_log_dir`: 指定 tb-paddle 记录数据的存储路径，默认是`tb_log_dir/scalar`。
 
@@ -57,7 +58,7 @@ python -u tools/train.py -c configs/faster_rcnn_r50_1x.yml --eval
 当边训练边测试时，在每次snapshot\_iter会评测出最佳mAP模型保存到
 `best_model`文件夹下，`best_model`的路径和`model_final`的路径相同。
 
-- 设置配置文件参数 && 指定数据集路径
+- 指定数据集路径
 
 ```bash
 export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
@@ -66,6 +67,18 @@ python -u tools/train.py -c configs/faster_rcnn_r50_1x.yml \
                          -d dataset/coco
 ```
 
+- Fine-tune其他任务
+
+使用预训练模型fine-tune其他任务时，在YAML配置文件中设置`finetune_exclude_pretrained_params`或在命令行中添加`-o finetune_exclude_pretrained_params`对预训练模型进行选择性加载。
+
+```bash
+export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+export PYTHONPATH=$PYTHONPATH:.
+python -u tools/train.py -c configs/faster_rcnn_r50_1x.yml \
+                         -o pretrain_weights=output/faster_rcnn_r50_1x/model_final/ \
+                            finetune_exclude_pretrained_params = ['cls_score','bbox_pred']
+```
+
 ##### 提示
 
 - `CUDA_VISIBLE_DEVICES` 参数可以指定不同的GPU。例如: `export CUDA_VISIBLE_DEVICES=0,1,2,3`. GPU计算规则可以参考 [FAQ](#faq)
@@ -73,6 +86,7 @@ python -u tools/train.py -c configs/faster_rcnn_r50_1x.yml \
 - 若本地未找到数据集，将自动下载数据集并保存在`~/.cache/paddle/dataset`中。
 - 预训练模型自动下载并保存在`〜/.cache/paddle/weights`中。
 - 模型checkpoints默认保存在`output`中（可配置）。
+- 进行模型fine-tune时，用户可将`pretrain_weights`配置为PaddlePaddle发布的模型，加载模型时finetune_exclude_pretrained_params中的字段匹配的参数不被加载，可以为通配符匹配方式。详细说明请参考[Transfer Learning](TRANSFER_LEARNING_cn.md)
 - 更多参数配置，请参考[配置文件](../configs)。
 - RCNN系列模型CPU训练在PaddlePaddle 1.5.1及以下版本暂不支持，将在下个版本修复。
 
@@ -96,7 +110,7 @@ python tools/eval.py -c configs/faster_rcnn_r50_1x.yml
 
 #### 例子
 
-- 设置配置文件参数 && 指定数据集路径
+- 指定数据集路径
 ```bash
 # GPU评估
 export CUDA_VISIBLE_DEVICES=0
@@ -206,3 +220,8 @@ python tools/infer.py -c configs/faster_rcnn_r50_1x.yml --infer_img=demo/0000005
 **A:**  可通过设置环境变量`FLAGS_conv_workspace_size_limit`为较小的值来减少显存消耗，并且不
 会影响训练速度。以Mask-RCNN（R50）为例，设置`export FLAGS_conv_workspace_size_limit = 512`，
 batch size可以达到每GPU 4 (Tesla V100 16GB)。
+
+
+**Q:**  如何修改数据预处理? </br>
+**A:**  可在配置文件中设置 `sample_transform`。注意需要在配置文件中加入**完整预处理**
+例如RCNN模型中`DecodeImage`, `NormalizeImage` and `Permute`。更多详细描述请参考[配置案例](config_example)。
diff --git a/docs/QUICK_STARTED.md b/docs/QUICK_STARTED.md
new file mode 100644
index 0000000000000000000000000000000000000000..dbc5fccd1502fe0d28f2c58db1cde70e8322d102
--- /dev/null
+++ b/docs/QUICK_STARTED.md
@@ -0,0 +1,64 @@
+English | [简体中文](QUICK_STARTED_cn.md)
+
+# Quick Start
+
+This tutorial fine-tunes a tiny dataset by pretrained detection model for users to get a model and learn PaddleDetection quickly. The model can be trained in around 15min with good performance. 
+
+## Data Preparation
+
+Dataset refers to [Kaggle](https://www.kaggle.com/mbkinaci/fruit-images-for-object-detection), which contains 240 images in train dataset and 60 images in test dataset. Data categories are apple, orange and banana. Download [here](https://dataset.bj.bcebos.com/PaddleDetection_demo/fruit-detection.tar) and uncompress the dataset after download, script for data preparation is located at [download.sh](../dataset/fruit/download.sh). Command is as follows:
+
+```bash
+cd dataset/fruit
+sh download.sh
+```
+
+Training command is as follows:
+
+```bash
+export PYTHONPATH=$PYTHONPATH:.
+export CUDA_VISIBLE_DEVICES=0
+python -u tools/train.py -c configs/yolov3_mobilenet_v1_fruit.yml \
+                        --use_tb=True \
+                        --tb_log_dir=tb_fruit_dir/scalar \
+                        --eval \
+```
+
+Use `yolov3_mobilenet_v1` to fine-tune the model from COCO dataset. Meanwhile, loss and mAP can be observed on tensorboard.  
+
+```bash
+tensorboard --logdir tb_fruit_dir/scalar/ --host <host_IP> --port <port_num>
+```
+
+Result on tensorboard is shown below:
+
+<div align="center">
+  <img src="../demo/tensorboard_fruit.jpg" />
+</div>
+
+Model can be downloaded [here](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1_fruit.tar)
+
+Evaluation:
+
+```bash
+export PYTHONPATH=$PYTHONPATH:.
+export CUDA_VISIBLE_DEVICES=0
+python -u tools/eval.py -c configs/yolov3_mobilenet_v1_fruit.yml
+```
+
+Inference:
+
+```bash
+export PYTHONPATH=$PYTHONPATH:.
+export CUDA_VISIBLE_DEVICES=0
+python -u tools/infer.py -c configs/yolov3_mobilenet_v1_fruit.yml
+```
+
+Inference images are shown below:
+
+<p align="center">
+  <img src="../demo/orange_71.jpg" height=400 width=400 hspace='10'/>
+  <img src="../demo/orange_71_detection.jpg" height=400 width=400 hspace='10'/>
+</p>
+
+For detailed infomation of training and evalution, please refer to [GETTING_STARTED.md](GETTING_STARTED.md).
diff --git a/docs/QUICK_STARTED_cn.md b/docs/QUICK_STARTED_cn.md
new file mode 100644
index 0000000000000000000000000000000000000000..bc1814e02087362f08987efd2c7738abbfd18e8a
--- /dev/null
+++ b/docs/QUICK_STARTED_cn.md
@@ -0,0 +1,65 @@
+[English](QUICK_STARTED.md) | 简体中文
+
+# 快速开始
+
+为了使得用户能够在很短的时间内快速产出模型，掌握PaddleDetection的使用方式，这篇教程通过一个预训练检测模型对小数据集进行finetune。在P40上单卡大约15min即可产出一个效果不错的模型。
+
+## 数据准备
+
+数据集参考[Kaggle数据集](https://www.kaggle.com/mbkinaci/fruit-images-for-object-detection)，其中训练数据集240张图片，测试数据集60张图片，数据类别为3类：苹果，橘子，香蕉。[下载链接](https://dataset.bj.bcebos.com/PaddleDetection_demo/fruit-detection.tar)。数据下载后分别解压即可, 数据准备脚本位于[download.sh](../dataset/fruit/download.sh)。下载数据方式如下：
+
+```bash
+cd dataset/fruit
+sh download.sh
+```
+
+
+训练命令如下：
+
+```bash
+export PYTHONPATH=$PYTHONPATH:.
+export CUDA_VISIBLE_DEVICES=0
+python -u tools/train.py -c configs/yolov3_mobilenet_v1_fruit.yml \
+                        --use_tb=True \
+                        --tb_log_dir=tb_fruit_dir/scalar \
+                        --eval \
+```
+
+训练使用`yolov3_mobilenet_v1`基于COCO数据集训练好的模型进行finetune。训练期间可以通过tensorboard实时观察loss和精度值，启动命令如下：
+
+```bash
+tensorboard --logdir tb_fruit_dir/scalar/ --host <host_IP> --port <port_num>
+```
+
+tensorboard结果显示如下：
+
+<div align="center">
+  <img src="../demo/tensorboard_fruit.jpg" />
+</div>
+
+训练模型[下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1_fruit.tar)
+
+评估命令如下：
+
+```bash
+export PYTHONPATH=$PYTHONPATH:.
+export CUDA_VISIBLE_DEVICES=0
+python -u tools/eval.py -c configs/yolov3_mobilenet_v1_fruit.yml
+```
+
+预测命令如下
+
+```bash
+export PYTHONPATH=$PYTHONPATH:.
+export CUDA_VISIBLE_DEVICES=0
+python -u tools/infer.py -c configs/yolov3_mobilenet_v1_fruit.yml
+```
+
+预测图片如下：
+
+<p align="center">
+  <img src="../demo/orange_71.jpg" height=400 width=400 hspace='10'/>
+  <img src="../demo/orange_71_detection.jpg" height=400 width=400 hspace='10'/>
+</p>
+
+更多训练及评估流程，请参考[GETTING_STARTED_cn.md](GETTING_STARTED_cn.md).
diff --git a/docs/TRANSFER_LEARNING.md b/docs/TRANSFER_LEARNING.md
new file mode 100644
index 0000000000000000000000000000000000000000..a911d2994002cf825493b5846db57930156c2594
--- /dev/null
+++ b/docs/TRANSFER_LEARNING.md
@@ -0,0 +1,34 @@
+# Transfer Learning
+
+Transfer learning aims at learning new knowledge from existing knowledge. For example, take pretrained model from ImageNet to initialize detection models, or take pretrained model from COCO dataset to initialize train detection models in PascalVOC dataset.
+
+In transfer learning, if different dataset and the number of classes is used, the dimensional inconsistency will causes in loading parameters related to the number of classes; On the other hand, if more complicated model is used, need to motify the open-source model construction and selective load parameters. Thus, PaddleDetection should designate parameter fields and ignore loading the parameters which match the fields.
+
+## Transfer Learning in PaddleDetection
+
+In transfer learning, it's needed to load pretrained model selectively. Set `finetune_exclude_pretrained_params` in YAML configuration files or set `-o finetune_exclude_pretrained_params` in command line.
+
+```python
+export PYTHONPATH=$PYTHONPATH:.
+export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+python -u tools/train.py -c configs/faster_rcnn_r50_1x.yml \
+                        -o pretrain_weights=https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_1x.tar \
+                           finetune_exclude_pretrained_params=['cls_score','bbox_pred']
+```
+
+* Note:
+
+1. The path in pretrain\_weights is the open-source model link of faster RCNN from COCO dataset
+2. The parameter fields are set in finetune\_exclude\_pretrained\_params. If the name of parameter matches field (wildcard matching), the parameter will be ignored in loading.
+
+If users want to fine-tune by own dataet, and remain the model construction, need to ignore the parameters related to the number of classes. PaddleDetection lists ignored parameter fields corresponding to different model type. The table is shown below: </br>
+
+|      model type    |         ignored parameter fields          |
+| :----------------: | :---------------------------------------: |
+|     Faster RCNN    |          cls\_score, bbox\_pred           |
+|     Cascade RCNN   |          cls\_score, bbox\_pred           |
+|       Mask RCNN    | cls\_score, bbox\_pred, mask\_fcn\_logits |
+|  Cascade-Mask RCNN | cls\_score, bbox\_pred, mask\_fcn\_logits |
+|      RetinaNet     |           retnet\_cls\_pred\_fpn          |
+|        SSD         |                ^conv2d\_                  |
+|       YOLOv3       |              yolo\_output                 |
diff --git a/docs/TRANSFER_LEARNING_cn.md b/docs/TRANSFER_LEARNING_cn.md
new file mode 100644
index 0000000000000000000000000000000000000000..f75ca4a9c7ba5fbf34b49c5d436d1883a884108d
--- /dev/null
+++ b/docs/TRANSFER_LEARNING_cn.md
@@ -0,0 +1,34 @@
+# 迁移学习
+
+迁移学习为利用已有知识，对新知识进行学习。例如利用ImageNet分类预训练模型做初始化来训练检测模型，利用在COCO数据集上的检测模型做初始化来训练基于PascalVOC数据集的检测模型。
+
+在进行迁移学习时，由于会使用不同的数据集，数据类别数与COCO/VOC数据类别不同，导致在加载PaddlePaddle开源模型时，与类别数相关的权重（例如分类模块的fc层）会出现维度不匹配的问题；另外，如果需要结构更加复杂的模型，需要对已有开源模型结构进行调整，对应权重也需要选择性加载。因此，需要检测库能够指定参数字段，在加载模型时不加载匹配的权重。
+
+## PaddleDetection进行迁移学习
+
+在迁移学习中，对预训练模型进行选择性加载，可通过在 YMAL 配置文件中通过设置 finetune_exclude_pretrained_params字段，也可通过在 train.py的启动参数中设置 -o finetune_exclude_pretrained_params。
+
+```python
+export PYTHONPATH=$PYTHONPATH:.
+export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+python -u tools/train.py -c configs/faster_rcnn_r50_1x.yml \
+                        -o pretrain_weights=https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_1x.tar \
+                           finetune_exclude_pretrained_params=['cls_score','bbox_pred']
+```
+
+* 说明：
+
+1. pretrain\_weights的路径为COCO数据集上开源的faster RCNN模型链接
+2. finetune\_exclude\_pretrained\_params中设置参数字段，如果参数名能够匹配以上参数字段（通配符匹配方式），则在模型加载时忽略该参数。
+
+如果用户需要利用自己的数据进行finetune，模型结构不变，只需要忽略与类别数相关的参数。PaddleDetection给出了不同模型类型所对应的忽略参数字段。如下表所示：</br>
+
+|      模型类型      |             忽略参数字段                  |
+| :----------------: | :---------------------------------------: |
+|     Faster RCNN    |          cls\_score, bbox\_pred           |
+|     Cascade RCNN   |          cls\_score, bbox\_pred           |
+|       Mask RCNN    | cls\_score, bbox\_pred, mask\_fcn\_logits |
+|  Cascade-Mask RCNN | cls\_score, bbox\_pred, mask\_fcn\_logits |
+|      RetinaNet     |           retnet\_cls\_pred\_fpn          |
+|        SSD         |                ^conv2d\_                  |
+|       YOLOv3       |              yolo\_output                 |
diff --git a/ppdet/data/__init__.py b/ppdet/data/__init__.py
index bba9ab3bf003a874ac132fe6bba17cc7e3f43884..1104c33f6ac34b8ec32681f5c4a7fc4d89274bfb 100644
--- a/ppdet/data/__init__.py
+++ b/ppdet/data/__init__.py
@@ -37,6 +37,10 @@ from __future__ import absolute_import
 
 from .dataset import Dataset
 from .reader import Reader
-from .data_feed import create_reader
-
-__all__ = ['Dataset', 'Reader', 'create_reader']
+import traceback
+if traceback.extract_stack()[0][
+        0] == 'ppdet/data/tools/generate_data_for_training.py':
+    __all__ = ['Dataset', 'Reader']
+else:
+    from .data_feed import create_reader
+    __all__ = ['Dataset', 'Reader', 'create_reader']
diff --git a/ppdet/modeling/architectures/cascade_rcnn.py b/ppdet/modeling/architectures/cascade_rcnn.py
index 3ab9369dcaa9935ad2fa1d67b7b7d8bd5d41904a..26e05925ebc4974c93b81385c2523cd49cb378e6 100644
--- a/ppdet/modeling/architectures/cascade_rcnn.py
+++ b/ppdet/modeling/architectures/cascade_rcnn.py
@@ -89,7 +89,6 @@ class CascadeRCNN(object):
 
         # backbone
         body_feats = self.backbone(im)
-        # body_feat_names = list(body_feats.keys())
 
         # FPN
         if self.fpn is not None:
diff --git a/ppdet/modeling/architectures/faster_rcnn.py b/ppdet/modeling/architectures/faster_rcnn.py
index 1968289fde350b0d99d1bd071568aafd6ac83627..a6ef2f6a0c483e91475a8de1f5748fd0b3fdeb8b 100644
--- a/ppdet/modeling/architectures/faster_rcnn.py
+++ b/ppdet/modeling/architectures/faster_rcnn.py
@@ -94,7 +94,8 @@ class FasterRCNN(object):
             bbox_outside_weights = outs[4]
         else:
             if self.rpn_only:
-                im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3])
+                im_scale = fluid.layers.slice(
+                    im_info, [1], starts=[2], ends=[3])
                 im_scale = fluid.layers.sequence_expand(im_scale, rois)
                 rois = rois / im_scale
                 return {'proposal': rois}
diff --git a/ppdet/modeling/architectures/mask_rcnn.py b/ppdet/modeling/architectures/mask_rcnn.py
index 49a4c0aa9f8a1feb7291098dfece5a6107f0b22c..ef7f4af19576224b331e478472c8fb2dcf3ab38d 100644
--- a/ppdet/modeling/architectures/mask_rcnn.py
+++ b/ppdet/modeling/architectures/mask_rcnn.py
@@ -133,7 +133,8 @@ class MaskRCNN(object):
 
         else:
             if self.rpn_only:
-                im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3])
+                im_scale = fluid.layers.slice(
+                    im_info, [1], starts=[2], ends=[3])
                 im_scale = fluid.layers.sequence_expand(im_scale, rois)
                 rois = rois / im_scale
                 return {'proposal': rois}
diff --git a/ppdet/utils/checkpoint.py b/ppdet/utils/checkpoint.py
index a63dd1daf3044452579182092a283a47bd88832c..947a0803fde16082561914d8a61980f8f174ab3d 100644
--- a/ppdet/utils/checkpoint.py
+++ b/ppdet/utils/checkpoint.py
@@ -22,7 +22,7 @@ import os
 import shutil
 import time
 import numpy as np
-
+import re
 import paddle.fluid as fluid
 
 from .download import get_weights_path
@@ -33,6 +33,7 @@ logger = logging.getLogger(__name__)
 __all__ = [
     'load_checkpoint',
     'load_and_fusebn',
+    'load_params',
     'save',
 ]
 
@@ -46,60 +47,41 @@ def is_url(path):
     return path.startswith('http://') or path.startswith('https://')
 
 
-def _get_weight_path(path):
-    env = os.environ
-    if 'PADDLE_TRAINERS_NUM' in env and 'PADDLE_TRAINER_ID' in env:
-        trainer_id = int(env['PADDLE_TRAINER_ID'])
-        num_trainers = int(env['PADDLE_TRAINERS_NUM'])
-        if num_trainers <= 1:
-            path = get_weights_path(path)
-        else:
-            from ppdet.utils.download import map_path, WEIGHTS_HOME
-            weight_path = map_path(path, WEIGHTS_HOME)
-            lock_path = weight_path + '.lock'
-            if not os.path.exists(weight_path):
-                try:
-                    os.makedirs(os.path.dirname(weight_path))
-                except OSError as e:
-                    if e.errno != errno.EEXIST:
-                        raise
-                with open(lock_path, 'w'):  # touch
-                    os.utime(lock_path, None)
-                if trainer_id == 0:
-                    get_weights_path(path)
-                    os.remove(lock_path)
-                else:
-                    while os.path.exists(lock_path):
-                        time.sleep(1)
-            path = weight_path
-    else:
-        path = get_weights_path(path)
-    return path
-
 
-def load_pretrain(exe, prog, path):
+def load_params(exe, prog, path, ignore_params=[]):
     """
     Load model from the given path.
     Args:
         exe (fluid.Executor): The fluid.Executor object.
         prog (fluid.Program): load weight to which Program object.
         path (string): URL string or loca model path.
+        ignore_params (bool): ignore variable to load when finetuning.
     """
 
     if is_url(path):
-        path = _get_weight_path(path)
+        path = get_weights_path(path)
 
     if not os.path.exists(path):
         raise ValueError("Model pretrain path {} does not "
                          "exists.".format(path))
 
-    logger.info('Loading pretrained model from {}...'.format(path))
+    logger.info('Loading parameters from {}...'.format(path))
 
     def _if_exist(var):
-        b = os.path.exists(os.path.join(path, var.name))
-        if b:
+        do_ignore = False
+        param_exist = os.path.exists(os.path.join(path, var.name))
+        if len(ignore_params) > 0:
+            # Parameter related to num_classes will be ignored in finetuning
+            do_ignore_list = [
+                bool(re.match(name, var.name)) for name in ignore_params
+            ]
+            do_ignore = any(do_ignore_list)
+            if do_ignore and param_exist:
+                logger.info('In load_params, ignore {}'.format(var.name))
+        do_load = param_exist and not do_ignore
+        if do_load:
             logger.debug('load weight {}'.format(var.name))
-        return b
+        return do_load
 
     fluid.io.load_vars(exe, path, prog, predicate=_if_exist)
 
@@ -164,14 +146,16 @@ def load_and_fusebn(exe, prog, path):
         path (string): the path to save model.
     """
     logger.info('Load model and fuse batch norm from {}...'.format(path))
+
     if is_url(path):
-        path = _get_weight_path(path)
+        path = get_weights_path(path)
 
     if not os.path.exists(path):
         raise ValueError("Model path {} does not exists.".format(path))
 
     def _if_exist(var):
         b = os.path.exists(os.path.join(path, var.name))
+
         if b:
             logger.debug('load weight {}'.format(var.name))
         return b
diff --git a/tools/configure.py b/tools/configure.py
index 3bfc8b83ad21cb94149c51fb698ef7ced7e72f86..45b297116a1eaa787e72b8d191245ff70dfef3dd 100644
--- a/tools/configure.py
+++ b/tools/configure.py
@@ -37,7 +37,9 @@ MISC_CONFIG = {
     "map_type": "11point",
     "log_smooth_window": 20,
     "snapshot_iter": 10000,
+    "log_iter": 20,
     "use_gpu": True,
+    "finetune_exclude_pretrained_params": "<value>",
 }
 
 
diff --git a/tools/eval.py b/tools/eval.py
index 5e596f020ec3018690cd24179ae00611c76e1910..c4a588e092f59ad982774be48815887bd724b076 100644
--- a/tools/eval.py
+++ b/tools/eval.py
@@ -102,7 +102,7 @@ def main():
     # load model
     exe.run(startup_prog)
     if 'weights' in cfg:
-        checkpoint.load_pretrain(exe, eval_prog, cfg.weights)
+        checkpoint.load_params(exe, eval_prog, cfg.weights)
 
     assert cfg.metric in ['COCO', 'VOC'], \
             "unknown metric type {}".format(cfg.metric)
diff --git a/tools/infer.py b/tools/infer.py
index 32e6040d2f3db5768d5191babc45e65368517466..d33d1e3ebecaa342643817c1302220f202d93885 100644
--- a/tools/infer.py
+++ b/tools/infer.py
@@ -178,7 +178,7 @@ def main():
 
     exe.run(startup_prog)
     if cfg.weights:
-        checkpoint.load_checkpoint(exe, infer_prog, cfg.weights)
+        checkpoint.load_params(exe, infer_prog, cfg.weights)
 
     if FLAGS.save_inference_model:
         save_infer_model(FLAGS, exe, feed_vars, test_fetches, infer_prog)
diff --git a/tools/train.py b/tools/train.py
index 6dc0f1ede87a8dd174fef4776c87424ab70c257e..f1980307211393521a6580e4af18a5044c12773f 100644
--- a/tools/train.py
+++ b/tools/train.py
@@ -75,6 +75,9 @@ def main():
     if 'log_iter' not in cfg:
         cfg.log_iter = 20
 
+    ignore_params = cfg.finetune_exclude_pretrained_params \
+                 if 'finetune_exclude_pretrained_params' in cfg else []
+
     # check if set use_gpu=True in paddlepaddle cpu version
     check_gpu(cfg.use_gpu)
     print_total_cfg(cfg)
@@ -179,13 +182,15 @@ def main():
 
     fuse_bn = getattr(model.backbone, 'norm_type', None) == 'affine_channel'
     start_iter = 0
+
     if FLAGS.resume_checkpoint:
         checkpoint.load_checkpoint(exe, train_prog, FLAGS.resume_checkpoint)
         start_iter = checkpoint.global_step()
-    elif cfg.pretrain_weights and fuse_bn:
+    elif cfg.pretrain_weights and fuse_bn and not ignore_params:
         checkpoint.load_and_fusebn(exe, train_prog, cfg.pretrain_weights)
     elif cfg.pretrain_weights:
-        checkpoint.load_pretrain(exe, train_prog, cfg.pretrain_weights)
+        checkpoint.load_params(
+            exe, train_prog, cfg.pretrain_weights, ignore_params=ignore_params)
 
     train_reader = create_reader(train_feed, (cfg.max_iters - start_iter) *
                                  devices_num, FLAGS.dataset_dir)