diff --git a/doc/imgs/overview.png b/doc/imgs/overview.png
index 83341cb3b96a257117f07e452993911277823f80..1c98dd47ed935781c56890c58a6819543d45de24 100644
Binary files a/doc/imgs/overview.png and b/doc/imgs/overview.png differ
diff --git a/models/rank/fibinet/README.md b/models/rank/fibinet/README.md
index 9e1eef9010a6d07d5738ecfee9ab4c5cf890a0dd..ea8c58d5b2eb6ef1208cf136d6ae516add42524f 100644
--- a/models/rank/fibinet/README.md
+++ b/models/rank/fibinet/README.md
@@ -30,6 +30,12 @@
 
 （2）数值特征（连续特征）进行归一化处理
 
+执行run.sh生成训练集和测试集
+
+```
+sh run.sh
+```
+
 ## 环境
 
 PaddlePaddle 1.7.2
@@ -97,38 +103,36 @@ python -m paddlerec.run -m paddlerec.models.rank.fibinet
 训练：
 
 ```
-I0622 19:25:12.142271   344 parallel_executor.cc:440] The Program will be executed on CPU using ParallelExecutor, 1 cards are used, so 1 programs are executed in parallel.
-I0622 19:25:12.673106   344 build_strategy.cc:365] SeqOnlyAllReduceOps:0, num_trainers:1
-I0622 19:25:17.203287   344 parallel_executor.cc:307] Inplace strategy is enabled, when build_strategy.enable_inplace = True
-I0622 19:25:17.684131   344 parallel_executor.cc:375] Garbage collection strategy is enabled, when FLAGS_eager_delete_tensor_gb = 0
-batch: 10, AUC: [0.52777778], BATCH_AUC: [0.52777778]
-batch: 20, AUC: [0.51836735], BATCH_AUC: [0.45098039]
-batch: 30, AUC: [0.30978261], BATCH_AUC: [0.23214286]
-epoch 0 done, use time: 11.074166536331177
-batch: 10, AUC: [0.44592593], BATCH_AUC: [0.74294671]
-batch: 20, AUC: [0.52282609], BATCH_AUC: [0.83333333]
-batch: 30, AUC: [0.5210356], BATCH_AUC: [0.91071429]
-epoch 1 done, use time: 4.212069749832153
-batch: 10, AUC: [0.60075758], BATCH_AUC: [0.89184953]
-batch: 20, AUC: [0.64758769], BATCH_AUC: [1.]
-batch: 30, AUC: [0.68684476], BATCH_AUC: [1.]
-epoch 2 done, use time: 4.276938438415527
-batch: 10, AUC: [0.75172139], BATCH_AUC: [1.]
-batch: 20, AUC: [0.77915815], BATCH_AUC: [1.]
-batch: 30, AUC: [0.81179181], BATCH_AUC: [1.]
-epoch 3 done, use time: 4.278341770172119
-PaddleRec Finish
+Running SingleStartup.
+W0623 12:03:35.130075   509 device_context.cc:237] Please NOTE: device: 0, CUDA Capability: 70, Driver API Version: 9.2, Runtime API Version: 9.0
+W0623 12:03:35.134771   509 device_context.cc:245] device: 0, cuDNN Version: 7.3.
+Running SingleRunner.
+batch: 100, AUC: [0.6449976], BATCH_AUC: [0.69029814]
+batch: 200, AUC: [0.6769844], BATCH_AUC: [0.70255003]
+batch: 300, AUC: [0.67131597], BATCH_AUC: [0.68954499]
+batch: 400, AUC: [0.68129822], BATCH_AUC: [0.70892718]
+batch: 500, AUC: [0.68242937], BATCH_AUC: [0.69269376]
+batch: 600, AUC: [0.68741928], BATCH_AUC: [0.72034578]
+...
+batch: 1400, AUC: [0.84607023], BATCH_AUC: [0.93358024]
+batch: 1500, AUC: [0.84796116], BATCH_AUC: [0.95302841]
+batch: 1600, AUC: [0.84949111], BATCH_AUC: [0.92868531]
+batch: 1700, AUC: [0.85113661], BATCH_AUC: [0.95452616]
+batch: 1800, AUC: [0.85260467], BATCH_AUC: [0.92847032]
+epoch 3 done, use time: 1618.1106688976288
 ```
 
 预测
 
 ```
 load persistables from increment_model/3
-batch: 20, AUC: [0.86578715], BATCH_AUC: [1.]
-Infer phase2 of 3 done, use time: 13.813123941421509
-load persistables from increment_model/1
-batch: 20, AUC: [0.6480309], BATCH_AUC: [1.]
-Infer phase2 of 1 done, use time: 13.001627922058105
-PaddleRec Finish
+batch: 20, AUC: [0.85304064], BATCH_AUC: [0.94178556]
+batch: 40, AUC: [0.85304544], BATCH_AUC: [0.95207907]
+batch: 60, AUC: [0.85303907], BATCH_AUC: [0.94782551]
+batch: 80, AUC: [0.85298773], BATCH_AUC: [0.93987691]
+...
+batch: 1780, AUC: [0.866046], BATCH_AUC: [0.96424594]
+batch: 1800, AUC: [0.86633785], BATCH_AUC: [0.96900967]
+batch: 1820, AUC: [0.86662365], BATCH_AUC: [0.96759972]
 ```
 
diff --git a/models/rank/fibinet/config.yaml b/models/rank/fibinet/config.yaml
index 06dbc745493e00591bc8302f62911ff26f3f80be..520d709d16cbd24fe619e568d81e93f289328815 100644
--- a/models/rank/fibinet/config.yaml
+++ b/models/rank/fibinet/config.yaml
@@ -18,21 +18,15 @@ workspace: "paddlerec.models.rank.fibinet"
 # list of dataset
 dataset:
 - name: dataloader_train # name of dataset to distinguish different datasets
-  batch_size: 2
+  batch_size: 1000
   type: DataLoader # or QueueDataset 
-  data_path: "{workspace}/data/sample_data/train"
-  sparse_slots: "click 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26"
-  dense_slots: "dense_var:13"
-- name: dataset_train # name of dataset to distinguish different datasets
-  batch_size: 2
-  type: QueueDataset # or DataLoader 
-  data_path: "{workspace}/data/sample_data/train"
+  data_path: "{workspace}/data/slot_test_data_full"
   sparse_slots: "click 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26"
   dense_slots: "dense_var:13"
 - name: dataset_infer # name
-  batch_size: 2
+  batch_size: 1000
   type: DataLoader # or QueueDataset
-  data_path: "{workspace}/data/sample_data/train"
+  data_path: "{workspace}/data/slot_test_data_full"
   sparse_slots: "click 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26"
   dense_slots: "dense_var:13"
 
@@ -53,7 +47,7 @@ hyper_parameters:
   dropout_rate: 0.5
 
 # select runner by name
-mode: [single_cpu_train, single_cpu_infer]
+mode: [single_gpu_train, single_gpu_infer]
 # config of each runner.
 # runner is a kind of paddle training class, which wraps the train/infer process.
 runner:
@@ -63,23 +57,44 @@ runner:
   epochs: 4
   # device to run training or infer
   device: cpu
-  save_checkpoint_interval: 2 # save model interval of epochs
+  save_checkpoint_interval: 1 # save model interval of epochs
   save_inference_interval: 4 # save inference
   save_checkpoint_path: "increment_model" # save checkpoint path
   save_inference_path: "inference" # save inference path
   save_inference_feed_varnames: [] # feed vars of save inference
   save_inference_fetch_varnames: [] # fetch vars of save inference
   init_model_path: "" # load model path
-  print_interval: 10
+  print_interval: 100
+  phases: [phase1]
+
+- name: single_gpu_train
+  class: train
+  # num of epochs
+  epochs: 4
+  # device to run training or infer
+  device: gpu
+  save_checkpoint_interval: 1 # save model interval of epochs
+  save_inference_interval: 4 # save inference
+  save_checkpoint_path: "increment_model" # save checkpoint path
+  save_inference_path: "inference" # save inference path
+  save_inference_feed_varnames: [] # feed vars of save inference
+  save_inference_fetch_varnames: [] # fetch vars of save inference
+  init_model_path: "" # load model path
+  print_interval: 100
   phases: [phase1]
 
 - name: single_cpu_infer
   class: infer
-  # num of epochs
-  epochs: 1
   # device to run training or infer
   device: cpu
-  init_model_path: "increment_model" # load model path
+  init_model_path: "increment_model/3" # load model path
+  phases: [phase2]
+
+- name: single_gpu_infer
+  class: infer
+  # device to run training or infer
+  device: gpu
+  init_model_path: "increment_model/3" # load model path
   phases: [phase2]
 
 # runner will run all the phase in each epoch
@@ -87,10 +102,10 @@ phase:
 - name: phase1
   model: "{workspace}/model.py" # user-defined model
   dataset_name: dataloader_train # select dataset by name
-  thread_num: 1
+  thread_num: 8
 
 - name: phase2
   model: "{workspace}/model.py" # user-defined model
   dataset_name: dataset_infer # select dataset by name
-  thread_num: 1
+  thread_num: 8
   
diff --git a/models/rank/readme.md b/models/rank/readme.md
index 3ca96de21b7e134270adc448107c9db59f3632c3..b1939c9d3776eab86b48589698baa516130e9f60 100644
--- a/models/rank/readme.md
+++ b/models/rank/readme.md
@@ -37,7 +37,7 @@
 | xDeepFM | xDeepFM | [xDeepFM: Combining Explicit and Implicit Feature Interactions for Recommender Systems](https://dl.acm.org/doi/pdf/10.1145/3219819.3220023)(2018) |
 | DIN | Deep Interest Network | [Deep Interest Network for Click-Through Rate Prediction](https://dl.acm.org/doi/pdf/10.1145/3219819.3219823)(2018) |
 | FGCNN | Feature Generation by CNN | [Feature Generation by Convolutional Neural Network for Click-Through Rate Prediction](https://arxiv.org/pdf/1904.04447.pdf)(2019) |
-| FIBINET | Combining Feature Importance and Bilinear feature Interaction | [《FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction》]( https://arxiv.org/pdf/1905.09433.pdf) |
+| FIBINET | Combining Feature Importance and Bilinear feature Interaction | [《FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction》]( https://arxiv.org/pdf/1905.09433.pdf)(2019) |
 
 下面是每个模型的简介（注：图片引用自链接中的论文）
 
@@ -85,6 +85,7 @@
 |       DIN        |       32       |       10       |        100       |
 |       Wide&Deep  |       40       |       1       |        40       |
 |       xDeepFM        |       100       |       1       |        10       |
+| Fibinet | 1000 | 8 | 4 |
 ### 数据处理
 参考每个模型目录数据下载&预处理脚本
 
@@ -124,6 +125,7 @@ python -m paddlerec.run -m ./config.yaml # 以DNN为例
 |       Criteo        |       xDeepFM       |       0.48657        |       --          |       --          |       --          |
 |       Census-income Data        |       Wide&Deep       |       0.76195         |       0.90577          |       --          |       --          |
 |       Amazon Product        |       DIN       |       0.47005        |       0.86379         |       --          |       --          |
+| Criteo | Fibinet | -- | 0.86662 | -- | -- |
 
 
 ## 分布式