Merge pull request #42 from 123malin/bug_fix

add movie_recommand_demo

Merge pull request #42 from 123malin/bug_fix
add movie_recommand_demo
77381441 · wuzhihua · GitHub · 3cd0fd19 · d0ff1534 · 77381441
20 changed file
--- a/core/trainers/single_infer.py
+++ b/core/trainers/single_infer.py
@@ -20,6 +20,8 @@ from __future__ import print_function
 import time
 import logging
 import os
+import json
+import numpy as np
 import paddle.fluid as fluid

 from paddlerec.core.trainers.transpiler_trainer import TranspileTrainer
@@ -263,8 +265,10 @@ class SingleInfer(TranspileTrainer):
            envs.get_global_env("runner." + self._runner_name +
                                ".print_interval", 20))
        metrics_format.append("{}: {{}}".format("batch"))
+        metrics_indexes = dict()
        for name, var in metrics.items():
            metrics_varnames.append(var.name)
+            metrics_indexes[var.name] = len(metrics_varnames) - 1
            metrics_format.append("{}: {{}}".format(name))
        metrics_format = ", ".join(metrics_format)

@@ -272,19 +276,30 @@ class SingleInfer(TranspileTrainer):
        reader.start()
        batch_id = 0
        scope = self._model[model_name][2]
+
+        infer_results = []
        with fluid.scope_guard(scope):
            try:
                while True:
                    metrics_rets = self._exe.run(program=program,
-                                                 fetch_list=metrics_varnames)
+                                                 fetch_list=metrics_varnames,
+                                                 return_numpy=False)
                    metrics = [batch_id]
                    metrics.extend(metrics_rets)

+                    batch_infer_result = {}
+                    for k, v in metrics_indexes.items():
+                        batch_infer_result[k] = np.array(metrics_rets[
+                            v]).tolist()
+                    infer_results.append(batch_infer_result)
+
                    if batch_id % fetch_period == 0 and batch_id != 0:
                        print(metrics_format.format(*metrics))
                    batch_id += 1
            except fluid.core.EOFException:
                reader.reset()
+        with open(model_dict['save_path'], 'w') as fout:
+            json.dump(infer_results, fout)

    def terminal(self, context):
        context['is_exit'] = True

--- a/models/demo/__init__.py
+++ b/models/demo/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/models/demo/movie_recommand/__init__.py
+++ b/models/demo/movie_recommand/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/models/demo/movie_recommand/data/online_user/users.dat
+++ b/models/demo/movie_recommand/data/online_user/users.dat
+2181::M::25::0
+2073::F::18::4
--- a/models/demo/movie_recommand/data/process_ml_1m.py
+++ b/models/demo/movie_recommand/data/process_ml_1m.py
+#coding=utf8
+import sys
+reload(sys)
+sys.setdefaultencoding('utf-8')
+import random
+import json
+user_fea = ["userid", "gender", "age", "occupation"]
+movie_fea = ["movieid", "title", "genres"]
+rating_fea = ["userid", "movieid", "rating", "time"]
+dict_size = 60000000
+hash_dict = dict()
+
+data_path = "ml-1m"
+test_user_path = "online_user"
+
+
+def process(path):
+    user_dict = parse_data(data_path + "/users.dat", user_fea)
+    movie_dict = parse_movie_data(data_path + "/movies.dat", movie_fea)
+
+    for line in open(path):
+        line = line.strip()
+        arr = line.split("::")
+        userid = arr[0]
+        movieid = arr[1]
+        out_str = "time:%s\t%s\t%s\tlabel:%s" % (arr[3], user_dict[userid],
+                                                 movie_dict[movieid], arr[2])
+        log_id = hash(out_str) % 1000000000
+        print "%s\t%s" % (log_id, out_str)
+
+
+def parse_data(file_name, feas):
+    dict = {}
+    for line in open(file_name):
+        line = line.strip()
+        arr = line.split("::")
+        out_str = ""
+        for i in range(0, len(feas)):
+            out_str += "%s:%s\t" % (feas[i], arr[i])
+
+        dict[arr[0]] = out_str.strip()
+    return dict
+
+
+def parse_movie_data(file_name, feas):
+    dict = {}
+    for line in open(file_name):
+        line = line.strip()
+        arr = line.split("::")
+        title_str = ""
+        genres_str = ""
+
+        for term in arr[1].split(" "):
+            term = term.strip()
+            if term != "":
+                title_str += "%s " % (term)
+        for term in arr[2].split("|"):
+            term = term.strip()
+            if term != "":
+                genres_str += "%s " % (term)
+        out_str = "movieid:%s\ttitle:%s\tgenres:%s" % (
+            arr[0], title_str.strip(), genres_str.strip())
+        dict[arr[0]] = out_str.strip()
+    return dict
+
+
+def to_hash(in_str):
+    feas = in_str.split(":")[0]
+    arr = in_str.split(":")[1]
+    out_str = "%s:%s" % (feas, (arr + arr[::-1] + arr[::-2] + arr[::-3]))
+    hash_id = hash(out_str) % dict_size
+    if hash_id in hash_dict and hash_dict[hash_id] != out_str:
+        print(hash_id, out_str, hash(out_str))
+        print("conflict")
+        exit(-1)
+
+    return "%s:%s" % (feas, hash_id)
+
+
+def to_hash_list(in_str):
+    arr = in_str.split(":")
+    tmp_arr = arr[1].split(" ")
+    out_str = ""
+    for item in tmp_arr:
+        item = item.strip()
+        if item != "":
+            key = "%s:%s" % (arr[0], item)
+            out_str += "%s " % (to_hash(key))
+    return out_str.strip()
+
+
+def get_hash(path):
+    #0-34831 1-time:974673057 2-userid:2021 3-gender:M 4-age:25 5-occupation:0 6-movieid:1345  7-title:Carrie (1976)  8-genres:Horror  9-label:2
+    for line in open(path):
+        arr = line.strip().split("\t")
+        out_str = "logid:%s %s %s %s %s %s %s %s %s %s" % \
+                 (arr[0], arr[1], to_hash(arr[2]), to_hash(arr[3]), to_hash(arr[4]), to_hash(arr[5]), \
+                 to_hash(arr[6]), to_hash_list(arr[7]), to_hash_list(arr[8]), arr[9])
+        print out_str
+
+
+def generate_online_user():
+    movie_dict = parse_movie_data(data_path + "/movies.dat", movie_fea)
+
+    with open(test_user_path + "/movies.dat", 'w') as f:
+        for line in open(test_user_path + "/users.dat"):
+            line = line.strip()
+            arr = line.split("::")
+            userid = arr[0]
+            for item in movie_dict:
+                f.write(userid + "::" + item + "::1")
+                f.write("\n")
+
+
+def generate_online_data(path):
+    user_dict = parse_data(data_path + "/users.dat", user_fea)
+    movie_dict = parse_movie_data(data_path + "/movies.dat", movie_fea)
+
+    for line in open(path):
+        line = line.strip()
+        arr = line.split("::")
+        userid = arr[0]
+        movieid = arr[1]
+        label = arr[2]
+        out_str = "time:%s\t%s\t%s\tlabel:%s" % ("1", user_dict[userid],
+                                                 movie_dict[movieid], label)
+        log_id = hash(out_str) % 1000000000
+        res = "%s\t%s" % (log_id, out_str)
+        arr = res.strip().split("\t")
+        out_str = "logid:%s %s %s %s %s %s %s %s %s %s" % \
+                (arr[0], arr[1], to_hash(arr[2]), to_hash(arr[3]), to_hash(arr[4]), to_hash(arr[5]), \
+                to_hash(arr[6]), to_hash_list(arr[7]), to_hash_list(arr[8]), arr[9])
+        print(out_str)
+
+
+if __name__ == "__main__":
+    random.seed(1111111)
+    if sys.argv[1] == "process_raw":
+        process(sys.argv[2])
+    elif sys.argv[1] == "hash":
+        get_hash(sys.argv[2])
+    elif sys.argv[1] == "data_recall":
+        generate_online_user()
+        generate_online_data(test_user_path + "/movies.dat")
+    elif sys.argv[1] == "data_rank":
+        generate_online_data(test_user_path + "/movies.dat")
--- a/models/demo/movie_recommand/data/split.py
+++ b/models/demo/movie_recommand/data/split.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import random
+
+train = dict()
+test = dict()
+data_path = "ml-1m"
+
+for line in open(data_path + "/ratings.dat"):
+    fea = line.rstrip().split("::")
+    if fea[0] not in train:
+        train[fea[0]] = [line]
+    elif fea[0] not in test:
+        test[fea[0]] = dict()
+        test[fea[0]]['time'] = int(fea[3])
+        test[fea[0]]['content'] = line
+    else:
+        time = int(fea[3])
+        if time <= test[fea[0]]['time']:
+            train[fea[0]].append(line)
+        else:
+            train[fea[0]].append(test[fea[0]]['content'])
+            test[fea[0]]['time'] = time
+            test[fea[0]]['content'] = line
+
+train_data = []
+for key in train:
+    for line in train[key]:
+        train_data.append(line)
+
+random.shuffle(train_data)
+
+with open(data_path + "/train.dat", 'w') as f:
+    for line in train_data:
+        f.write(line)
+
+with open(data_path + "/test.dat", 'w') as f:
+    for key in test:
+        f.write(test[key]['content'])
--- a/models/demo/movie_recommand/data/test/data.txt
+++ b/models/demo/movie_recommand/data/test/data.txt
--- a/models/demo/movie_recommand/data/train/data.txt
+++ b/models/demo/movie_recommand/data/train/data.txt
--- a/models/demo/movie_recommand/data_prepare.sh
+++ b/models/demo/movie_recommand/data_prepare.sh
+cd data
+
+wget http://files.grouplens.org/datasets/movielens/ml-1m.zip
+unzip ml-1m.zip
+
+python split.py
+
+mkdir train/
+mkdir test/
+
+python process_ml_1m.py process_raw ./ml-1m/train.dat | sort -t $'\t' -k 9 -n > log.data.train
+python process_ml_1m.py process_raw ./ml-1m/test.dat | sort -t $'\t' -k 9 -n > log.data.test
+python process_ml_1m.py hash log.data.train > ./train/data.txt
+python process_ml_1m.py hash log.data.test > ./test/data.txt
+
+rm log.data.train
+rm log.data.test
+cd ../
--- a/models/demo/movie_recommand/offline_test.sh
+++ b/models/demo/movie_recommand/offline_test.sh
+## modify config.yaml to infer mode at first
+
+cd recall
+python -m paddlerec.run -m ./config.yaml
+cd ../rank
+python -m paddlerec.run -m ./config.yaml
+cd ..
+
+echo "recall offline test result:"
+python parse.py recall_offline recall/infer_result
+echo "rank offline test result:"
+python parse.py rank_offline rank/infer_result
--- a/models/demo/movie_recommand/online_rank.sh
+++ b/models/demo/movie_recommand/online_rank.sh
+cd data
+python process_ml_1m.py data_rank > online_user/test/data.txt
+
+## modify recall/config.yaml to online_infer mode
+cd ../rank
+python -m paddlerec.run -m ./config.yaml
+cd ../
+python parse.py rank_online rank/infer_result
--- a/models/demo/movie_recommand/online_recall.sh
+++ b/models/demo/movie_recommand/online_recall.sh
+cd data
+mkdir online_user/test
+python process_ml_1m.py data_recall > online_user/test/data.txt
+
+## modify recall/config.yaml to online_infer mode
+cd ../recall
+python -m paddlerec.run -m ./config.yaml
+cd ../
+python parse.py recall_online recall/infer_result
--- a/models/demo/movie_recommand/parse.py
+++ b/models/demo/movie_recommand/parse.py
+#coding=utf8
+import sys
+reload(sys)
+sys.setdefaultencoding('utf-8')
+import random
+import json
+import numpy as np
+import operator
+
+user_fea = ["userid", "gender", "age", "occupation"]
+movie_fea = ["movieid", "title", "genres"]
+rating_fea = ["userid", "movieid", "rating", "time"]
+dict_size = 60000000
+hash_dict = dict()
+
+data_path = "data/ml-1m"
+test_user_path = "data/online_user"
+topk = 100
+
+
+def read_raw_data():
+    user_dict = parse_data(data_path + "/users.dat", user_fea)
+    movie_dict = parse_data(data_path + "/movies.dat", movie_fea)
+    ratings_dict = dict()
+    for line in open(data_path + "/ratings.dat"):
+        arr = line.strip().split("::")
+        if arr[0] not in ratings_dict:
+            ratings_dict[arr[0]] = []
+        tmp = dict()
+        tmp["movieid"] = arr[1]
+        tmp["score"] = arr[2]
+        tmp["time"] = arr[3]
+        ratings_dict[arr[0]].append(tmp)
+    return user_dict, movie_dict, ratings_dict
+
+
+def parse_data(file_name, feas):
+    res = {}
+    for line in open(file_name):
+        line = line.strip()
+        arr = line.split("::")
+        res[arr[0]] = dict()
+        _ = to_hash(feas[0], arr[0])
+        for i in range(0, len(feas)):
+            res[arr[0]][feas[i]] = arr[i]
+    return res
+
+
+def to_hash(feas, arr):
+    out_str = "%s:%s" % (feas, (arr + arr[::-1] + arr[::-2] + arr[::-3]))
+    hash_id = hash(out_str) % dict_size
+    if hash_id in hash_dict and hash_dict[hash_id] != out_str:
+        print(hash_id, out_str, hash(out_str), hash_dict[hash_id])
+        print("conflict")
+        exit(-1)
+    hash_dict[hash_id] = out_str
+    return hash_id
+
+
+def load_ground_truth(user_dict, movie_dict, ratings_dict):
+    for line in open(test_user_path + "/users.dat"):
+        uid = line.strip().split("::")[0]
+        display_user(user_dict[uid])
+        ratings_dict[uid] = sorted(
+            ratings_dict[uid],
+            key=lambda i: (i["score"], i["time"]),
+            reverse=True)
+        ratings_dict[uid] = ratings_dict[uid][:topk]
+        for i in range(len(ratings_dict[uid])):
+            item = ratings_dict[uid][i]
+            mid = item["movieid"]
+            for key in movie_fea:
+                item[key] = movie_dict[mid][key]
+        display_movies(ratings_dict[uid])
+
+
+def load_infer_results(path, feas, movie_dict):
+    with open(path) as f:
+        content = json.load(f)
+
+    total = 0
+    correct = 0
+    mae = 0.0
+
+    res = dict()
+    for item in content:
+        userid = reduce(operator.add, item[feas["userid"]])
+        movieid = reduce(operator.add, item[feas["movieid"]])
+        ratings = reduce(operator.add, item[feas["ratings"]])
+        predict = map(int, ratings)
+        label = reduce(operator.add, item[feas["label"]])
+
+        mae += sum(np.square(np.array(ratings) - np.array(label)))
+        total += len(label)
+        correct += sum(np.array(predict) == np.array(label))
+
+        for i in range(len(userid)):
+            hash_uid = userid[i]
+            hash_mid = movieid[i]
+            if hash_uid not in hash_dict or hash_mid not in hash_dict:
+                continue
+            tmp = hash_dict[hash_uid].split(':')[1]
+            uid = tmp[:len(tmp) / 3]
+            tmp = hash_dict[hash_mid].split(':')[1]
+            mid = tmp[:len(tmp) / 3]
+            if uid not in res:
+                res[uid] = []
+            item = {"score": ratings[i]}
+            for info in movie_dict[mid]:
+                item[info] = movie_dict[mid][info]
+            res[uid].append(item)
+
+    for key in res:
+        tmp = sorted(res[key], key=lambda i: i["score"], reverse=True)
+        existed_movie = []
+        res[key] = []
+        for i in range(len(tmp)):
+            if len(res[key]) >= topk:
+                break
+            if tmp[i]["movieid"] not in existed_movie:
+                existed_movie.append(tmp[i]["movieid"])
+                res[key].append(tmp[i])
+
+    print("total: " + str(total) + "; correct: " + str(correct))
+    print("accuracy: " + str(float(correct) / total))
+    print("mae: " + str(mae / total))
+    return res
+
+
+def display_user(item):
+    out_str = ""
+    for key in user_fea:
+        out_str += "%s:%s " % (key, item[key])
+    print(out_str)
+
+
+def display_movies(input):
+    for item in input:
+        print_str = ""
+        for key in movie_fea:
+            print_str += "%s:%s " % (key, item[key])
+        print_str += "%s:%s" % ("score", item["score"])
+        print(print_str)
+
+
+def parse_infer(mode, path, user_dict, movie_dict):
+    stage, online = mode.split('_')
+    feas = {
+        "userid": "userid",
+        "movieid": "movieid",
+        "ratings": "scale_0.tmp_0",
+        "label": "label"
+    }
+
+    infer_results = load_infer_results(path, feas, movie_dict)
+    if online.startswith("offline"):
+        return
+
+    for uid in infer_results:
+        display_user(user_dict[uid])
+        display_movies(infer_results[uid])
+
+    with open(test_user_path + "/movies.dat", 'w') as fout:
+        for uid in infer_results:
+            for item in infer_results[uid]:
+                str_ = uid + "::" + str(item["movieid"]) + "::" + str(
+                    int(item["score"])) + "\n"
+                fout.write(str_)
+
+
+if __name__ == "__main__":
+    user_dict, movie_dict, ratings_dict = read_raw_data()
+    if sys.argv[1] == "ground_truth":
+        load_ground_truth(user_dict, movie_dict, ratings_dict)
+    else:
+        parse_infer(sys.argv[1], sys.argv[2], user_dict, movie_dict)
--- a/models/demo/movie_recommand/rank/__init__.py
+++ b/models/demo/movie_recommand/rank/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/models/demo/movie_recommand/rank/config.yaml
+++ b/models/demo/movie_recommand/rank/config.yaml
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+workspace: "paddlerec.models.demo.movie_recommand"
+
+# list of dataset
+dataset:
+- name: dataset_train # name of dataset to distinguish different datasets
+  batch_size: 128
+  type: QueueDataset 
+  data_path: "{workspace}/data/train"
+  sparse_slots: "logid time userid gender age occupation movieid title genres label"
+  dense_slots: ""
+- name: dataset_infer # name
+  batch_size: 128
+  type: DataLoader
+  data_path: "{workspace}/data/test"
+  sparse_slots: "logid time userid gender age occupation movieid title genres label"
+  dense_slots: ""
+- name: dataset_online_infer # name
+  batch_size: 10
+  type: DataLoader
+  data_path: "{workspace}/data/online_user/test"
+  sparse_slots: "logid time userid gender age occupation movieid title genres label"
+  dense_slots: ""
+
+# hyper parameters of user-defined network
+hyper_parameters:
+  # optimizer config
+  optimizer:
+    class: Adam
+    learning_rate: 0.001
+    strategy: async
+  # user-defined <key, value> pairs
+  sparse_feature_number: 60000000
+  sparse_feature_dim: 9
+  dense_input_dim: 13
+  fc_sizes: [512, 256, 128, 32]
+
+# train
+mode: runner_train
+
+## online or offline infer
+#mode: runner_infer
+runner:
+- name: runner_train
+  class: single_train
+  save_checkpoint_interval: 1 # save model interval of epochs
+  save_inference_interval: 1 # save inference
+  save_checkpoint_path: "increment" # save checkpoint path
+  save_inference_path: "inference" # save inference path
+  epochs: 10
+  device: cpu
+
+- name: runner_infer
+  epochs: 1
+  class: single_infer
+  print_interval: 10000
+  init_model_path: "increment/9" # load model path
+
+#train
+phase:
+- name: phase1
+  model: "{workspace}/model.py" # user-defined model
+  dataset_name: dataset_train # select dataset by name
+  thread_num: 12
+
+##offline infer
+#phase:
+#- name: phase1
+#  model: "{workspace}/model.py" # user-defined model
+#  dataset_name: dataset_infer # select dataset by name
+#  save_path: "./infer_result"
+#  thread_num: 1
+
+##offline infer
+#phase:
+#- name: phase1
+#  model: "{workspace}/model.py" # user-defined model
+#  dataset_name: dataset_online_infer # select dataset by name
+#  save_path: "./infer_result"
+#  thread_num: 1
--- a/models/demo/movie_recommand/rank/model.py
+++ b/models/demo/movie_recommand/rank/model.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+
+import paddle.fluid as fluid
+
+from paddlerec.core.utils import envs
+from paddlerec.core.model import Model as ModelBase
+
+
+class Model(ModelBase):
+    def __init__(self, config):
+        ModelBase.__init__(self, config)
+
+    def _init_hyper_parameters(self):
+        self.is_distributed = True if envs.get_trainer(
+        ) == "CtrTrainer" else False
+        self.sparse_feature_number = envs.get_global_env(
+            "hyper_parameters.sparse_feature_number")
+        self.sparse_feature_dim = envs.get_global_env(
+            "hyper_parameters.sparse_feature_dim")
+        self.learning_rate = envs.get_global_env(
+            "hyper_parameters.optimizer.learning_rate")
+        self.hidden_layers = envs.get_global_env("hyper_parameters.fc_sizes")
+
+    def net(self, input, is_infer=False):
+        self.user_sparse_inputs = self._sparse_data_var[2:6]
+        self.mov_sparse_inputs = self._sparse_data_var[6:9]
+
+        self.label_input = self._sparse_data_var[-1]
+
+        def fc(input):
+            fcs = [input]
+            for size in self.hidden_layers:
+                output = fluid.layers.fc(
+                    input=fcs[-1],
+                    size=size,
+                    act='relu',
+                    param_attr=fluid.ParamAttr(
+                        initializer=fluid.initializer.Normal(
+                            scale=1.0 / math.sqrt(fcs[-1].shape[1]))))
+                fcs.append(output)
+            return fcs[-1]
+
+        def embedding_layer(input):
+            emb = fluid.layers.embedding(
+                input=input,
+                is_sparse=True,
+                is_distributed=self.is_distributed,
+                size=[self.sparse_feature_number, self.sparse_feature_dim],
+                param_attr=fluid.ParamAttr(
+                    name="emb", initializer=fluid.initializer.Uniform()), )
+            emb_sum = fluid.layers.sequence_pool(input=emb, pool_type='sum')
+            return emb_sum
+
+        user_sparse_embed_seq = list(
+            map(embedding_layer, self.user_sparse_inputs))
+        mov_sparse_embed_seq = list(
+            map(embedding_layer, self.mov_sparse_inputs))
+        concated_user = fluid.layers.concat(user_sparse_embed_seq, axis=1)
+        concated_mov = fluid.layers.concat(mov_sparse_embed_seq, axis=1)
+
+        usr_combined_features = fc(concated_user)
+        mov_combined_features = fc(concated_mov)
+
+        fc_input = fluid.layers.concat(
+            [usr_combined_features, mov_combined_features], axis=1)
+        sim = fluid.layers.fc(
+            input=fc_input,
+            size=1,
+            act='sigmoid',
+            param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
+                scale=1.0 / math.sqrt(fc_input.shape[1]))))
+
+        predict = fluid.layers.scale(sim, scale=5)
+        self.predict = predict
+        #auc, batch_auc, _ = fluid.layers.auc(input=self.predict,
+        #                                     label=self.label_input,
+        #                                     num_thresholds=10000,
+        #                                     slide_steps=20)
+
+        if is_infer:
+            self._infer_results["user_feature"] = usr_combined_features
+            self._infer_results["movie_feature"] = mov_combined_features
+            self._infer_results["uid"] = self._sparse_data_var[2]
+            self._infer_results["movieid"] = self._sparse_data_var[6]
+            self._infer_results["label"] = self._sparse_data_var[-1]
+            self._infer_results["predict"] = self.predict
+            return
+
+        #self._metrics["AUC"] = auc
+        #self._metrics["BATCH_AUC"] = batch_auc
+        #cost = fluid.layers.cross_entropy(
+        #    input=self.predict, label=self.label_input)
+        cost = fluid.layers.square_error_cost(
+            self.predict,
+            fluid.layers.cast(
+                x=self.label_input, dtype='float32'))
+        avg_cost = fluid.layers.reduce_mean(cost)
+        self._cost = avg_cost
+        self._metrics["LOSS"] = avg_cost
+
+    def optimizer(self):
+        optimizer = fluid.optimizer.Adam(self.learning_rate, lazy_mode=True)
+        return optimizer
+
+    def infer_net(self):
+        pass
--- a/models/demo/movie_recommand/recall/__init__.py
+++ b/models/demo/movie_recommand/recall/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/models/demo/movie_recommand/recall/config.yaml
+++ b/models/demo/movie_recommand/recall/config.yaml
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+workspace: "paddlerec.models.demo.movie_recommand"
+
+# list of dataset
+dataset:
+- name: dataset_train # name of dataset to distinguish different datasets
+  batch_size: 128
+  type: QueueDataset 
+  data_path: "{workspace}/data/train"
+  sparse_slots: "logid time userid gender age occupation movieid title genres label"
+  dense_slots: ""
+- name: dataset_infer # name
+  batch_size: 128
+  type: DataLoader
+  data_path: "{workspace}/data/test"
+  sparse_slots: "logid time userid gender age occupation movieid title genres label"
+  dense_slots: ""
+- name: dataset_online_infer # name
+  batch_size: 128
+  type: DataLoader
+  data_path: "{workspace}/data/online_user/test"
+  sparse_slots: "logid time userid gender age occupation movieid title genres label"
+  dense_slots: ""
+
+# hyper parameters of user-defined network
+hyper_parameters:
+  # optimizer config
+  optimizer:
+    class: Adam
+    learning_rate: 0.001
+    strategy: async
+  # user-defined <key, value> pairs
+  sparse_feature_number: 60000000
+  sparse_feature_dim: 9
+  dense_input_dim: 13
+  fc_sizes: [512, 256, 128, 32]
+
+# train
+mode: runner_train
+
+## online or offline infer
+#mode: runner_infer
+runner:
+- name: runner_train
+  class: single_train
+  save_checkpoint_interval: 1 # save model interval of epochs
+  save_inference_interval: 1 # save inference
+  save_checkpoint_path: "increment" # save checkpoint path
+  save_inference_path: "inference" # save inference path
+  epochs: 10
+  device: cpu
+
+- name: runner_infer
+  epochs: 1
+  class: single_infer
+  print_interval: 10000
+  init_model_path: "increment/9" # load model path
+
+#train
+phase:
+- name: phase1
+  model: "{workspace}/model.py" # user-defined model
+  dataset_name: dataset_train # select dataset by name
+  thread_num: 12
+
+##offline infer
+#phase:
+#- name: phase1
+#  model: "{workspace}/model.py" # user-defined model
+#  dataset_name: dataset_infer # select dataset by name
+#  save_path: "./infer_result"
+#  thread_num: 1
+
+##offline infer
+#phase:
+#- name: phase1
+#  model: "{workspace}/model.py" # user-defined model
+#  dataset_name: dataset_online_infer # select dataset by name
+#  save_path: "./infer_result"
+#  thread_num: 1
--- a/models/demo/movie_recommand/recall/model.py
+++ b/models/demo/movie_recommand/recall/model.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+
+import paddle.fluid as fluid
+
+from paddlerec.core.utils import envs
+from paddlerec.core.model import Model as ModelBase
+
+
+class Model(ModelBase):
+    def __init__(self, config):
+        ModelBase.__init__(self, config)
+
+    def _init_hyper_parameters(self):
+        self.is_distributed = True if envs.get_trainer(
+        ) == "CtrTrainer" else False
+        self.sparse_feature_number = envs.get_global_env(
+            "hyper_parameters.sparse_feature_number")
+        self.sparse_feature_dim = envs.get_global_env(
+            "hyper_parameters.sparse_feature_dim")
+        self.learning_rate = envs.get_global_env(
+            "hyper_parameters.optimizer.learning_rate")
+        self.hidden_layers = envs.get_global_env("hyper_parameters.fc_sizes")
+
+    def net(self, input, is_infer=False):
+        self.user_sparse_inputs = self._sparse_data_var[2:6]
+        self.mov_sparse_inputs = self._sparse_data_var[6:9]
+
+        self.label_input = self._sparse_data_var[-1]
+
+        def fc(input):
+            fcs = [input]
+            for size in self.hidden_layers:
+                output = fluid.layers.fc(
+                    input=fcs[-1],
+                    size=size,
+                    act='relu',
+                    param_attr=fluid.ParamAttr(
+                        initializer=fluid.initializer.Normal(
+                            scale=1.0 / math.sqrt(fcs[-1].shape[1]))))
+                fcs.append(output)
+            return fcs[-1]
+
+        def embedding_layer(input):
+            emb = fluid.layers.embedding(
+                input=input,
+                is_sparse=True,
+                is_distributed=self.is_distributed,
+                size=[self.sparse_feature_number, self.sparse_feature_dim],
+                param_attr=fluid.ParamAttr(
+                    name="emb", initializer=fluid.initializer.Uniform()), )
+            emb_sum = fluid.layers.sequence_pool(input=emb, pool_type='sum')
+            return emb_sum
+
+        user_sparse_embed_seq = list(
+            map(embedding_layer, self.user_sparse_inputs))
+        mov_sparse_embed_seq = list(
+            map(embedding_layer, self.mov_sparse_inputs))
+        concated_user = fluid.layers.concat(user_sparse_embed_seq, axis=1)
+        concated_mov = fluid.layers.concat(mov_sparse_embed_seq, axis=1)
+
+        usr_combined_features = fc(concated_user)
+        mov_combined_features = fc(concated_mov)
+
+        sim = fluid.layers.cos_sim(
+            X=usr_combined_features, Y=mov_combined_features)
+        predict = fluid.layers.scale(sim, scale=5)
+        self.predict = predict
+
+        if is_infer:
+            self._infer_results["uid"] = self._sparse_data_var[2]
+            self._infer_results["movieid"] = self._sparse_data_var[6]
+            self._infer_results["label"] = self._sparse_data_var[-1]
+            self._infer_results["predict"] = self.predict
+            return
+
+        cost = fluid.layers.square_error_cost(
+            self.predict,
+            fluid.layers.cast(
+                x=self.label_input, dtype='float32'))
+        avg_cost = fluid.layers.reduce_mean(cost)
+        self._cost = avg_cost
+        self._metrics["LOSS"] = avg_cost
+
+    def optimizer(self):
+        optimizer = fluid.optimizer.Adam(self.learning_rate, lazy_mode=True)
+        return optimizer
--- a/models/demo/movie_recommand/train.sh
+++ b/models/demo/movie_recommand/train.sh
+cd recall
+python -m paddlerec.run -m ./config.yaml &> log &
+cd ../rank
+python -m paddlerec.run -m ./config.yaml &> log &
+cd ..