提交 89b366a1 编写于 作者: C chengmo

update doc

*.o
output
.idea/
build/
dist/
fleetrec.egg-info/
paddlerec.egg-info/
*~
*.pyc
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from __future__ import unicode_literals
import subprocess
import sys
import os
import copy
from paddlerec.core.engine.engine import Engine
from paddlerec.core.factory import TrainerFactory
from paddlerec.core.utils import envs
class ClusterEngine(Engine):
def __init_impl__(self):
abs_dir = os.path.dirname(os.path.abspath(__file__))
self.submit_script = os.path.join(abs_dir, "master.sh")
def start_worker_procs(self):
trainer = TrainerFactory.create(self.trainer)
trainer.run()
def start_master_procs(self):
default_env = os.environ.copy()
current_env = copy.copy(default_env)
current_env.pop("http_proxy", None)
current_env.pop("https_proxy", None)
cmd = ("bash {}".format(self.submit_script)).split(" ")
proc = subprocess.Popen(cmd, env=current_env, cwd=os.getcwd())
proc.wait()
def run(self):
role = envs.get_runtime_environ("engine_role")
if role == "MASTER":
self.start_master_procs()
elif role == "WORKER":
self.start_worker_procs()
else:
raise ValueError("role {} error, must in MASTER/WORKER".format(role))
#!/bin/bash
###################################################
# Usage: submit.sh
# Description: run mpi submit clinet
###################################################
# ---------------------------------------------------------------------------- #
# variable define #
# ---------------------------------------------------------------------------- #
declare g_curPath=""
declare g_scriptName=""
declare g_workPath=""
declare g_run_stage=""
# ---------------------------------------------------------------------------- #
# const define #
# ---------------------------------------------------------------------------- #
declare -r CALL="x"
################################################################################
#-----------------------------------------------------------------------------------------------------------------
# Function: get_cur_path
# Description: get churrent path
# Parameter:
# input:
# N/A
# output:
# N/A
# Return: 0 -- success; not 0 -- failure
# Others: N/A
#-----------------------------------------------------------------------------------------------------------------
get_cur_path() {
g_run_stage="get_cur_path"
cd "$(dirname "${BASH_SOURCE-$0}")"
g_curPath="${PWD}"
g_scriptName="$(basename "${BASH_SOURCE-$0}")"
cd - >/dev/null
}
#-----------------------------------------------------------------------------------------------------------------
#fun : check function return code
#param : N/A
#return : 0 -- success; not 0 -- failure
#-----------------------------------------------------------------------------------------------------------------
function check_error() {
if [ ${?} -ne 0 ]; then
echo "execute " + $g_run_stage + " raise exception! please check ..."
exit 1
fi
}
source ${engine_submit_scrpit}
main
......@@ -7,6 +7,10 @@ class Engine:
def __init__(self, envs, trainer):
self.envs = envs
self.trainer = trainer
self.__init_impl__()
def __init_impl__(self):
pass
@abc.abstractmethod
def run(self):
......
......@@ -19,8 +19,8 @@ import sys
import os
import copy
from fleetrec.core.engine.engine import Engine
from fleetrec.core.utils import envs
from paddlerec.core.engine.engine import Engine
from paddlerec.core.utils import envs
class LocalClusterEngine(Engine):
......@@ -44,14 +44,13 @@ class LocalClusterEngine(Engine):
if new_port not in ports:
ports.append(new_port)
break
user_endpoints = ",".join(["127.0.0.1:" + str(x) for x in ports])
user_endpoints_ips = [x.split(":")[0]
for x in user_endpoints.split(",")]
user_endpoints_port = [x.split(":")[1]
for x in user_endpoints.split(",")]
factory = "fleetrec.core.factory"
factory = "paddlerec.core.factory"
cmd = [sys.executable, "-u", "-m", factory, self.trainer]
for i in range(server_num):
......
......@@ -19,7 +19,7 @@ import sys
import os
import copy
from fleetrec.core.engine.engine import Engine
from paddlerec.core.engine.engine import Engine
class LocalMPIEngine(Engine):
......@@ -33,7 +33,7 @@ class LocalMPIEngine(Engine):
procs = []
log_fns = []
factory = "fleetrec.core.factory"
factory = "paddlerec.core.factory"
cmd = "mpirun -npernode 2 -timestamp-output -tag-output".split(" ")
cmd.extend([sys.executable, "-u", "-m", factory, self.trainer])
......
......@@ -17,7 +17,7 @@ import sys
import yaml
from fleetrec.core.utils import envs
from paddlerec.core.utils import envs
trainer_abs = os.path.join(os.path.dirname(
os.path.abspath(__file__)), "trainers")
......@@ -71,7 +71,7 @@ class TrainerFactory(object):
with open(config, 'r') as rb:
_config = yaml.load(rb.read(), Loader=yaml.FullLoader)
else:
raise ValueError("fleetrec's config only support yaml")
raise ValueError("paddlerec's config only support yaml")
envs.set_global_envs(_config)
envs.update_workspace()
......
......@@ -15,7 +15,7 @@
import math
import numpy as np
import paddle.fluid as fluid
from fleetrec.core.metric import Metric
from paddlerec.core.metric import Metric
class AUCMetric(Metric):
......
......@@ -2,7 +2,7 @@ import abc
import paddle.fluid as fluid
from fleetrec.core.utils import envs
from paddlerec.core.utils import envs
class Model(object):
......@@ -16,7 +16,10 @@ class Model(object):
self._cost = None
self._metrics = {}
self._data_var = []
self._infer_data_var = []
self._infer_results = {}
self._data_loader = None
self._infer_data_loader = None
self._fetch_interval = 20
self._namespace = "train.model"
self._platform = envs.get_platform()
......@@ -24,6 +27,12 @@ class Model(object):
def get_inputs(self):
return self._data_var
def get_infer_inputs(self):
return self._infer_data_var
def get_infer_results(self):
return self._infer_results
def get_cost_op(self):
"""R
"""
......
......@@ -3,8 +3,8 @@ import copy
import paddle.fluid as fluid
from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
from fleetrec.core.model import Model
from fleetrec.core.utils import table
from paddlerec.core.model import Model
from paddlerec.core.utils import table
def create(config):
......
......@@ -13,7 +13,7 @@
# limitations under the License.
import paddle.fluid as fluid
from fleetrec.core.layer import Layer
from paddlerec.core.layer import Layer
class EmbeddingInputLayer(Layer):
......
......@@ -19,7 +19,7 @@ import os
import paddle.fluid.incubate.data_generator as dg
import yaml
from fleetrec.core.utils import envs
from paddlerec.core.utils import envs
class Reader(dg.MultiSlotDataGenerator):
......
......@@ -20,7 +20,7 @@ import time
import yaml
from paddle import fluid
from fleetrec.core.utils import envs
from paddlerec.core.utils import envs
class Trainer(object):
......
......@@ -23,14 +23,16 @@ import paddle.fluid as fluid
from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet
from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import StrategyFactory
from paddle.fluid.incubate.fleet.base.role_maker import PaddleCloudRoleMaker
from paddle.fluid.incubate.fleet.base.role_maker import MPISymetricRoleMaker
from fleetrec.core.utils import envs
from fleetrec.core.trainers.transpiler_trainer import TranspileTrainer
from paddlerec.core.utils import envs
from paddlerec.core.trainers.transpiler_trainer import TranspileTrainer
class ClusterTrainer(TranspileTrainer):
def processor_register(self):
role = PaddleCloudRoleMaker()
#role = PaddleCloudRoleMaker()
role = MPISymetricRoleMaker()
fleet.init(role)
if fleet.is_server():
......@@ -40,12 +42,13 @@ class ClusterTrainer(TranspileTrainer):
else:
self.regist_context_processor('uninit', self.instance)
self.regist_context_processor('init_pass', self.init)
self.regist_context_processor('startup_pass', self.startup)
if envs.get_platform() == "LINUX" and envs.get_global_env("dataset_class", None, "train.reader") != "DataLoader":
self.regist_context_processor('train_pass', self.dataset_train)
else:
self.regist_context_processor(
'train_pass', self.dataloader_train)
self.regist_context_processor('infer_pass', self.infer)
self.regist_context_processor('terminal_pass', self.terminal)
def build_strategy(self):
......@@ -139,14 +142,15 @@ class ClusterTrainer(TranspileTrainer):
metrics = [epoch, batch_id]
metrics.extend(metrics_rets)
if batch_id % 10 == 0 and batch_id != 0:
if batch_id % self.fetch_period == 0 and batch_id != 0:
print(metrics_format.format(*metrics))
batch_id += 1
except fluid.core.EOFException:
reader.reset()
self.save(epoch, "train", is_fleet=True)
fleet.stop_worker()
context['status'] = 'terminal_pass'
context['status'] = 'infer_pass'
def dataset_train(self, context):
fleet.init_worker()
......@@ -162,10 +166,7 @@ class ClusterTrainer(TranspileTrainer):
print_period=self.fetch_period)
self.save(i, "train", is_fleet=True)
fleet.stop_worker()
context['status'] = 'terminal_pass'
def infer(self, context):
context['status'] = 'terminal_pass'
context['status'] = 'infer_pass'
def terminal(self, context):
for model in self.increment_models:
......
......@@ -18,8 +18,8 @@ import paddle.fluid as fluid
from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
from paddle.fluid.incubate.fleet.base.role_maker import MPISymetricRoleMaker
from fleetrec.core.utils import envs
from fleetrec.core.trainer import Trainer
from paddlerec.core.utils import envs
from paddlerec.core.trainer import Trainer
class CtrPaddleTrainer(Trainer):
......
......@@ -24,12 +24,12 @@ from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
from paddle.fluid.incubate.fleet.base.role_maker import GeneralRoleMaker
from fleetrec.core.utils import fs as fs
from fleetrec.core.utils import util as util
from fleetrec.core.metrics.auc_metrics import AUCMetric
from fleetrec.core.modules.modul import build as model_basic
from fleetrec.core.utils import dataset
from fleetrec.core.trainer import Trainer
from paddlerec.core.utils import fs as fs
from paddlerec.core.utils import util as util
from paddlerec.core.metrics.auc_metrics import AUCMetric
from paddlerec.core.modules.modul import build as model_basic
from paddlerec.core.utils import dataset
from paddlerec.core.trainer import Trainer
def wroker_numric_opt(value, env, opt):
......
......@@ -20,8 +20,8 @@ from __future__ import print_function
import logging
import paddle.fluid as fluid
from fleetrec.core.trainers.transpiler_trainer import TranspileTrainer
from fleetrec.core.utils import envs
from paddlerec.core.trainers.transpiler_trainer import TranspileTrainer
from paddlerec.core.utils import envs
import numpy as np
logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
......@@ -33,7 +33,7 @@ class SingleTrainer(TranspileTrainer):
def processor_register(self):
self.regist_context_processor('uninit', self.instance)
self.regist_context_processor('init_pass', self.init)
self.regist_context_processor('startup_pass', self.startup)
if envs.get_platform() == "LINUX" and envs.get_global_env("dataset_class", None, "train.reader") != "DataLoader":
self.regist_context_processor('train_pass', self.dataset_train)
else:
......@@ -62,7 +62,7 @@ class SingleTrainer(TranspileTrainer):
context['status'] = 'train_pass'
def dataloader_train(self, context):
reader = self._get_dataloader()
reader = self._get_dataloader("TRAIN")
epochs = envs.get_global_env("train.epochs")
program = fluid.compiler.CompiledProgram(
......@@ -93,16 +93,17 @@ class SingleTrainer(TranspileTrainer):
metrics = [epoch, batch_id]
metrics.extend(metrics_rets)
if batch_id % 10 == 0 and batch_id != 0:
if batch_id % self.fetch_period == 0 and batch_id != 0:
print(metrics_format.format(*metrics))
batch_id += 1
except fluid.core.EOFException:
reader.reset()
self.save(epoch, "train", is_fleet=False)
context['status'] = 'infer_pass'
def dataset_train(self, context):
dataset = self._get_dataset()
dataset = self._get_dataset("TRAIN")
epochs = envs.get_global_env("train.epochs")
for i in range(epochs):
......@@ -114,9 +115,6 @@ class SingleTrainer(TranspileTrainer):
self.save(i, "train", is_fleet=False)
context['status'] = 'infer_pass'
def infer(self, context):
context['status'] = 'terminal_pass'
def terminal(self, context):
for model in self.increment_models:
print("epoch :{}, dir: {}".format(model[0], model[1]))
......
......@@ -25,8 +25,8 @@ from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import f
from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import StrategyFactory
from paddle.fluid.incubate.fleet.base.role_maker import PaddleCloudRoleMaker
from fleetrec.core.utils import envs
from fleetrec.core.trainers.cluster_trainer import ClusterTrainer
from paddlerec.core.utils import envs
from paddlerec.core.trainers.cluster_trainer import ClusterTrainer
logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
......
......@@ -21,9 +21,9 @@ from __future__ import print_function
import logging
import paddle.fluid as fluid
from fleetrec.core.trainers.transpiler_trainer import TranspileTrainer
from fleetrec.core.trainers.single_trainer import SingleTrainer
from fleetrec.core.utils import envs
from paddlerec.core.trainers.transpiler_trainer import TranspileTrainer
from paddlerec.core.trainers.single_trainer import SingleTrainer
from paddlerec.core.utils import envs
import numpy as np
logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
......
......@@ -20,10 +20,9 @@ import os
import paddle.fluid as fluid
from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet
from fleetrec.core.trainer import Trainer
from fleetrec.core.utils import envs
from fleetrec.core.utils import dataloader_instance
import fleetrec.core.din_reader as din_reader
from paddlerec.core.trainer import Trainer
from paddlerec.core.utils import envs
from paddlerec.core.utils import dataloader_instance
class TranspileTrainer(Trainer):
......@@ -37,40 +36,56 @@ class TranspileTrainer(Trainer):
def processor_register(self):
print("Need implement by trainer, `self.regist_context_processor('uninit', self.instance)` must be the first")
def _get_dataloader(self):
namespace = "train.reader"
dataloader = self.model._data_loader
def _get_dataloader(self, state="TRAIN"):
if state == "TRAIN":
dataloader = self.model._data_loader
namespace = "train.reader"
class_name = "TrainReader"
else:
dataloader = self.model._infer_data_loader
namespace = "evaluate.reader"
class_name = "EvaluateReader"
batch_size = envs.get_global_env("batch_size", None, namespace)
reader_class = envs.get_global_env("class", None, namespace)
print("batch_size: {}".format(batch_size))
reader = dataloader_instance.dataloader(
reader_class, state, self._config_yaml)
reader = dataloader_instance.dataloader(reader_class, "TRAIN", self._config_yaml)
reader_class = envs.lazy_instance_by_fliename(reader_class, "TrainReader")
reader_class = envs.lazy_instance_by_fliename(reader_class, class_name)
reader_ins = reader_class(self._config_yaml)
if hasattr(reader_ins,'generate_batch_from_trainfiles'):
print("++++++++hieehi+++++++++")
if hasattr(reader_ins, 'generate_batch_from_trainfiles'):
dataloader.set_sample_list_generator(reader)
else:
dataloader.set_sample_generator(reader, batch_size)
return dataloader
def _get_dataset(self):
namespace = "train.reader"
def _get_dataset(self, state="TRAIN"):
if state == "TRAIN":
inputs = self.model.get_inputs()
namespace = "train.reader"
train_data_path = envs.get_global_env(
"train_data_path", None, namespace)
else:
inputs = self.model.get_infer_inputs()
namespace = "evaluate.reader"
train_data_path = envs.get_global_env(
"test_data_path", None, namespace)
inputs = self.model.get_inputs()
threads = int(envs.get_runtime_environ("train.trainer.threads"))
#threads = int(envs.get_runtime_environ("train.trainer.threads"))
threads = 2
batch_size = envs.get_global_env("batch_size", None, namespace)
reader_class = envs.get_global_env("class", None, namespace)
abs_dir = os.path.dirname(os.path.abspath(__file__))
reader = os.path.join(abs_dir, '../utils', 'dataset_instance.py')
pipe_cmd = "python {} {} {} {}".format(reader, reader_class, "TRAIN", self._config_yaml)
train_data_path = envs.get_global_env("train_data_path", None, namespace)
pipe_cmd = "python {} {} {} {}".format(
reader, reader_class, state, self._config_yaml)
if train_data_path.startswith("fleetrec::"):
if train_data_path.startswith("paddlerec::"):
package_base = envs.get_runtime_environ("PACKAGE_BASE")
assert package_base is not None
train_data_path = os.path.join(package_base, train_data_path.split("::")[1])
train_data_path = os.path.join(
package_base, train_data_path.split("::")[1])
dataset = fluid.DatasetFactory().create_dataset()
dataset.set_use_var(inputs)
......@@ -96,35 +111,42 @@ class TranspileTrainer(Trainer):
return epoch_id % epoch_interval == 0
def save_inference_model():
save_interval = envs.get_global_env("save.inference.epoch_interval", -1, namespace)
save_interval = envs.get_global_env(
"save.inference.epoch_interval", -1, namespace)
if not need_save(epoch_id, save_interval, False):
return
print("save inference model is not supported now.")
return
# print("save inference model is not supported now.")
# return
feed_varnames = envs.get_global_env("save.inference.feed_varnames", None, namespace)
fetch_varnames = envs.get_global_env("save.inference.fetch_varnames", None, namespace)
fetch_vars = [fluid.global_scope().vars[varname] for varname in fetch_varnames]
if feed_varnames is None or fetch_varnames is None:
return
fetch_vars = [fluid.default_main_program().global_block().vars[varname] for varname in fetch_varnames]
dirname = envs.get_global_env("save.inference.dirname", None, namespace)
assert dirname is not None
dirname = os.path.join(dirname, str(epoch_id))
if is_fleet:
fleet.save_inference_model(dirname, feed_varnames, fetch_vars)
fleet.save_inference_model(self._exe, dirname, feed_varnames, fetch_vars)
else:
fluid.io.save_inference_model(dirname, feed_varnames, fetch_vars, self._exe)
fluid.io.save_inference_model(
dirname, feed_varnames, fetch_vars, self._exe)
self.inference_models.append((epoch_id, dirname))
def save_persistables():
save_interval = envs.get_global_env("save.increment.epoch_interval", -1, namespace)
save_interval = envs.get_global_env(
"save.increment.epoch_interval", -1, namespace)
if not need_save(epoch_id, save_interval, False):
return
dirname = envs.get_global_env("save.increment.dirname", None, namespace)
dirname = envs.get_global_env(
"save.increment.dirname", None, namespace)
assert dirname is not None
dirname = os.path.join(dirname, str(epoch_id))
......@@ -157,7 +179,53 @@ class TranspileTrainer(Trainer):
context['is_exit'] = True
def infer(self, context):
context['is_exit'] = True
infer_program = fluid.Program()
startup_program = fluid.Program()
with fluid.unique_name.guard():
with fluid.program_guard(infer_program, startup_program):
self.model.infer_net()
if self.model._infer_data_loader is None:
context['status'] = 'terminal_pass'
return
reader = self._get_dataloader("Evaluate")
metrics_varnames = []
metrics_format = []
metrics_format.append("{}: {{}}".format("epoch"))
metrics_format.append("{}: {{}}".format("batch"))
for name, var in self.model.get_infer_results().items():
metrics_varnames.append(var.name)
metrics_format.append("{}: {{}}".format(name))
metrics_format = ", ".join(metrics_format)
self._exe.run(startup_program)
for (epoch, model_dir) in self.increment_models:
print("Begin to infer epoch {}, model_dir: {}".format(epoch, model_dir))
program = infer_program.clone()
fluid.io.load_persistables(self._exe, model_dir, program)
reader.start()
batch_id = 0
try:
while True:
metrics_rets = self._exe.run(
program=program,
fetch_list=metrics_varnames)
metrics = [epoch, batch_id]
metrics.extend(metrics_rets)
if batch_id % 2 == 0 and batch_id != 0:
print(metrics_format.format(*metrics))
batch_id += 1
except fluid.core.EOFException:
reader.reset()
context['status'] = 'terminal_pass'
def terminal(self, context):
print("clean up and exit")
......
......@@ -16,22 +16,22 @@ from __future__ import print_function
import os
import sys
from fleetrec.core.utils.envs import lazy_instance_by_fliename
from fleetrec.core.utils.envs import get_global_env
from fleetrec.core.utils.envs import get_runtime_environ
from paddlerec.core.utils.envs import lazy_instance_by_fliename
from paddlerec.core.utils.envs import get_global_env
from paddlerec.core.utils.envs import get_runtime_environ
def dataloader(readerclass, train, yaml_file):
namespace = "train.reader"
if train == "TRAIN":
reader_name = "TrainReader"
namespace = "train.reader"
data_path = get_global_env("train_data_path", None, namespace)
else:
reader_name = "EvaluateReader"
namespace = "evaluate.reader"
data_path = get_global_env("test_data_path", None, namespace)
if data_path.startswith("fleetrec::"):
if data_path.startswith("paddlerec::"):
package_base = get_runtime_environ("PACKAGE_BASE")
assert package_base is not None
data_path = os.path.join(package_base, data_path.split("::")[1])
......@@ -62,4 +62,4 @@ def dataloader(readerclass, train, yaml_file):
if hasattr(reader, 'generate_batch_from_trainfiles'):
return gen_batch_reader()
return reader.generate_dataloader_batch(files)
return gen_reader
......@@ -18,8 +18,8 @@ import datetime
import paddle.fluid as fluid
from fleetrec.core.utils import fs as fs
from fleetrec.core.utils import util as util
from paddlerec.core.utils import fs as fs
from paddlerec.core.utils import util as util
class Dataset(object):
......
......@@ -14,7 +14,7 @@
from __future__ import print_function
import sys
from fleetrec.core.utils.envs import lazy_instance_by_fliename
from paddlerec.core.utils.envs import lazy_instance_by_fliename
if len(sys.argv) != 4:
raise ValueError("reader only accept 3 argument: 1. reader_class 2.train/evaluate 3.yaml_abs_path")
......
......@@ -16,16 +16,17 @@ import os
import copy
import sys
import socket
from contextlib import closing
global_envs = {}
def flatten_environs(envs):
def flatten_environs(envs, separator="."):
flatten_dict = {}
assert isinstance(envs, dict)
def fatten_env_namespace(namespace_nests, local_envs):
if not isinstance(local_envs, dict):
global_k = ".".join(namespace_nests)
global_k = separator.join(namespace_nests)
flatten_dict[global_k] = str(local_envs)
else:
for k, v in local_envs.items():
......@@ -34,7 +35,7 @@ def flatten_environs(envs):
nests.append(k)
fatten_env_namespace(nests, v)
else:
global_k = ".".join(namespace_nests + [k])
global_k = separator.join(namespace_nests + [k])
flatten_dict[global_k] = str(v)
for k, v in envs.items():
......@@ -93,9 +94,9 @@ def update_workspace():
return
# is fleet inner models
if workspace.startswith("fleetrec."):
if workspace.startswith("paddlerec."):
fleet_package = get_runtime_environ("PACKAGE_BASE")
workspace_dir = workspace.split("fleetrec.")[1].replace(".", "/")
workspace_dir = workspace.split("paddlerec.")[1].replace(".", "/")
path = os.path.join(fleet_package, workspace_dir)
else:
path = workspace
......@@ -127,7 +128,7 @@ def pretty_print_envs(envs, header=None):
if header:
draws += h_format.format(header[0], header[1])
else:
draws += h_format.format("fleetrec Global Envs", "Value")
draws += h_format.format("paddlerec Global Envs", "Value")
draws += line + "\n"
......@@ -175,6 +176,12 @@ def get_platform():
return "WINDOWS"
<< << << < HEAD: fleet_rec/core/utils/envs.py
== == == =
>>>>>> > upstream/develop: core/utils/envs.py
def find_free_port():
def __free_port():
with closing(socket.socket(socket.AF_INET,
......
......@@ -17,7 +17,7 @@ import time
import datetime
from paddle import fluid
from fleetrec.core.utils import fs as fs
from paddlerec.core.utils import fs as fs
def save_program_proto(path, program=None):
......
文件模式从 100644 更改为 100755
# PaddleRec 贡献代码
\ No newline at end of file
# PaddleRec 贡献代码
> 占位
\ No newline at end of file
# PaddleRec 自定义数据集及Reader
\ No newline at end of file
# PaddleRec 自定义数据集及Reader
## dataset数据读取
为了能高速运行CTR模型的训练,我们使用`dataset`API进行高性能的IO,dataset是为多线程及全异步方式量身打造的数据读取方式,每个数据读取线程会与一个训练线程耦合,形成了多生产者-多消费者的模式,会极大的加速我们的模型训练。
如何在我们的训练中引入dataset读取方式呢?无需变更数据格式,只需在我们的训练代码中加入以下内容,便可达到媲美二进制读取的高效率,以下是一个比较完整的流程:
### 引入dataset
1. 通过工厂类`fluid.DatasetFactory()`创建一个dataset对象。
2. 将我们定义好的数据输入格式传给dataset,通过`dataset.set_use_var(inputs)`实现。
3. 指定我们的数据读取方式,由`dataset_generator.py`实现数据读取的规则,后面将会介绍读取规则的实现。
4. 指定数据读取的batch_size。
5. 指定数据读取的线程数,该线程数和训练线程应保持一致,两者为耦合的关系。
6. 指定dataset读取的训练文件的列表。
```python
def get_dataset(inputs, args)
dataset = fluid.DatasetFactory().create_dataset()
dataset.set_use_var(inputs)
dataset.set_pipe_command("python dataset_generator.py")
dataset.set_batch_size(args.batch_size)
dataset.set_thread(int(args.cpu_num))
file_list = [
str(args.train_files_path) + "/%s" % x
for x in os.listdir(args.train_files_path)
]
logger.info("file list: {}".format(file_list))
return dataset, file_list
```
### 如何指定数据读取规则
在上文我们提到了由`dataset_generator.py`实现具体的数据读取规则,那么,怎样为dataset创建数据读取的规则呢?
以下是`dataset_generator.py`的全部代码,具体流程如下:
1. 首先我们需要引入dataset的库,位于`paddle.fluid.incubate.data_generator`
2. 声明一些在数据读取中会用到的变量,如示例代码中的`cont_min_``categorical_range_`等。
3. 创建一个子类,继承dataset的基类,基类有多种选择,如果是多种数据类型混合,并且需要转化为数值进行预处理的,建议使用`MultiSlotDataGenerator`;若已经完成了预处理并保存为数据文件,可以直接以`string`的方式进行读取,使用`MultiSlotStringDataGenerator`,能够进一步加速。在示例代码,我们继承并实现了名为`CriteoDataset`的dataset子类,使用`MultiSlotDataGenerator`方法。
4. 继承并实现基类中的`generate_sample`函数,逐行读取数据。该函数应返回一个可以迭代的reader方法(带有yield的函数不再是一个普通的函数,而是一个生成器generator,成为了可以迭代的对象,等价于一个数组、链表、文件、字符串etc.)
5. 在这个可以迭代的函数中,如示例代码中的`def reader()`,我们定义数据读取的逻辑。例如对以行为单位的数据进行截取,转换及预处理。
6. 最后,我们需要将数据整理为特定的格式,才能够被dataset正确读取,并灌入的训练的网络中。简单来说,数据的输出顺序与我们在网络中创建的`inputs`必须是严格一一对应的,并转换为类似字典的形式。在示例代码中,我们使用`zip`的方法将参数名与数值构成的元组组成了一个list,并将其yield输出。如果展开来看,我们输出的数据形如`[('dense_feature',[value]),('C1',[value]),('C2',[value]),...,('C26',[value]),('label',[value])]`
```python
import paddle.fluid.incubate.data_generator as dg
cont_min_ = [0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
cont_max_ = [20, 600, 100, 50, 64000, 500, 100, 50, 500, 10, 10, 10, 50]
cont_diff_ = [20, 603, 100, 50, 64000, 500, 100, 50, 500, 10, 10, 10, 50]
hash_dim_ = 1000001
continuous_range_ = range(1, 14)
categorical_range_ = range(14, 40)
class CriteoDataset(dg.MultiSlotDataGenerator):
def generate_sample(self, line):
def reader():
features = line.rstrip('\n').split('\t')
dense_feature = []
sparse_feature = []
for idx in continuous_range_:
if features[idx] == "":
dense_feature.append(0.0)
else:
dense_feature.append(
(float(features[idx]) - cont_min_[idx - 1]) /
cont_diff_[idx - 1])
for idx in categorical_range_:
sparse_feature.append(
[hash(str(idx) + features[idx]) % hash_dim_])
label = [int(features[0])]
process_line = dense_feature, sparse_feature, label
feature_name = ["dense_feature"]
for idx in categorical_range_:
feature_name.append("C" + str(idx - 13))
feature_name.append("label")
yield zip(feature_name, [dense_feature] + sparse_feature + [label])
return reader
d = CriteoDataset()
d.run_from_stdin()
```
### 快速调试Dataset
我们可以脱离组网架构,单独验证Dataset的输出是否符合我们预期。使用命令
`cat 数据文件 | python dataset读取python文件`进行dataset代码的调试:
```bash
cat train_data/part-0 | python dataset_generator.py
```
输出的数据格式如下:
` dense_input:size ; dense_input:value ; sparse_input:size ; sparse_input:value ; ... ; sparse_input:size ; sparse_input:value ; label:size ; label:value `
理想的输出为(截取了一个片段):
```bash
...
13 0.05 0.00663349917081 0.05 0.0 0.02159375 0.008 0.15 0.04 0.362 0.1 0.2 0.0 0.04 1 715353 1 817085 1 851010 1 833725 1 286835 1 948614 1 881652 1 507110 1 27346 1 646986 1 643076 1 200960 1 18464 1 202774 1 532679 1 729573 1 342789 1 562805 1 880474 1 984402 1 666449 1 26235 1 700326 1 452909 1 884722 1 787527 1 0
...
```
>使用Dataset的一些注意事项
> - Dataset的基本原理:将数据print到缓存,再由C++端的代码实现读取,因此,我们不能在dataset的读取代码中,加入与数据读取无关的print信息,会导致C++端拿到错误的数据信息。
> - dataset目前只支持在`unbuntu`及`CentOS`等标准Linux环境下使用,在`Windows`及`Mac`下使用时,会产生预料之外的错误,请知悉。
\ No newline at end of file
# PaddleRec 分布式训练
## 分布式原理基本介绍
> 占位
## 单机代码转分布式代码
> 占位
### 训练代码准备
参数服务器架构,有两个重要的组成部分:Server与Worker。为了启动训练,我们是否要准备两套代码分别运行呢?答案是不需要的。Paddle Fleet API将两者运行的逻辑进行了很好的统一,用户只需使用`fleet.init(role)`就可以判断当前启动的程序扮演server还是worker。使用如下的编程范式,只需10行,便可将单机代码转变为分布式代码:
``` python
role = role_maker.PaddleCloudRoleMaker()
fleet.init(role)
# Define your network, choose your optimizer(SGD/Adam/Adagrad etc.)
strategy = StrategyFactory.create_sync_strategy()
optimizer = fleet.distributed_optimizer(optimizer, strategy)
if fleet.is_server():
fleet.init_server()
fleet.run_server()
if fleet.is_worker():
fleet.init_worker()
# run training
fleet.stop_worker()
```
### 运行环境准备
- Paddle参数服务器模式的训练,目前只支持在`Liunx`环境下运行,推荐使用`ubuntu``CentOS`
- Paddle参数服务器模式的前端代码支持`python 2.7``python 3.5+`,若使用`Dataset`模式的高性能IO,需使用`python 2.7`
- 使用多台机器进行分布式训练,请确保各自之间可以通过`ip:port`的方式访问`rpc`服务,使用`http/https`代理会导致通信失败
- 各个机器之间的通信耗费应尽量少
假设我们有两台机器,想要在每台机器上分别启动一个`server`进程以及一个`worker`进程,完成2x2(2个参数服务器,2个训练节点)的参数服务器模式分布式训练,按照如下步骤操作。
### 启动server
机器A,IP地址是`10.89.176.11`,通信端口是`36000`,配置如下环境变量后,运行训练的入口程序:
```bash
export PADDLE_PSERVERS_IP_PORT_LIST="10.89.176.11:36000,10.89.176.12:36000"
export TRAINING_ROLE=PSERVER
export POD_IP=10.89.176.11 # node A:10.89.176.11
export PADDLE_PORT=36000
export PADDLE_TRAINERS_NUM=2
python -u train.py --is_cloud=1
```
应能在日志中看到如下输出:
> I0318 21:47:01.298220 188592128 grpc_server.cc:470] Server listening on 127.0.0.1:36000 selected port: 36000
查看系统进程
> 8624 | ttys000 | 0:02.31 | python -u train.py --is_cloud=1
查看系统进程及端口占用:
> python3.7 | 8624 | paddle | 8u | IPv6 | 0xe149b87d093872e5 | 0t0 | TCP | localhost:36000 (LISTEN)
也可以看到我们的`server`进程8624的确在`36000`端口开始了监听,等待`worker`的通信。
机器B,IP地址是`10.89.176.12`,通信端口是`36000`,配置如下环境变量后,运行训练的入口程序:
```bash
export PADDLE_PSERVERS_IP_PORT_LIST="10.89.176.11:36000,10.89.176.12:36000"
export TRAINING_ROLE=PSERVER
export POD_IP=10.89.176.12 # node B: 10.89.176.12
export PADDLE_PORT=36000
export PADDLE_TRAINERS_NUM=2
python -u train.py --is_cloud=1
```
也可以看到相似的日志输出与进程状况。(进行验证时,请务必确保IP与端口的正确性)
### 启动worker
接下来我们分别在机器A与B上开启训练进程。配置如下环境变量并开启训练进程:
机器A:
```bash
export PADDLE_PSERVERS_IP_PORT_LIST="10.89.176.11:36000,10.89.176.12:36000"
export TRAINING_ROLE=TRAINER
export PADDLE_TRAINERS_NUM=2
export PADDLE_TRAINER_ID=0 # node A:trainer_id = 0
python -u train.py --is_cloud=1
```
机器B:
```bash
export PADDLE_PSERVERS_IP_PORT_LIST="10.89.176.11:36000,10.89.176.12:36000"
export TRAINING_ROLE=TRAINER
export PADDLE_TRAINERS_NUM=2
export PADDLE_TRAINER_ID=1 # node B: trainer_id = 1
python -u train.py --is_cloud=1
```
运行该命令时,若pserver还未就绪,可在日志输出中看到如下信息:
> server not ready, wait 3 sec to retry...
>
> not ready endpoints:['10.89.176.11:36000', '10.89.176.12:36000']
worker进程将持续等待,直到server开始监听,或等待超时。
当pserver都准备就绪后,可以在日志输出看到如下信息:
> I0317 11:38:48.099179 16719 communicator.cc:271] Communicator start
>
> I0317 11:38:49.838711 16719 rpc_client.h:107] init rpc client with trainer_id 0
至此,分布式训练启动完毕,将开始训练。
## PaddleRec分布式运行
> 占位
### 本地模拟分布式
> 占位
### MPI集群运行分布式
> 占位
### PaddleCloud集群运行分布式
> 占位
### K8S集群运行分布式
> 占位
# 常见问题FAQ
> 占位
\ No newline at end of file
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
# PaddleRec 单机训练
\ No newline at end of file
# PaddleRec 单机训练
> 占位
\ No newline at end of file
# PaddleRec 模型调参
\ No newline at end of file
# PaddleRec 模型调参
> 占位
\ No newline at end of file
import argparse
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='fleet-rec check')
parser.add_argument("--model", type=str)
parser.add_argument("--engine", type=str)
print("coming soon")
文件模式从 100644 更改为 100755
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
train:
trainer:
# for cluster training
strategy: "async"
epochs: 10
workspace: "paddlerec.models.rank.tagspace"
reader:
batch_size: 5
class: "{workspace}/reader.py"
train_data_path: "{workspace}/train_data"
model:
models: "{workspace}/model.py"
hyper_parameters:
vocab_text_size: 11447
vocab_tag_size: 4
emb_dim: 10
hid_dim: 1000
win_size: 5
margin: 0.1
neg_size: 3
num_devices: 1
save:
increment:
dirname: "increment"
epoch_interval: 1
save_last: True
inference:
dirname: "inference"
epoch_interval: 100
save_last: True
import paddle.fluid as fluid
import math
from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase
import paddle.fluid as fluid
import paddle.fluid.layers.nn as nn
import paddle.fluid.layers.tensor as tensor
import paddle.fluid.layers.control_flow as cf
class Model(ModelBase):
def __init__(self, config):
ModelBase.__init__(self, config)
self.cost = None
self.metrics = {}
self.vocab_text_size = 11447#envs.get_global_env("vocab_text_size", None, self._namespace)
self.vocab_tag_size = 4#envs.get_global_env("vocab_tag_size", None, self._namespace)
self.emb_dim = 10#envs.get_global_env("emb_dim", None, self._namespace)
self.hid_dim = 1000#envs.get_global_env("hid_dim", None, self._namespace)
self.win_size = 5#envs.get_global_env("win_size", None, self._namespace)
self.margin = 0.1#envs.get_global_env("margin", None, self._namespace)
self.neg_size = 3#envs.get_global_env("neg_size", None, self._namespace)
print self.emb_dim
def train_net(self):
""" network definition """
text = fluid.data(name="text", shape=[None, 1], lod_level=1, dtype='int64')
pos_tag = fluid.data(
name="pos_tag", shape=[None, 1], lod_level=1, dtype='int64')
neg_tag = fluid.data(
name="neg_tag", shape=[None, 1], lod_level=1, dtype='int64')
self._data_var = [text, pos_tag, neg_tag]
text_emb = fluid.embedding(
input=text, size=[self.vocab_text_size, self.emb_dim], param_attr="text_emb")
text_emb = fluid.layers.squeeze(input=text_emb, axes=[1])
pos_tag_emb = fluid.embedding(
input=pos_tag, size=[self.vocab_tag_size, self.emb_dim], param_attr="tag_emb")
pos_tag_emb = fluid.layers.squeeze(input=pos_tag_emb, axes=[1])
neg_tag_emb = fluid.embedding(
input=neg_tag, size=[self.vocab_tag_size, self.emb_dim], param_attr="tag_emb")
neg_tag_emb = fluid.layers.squeeze(input=neg_tag_emb, axes=[1])
conv_1d = fluid.nets.sequence_conv_pool(
input=text_emb,
num_filters=self.hid_dim,
filter_size=self.win_size,
act="tanh",
pool_type="max",
param_attr="cnn")
text_hid = fluid.layers.fc(input=conv_1d,
size=self.emb_dim,
param_attr="text_hid")
cos_pos = nn.cos_sim(pos_tag_emb, text_hid)
mul_text_hid = fluid.layers.sequence_expand_as(x=text_hid, y=neg_tag_emb)
mul_cos_neg = nn.cos_sim(neg_tag_emb, mul_text_hid)
cos_neg_all = fluid.layers.sequence_reshape(
input=mul_cos_neg, new_dim=self.neg_size)
#choose max negtive cosine
cos_neg = nn.reduce_max(cos_neg_all, dim=1, keep_dim=True)
#calculate hinge loss
loss_part1 = nn.elementwise_sub(
tensor.fill_constant_batch_size_like(
input=cos_pos, shape=[-1, 1], value=self.margin, dtype='float32'),
cos_pos)
loss_part2 = nn.elementwise_add(loss_part1, cos_neg)
loss_part3 = nn.elementwise_max(
tensor.fill_constant_batch_size_like(
input=loss_part2, shape=[-1, 1], value=0.0, dtype='float32'),
loss_part2)
avg_cost = nn.mean(loss_part3)
less = tensor.cast(cf.less_than(cos_neg, cos_pos), dtype='float32')
correct = nn.reduce_sum(less)
self.cost = avg_cost
self.metrics["correct"] = correct
self.metrics["cos_pos"] = cos_pos
def get_cost_op(self):
return self.cost
def get_metrics(self):
return self.metrics
def optimizer(self):
learning_rate = 0.01#envs.get_global_env("hyper_parameters.base_lr", None, self._namespace)
sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=learning_rate)
#sgd_optimizer.minimize(avg_cost)
return sgd_optimizer
def infer_net(self, parameter_list):
self.train_net()
import re
import sys
import collections
import os
import six
import time
import numpy as np
import paddle.fluid as fluid
import paddle
import csv
import io
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class TrainReader(Reader):
def init(self):
pass
def _process_line(self, l):
tag_size = 4
neg_size = 3
l = l.strip().split(",")
pos_index = int(l[0])
pos_tag = []
pos_tag.append(pos_index)
text_raw = l[1].split()
text = [int(w) for w in text_raw]
neg_tag = []
max_iter = 100
now_iter = 0
sum_n = 0
while (sum_n < neg_size):
now_iter += 1
if now_iter > max_iter:
print("error : only one class")
sys.exit(0)
rand_i = np.random.randint(0, tag_size)
if rand_i != pos_index:
neg_index = rand_i
neg_tag.append(neg_index)
sum_n += 1
# if n > 0 and len(text) > n:
# #yield None
# return None, None, None
return text, pos_tag, neg_tag
def generate_sample(self, line):
def data_iter():
text, pos_tag, neg_tag = self._process_line(line)
if text is None:
yield None
return
yield [('text', text), ('pos_tag', pos_tag), ('neg_tag', neg_tag)]
return data_iter
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
train:
trainer:
# for cluster training
strategy: "async"
epochs: 10
workspace: "paddlerec.models.rank.text_classification"
reader:
batch_size: 5
class: "{workspace}/reader.py"
train_data_path: "{workspace}/train_data"
model:
models: "{workspace}/model.py"
save:
increment:
dirname: "increment"
epoch_interval: 1
save_last: True
inference:
dirname: "inference"
epoch_interval: 100
save_last: True
import paddle.fluid as fluid
import math
from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase
import paddle.fluid as fluid
import paddle.fluid.layers.nn as nn
import paddle.fluid.layers.tensor as tensor
import paddle.fluid.layers.control_flow as cf
class Model(ModelBase):
def __init__(self, config):
ModelBase.__init__(self, config)
def train_net(self):
""" network definition """
data = fluid.data(name="input", shape=[None, max_len], dtype='int64')
label = fluid.data(name="label", shape=[None, 1], dtype='int64')
seq_len = fluid.data(name="seq_len", shape=[None], dtype='int64')
# embedding layer
emb = fluid.embedding(input=data, size=[dict_dim, emb_dim])
emb = fluid.layers.sequence_unpad(emb, length=seq_len)
# convolution layer
conv = fluid.nets.sequence_conv_pool(
input=emb,
num_filters=cnn_dim,
filter_size=cnn_filter_size,
act="tanh",
pool_type="max")
# full connect layer
fc_1 = fluid.layers.fc(input=[conv], size=hid_dim)
# softmax layer
prediction = fluid.layers.fc(input=[fc_1], size=class_dim, act="softmax")
#if is_prediction:
# return prediction
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label)
self.cost = avg_cost
self.metrics["acc"] = cos_pos
def get_cost_op(self):
return self.cost
def get_metrics(self):
return self.metrics
def optimizer(self):
learning_rate = 0.01#envs.get_global_env("hyper_parameters.base_lr", None, self._namespace)
sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=learning_rate)
#sgd_optimizer.minimize(avg_cost)
return sgd_optimizer
def infer_net(self, parameter_list):
self.train_net()
import re
import sys
import collections
import os
import six
import time
import numpy as np
import paddle.fluid as fluid
import paddle
import csv
import io
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class TrainReader(Reader):
def init(self):
pass
def _process_line(self, l):
l = l.strip().split(" ")
data = l[0:10]
seq_len = l[10:11]
label = l[11:]
return data, label, seq_len
def generate_sample(self, line):
def data_iter():
data, label, seq_len = self._process_line(line)
if data is None:
yield None
return
yield [('data', data), ('label', label), ('seq_len', seq_len)]
return data_iter
12 27 13 0 25 52 89 20 39 4 9 1
78 10 61 58 29 79 85 16 46 41 9 1
81 77 44 4 5 57 43 97 42 89 6 0
7 77 86 3 98 89 56 24 7 59 9 1
65 89 99 27 65 98 16 89 42 0 3 0
66 14 48 38 66 5 56 89 98 19 4 1
78 7 10 20 77 16 37 43 59 23 6 1
84 95 28 35 0 82 55 19 13 81 7 0
34 32 98 37 43 51 6 38 20 40 9 0
75 36 13 51 70 24 62 90 32 91 7 1
13 5 49 21 57 21 67 85 74 14 1 0
68 13 86 16 52 50 23 11 65 99 1 1
15 20 75 55 15 90 54 54 15 91 9 0
44 56 15 88 57 3 62 53 89 57 8 1
23 8 40 25 60 33 8 69 44 88 7 1
63 94 5 43 23 70 31 67 21 55 6 0
44 11 64 92 10 37 30 84 19 71 5 1
89 18 71 13 16 58 47 60 77 87 7 1
13 48 56 39 98 53 32 93 13 91 7 0
56 78 67 68 27 11 77 48 45 10 1 1
52 12 14 5 2 8 3 36 33 59 6 0
86 42 91 81 2 9 21 0 44 7 9 1
96 27 82 55 81 30 91 41 91 58 2 1
97 69 76 47 80 62 23 30 87 22 7 1
42 56 25 47 42 18 80 53 15 57 7 0
34 73 75 88 61 79 40 74 87 87 6 1
7 91 9 24 42 60 76 31 10 13 4 0
21 1 46 59 61 54 99 54 89 55 5 1
67 21 1 29 88 5 3 85 39 22 5 1
90 99 7 8 17 77 73 3 32 10 5 0
30 44 26 32 37 74 90 71 42 29 9 1
79 68 3 24 21 37 35 3 76 23 6 1
3 66 7 4 2 88 94 64 47 81 6 1
10 48 16 49 96 93 61 97 84 39 3 1
73 28 67 59 89 92 17 24 52 71 3 1
98 4 35 62 91 2 78 51 72 93 1 1
37 42 96 10 48 49 84 45 59 47 5 1
13 24 7 49 63 78 29 75 45 92 7 1
1 6 95 23 38 34 85 94 33 47 6 1
99 63 65 39 72 73 91 20 16 45 9 0
35 8 81 24 62 0 95 0 52 46 4 1
58 66 88 42 86 94 91 8 18 92 7 0
12 62 56 43 99 31 63 80 11 7 4 1
22 36 1 39 69 20 56 75 17 15 7 0
25 97 62 50 99 98 32 2 98 75 7 1
7 59 98 68 62 19 28 28 60 27 7 0
39 63 43 45 43 11 40 81 4 25 6 0
81 95 27 84 71 45 87 65 40 50 1 0
82 21 69 55 71 92 52 65 90 16 3 0
24 6 5 22 36 34 66 71 3 52 2 0
5 14 66 71 49 10 52 81 32 14 1 0
8 94 52 23 60 27 43 19 89 91 9 0
26 14 36 37 28 94 46 96 11 80 8 1
89 19 77 66 48 75 62 58 90 81 8 1
25 43 95 21 25 81 39 79 9 74 9 0
25 2 64 27 67 36 59 68 99 66 5 1
13 46 41 55 89 93 79 83 32 52 6 0
49 77 57 9 91 49 86 50 32 5 2 0
94 7 53 54 70 69 5 51 59 91 5 1
24 72 94 13 17 12 2 67 0 89 6 1
70 38 19 27 38 87 72 41 98 84 6 1
89 76 82 4 69 64 97 77 88 58 9 0
67 41 99 1 80 38 96 24 67 59 3 1
42 83 50 19 97 99 99 50 46 76 8 1
43 99 63 40 93 15 3 57 11 0 1 0
16 65 31 43 89 37 98 63 29 69 8 1
39 5 65 45 12 82 46 87 82 93 8 0
34 69 82 13 4 20 92 58 46 83 2 1
46 79 87 57 87 23 72 95 37 88 8 0
41 72 81 71 60 15 32 1 9 97 3 0
84 98 15 78 39 82 89 74 46 32 9 0
16 18 92 80 50 44 98 45 15 41 3 1
74 78 81 40 17 65 38 21 27 9 1 0
14 69 68 50 57 11 62 2 89 54 6 0
70 29 79 29 44 56 33 27 25 4 3 1
44 20 87 67 65 41 93 37 99 78 1 1
93 57 87 11 33 40 21 3 47 87 9 1
8 3 24 49 99 48 40 22 99 41 2 0
19 90 9 83 93 22 36 96 44 73 7 1
4 73 2 88 79 90 32 48 45 12 5 0
24 58 34 67 85 62 84 48 14 79 5 1
54 69 19 18 59 78 84 48 61 46 4 0
72 69 95 26 30 74 49 30 95 61 8 0
73 29 46 39 48 30 97 63 89 34 9 1
51 32 44 22 70 69 91 81 74 52 3 0
99 66 89 71 31 42 5 40 21 12 6 0
58 26 59 56 91 49 79 57 57 74 6 1
30 36 59 74 6 30 17 1 99 38 4 0
43 48 77 86 67 25 38 36 3 91 4 1
67 24 51 34 37 8 98 76 84 13 1 1
73 47 88 15 32 99 67 26 28 89 3 1
91 66 11 86 5 12 15 43 79 89 1 1
15 60 43 58 61 0 62 32 98 29 9 0
80 36 78 42 70 52 2 10 42 41 6 1
36 16 46 34 96 39 8 21 86 54 5 1
80 72 13 1 28 49 73 90 81 34 1 0
73 64 86 9 94 49 44 38 47 64 2 0
69 90 69 36 60 45 39 7 41 72 8 0
31 86 54 82 81 77 93 99 68 63 1 1
95 76 97 36 40 12 4 95 59 64 4 1
88 20 64 40 27 11 96 40 41 73 6 0
28 72 70 43 34 54 98 43 29 63 5 0
78 72 4 47 47 38 73 8 65 40 3 1
91 64 51 93 8 78 53 15 42 32 4 0
34 36 45 9 16 0 51 40 90 29 2 1
80 93 65 80 11 19 26 61 29 8 4 0
94 11 60 36 58 98 43 90 64 1 1 0
42 54 89 86 80 72 81 48 19 67 5 0
81 25 30 60 59 20 75 38 75 29 6 0
84 16 48 28 23 20 53 13 32 90 1 0
58 31 77 68 27 88 51 97 70 93 8 1
63 67 85 6 35 22 28 65 8 7 3 0
54 75 93 58 98 9 15 37 61 38 6 1
56 24 50 62 63 47 9 4 58 30 8 1
64 91 32 68 50 90 51 86 52 6 1 1
55 50 46 41 28 1 11 39 75 9 1 0
23 27 98 73 25 7 89 48 7 44 4 1
86 98 68 1 74 46 15 92 59 25 9 1
95 86 72 13 33 60 62 83 96 84 1 0
9 58 37 50 57 16 78 0 21 80 2 0
82 94 74 42 3 60 61 93 34 22 3 1
16 97 97 14 47 50 90 35 9 58 5 0
70 94 82 42 85 88 59 58 6 68 9 0
14 58 24 44 8 29 12 18 26 80 7 0
22 23 7 82 39 28 96 92 23 40 5 1
40 31 72 94 20 81 89 4 42 1 5 0
57 63 71 41 28 2 39 67 90 54 6 0
9 74 4 41 11 31 15 21 44 32 6 1
31 28 66 66 61 78 72 80 82 88 3 1
79 18 1 59 35 62 0 72 78 97 7 0
14 19 30 63 38 37 12 15 54 15 6 1
54 91 37 79 60 35 55 62 94 84 7 1
10 55 78 96 45 55 35 56 54 70 6 1
23 46 15 93 66 11 32 45 74 25 4 0
51 55 9 9 88 59 21 66 87 12 1 1
90 22 38 66 12 9 30 48 55 85 1 1
39 23 82 29 57 76 79 56 3 19 2 0
7 72 76 15 90 23 40 40 33 39 4 1
60 64 34 11 18 18 38 39 53 37 1 1
85 72 51 47 83 90 32 96 78 23 9 1
85 51 96 31 83 70 57 65 15 0 6 0
41 11 56 94 40 6 62 86 68 83 7 0
34 82 44 30 2 2 94 62 41 27 6 1
54 86 50 83 76 65 0 87 80 70 7 0
97 50 65 78 2 90 28 5 12 56 5 1
34 19 68 93 11 9 14 87 22 70 9 0
63 77 27 20 20 37 65 51 29 29 9 1
22 79 98 57 56 97 43 49 4 80 4 1
6 4 35 54 4 36 1 79 85 35 6 0
12 55 68 61 91 43 49 5 93 27 8 0
64 22 69 16 63 20 28 60 13 35 7 1
9 19 60 89 62 29 47 33 6 13 4 0
14 15 39 86 47 75 7 70 57 60 6 1
90 63 12 43 28 46 39 97 83 42 6 0
49 3 3 64 59 46 30 13 61 10 2 0
79 47 29 47 54 38 50 66 18 63 5 1
98 67 1 22 66 32 91 77 63 33 3 0
72 22 10 27 28 44 29 66 71 1 7 0
20 52 19 23 9 38 1 93 83 73 5 0
88 57 22 64 93 66 20 90 78 2 7 1
90 86 41 28 14 25 86 73 7 21 4 0
63 91 0 29 2 78 86 76 9 20 4 1
3 57 91 37 21 85 80 99 18 79 1 1
69 95 36 6 85 47 83 83 61 52 4 0
72 4 34 16 59 78 56 70 27 44 9 1
58 42 6 53 21 7 83 38 86 66 5 0
22 86 22 21 86 22 83 38 62 19 4 0
14 63 20 53 98 76 10 22 35 76 9 1
16 88 13 66 37 33 11 40 61 97 2 1
60 9 98 35 51 11 98 73 67 26 6 1
25 48 87 93 58 58 15 9 23 13 7 1
61 47 47 36 97 22 63 35 9 38 5 1
94 49 41 38 0 81 59 39 13 65 3 0
88 82 71 96 76 16 57 24 72 36 5 1
28 46 8 95 94 86 63 1 42 63 6 0
12 95 29 66 64 77 19 26 73 53 4 0
19 5 52 34 13 62 6 4 25 58 5 0
18 39 39 56 73 29 5 15 13 82 1 1
50 66 99 67 76 25 43 12 24 67 9 0
74 56 61 97 23 63 22 63 6 83 2 1
10 96 13 49 43 20 58 19 99 58 7 1
2 95 31 4 99 91 27 90 85 32 3 0
41 23 20 71 41 75 75 35 16 12 3 1
21 33 87 57 19 27 94 36 80 10 6 0
8 0 25 74 14 61 86 8 42 82 9 0
23 33 91 19 84 99 95 92 29 31 8 0
94 94 5 6 98 23 37 65 14 25 6 1
42 16 39 32 2 20 86 81 90 91 8 0
72 39 20 63 88 52 65 81 77 96 4 0
48 73 65 75 89 36 75 36 11 35 8 0
79 74 3 29 63 20 76 46 8 82 5 0
7 46 38 77 79 92 71 98 30 35 6 0
44 69 93 31 22 68 91 70 32 86 5 0
45 38 77 87 64 44 69 19 28 82 9 0
93 63 92 84 22 44 51 94 4 99 9 0
77 10 49 29 59 55 44 7 95 39 2 0
10 85 99 9 91 29 64 14 50 24 6 1
74 4 21 12 77 36 71 51 50 31 9 1
66 76 28 18 23 49 33 31 6 44 1 1
92 50 90 64 95 58 93 4 78 88 6 1
69 79 76 47 46 26 30 40 33 58 8 1
97 12 87 82 6 18 57 49 49 58 1 1
70 79 55 86 29 88 55 39 17 74 5 1
65 51 45 62 54 17 59 12 29 79 5 0
5 63 82 51 54 97 54 36 57 46 3 0
74 77 52 10 12 9 34 95 2 0 5 0
50 20 22 89 50 70 55 98 80 50 1 0
61 80 7 3 78 36 44 37 90 18 9 0
81 13 55 57 88 81 66 55 18 34 2 1
52 30 54 70 28 56 48 82 67 20 8 1
0 41 15 63 27 90 12 16 56 79 3 0
69 89 54 1 93 10 15 2 25 59 8 0
74 99 17 93 96 82 38 77 98 85 4 0
8 59 17 92 60 21 59 76 55 73 2 1
53 56 79 19 29 94 86 96 62 39 3 1
23 44 25 63 41 94 65 10 8 40 9 1
7 18 80 43 20 70 14 59 72 17 9 0
84 97 79 14 37 64 23 68 8 24 2 0
63 94 98 77 8 62 10 77 63 56 4 0
8 63 74 34 49 22 52 54 44 93 3 0
94 48 92 58 82 48 53 34 96 25 2 0
33 15 3 95 48 93 9 69 44 77 7 1
69 72 80 77 64 24 52 21 36 49 2 0
59 34 54 66 60 19 76 79 16 70 5 1
8 83 9 91 67 79 31 20 31 88 2 0
64 95 46 95 78 63 4 60 66 63 7 1
10 39 78 45 36 4 89 94 68 75 7 0
81 52 70 11 48 15 40 63 29 14 8 1
94 49 30 14 53 12 53 42 77 82 8 1
40 88 46 20 54 84 76 15 2 73 2 1
71 50 79 54 17 58 30 16 17 99 1 1
74 79 74 61 61 36 28 39 89 36 6 0
53 45 45 23 51 32 93 26 10 8 3 0
1 97 6 67 88 20 41 63 49 6 8 0
3 64 41 19 41 80 75 71 69 90 8 0
31 90 38 93 52 0 38 86 41 68 9 1
50 94 53 9 73 59 94 7 24 57 3 0
87 11 4 62 96 7 0 59 46 11 6 1
77 67 56 88 45 62 10 51 86 27 6 1
62 62 59 99 83 84 79 97 56 37 5 0
19 55 0 37 44 44 2 7 54 50 5 1
23 60 11 83 6 48 20 77 54 31 6 0
27 53 52 30 3 70 57 38 47 96 5 0
75 14 5 83 72 46 47 64 14 12 7 0
29 95 36 63 59 49 38 44 13 15 2 1
38 3 70 89 2 94 89 74 33 6 8 1
28 56 49 43 83 34 7 63 36 13 7 0
25 90 23 85 50 65 36 10 64 38 5 0
35 94 48 38 99 71 42 39 61 75 8 1
28 73 34 22 51 8 52 98 74 19 8 1
12 40 65 12 7 96 73 65 12 90 5 0
42 42 48 16 80 14 48 29 29 45 5 0
58 20 4 0 69 99 15 4 16 4 1 1
93 30 90 5 23 63 25 30 99 32 7 1
91 23 20 26 84 78 58 76 58 90 5 1
33 2 36 59 55 9 79 34 92 57 9 0
80 63 84 73 22 40 70 94 59 34 5 0
49 95 50 32 90 22 18 66 46 32 2 0
47 72 3 94 33 78 87 43 11 67 5 0
76 44 86 81 95 48 79 46 11 65 8 1
59 51 97 75 17 5 40 59 32 62 6 0
41 13 58 7 54 84 8 84 27 55 1 0
24 80 44 26 86 99 68 80 81 22 9 0
12 45 16 44 66 76 33 53 3 20 9 0
22 3 79 6 32 38 75 66 15 25 9 1
51 48 26 53 33 26 18 74 9 39 5 1
35 67 89 91 29 81 23 52 19 11 6 0
64 50 43 1 43 49 19 20 84 19 8 0
34 4 9 77 24 61 55 82 42 76 9 0
37 84 94 33 67 60 3 95 78 8 9 0
82 10 54 12 47 23 78 97 6 51 5 0
70 40 38 47 5 38 83 70 37 90 2 0
42 21 62 27 43 47 82 80 88 49 4 0
68 68 67 12 38 13 32 30 93 27 3 1
5 44 98 28 5 81 20 56 10 34 9 1
40 46 11 33 73 62 68 70 66 85 4 0
9 46 11 84 6 31 18 89 66 32 1 1
6 78 44 98 77 29 69 39 62 78 1 0
47 90 18 0 3 8 12 20 51 75 4 1
21 29 74 19 12 29 41 22 63 47 8 1
22 59 64 62 18 89 19 92 87 8 8 0
6 21 24 58 14 53 18 93 62 15 8 0
20 33 88 25 37 52 1 72 74 11 2 0
90 49 28 53 28 80 22 81 0 46 9 0
87 31 51 27 15 31 68 93 5 4 7 1
21 72 60 2 24 79 22 24 77 61 9 0
20 4 6 40 28 14 16 78 58 99 7 1
80 35 98 20 91 35 47 29 3 19 2 1
57 21 24 61 60 39 83 34 53 2 2 0
74 86 78 78 18 44 20 94 85 71 4 1
27 48 44 92 10 18 74 54 25 85 2 0
74 77 28 75 74 91 69 36 95 68 7 0
32 84 17 18 55 79 59 57 21 69 2 1
69 77 40 98 83 40 4 66 39 83 1 1
63 24 32 39 75 92 81 49 2 51 5 1
35 40 84 71 3 16 82 91 44 52 8 0
21 78 66 4 57 27 21 89 4 34 7 1
94 18 57 49 88 26 29 76 56 67 6 0
14 91 71 30 5 36 28 74 16 73 3 1
93 36 43 46 77 44 59 19 56 84 3 0
11 16 2 67 11 96 20 91 20 59 2 1
72 79 26 99 90 71 56 46 35 99 3 0
29 87 20 40 13 14 14 40 61 27 6 0
41 64 28 51 56 52 87 67 37 91 6 1
33 14 5 30 99 54 27 80 54 55 4 1
60 44 73 91 71 53 54 95 59 81 6 0
69 33 11 83 4 53 34 39 43 84 1 0
73 31 19 4 50 20 66 73 94 88 4 0
30 49 41 76 5 21 88 69 76 3 2 0
18 50 27 76 67 38 87 16 52 87 5 1
33 36 80 8 43 82 89 76 37 3 5 0
98 21 61 24 58 13 9 85 56 74 1 1
84 27 50 96 9 56 30 31 85 65 1 1
65 74 40 2 8 40 18 57 30 38 1 1
76 44 64 6 10 32 84 70 74 24 1 1
14 29 59 34 27 8 0 37 27 68 3 0
6 47 5 77 15 41 93 49 59 83 4 1
39 88 43 89 32 98 82 0 5 12 9 0
78 79 30 26 58 6 9 58 37 65 8 1
25 28 66 41 70 87 76 62 29 39 7 1
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
evaluate:
reader:
batch_size: 1
class: "{workspace}/synthetic_evaluate_reader.py"
test_data_path: "{workspace}/data/train"
train:
trainer:
# for cluster training
strategy: "async"
epochs: 4
workspace: "paddlerec.models.match.dssm"
reader:
batch_size: 4
class: "{workspace}/synthetic_reader.py"
train_data_path: "{workspace}/data/train"
model:
models: "{workspace}/model.py"
hyper_parameters:
TRIGRAM_D: 1000
NEG: 4
fc_sizes: [300, 300, 128]
fc_acts: ['tanh', 'tanh', 'tanh']
learning_rate: 0.01
optimizer: sgd
save:
increment:
dirname: "increment"
epoch_interval: 2
save_last: True
inference:
dirname: "inference"
epoch_interval: 4
feed_varnames: ["query", "doc_pos"]
fetch_varnames: ["cos_sim_0.tmp_0"]
save_last: True
此差异已折叠。
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import paddle.fluid as fluid
from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase
class Model(ModelBase):
def __init__(self, config):
ModelBase.__init__(self, config)
def input(self):
TRIGRAM_D = envs.get_global_env("hyper_parameters.TRIGRAM_D", None, self._namespace)
Neg = envs.get_global_env("hyper_parameters.NEG", None, self._namespace)
self.query = fluid.data(name="query", shape=[-1, TRIGRAM_D], dtype='float32', lod_level=0)
self.doc_pos = fluid.data(name="doc_pos", shape=[-1, TRIGRAM_D], dtype='float32', lod_level=0)
self.doc_negs = [fluid.data(name="doc_neg_" + str(i), shape=[-1, TRIGRAM_D], dtype="float32", lod_level=0) for i in range(Neg)]
self._data_var.append(self.query)
self._data_var.append(self.doc_pos)
for input in self.doc_negs:
self._data_var.append(input)
if self._platform != "LINUX":
self._data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._data_var, capacity=64, use_double_buffer=False, iterable=False)
def net(self, is_infer=False):
hidden_layers = envs.get_global_env("hyper_parameters.fc_sizes", None, self._namespace)
hidden_acts = envs.get_global_env("hyper_parameters.fc_acts", None, self._namespace)
def fc(data, hidden_layers, hidden_acts, names):
fc_inputs = [data]
for i in range(len(hidden_layers)):
xavier=fluid.initializer.Xavier(uniform=True, fan_in=fc_inputs[-1].shape[1], fan_out=hidden_layers[i])
out = fluid.layers.fc(input=fc_inputs[-1],
size=hidden_layers[i],
act=hidden_acts[i],
param_attr=xavier,
bias_attr=xavier,
name=names[i])
fc_inputs.append(out)
return fc_inputs[-1]
query_fc = fc(self.query, hidden_layers, hidden_acts, ['query_l1', 'query_l2', 'query_l3'])
doc_pos_fc = fc(self.doc_pos, hidden_layers, hidden_acts, ['doc_pos_l1', 'doc_pos_l2', 'doc_pos_l3'])
self.R_Q_D_p = fluid.layers.cos_sim(query_fc, doc_pos_fc)
if is_infer:
return
R_Q_D_ns = []
for i, doc_neg in enumerate(self.doc_negs):
doc_neg_fc_i = fc(doc_neg, hidden_layers, hidden_acts, ['doc_neg_l1_' + str(i), 'doc_neg_l2_' + str(i), 'doc_neg_l3_' + str(i)])
R_Q_D_ns.append(fluid.layers.cos_sim(query_fc, doc_neg_fc_i))
concat_Rs = fluid.layers.concat(input=[self.R_Q_D_p] + R_Q_D_ns, axis=-1)
prob = fluid.layers.softmax(concat_Rs, axis=1)
hit_prob = fluid.layers.slice(prob, axes=[0,1], starts=[0,0], ends=[4, 1])
loss = -fluid.layers.reduce_sum(fluid.layers.log(hit_prob))
self.avg_cost = fluid.layers.mean(x=loss)
def infer_results(self):
self._infer_results['query_doc_sim'] = self.R_Q_D_p
def avg_loss(self):
self._cost = self.avg_cost
def metrics(self):
self._metrics["LOSS"] = self.avg_cost
def train_net(self):
self.input()
self.net(is_infer=False)
self.avg_loss()
self.metrics()
def optimizer(self):
learning_rate = envs.get_global_env("hyper_parameters.learning_rate", None, self._namespace)
optimizer = fluid.optimizer.SGD(learning_rate)
return optimizer
def infer_input(self):
TRIGRAM_D = envs.get_global_env("hyper_parameters.TRIGRAM_D", None, self._namespace)
self.query = fluid.data(name="query", shape=[-1, TRIGRAM_D], dtype='float32', lod_level=0)
self.doc_pos = fluid.data(name="doc_pos", shape=[-1, TRIGRAM_D], dtype='float32', lod_level=0)
self._infer_data_var = [self.query, self.doc_pos]
self._infer_data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False)
def infer_net(self):
self.infer_input()
self.net(is_infer=True)
self.infer_results()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class EvaluateReader(Reader):
def init(self):
pass
def generate_sample(self, line):
"""
Read the data line by line and process it as a dictionary
"""
def reader():
"""
This function needs to be implemented by the user, based on data format
"""
features = line.rstrip('\n').split('\t')
query = map(float, features[0].split(','))
pos_doc = map(float, features[1].split(','))
feature_names = ['query', 'doc_pos']
yield zip(feature_names, [query] + [pos_doc])
return reader
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class TrainReader(Reader):
def init(self):
pass
def generate_sample(self, line):
"""
Read the data line by line and process it as a dictionary
"""
def reader():
"""
This function needs to be implemented by the user, based on data format
"""
features = line.rstrip('\n').split('\t')
query = map(float, features[0].split(','))
pos_doc = map(float, features[1].split(','))
feature_names = ['query', 'doc_pos']
neg_docs = []
for i in range(len(features) - 2):
feature_names.append('doc_neg_' + str(i))
neg_docs.append(map(float, features[i+2].split(',')))
yield zip(feature_names, [query] + [pos_doc] + neg_docs)
return reader
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
evaluate:
workspace: "paddlerec.models.recall.multiview-simnet"
reader:
batch_size: 2
class: "{workspace}/evaluate_reader.py"
test_data_path: "{workspace}/data/test"
train:
trainer:
# for cluster training
strategy: "async"
epochs: 2
workspace: "paddlerec.models.recall.multiview-simnet"
reader:
batch_size: 2
class: "{workspace}/reader.py"
train_data_path: "{workspace}/data/train"
dataset_class: "DataLoader"
model:
models: "{workspace}/model.py"
hyper_parameters:
use_DataLoader: True
query_encoder: "bow"
title_encoder: "bow"
query_encode_dim: 128
title_encode_dim: 128
query_slots: 1
title_slots: 1
sparse_feature_dim: 1000001
embedding_dim: 128
hidden_size: 128
learning_rate: 0.0001
optimizer: adam
save:
increment:
dirname: "increment"
epoch_interval: 1
save_last: True
inference:
dirname: "inference"
epoch_interval: 1
save_last: True
224289:0 126379:0 284519:0 549329:0 750666:0 393772:0 586898:0 736887:0 48785:0 906517:0 229162:1 483485:1 739835:1 29957:1 694497:1 997508:1 556876:1 717791:1 232176:1 430356:1
366182:0 82062:0 708883:0 949128:0 798964:0 639103:0 409033:0 79301:0 405607:0 342616:0 61552:1 560547:1 3760:1 754734:1 98496:1 472427:1 979596:1 750283:1 492028:1 801383:1
969571:0 405187:0 756217:0 563640:0 572168:0 881952:0 446260:0 692177:0 994140:0 485393:0 509081:1 297377:1 465399:1 934708:1 430949:1 135651:1 484531:1 385306:1 463957:1 996004:1
436320:0 423131:0 963969:0 78345:0 879550:0 458203:0 684397:0 956202:0 989802:0 526101:0 852446:1 182545:1 625656:1 674856:1 422648:1 74100:1 48372:1 850830:1 336087:1 178251:1
242683:0 118677:0 20731:0 970617:0 355890:0 739613:0 926695:0 963639:0 201043:0 611907:0 115309:1 310984:1 615584:1 638886:1 575934:1 889389:1 974807:1 570987:1 532482:1 911925:1
954007:0 122623:0 168195:0 348901:0 217880:0 84759:0 925763:0 436382:0 573742:0 942921:0 553377:1 835046:1 137907:1 933870:1 766585:1 48483:1 543079:1 889467:1 521705:1 906676:1
798690:0 617323:0 553266:0 232924:0 159461:0 404822:0 52992:0 364854:0 913876:0 547974:0 559472:1 748595:1 71793:1 357331:1 606888:1 477051:1 291481:1 89363:1 503881:1 423029:1
228207:0 785250:0 661149:0 803304:0 478781:0 495202:0 804509:0 273065:0 26123:0 810840:0 801871:1 146772:1 421009:1 752344:1 946358:1 531668:1 5771:1 191294:1 627329:1 434664:1
984628:0 762075:0 505288:0 48519:0 72492:0 26568:0 684085:0 613095:0 781547:0 895829:0 280541:1 903234:1 708065:1 386658:1 331060:1 3693:1 279760:1 459579:1 423552:1 962594:1
674172:0 39271:0 646093:0 757969:0 553251:0 734960:0 967186:0 856940:0 617246:0 376452:0 113050:1 472707:1 975057:1 865095:1 155824:1 389921:1 205520:1 513667:1 163588:1 953463:1
7688:0 589671:0 339543:0 681723:0 339204:0 743067:0 897959:0 897541:0 571340:0 858141:0 68161:1 533957:1 288065:1 755516:1 179906:1 324817:1 116293:1 942079:1 455277:1 787142:1 251765:2 846187:2 586960:2 781883:2 430436:2 240100:2 686201:2 632045:2 585097:2 61976:2
187966:0 194147:0 640819:0 283848:0 514875:0 310781:0 760083:0 281096:0 837090:0 928087:0 958908:1 451359:1 456136:1 577231:1 373371:1 651928:1 877106:1 721988:1 342265:1 114942:1 668915:2 502190:2 139044:2 213045:2 36710:2 119509:2 450285:2 165440:2 199495:2 798870:2
477955:0 598041:0 452166:0 924550:0 152308:0 316225:0 285239:0 7967:0 177143:0 132244:0 391070:1 169561:1 256279:1 563588:1 749753:1 237035:1 550804:1 736257:1 71551:1 61944:1 102132:2 484023:2 82995:2 732704:2 114816:2 413165:2 197504:2 686192:2 253734:2 248157:2
325819:0 140241:0 365103:0 334185:0 357327:0 613836:0 928004:0 595589:0 506569:0 539067:0 638196:1 729129:1 730912:1 701797:1 571150:1 140054:1 680316:1 889784:1 302584:1 676284:1 671069:2 212989:2 318469:2 732930:2 924564:2 147041:2 572412:2 662673:2 418312:2 382855:2
839803:0 888881:0 957998:0 906486:0 44377:0 247842:0 994783:0 813449:0 168271:0 493685:0 269703:1 156692:1 686681:1 273684:1 312387:1 462033:1 669631:1 635437:1 74337:1 217677:1 582194:2 992666:2 860610:2 660766:2 24524:2 169856:2 882211:2 291866:2 44494:2 984736:2
327559:0 627497:0 876526:0 243959:0 532929:0 639919:0 443220:0 952110:0 844723:0 372053:0 196819:1 326005:1 62242:1 774928:1 382727:1 348680:1 946697:1 625998:1 276517:1 251595:1 342204:2 825871:2 407136:2 724114:2 611341:2 517978:2 248341:2 111254:2 836867:2 677297:2
72451:0 749548:0 283413:0 419402:0 67446:0 341795:0 918120:0 892028:0 113151:0 832663:0 758121:1 500602:1 734935:1 577972:1 205421:1 726739:1 276563:1 611928:1 185486:1 603502:1 633117:2 929300:2 332435:2 216848:2 412769:2 708304:2 800045:2 315869:2 444476:2 332565:2
675647:0 212558:0 654982:0 321053:0 111172:0 635432:0 298523:0 612182:0 203835:0 288250:0 990034:1 891786:1 188524:1 480757:1 436783:1 874434:1 530090:1 492441:1 32835:1 886415:1 688876:2 626030:2 612348:2 208265:2 355885:2 603938:2 349931:2 86683:2 361956:2 705130:2
164500:0 332294:0 373155:0 320413:0 801561:0 152827:0 28282:0 435913:0 376758:0 367848:0 285596:1 282674:1 357323:1 257195:1 948061:1 996976:1 300918:1 734644:1 870559:1 924205:1 45095:2 61352:2 242258:2 153354:2 763576:2 133542:2 431079:2 193327:2 655823:2 770159:2
821764:0 184731:0 888413:0 793536:0 30049:0 533675:0 791254:0 92255:0 74185:0 557758:0 795898:1 15689:1 983592:1 248891:1 64421:1 387642:1 315522:1 526054:1 404172:1 704838:1 537016:2 383828:2 438418:2 885895:2 894698:2 228867:2 343213:2 411377:2 149957:2 810795:2
#! /bin/bash
set -e
echo "begin to prepare data"
mkdir -p data/train
mkdir -p data/test
python generate_synthetic_data.py
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import io
import copy
import random
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class EvaluateReader(Reader):
def init(self):
self.query_slots = envs.get_global_env("hyper_parameters.query_slots", None, "train.model")
self.title_slots = envs.get_global_env("hyper_parameters.title_slots", None, "train.model")
self.all_slots = []
for i in range(self.query_slots):
self.all_slots.append(str(i))
for i in range(self.title_slots):
self.all_slots.append(str(i + self.query_slots))
self._all_slots_dict = dict()
for index, slot in enumerate(self.all_slots):
self._all_slots_dict[slot] = [False, index]
def generate_sample(self, line):
def data_iter():
elements = line.rstrip().split()
padding = 0
output = [(slot, []) for slot in self.all_slots]
for elem in elements:
feasign, slot = elem.split(':')
if not self._all_slots_dict.has_key(slot):
continue
self._all_slots_dict[slot][0] = True
index = self._all_slots_dict[slot][1]
output[index][1].append(int(feasign))
for slot in self._all_slots_dict:
visit, index = self._all_slots_dict[slot]
if visit:
self._all_slots_dict[slot][0] = False
else:
output[index][1].append(padding)
yield output
return data_iter
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import random
class Dataset:
def __init__(self):
pass
class SyntheticDataset(Dataset):
def __init__(self, sparse_feature_dim, query_slot_num, title_slot_num, dataset_size=10000):
# ids are randomly generated
self.ids_per_slot = 10
self.sparse_feature_dim = sparse_feature_dim
self.query_slot_num = query_slot_num
self.title_slot_num = title_slot_num
self.dataset_size = dataset_size
def _reader_creator(self, is_train):
def generate_ids(num, space):
return [random.randint(0, space - 1) for i in range(num)]
def reader():
for i in range(self.dataset_size):
query_slots = []
pos_title_slots = []
neg_title_slots = []
for i in range(self.query_slot_num):
qslot = generate_ids(self.ids_per_slot,
self.sparse_feature_dim)
qslot = [str(fea) + ':' + str(i) for fea in qslot]
query_slots += qslot
for i in range(self.title_slot_num):
pt_slot = generate_ids(self.ids_per_slot,
self.sparse_feature_dim)
pt_slot = [str(fea) + ':' + str(i + self.query_slot_num) for fea in pt_slot]
pos_title_slots += pt_slot
if is_train:
for i in range(self.title_slot_num):
nt_slot = generate_ids(self.ids_per_slot,
self.sparse_feature_dim)
nt_slot = [str(fea) + ':' + str(i + self.query_slot_num + self.title_slot_num) for fea in nt_slot]
neg_title_slots += nt_slot
yield query_slots + pos_title_slots + neg_title_slots
else:
yield query_slots + pos_title_slots
return reader
def train(self):
return self._reader_creator(True)
def valid(self):
return self._reader_creator(True)
def test(self):
return self._reader_creator(False)
if __name__ == '__main__':
sparse_feature_dim = 1000001
query_slots = 1
title_slots = 1
dataset_size = 10
dataset = SyntheticDataset(sparse_feature_dim, query_slots, title_slots, dataset_size)
train_reader = dataset.train()
test_reader = dataset.test()
with open("data/train/train.txt", 'w') as fout:
for data in train_reader():
fout.write(' '.join(data))
fout.write("\n")
with open("data/test/test.txt", 'w') as fout:
for data in test_reader():
fout.write(' '.join(data))
fout.write("\n")
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import math
import paddle.fluid as fluid
import paddle.fluid.layers as layers
import paddle.fluid.layers.tensor as tensor
import paddle.fluid.layers.control_flow as cf
from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase
class BowEncoder(object):
""" bow-encoder """
def __init__(self):
self.param_name = ""
def forward(self, emb):
return fluid.layers.sequence_pool(input=emb, pool_type='sum')
class CNNEncoder(object):
""" cnn-encoder"""
def __init__(self,
param_name="cnn",
win_size=3,
ksize=128,
act='tanh',
pool_type='max'):
self.param_name = param_name
self.win_size = win_size
self.ksize = ksize
self.act = act
self.pool_type = pool_type
def forward(self, emb):
return fluid.nets.sequence_conv_pool(
input=emb,
num_filters=self.ksize,
filter_size=self.win_size,
act=self.act,
pool_type=self.pool_type,
param_attr=self.param_name + ".param",
bias_attr=self.param_name + ".bias")
class GrnnEncoder(object):
""" grnn-encoder """
def __init__(self, param_name="grnn", hidden_size=128):
self.param_name = param_name
self.hidden_size = hidden_size
def forward(self, emb):
fc0 = fluid.layers.fc(input=emb,
size=self.hidden_size * 3,
param_attr=self.param_name + "_fc.w",
bias_attr=False)
gru_h = fluid.layers.dynamic_gru(
input=fc0,
size=self.hidden_size,
is_reverse=False,
param_attr=self.param_name + ".param",
bias_attr=self.param_name + ".bias")
return fluid.layers.sequence_pool(input=gru_h, pool_type='max')
class SimpleEncoderFactory(object):
def __init__(self):
pass
''' create an encoder through create function '''
def create(self, enc_type, enc_hid_size):
if enc_type == "bow":
bow_encode = BowEncoder()
return bow_encode
elif enc_type == "cnn":
cnn_encode = CNNEncoder(ksize=enc_hid_size)
return cnn_encode
elif enc_type == "gru":
rnn_encode = GrnnEncoder(hidden_size=enc_hid_size)
return rnn_encode
class Model(ModelBase):
def __init__(self, config):
ModelBase.__init__(self, config)
self.init_config()
def init_config(self):
self._fetch_interval = 1
query_encoder = envs.get_global_env("hyper_parameters.query_encoder", None, self._namespace)
title_encoder = envs.get_global_env("hyper_parameters.title_encoder", None, self._namespace)
query_encode_dim = envs.get_global_env("hyper_parameters.query_encode_dim", None, self._namespace)
title_encode_dim = envs.get_global_env("hyper_parameters.title_encode_dim", None, self._namespace)
query_slots = envs.get_global_env("hyper_parameters.query_slots", None, self._namespace)
title_slots = envs.get_global_env("hyper_parameters.title_slots", None, self._namespace)
factory = SimpleEncoderFactory()
self.query_encoders = [
factory.create(query_encoder, query_encode_dim)
for i in range(query_slots)
]
self.title_encoders = [
factory.create(title_encoder, title_encode_dim)
for i in range(title_slots)
]
self.emb_size = envs.get_global_env("hyper_parameters.sparse_feature_dim", None, self._namespace)
self.emb_dim = envs.get_global_env("hyper_parameters.embedding_dim", None, self._namespace)
self.emb_shape = [self.emb_size, self.emb_dim]
self.hidden_size = envs.get_global_env("hyper_parameters.hidden_size", None, self._namespace)
self.margin = 0.1
def input(self, is_train=True):
self.q_slots = [
fluid.data(
name="%d" % i, shape=[None, 1], lod_level=1, dtype='int64')
for i in range(len(self.query_encoders))
]
self.pt_slots = [
fluid.data(
name="%d" % (i + len(self.query_encoders)), shape=[None, 1], lod_level=1, dtype='int64')
for i in range(len(self.title_encoders))
]
if is_train == False:
return self.q_slots + self.pt_slots
self.nt_slots = [
fluid.data(
name="%d" % (i + len(self.query_encoders) + len(self.title_encoders)), shape=[None, 1], lod_level=1, dtype='int64')
for i in range(len(self.title_encoders))
]
return self.q_slots + self.pt_slots + self.nt_slots
def train_input(self):
res = self.input()
self._data_var = res
use_dataloader = envs.get_global_env("hyper_parameters.use_DataLoader", False, self._namespace)
if self._platform != "LINUX" or use_dataloader:
self._data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._data_var, capacity=256, use_double_buffer=False, iterable=False)
def get_acc(self, x, y):
less = tensor.cast(cf.less_than(x, y), dtype='float32')
label_ones = fluid.layers.fill_constant_batch_size_like(
input=x, dtype='float32', shape=[-1, 1], value=1.0)
correct = fluid.layers.reduce_sum(less)
total = fluid.layers.reduce_sum(label_ones)
acc = fluid.layers.elementwise_div(correct, total)
return acc
def net(self):
q_embs = [
fluid.embedding(
input=query, size=self.emb_shape, param_attr="emb")
for query in self.q_slots
]
pt_embs = [
fluid.embedding(
input=title, size=self.emb_shape, param_attr="emb")
for title in self.pt_slots
]
nt_embs = [
fluid.embedding(
input=title, size=self.emb_shape, param_attr="emb")
for title in self.nt_slots
]
# encode each embedding field with encoder
q_encodes = [
self.query_encoders[i].forward(emb) for i, emb in enumerate(q_embs)
]
pt_encodes = [
self.title_encoders[i].forward(emb) for i, emb in enumerate(pt_embs)
]
nt_encodes = [
self.title_encoders[i].forward(emb) for i, emb in enumerate(nt_embs)
]
# concat multi view for query, pos_title, neg_title
q_concat = fluid.layers.concat(q_encodes)
pt_concat = fluid.layers.concat(pt_encodes)
nt_concat = fluid.layers.concat(nt_encodes)
# projection of hidden layer
q_hid = fluid.layers.fc(q_concat,
size=self.hidden_size,
param_attr='q_fc.w',
bias_attr='q_fc.b')
pt_hid = fluid.layers.fc(pt_concat,
size=self.hidden_size,
param_attr='t_fc.w',
bias_attr='t_fc.b')
nt_hid = fluid.layers.fc(nt_concat,
size=self.hidden_size,
param_attr='t_fc.w',
bias_attr='t_fc.b')
# cosine of hidden layers
cos_pos = fluid.layers.cos_sim(q_hid, pt_hid)
cos_neg = fluid.layers.cos_sim(q_hid, nt_hid)
# pairwise hinge_loss
loss_part1 = fluid.layers.elementwise_sub(
tensor.fill_constant_batch_size_like(
input=cos_pos,
shape=[-1, 1],
value=self.margin,
dtype='float32'),
cos_pos)
loss_part2 = fluid.layers.elementwise_add(loss_part1, cos_neg)
loss_part3 = fluid.layers.elementwise_max(
tensor.fill_constant_batch_size_like(
input=loss_part2, shape=[-1, 1], value=0.0, dtype='float32'),
loss_part2)
self.avg_cost = fluid.layers.mean(loss_part3)
self.acc = self.get_acc(cos_neg, cos_pos)
def avg_loss(self):
self._cost = self.avg_cost
def metrics(self):
self._metrics["loss"] = self.avg_cost
self._metrics["acc"] = self.acc
def train_net(self):
self.train_input()
self.net()
self.avg_loss()
self.metrics()
def optimizer(self):
learning_rate = envs.get_global_env("hyper_parameters.learning_rate", None, self._namespace)
optimizer = fluid.optimizer.Adam(learning_rate=learning_rate)
return optimizer
def infer_input(self):
res = self.input(is_train=False)
self._infer_data_var = res
self._infer_data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False)
def infer_net(self):
self.infer_input()
# lookup embedding for each slot
q_embs = [
fluid.embedding(
input=query, size=self.emb_shape, param_attr="emb")
for query in self.q_slots
]
pt_embs = [
fluid.embedding(
input=title, size=self.emb_shape, param_attr="emb")
for title in self.pt_slots
]
# encode each embedding field with encoder
q_encodes = [
self.query_encoders[i].forward(emb) for i, emb in enumerate(q_embs)
]
pt_encodes = [
self.title_encoders[i].forward(emb) for i, emb in enumerate(pt_embs)
]
# concat multi view for query, pos_title, neg_title
q_concat = fluid.layers.concat(q_encodes)
pt_concat = fluid.layers.concat(pt_encodes)
# projection of hidden layer
q_hid = fluid.layers.fc(q_concat,
size=self.hidden_size,
param_attr='q_fc.w',
bias_attr='q_fc.b')
pt_hid = fluid.layers.fc(pt_concat,
size=self.hidden_size,
param_attr='t_fc.w',
bias_attr='t_fc.b')
# cosine of hidden layers
cos = fluid.layers.cos_sim(q_hid, pt_hid)
self._infer_results['query_pt_sim'] = cos
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import io
import copy
import random
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class TrainReader(Reader):
def init(self):
self.query_slots = envs.get_global_env("hyper_parameters.query_slots", None, "train.model")
self.title_slots = envs.get_global_env("hyper_parameters.title_slots", None, "train.model")
self.all_slots = []
for i in range(self.query_slots):
self.all_slots.append(str(i))
for i in range(self.title_slots):
self.all_slots.append(str(i + self.query_slots))
for i in range(self.title_slots):
self.all_slots.append(str(i + self.query_slots + self.title_slots))
self._all_slots_dict = dict()
for index, slot in enumerate(self.all_slots):
self._all_slots_dict[slot] = [False, index]
def generate_sample(self, line):
def data_iter():
elements = line.rstrip().split()
padding = 0
output = [(slot, []) for slot in self.all_slots]
for elem in elements:
feasign, slot = elem.split(':')
if not self._all_slots_dict.has_key(slot):
continue
self._all_slots_dict[slot][0] = True
index = self._all_slots_dict[slot][1]
output[index][1].append(int(feasign))
for slot in self._all_slots_dict:
visit, index = self._all_slots_dict[slot]
if visit:
self._all_slots_dict[slot][0] = False
else:
output[index][1].append(padding)
yield output
return data_iter
# 匹配模型库
## 简介
我们提供了常见的匹配任务中使用的模型算法的PaddleRec实现, 单机训练&预测效果指标以及分布式训练&预测性能指标等。实现的模型包括 [DSSM](http://gitlab.baidu.com/tangwei12/paddlerec/tree/develop/models/match/dssm)[MultiView-Simnet](http://gitlab.baidu.com/tangwei12/paddlerec/tree/develop/models/match/multiview-simnet)
模型算法库在持续添加中,欢迎关注。
## 目录
* [整体介绍](#整体介绍)
* [匹配模型列表](#匹配模型列表)
* [使用教程](#使用教程)
* [数据处理](#数据处理)
* [训练](#训练)
* [预测](#预测)
* [效果对比](#效果对比)
* [模型效果列表](#模型效果列表)
* [分布式](#分布式)
* [模型性能列表](#模型性能列表)
## 整体介绍
### 匹配模型列表
| 模型 | 简介 | 论文 |
| :------------------: | :--------------------: | :---------: |
| DSSM | Deep Structured Semantic Models | [Learning Deep Structured Semantic Models for Web Search using Clickthrough Data](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/cikm2013_DSSM_fullversion.pdf)(2013) |
| MultiView-Simnet | Multi-view Simnet for Personalized recommendation | [A Multi-View Deep Learning Approach for Cross Domain User Modeling in Recommendation Systems](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/frp1159-songA.pdf)(2015) |
## 使用教程
### 数据处理
### 训练
### 预测
## 效果对比
### 模型效果列表
| 数据集 | 模型 | loss | auc |
| :------------------: | :--------------------: | :---------: |:---------: |
| - | DSSM | -- | -- |
| - | MultiView-Simnet | -- | -- |
## 分布式
### 模型性能列表
| 数据集 | 模型 | 单机 | 多机(同步) | 多机(异步) | GPU |
| :------------------: | :--------------------: | :---------: |:---------: |:---------: |:---------: |
| - | DSSM | -- | -- | -- | -- |
| - | MultiView-Simnet | -- | -- | -- | -- |
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
train:
trainer:
# for cluster training
strategy: "async"
epochs: 3
workspace: "paddlerec.models.multitask.esmm"
reader:
batch_size: 2
class: "{workspace}/esmm_reader.py"
train_data_path: "{workspace}/data/train"
model:
models: "{workspace}/model.py"
hyper_parameters:
vocab_size: 10000
embed_size: 128
learning_rate: 0.001
optimizer: adam
save:
increment:
dirname: "increment"
epoch_interval: 2
save_last: True
inference:
dirname: "inference"
epoch_interval: 4
save_last: True
0 16
475 473 155
491 21
96 185 96
29 14 13
5 481 11 21 470
70 5 70 11
167 42 167 217
72 15 73 161 172
82 82
97 297 97
193 182 186 183 184 177 214
152 152
163 298 7
39 73 71
490 23 23 496 488 74 23 74 486 23 23 74
17 17
170 170 483 444 443 234
25 472
5 5 11 70 69
149 149 455
356 68 477 468 17 479 66
159 172 6 71 6 6 158 13 494 169
155 44 438 144 500
156 9 9
146 146
173 10 10 461
7 6 6
269 48 268
50 100
323 174 18
69 69 22 98
38 171
22 29 489 10
0 0
11 5
29 13 14 232 231 451 289 452 229
260 11 156
166 160 166 39
223 134 134 420
66 401 68 132 17 84 287 5
39 304
65 84 132
400 211
145 144
16 28 254 48 50 100 42 154 262 133 17
0 0
28 28
11 476 464
61 61 86 86
38 38
463 478
437 265
22 39 485 171 98
434 51 344
16 16
67 67 67 448
22 12 161
15 377 147 147 374
119 317 0
38 484
403 499
432 442
28 0 16 50 465 42
163 487 7 162
99 99 325 423 83 83
154 133
5 37 492 235 160 279
10 10 457 493 10 460
441 4 4 4 4 4 4 4
153 153
159 164 164
328 37
65 65 404 347 431 459
80 80 44 44
61 446
162 495 7 453
157 21 204 68 37 66 469 145
37 151 230 206 240 205 264 87 409 87 288 270 280 329 157 296 454 474
430 445 433
449 14
9 9 9 9
440 238 226
148 148
266 267 181
48 498
263 255 256
458 158 7
72 168 12 165 71 73 173 49
0 0
7 7 6
14 29 13 6 15 14 15 13
480 439 21
450 21 151
12 12 49 14 13 165 12 169 72 15 15
91 91
22 12 49 168
497 101 30 411 30 482 30 53 30 101 176 415 53 447
462 150 150
471 456 131 435 131 467 436 412 227 218 190 466 429 213 326
此差异已折叠。
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
from collections import defaultdict
import numpy as np
class TrainReader(Reader):
def init(self):
all_field_id = ['101', '109_14', '110_14', '127_14', '150_14', '121', '122', '124', '125', '126', '127', '128', '129',
'205', '206', '207', '210', '216', '508', '509', '702', '853', '301']
self.all_field_id_dict = defaultdict(int)
for i,field_id in enumerate(all_field_id):
self.all_field_id_dict[field_id] = [False,i]
def generate_sample(self, line):
"""
Read the data line by line and process it as a dictionary
"""
def reader():
"""
This function needs to be implemented by the user, based on data format
"""
features = line.strip().split(',')
#ctr = list(map(int, features[1]))
#cvr = list(map(int, features[2]))
ctr = int(features[1])
cvr = int(features[2])
padding = 0
output = [(field_id,[]) for field_id in self.all_field_id_dict]
for elem in features[4:]:
field_id,feat_id = elem.strip().split(':')
if field_id not in self.all_field_id_dict:
continue
self.all_field_id_dict[field_id][0] = True
index = self.all_field_id_dict[field_id][1]
#feat_id = list(map(int, feat_id))
output[index][1].append(int(feat_id))
for field_id in self.all_field_id_dict:
visited,index = self.all_field_id_dict[field_id]
if visited:
self.all_field_id_dict[field_id][0] = False
else:
output[index][1].append(padding)
output.append(('ctr', [ctr]))
output.append(('cvr', [cvr]))
yield output
return reader
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import paddle.fluid as fluid
from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase
import numpy as np
class Model(ModelBase):
def __init__(self, config):
ModelBase.__init__(self, config)
def fc(self,tag, data, out_dim, active='prelu'):
init_stddev = 1.0
scales = 1.0 / np.sqrt(data.shape[1])
p_attr = fluid.param_attr.ParamAttr(name='%s_weight' % tag,
initializer=fluid.initializer.NormalInitializer(loc=0.0, scale=init_stddev * scales))
b_attr = fluid.ParamAttr(name='%s_bias' % tag, initializer=fluid.initializer.Constant(0.1))
out = fluid.layers.fc(input=data,
size=out_dim,
act=active,
param_attr=p_attr,
bias_attr =b_attr,
name=tag)
return out
def input_data(self):
sparse_input_ids = [
fluid.data(name="field_" + str(i), shape=[-1, 1], dtype="int64", lod_level=1) for i in range(0,23)
]
label_ctr = fluid.data(name="ctr", shape=[-1, 1], dtype="int64")
label_cvr = fluid.data(name="cvr", shape=[-1, 1], dtype="int64")
inputs = sparse_input_ids + [label_ctr] + [label_cvr]
self._data_var.extend(inputs)
return inputs
def net(self, inputs):
vocab_size = envs.get_global_env("hyper_parameters.vocab_size", None, self._namespace)
embed_size = envs.get_global_env("hyper_parameters.embed_size", None, self._namespace)
emb = []
for data in inputs[0:-2]:
feat_emb = fluid.embedding(input=data,
size=[vocab_size, embed_size],
param_attr=fluid.ParamAttr(name='dis_emb',
learning_rate=5,
initializer=fluid.initializer.Xavier(fan_in=embed_size,fan_out=embed_size)
),
is_sparse=True)
field_emb = fluid.layers.sequence_pool(input=feat_emb,pool_type='sum')
emb.append(field_emb)
concat_emb = fluid.layers.concat(emb, axis=1)
# ctr
active = 'relu'
ctr_fc1 = self.fc('ctr_fc1', concat_emb, 200, active)
ctr_fc2 = self.fc('ctr_fc2', ctr_fc1, 80, active)
ctr_out = self.fc('ctr_out', ctr_fc2, 2, 'softmax')
# cvr
cvr_fc1 = self.fc('cvr_fc1', concat_emb, 200, active)
cvr_fc2 = self.fc('cvr_fc2', cvr_fc1, 80, active)
cvr_out = self.fc('cvr_out', cvr_fc2, 2,'softmax')
ctr_clk = inputs[-2]
ctcvr_buy = inputs[-1]
ctr_prop_one = fluid.layers.slice(ctr_out, axes=[1], starts=[1], ends=[2])
cvr_prop_one = fluid.layers.slice(cvr_out, axes=[1], starts=[1], ends=[2])
ctcvr_prop_one = fluid.layers.elementwise_mul(ctr_prop_one, cvr_prop_one)
ctcvr_prop = fluid.layers.concat(input=[1-ctcvr_prop_one,ctcvr_prop_one], axis = 1)
loss_ctr = fluid.layers.cross_entropy(input=ctr_out, label=ctr_clk)
loss_ctcvr = fluid.layers.cross_entropy(input=ctcvr_prop, label=ctcvr_buy)
cost = loss_ctr + loss_ctcvr
avg_cost = fluid.layers.mean(cost)
auc_ctr, batch_auc_ctr, auc_states_ctr = fluid.layers.auc(input=ctr_out, label=ctr_clk)
auc_ctcvr, batch_auc_ctcvr, auc_states_ctcvr = fluid.layers.auc(input=ctcvr_prop, label=ctcvr_buy)
self._cost = avg_cost
self._metrics["AUC_ctr"] = auc_ctr
self._metrics["BATCH_AUC_ctr"] = batch_auc_ctr
self._metrics["AUC_ctcvr"] = auc_ctcvr
self._metrics["BATCH_AUC_ctcvr"] = batch_auc_ctcvr
def train_net(self):
input_data = self.input_data()
self.net(input_data)
def infer_net(self):
pass
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
import numpy as np
class TrainReader(Reader):
def init(self):
pass
def generate_sample(self, line):
"""
Read the data line by line and process it as a dictionary
"""
def reader():
"""
This function needs to be implemented by the user, based on data format
"""
l = line.strip().split(',')
l = list(map(float, l))
label_income = []
label_marital = []
data = l[2:]
if int(l[1]) == 0:
label_income = [1, 0]
elif int(l[1]) == 1:
label_income = [0, 1]
if int(l[0]) == 0:
label_marital = [1, 0]
elif int(l[0]) == 1:
label_marital = [0, 1]
#label_income = np.array(label_income)
#label_marital = np.array(label_marital)
feature_name = ["input", "label_income", "label_marital"]
yield zip(feature_name, [data] + [label_income] + [label_marital])
return reader
此差异已折叠。
此差异已折叠。
0,0,73,0,0,0,0,1700.09,0,0,2,0,95,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0
0,0,58,0,0,0,0,1053.55,1,0,2,52,94,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0
1,0,18,0,0,0,0,991.95,0,0,2,0,95,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0
1,0,9,0,0,0,0,1758.14,0,0,0,0,94,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0
1,0,10,0,0,0,0,1069.16,0,0,0,0,94,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0
0,0,48,1200,0,0,0,162.61,1,2,2,52,95,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0
0,0,42,0,5178,0,0,1535.86,6,0,2,52,94,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0
1,0,28,0,0,0,0,898.83,4,0,2,30,95,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0
0,0,47,876,0,0,0,1661.53,5,0,2,52,95,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0
0,0,34,0,0,0,0,1146.79,6,0,2,52,94,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0
此差异已折叠。
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
文件模式从 100644 更改为 100755
此差异已折叠。
文件模式从 100644 更改为 100755
此差异已折叠。
此差异已折叠。
此差异已折叠。
文件模式从 100644 更改为 100755
此差异已折叠。
文件模式从 100644 更改为 100755
此差异已折叠。
此差异已折叠。
此差异已折叠。
文件模式从 100644 更改为 100755
此差异已折叠。
此差异已折叠。
此差异已折叠。
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
文件模式从 100644 更改为 100755
此差异已折叠。
此差异已折叠。
文件模式从 100644 更改为 100755
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
文件模式从 100644 更改为 100755
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
文件模式从 100644 更改为 100755
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册