提交 7d68c021 编写于 作者: M malin10

fix dssm

上级 eabfd85d
......@@ -11,44 +11,61 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
evaluate:
reader:
batch_size: 1
class: "{workspace}/synthetic_evaluate_reader.py"
test_data_path: "{workspace}/data/train"
train:
trainer:
# for cluster training
strategy: "async"
epochs: 4
workspace: "paddlerec.models.match.dssm"
# 轮数
epochs: 4
# 设备
device: cpu
# 工作目录
workspace: "paddlerec.models.match.dssm"
reader:
batch_size: 4
class: "{workspace}/synthetic_reader.py"
train_data_path: "{workspace}/data/train"
# dataset列表
dataset:
- name: dataset_train # 名字,用来区分不同的dataset
batch_size: 4
type: QueueDataset
data_path: "{workspace}/data/train" # 数据路径
data_converter: "{workspace}/synthetic_reader.py"
#- name: dataset_infer # 名字,用来区分不同的dataset
# batch_size: 1
# type: QueueDataset
# data_path: "{workspace}/data/train" # 数据路径
# data_converter: "{workspace}/synthetic_evaluate_reader.py"
model:
models: "{workspace}/model.py"
hyper_parameters:
TRIGRAM_D: 1000
NEG: 4
fc_sizes: [300, 300, 128]
fc_acts: ['tanh', 'tanh', 'tanh']
learning_rate: 0.01
optimizer: sgd
# 超参数
hyper_parameters:
#优化器
optimizer:
class: sgd
learning_rate: 0.01
strategy: async
# 用户自定义
TRIGRAM_D: 1000
NEG: 4
fc_sizes: [300, 300, 128]
fc_acts: ['tanh', 'tanh', 'tanh']
save:
increment:
dirname: "increment"
epoch_interval: 2
save_last: True
# executor配置
epoch:
name:
trainer_class: single
save_checkpoint_interval: 2 # 保存模型
save_inference_interval: 4 # 保存预测模型
save_checkpoint_path: "increment" # 保存模型路径
save_inference_path: "inference" # 保存预测模型路径
save_inference_feed_varnames: ["query", "doc_pos"] # 预测模型feed vars
save_inference_fetch_varnames: ["cos_sim_0.tmp_0"] # 预测模型 fetch vars
#init_model_path: "xxxx" # 加载模型
inference:
dirname: "inference"
epoch_interval: 4
feed_varnames: ["query", "doc_pos"]
fetch_varnames: ["cos_sim_0.tmp_0"]
save_last: True
# 执行器,每轮要跑的所有模型
executor:
- name: train
model: "{workspace}/model.py" # 模型路径
dataset_name: dataset_train # 名字,用来区分不同的阶段
thread_num: 1 # 线程数
is_infer: False # 是否是infer
# - name: infer
# model: "{workspace}/model.py" # 模型路径
# dataset_name: dataset_infer # 名字,用来区分不同的阶段
# thread_num: 1 # 线程数
# is_infer: True # 是否是infer
......@@ -22,45 +22,35 @@ class Model(ModelBase):
def __init__(self, config):
ModelBase.__init__(self, config)
def input(self):
TRIGRAM_D = envs.get_global_env("hyper_parameters.TRIGRAM_D", None,
self._namespace)
Neg = envs.get_global_env("hyper_parameters.NEG", None,
self._namespace)
self.query = fluid.data(
name="query", shape=[-1, TRIGRAM_D], dtype='float32', lod_level=0)
self.doc_pos = fluid.data(
def _init_hyper_parameters(self):
self.TRIGRAM_D = envs.get_global_env("hyper_parameters.TRIGRAM_D")
self.Neg = envs.get_global_env("hyper_parameters.NEG")
self.hidden_layers = envs.get_global_env("hyper_parameters.fc_sizes")
self.hidden_acts = envs.get_global_env("hyper_parameters.fc_acts")
self.learning_rate = envs.get_global_env("hyper_parameters.learning_rate")
def input_data(self, is_infer=False, **kwargs):
query = fluid.data(
name="query", shape=[-1, self.TRIGRAM_D], dtype='float32', lod_level=0)
doc_pos = fluid.data(
name="doc_pos",
shape=[-1, TRIGRAM_D],
shape=[-1, self.TRIGRAM_D],
dtype='float32',
lod_level=0)
self.doc_negs = [
if is_infer:
return [query, doc_pos]
doc_negs = [
fluid.data(
name="doc_neg_" + str(i),
shape=[-1, TRIGRAM_D],
shape=[-1, self.TRIGRAM_D],
dtype="float32",
lod_level=0) for i in range(Neg)
lod_level=0) for i in range(self.Neg)
]
self._data_var.append(self.query)
self._data_var.append(self.doc_pos)
for input in self.doc_negs:
self._data_var.append(input)
if self._platform != "LINUX":
self._data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._data_var,
capacity=64,
use_double_buffer=False,
iterable=False)
def net(self, is_infer=False):
hidden_layers = envs.get_global_env("hyper_parameters.fc_sizes", None,
self._namespace)
hidden_acts = envs.get_global_env("hyper_parameters.fc_acts", None,
self._namespace)
return [query, doc_pos] + doc_negs
def net(self, inputs, is_infer=False):
def fc(data, hidden_layers, hidden_acts, names):
fc_inputs = [data]
for i in range(len(hidden_layers)):
......@@ -77,71 +67,31 @@ class Model(ModelBase):
fc_inputs.append(out)
return fc_inputs[-1]
query_fc = fc(self.query, hidden_layers, hidden_acts,
query_fc = fc(inputs[0], self.hidden_layers, self.hidden_acts,
['query_l1', 'query_l2', 'query_l3'])
doc_pos_fc = fc(self.doc_pos, hidden_layers, hidden_acts,
doc_pos_fc = fc(inputs[1], self.hidden_layers, self.hidden_acts,
['doc_pos_l1', 'doc_pos_l2', 'doc_pos_l3'])
self.R_Q_D_p = fluid.layers.cos_sim(query_fc, doc_pos_fc)
R_Q_D_p = fluid.layers.cos_sim(query_fc, doc_pos_fc)
if is_infer:
self._infer_results["query_doc_sim"] = R_Q_D_p
return
R_Q_D_ns = []
for i, doc_neg in enumerate(self.doc_negs):
doc_neg_fc_i = fc(doc_neg, hidden_layers, hidden_acts, [
for i in range(len(inputs)-2):
doc_neg_fc_i = fc(inputs[i+2], self.hidden_layers, self.hidden_acts, [
'doc_neg_l1_' + str(i), 'doc_neg_l2_' + str(i),
'doc_neg_l3_' + str(i)
])
R_Q_D_ns.append(fluid.layers.cos_sim(query_fc, doc_neg_fc_i))
concat_Rs = fluid.layers.concat(
input=[self.R_Q_D_p] + R_Q_D_ns, axis=-1)
input=[R_Q_D_p] + R_Q_D_ns, axis=-1)
prob = fluid.layers.softmax(concat_Rs, axis=1)
hit_prob = fluid.layers.slice(
prob, axes=[0, 1], starts=[0, 0], ends=[4, 1])
loss = -fluid.layers.reduce_sum(fluid.layers.log(hit_prob))
self.avg_cost = fluid.layers.mean(x=loss)
def infer_results(self):
self._infer_results['query_doc_sim'] = self.R_Q_D_p
def avg_loss(self):
self._cost = self.avg_cost
def metrics(self):
self._metrics["LOSS"] = self.avg_cost
def train_net(self):
self.input()
self.net(is_infer=False)
self.avg_loss()
self.metrics()
def optimizer(self):
learning_rate = envs.get_global_env("hyper_parameters.learning_rate",
None, self._namespace)
optimizer = fluid.optimizer.SGD(learning_rate)
return optimizer
def infer_input(self):
TRIGRAM_D = envs.get_global_env("hyper_parameters.TRIGRAM_D", None,
self._namespace)
self.query = fluid.data(
name="query", shape=[-1, TRIGRAM_D], dtype='float32', lod_level=0)
self.doc_pos = fluid.data(
name="doc_pos",
shape=[-1, TRIGRAM_D],
dtype='float32',
lod_level=0)
self._infer_data_var = [self.query, self.doc_pos]
self._infer_data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._infer_data_var,
capacity=64,
use_double_buffer=False,
iterable=False)
avg_cost = fluid.layers.mean(x=loss)
self._cost = avg_cost
self._metrics["LOSS"] = avg_cost
def infer_net(self):
self.infer_input()
self.net(is_infer=True)
self.infer_results()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册