提交 30357a8d 编写于 作者: J jhjiangcs

add lenet network demo, improve mnist demo.

上级 193e4010
......@@ -44,23 +44,37 @@ epoch_num = 5
x = pfl_mpc.data(name='x', shape=[BATCH_SIZE, 1, 28, 28], dtype='int64')
y = pfl_mpc.data(name='y', shape=[BATCH_SIZE, 10], dtype='int64')
# lenet-3 network
#conv = pfl_mpc.layers.conv2d(input=x, num_filters=16, filter_size=5, act='relu')
#pool = pfl_mpc.layers.pool2d(input=conv, pool_size=2, pool_stride=2)
#fc_1 = pfl_mpc.layers.fc(input=pool, size=100, act='relu')
# lenet-5 network
conv_1 = pfl_mpc.layers.conv2d(input=x, num_filters=16, filter_size=5, act='relu')
pool_1 = pfl_mpc.layers.pool2d(input=conv_1, pool_size=2, pool_stride=2)
conv_2 = pfl_mpc.layers.conv2d(input=pool_1, num_filters=16, filter_size=5, act='relu')
pool_2 = pfl_mpc.layers.pool2d(input=conv_2, pool_size=2, pool_stride=2)
fc_1 = pfl_mpc.layers.fc(input=pool_2, size=100, act='relu')
fc_out = pfl_mpc.layers.fc(input=fc_1, size=10)
cost, softmax = pfl_mpc.layers.softmax_with_cross_entropy(logits=fc_out,
label=y,
soft_label=True,
return_softmax=True)
class Model(object):
def __int__(self):
pass
def lenet3(self):
conv = pfl_mpc.layers.conv2d(input=x, num_filters=16, filter_size=5, act='relu')
pool = pfl_mpc.layers.pool2d(input=conv, pool_size=2, pool_stride=2)
fc_1 = pfl_mpc.layers.fc(input=pool, size=100, act='relu')
fc_out = pfl_mpc.layers.fc(input=fc_1, size=10)
cost, softmax = pfl_mpc.layers.softmax_with_cross_entropy(logits=fc_out,
label=y,
soft_label=True,
return_softmax=True)
return cost, softmax
def lenet5(self):
conv_1 = pfl_mpc.layers.conv2d(input=x, num_filters=16, filter_size=5, act='relu')
pool_1 = pfl_mpc.layers.pool2d(input=conv_1, pool_size=2, pool_stride=2)
conv_2 = pfl_mpc.layers.conv2d(input=pool_1, num_filters=16, filter_size=5, act='relu')
pool_2 = pfl_mpc.layers.pool2d(input=conv_2, pool_size=2, pool_stride=2)
fc_1 = pfl_mpc.layers.fc(input=pool_2, size=100, act='relu')
fc_out = pfl_mpc.layers.fc(input=fc_1, size=10)
cost, softmax = pfl_mpc.layers.softmax_with_cross_entropy(logits=fc_out,
label=y,
soft_label=True,
return_softmax=True)
return cost, softmax
model = Model()
cost, softmax = model.lenet5()
infer_program = fluid.default_main_program().clone(for_test=False)
......
......@@ -8,7 +8,7 @@ This document introduces how to run MNIST demo based on Paddle-MPC, which has tw
#### (1). Prepare Data
Generate encrypted training and testing data utilizing `generate_encrypted_data()` and `generate_encrypted_test_data()` in `process_data.py` script. Users can run the script with command `python process_data.py` to generate encrypted feature and label in given directory, e.g., `./mpc_data/`. Users can specify `class_num` (2 or 10) to determine the encrypted data is for `fc_sigmoid`(two classes) or `lenet`(10 classes) network. Different suffix names are used for these files to indicate the ownership of different computation parties. For instance, a file named `mnist2_feature.part0` means it is a feature file of party 0.
Generate encrypted training and testing data utilizing `generate_encrypted_data()` and `generate_encrypted_test_data()` in `process_data.py` script. Users can run the script with command `python process_data.py` to generate encrypted feature and label in given directory, e.g., `./mpc_data/`. Users can specify `class_num` (2 or 10) to determine the encrypted data is for `logisticfc_sigmoid`(two classes) or `lenet` and `logistic_fc_softmax`(10 classes) network. Different suffix names are used for these files to indicate the ownership of different computation parties. For instance, a file named `mnist2_feature.part0` means it is a feature file of party 0.
#### (2). Launch Demo with A Shell Script
......
......@@ -8,7 +8,7 @@
#### 1. 准备数据
使用`process_data.py`脚本中的`generate_encrypted_data()``generate_encrypted_test_data()`产生加密训练数据和测试数据,用户可以直接运行脚本`python process_data.py`在指定的目录下(比如`./mpc_data/`)产生加密特征和标签。用户可以通过参数`class_num`指定label的类别数目,从而产生适用于`fc_sigmoid`(二分类)和`lenet`(十分类)网络的加密数据。在指定目录下生成对应于3个计算party的feature和label的加密数据文件,以后缀名区分属于不同party的数据。比如,`mnist2_feature.part0`表示属于party0的feature数据。
使用`process_data.py`脚本中的`generate_encrypted_data()``generate_encrypted_test_data()`产生加密训练数据和测试数据,用户可以直接运行脚本`python process_data.py`在指定的目录下(比如`./mpc_data/`)产生加密特征和标签。用户可以通过参数`class_num`指定label的类别数目,从而产生适用于`logistic_fc_sigmoid`(二分类)或`lenet``logistic_fc_softmax`(十分类)网络的加密数据。在指定目录下生成对应于3个计算party的feature和label的加密数据文件,以后缀名区分属于不同party的数据。比如,`mnist2_feature.part0`表示属于party0的feature数据。
#### 2. 使用shell脚本启动demo
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
MNIST CNN Demo (LeNet5)
"""
import sys
import os
import numpy as np
import time
import logging
import math
import paddle
import paddle.fluid as fluid
import paddle.fluid.profiler as profiler
import paddle_fl.mpc as pfl_mpc
import paddle_fl.mpc.data_utils.aby3 as aby3
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger("fluid")
logger.setLevel(logging.INFO)
role, server, port = sys.argv[1], sys.argv[2], sys.argv[3]
# modify host(localhost).
pfl_mpc.init("aby3", int(role), "localhost", server, int(port))
role = int(role)
# data preprocessing
BATCH_SIZE = 128
epoch_num = 5
x = pfl_mpc.data(name='x', shape=[BATCH_SIZE, 1, 28, 28], dtype='int64')
y = pfl_mpc.data(name='y', shape=[BATCH_SIZE, 10], dtype='int64')
fc_out = pfl_mpc.layers.fc(input=x, size=10)
cost, softmax = pfl_mpc.layers.softmax_with_cross_entropy(logits=fc_out,
label=y,
soft_label=True,
return_softmax=True)
infer_program = fluid.default_main_program().clone(for_test=False)
avg_loss = pfl_mpc.layers.mean(cost)
optimizer = pfl_mpc.optimizer.SGD(learning_rate=0.1)
optimizer.minimize(avg_loss)
# prepare train and test reader
mpc_data_dir = "./mpc_data/"
if not os.path.exists(mpc_data_dir):
raise ValueError("mpc_data_dir is not found. Please prepare encrypted data.")
# train_reader
feature_reader = aby3.load_aby3_shares(mpc_data_dir + "mnist10_feature", id=role, shape=(1, 28, 28))
label_reader = aby3.load_aby3_shares(mpc_data_dir + "mnist10_label", id=role, shape=(10,))
batch_feature = aby3.batch(feature_reader, BATCH_SIZE, drop_last=True)
batch_label = aby3.batch(label_reader, BATCH_SIZE, drop_last=True)
# test_reader
test_feature_reader = aby3.load_aby3_shares(mpc_data_dir + "mnist10_test_feature", id=role, shape=(1, 28, 28))
test_label_reader = aby3.load_aby3_shares(mpc_data_dir + "mnist10_test_label", id=role, shape=(10,))
test_batch_feature = aby3.batch(test_feature_reader, BATCH_SIZE, drop_last=True)
test_batch_label = aby3.batch(test_label_reader, BATCH_SIZE, drop_last=True)
place = fluid.CPUPlace()
# async data loader
loader = fluid.io.DataLoader.from_generator(feed_list=[x, y], capacity=BATCH_SIZE)
batch_sample = paddle.reader.compose(batch_feature, batch_label)
loader.set_batch_generator(batch_sample, places=place)
test_loader = fluid.io.DataLoader.from_generator(feed_list=[x, y], capacity=BATCH_SIZE)
test_batch_sample = paddle.reader.compose(test_batch_feature, test_batch_label)
test_loader.set_batch_generator(test_batch_sample, places=place)
# infer
def infer():
"""
MPC infer
"""
mpc_infer_data_dir = "./mpc_infer_data/"
if not os.path.exists(mpc_infer_data_dir):
os.mkdir(mpc_infer_data_dir)
prediction_file = mpc_infer_data_dir + "mnist_debug_prediction"
prediction_file_part = prediction_file + ".part{}".format(role)
if os.path.exists(prediction_file_part):
os.remove(prediction_file_part)
step = 0
start_time = time.time()
for sample in test_loader():
step += 1
prediction = exe.run(program=infer_program, feed=sample, fetch_list=[softmax])
with open(prediction_file_part, 'ab') as f:
f.write(np.array(prediction).tostring())
if step % 10 == 0:
end_time = time.time()
logger.info('MPC infer of step={}, cost time in seconds:{}'.format(step, (end_time - start_time)))
end_time = time.time()
logger.info('MPC infer time in seconds:{}'.format((end_time - start_time)))
# train
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
mpc_model_basedir = "./mpc_model/"
step = 0
start_time = time.time()
logger.info('MPC training start...')
for epoch_id in range(epoch_num):
for sample in loader():
step += 1
results = exe.run(feed=sample, fetch_list=[softmax])
if step % 100 == 0:
end_time = time.time()
logger.info('MPC training of epoch_id={} step={}, cost time in seconds:{}'
.format(epoch_id, step, (end_time - start_time)))
# For each epoch: infer or save infer program
#infer()
mpc_model_dir = mpc_model_basedir + "epoch{}/party{}".format(epoch_id, role)
fluid.io.save_inference_model(dirname=mpc_model_dir,
feeded_var_names=["x", "y"],
target_vars=[softmax],
executor=exe,
main_program=infer_program,
model_filename="__model__")
end_time = time.time()
logger.info('MPC training of epoch_num={} batch_size={}, cost time in seconds:{}'
.format(epoch_num, BATCH_SIZE, (end_time - start_time)))
# infer
infer()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册