From a74730af5b8762a239cd395450475f5358a488da Mon Sep 17 00:00:00 2001 From: "He, Kai" Date: Wed, 16 Sep 2020 09:53:33 +0000 Subject: [PATCH] add decrypt_and_rescale.py to mean_normalize_demo --- .../examples/mean_normalize_demo/README.md | 13 ++++--- .../decrypt_and_rescale.py | 35 +++++++++++++++++++ 2 files changed, 44 insertions(+), 4 deletions(-) create mode 100644 python/paddle_fl/mpc/examples/mean_normalize_demo/decrypt_and_rescale.py diff --git a/python/paddle_fl/mpc/examples/mean_normalize_demo/README.md b/python/paddle_fl/mpc/examples/mean_normalize_demo/README.md index 6200d07..5d590c6 100644 --- a/python/paddle_fl/mpc/examples/mean_normalize_demo/README.md +++ b/python/paddle_fl/mpc/examples/mean_normalize_demo/README.md @@ -11,8 +11,9 @@ Create a empty dir for data, and modify `data_path` in `process_data.py`, default dir path is `./data`. Then run the script with command `python prepare.py` to generate random data -for demo. Otherwise generate your own data, move them to `data_path` and modify -corresponding meta info in `prepare.py`. +for demo, which is dumped by numpy and named `feature_data.{i}.npy` located +in `data_path`. Otherwise generate your own data, move them to `data_path`, +name as the same way, and modify corresponding meta info in `prepare.py`. Encrypted data files of feature statstics would be generated and saved in `data_path` directory. Different suffix names are used for these files to @@ -55,5 +56,9 @@ import process_data res = process_data.decrypt_data(prepare.data_path + 'result', (2, prepare.feat_width, )) ``` -Also, `verify.py` could be used to calculate error between direct plaintext -numpy calculation and mpc mean normalize. +Or use `decrypt_and_rescale.py` to decrypt, rescale the feature data which has +been saved in `feature_data.{i}.npy`, and dump the normalized data to +`normalized_data.{i}.npy` which is located in `data_path`. + +Also, `verify.py` could be used to calculate error of `f_range` and `f_mean` +between direct plaintext numpy calculation and mpc mean normalize. diff --git a/python/paddle_fl/mpc/examples/mean_normalize_demo/decrypt_and_rescale.py b/python/paddle_fl/mpc/examples/mean_normalize_demo/decrypt_and_rescale.py new file mode 100644 index 0000000..e257a0d --- /dev/null +++ b/python/paddle_fl/mpc/examples/mean_normalize_demo/decrypt_and_rescale.py @@ -0,0 +1,35 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Decrypt and rescale for mean normalize demo. +""" +import sys +import numpy as np +import process_data +import prepare + +data_path = prepare.data_path +# 0 for f_range, 1 for f_mean +# use decrypted global f_range and f_mean to rescaling local feature data +res = process_data.decrypt_data(data_path + 'result', (2, prepare.feat_width, )) + +party = sys.argv[1] + +input = np.load(data_path + 'feature_data.' + party + '.npy') + +output = (input - res[1]) / res[0] + +np.save(data_path + 'normalized_data.' + party, output) + + -- GitLab