提交 78e7a9b0 编写于 作者: T TomorrowIsAnOtherDay

Merge branch 'zhs_es2' into develop

cmake_minimum_required (VERSION 2.6)
project (DeepES)
set(TARGET parallel_main)
########## options ##########
option(WITH_PADDLE "Compile DeepES with PaddleLite framework." OFF)
option(WITH_TORCH "Compile DeepES with Torch framework." OFF)
message("WITH_PADDLE: "${WITH_PADDLE})
message("WITH_TORCH: "${WITH_TORCH})
if (NOT (WITH_PADDLE OR WITH_TORCH))
message("ERROR: You should choose at least one framework to compile DeepES.")
return()
elseif(WITH_PADDLE AND WITH_TORCH)
message("ERROR: You cannot choose more than one framework to compile DeepES.")
return()
endif()
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
find_package(OpenMP)
if (OPENMP_FOUND)
......@@ -8,19 +28,47 @@ if (OPENMP_FOUND)
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
endif()
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
find_package(Torch REQUIRED ON)
file(GLOB demo_src "demo/*.cpp")
file(GLOB core_src "src/*.cpp")
file(GLOB pb_src "src/*.cc")
file(GLOB src "src/*.cc")
include_directories("include")
include_directories("demo")
include_directories("benchmark")
link_directories("/usr/lib/x86_64-linux-gnu/")
add_executable(parallel_main "./demo/cartpole_solver_parallel.cpp" ${core_src} ${pb_src} ${benchmark_src})
target_link_libraries(parallel_main gflags protobuf pthread glog "${TORCH_LIBRARIES}")
########## PaddleLite config ##########
if (WITH_PADDLE)
add_definitions(-g -O3 -pthread)
include_directories("include/paddle")
include_directories("${PROJECT_SOURCE_DIR}/inference_lite_lib/cxx/include"
"${PROJECT_SOURCE_DIR}/inference_lite_lib/third_party/mklml/include")
link_directories("${PROJECT_SOURCE_DIR}/inference_lite_lib/cxx/lib"
"${PROJECT_SOURCE_DIR}/inference_lite_lib/third_party/mklml/lib")
file(GLOB framework_src "src/paddle/*.cc")
set(demo "${PROJECT_SOURCE_DIR}/demo/paddle/cartpole_solver_parallel.cc")
########## Torch config ##########
elseif (WITH_TORCH)
list(APPEND CMAKE_PREFIX_PATH "./libtorch")
find_package(Torch REQUIRED ON)
include_directories("include/torch")
include_directories("demo/torch")
file(GLOB framework_src "src/torch/*.cc")
set(demo "${PROJECT_SOURCE_DIR}/demo/torch/cartpole_solver_parallel.cc")
else ()
message("ERROR: You should choose at least one framework to compile DeepES.")
endif()
add_executable(${TARGET} ${demo} ${src} ${framework_src})
target_link_libraries(${TARGET} gflags protobuf pthread glog)
########## PaddleLite libraries ##########
if (WITH_PADDLE)
target_link_libraries(${TARGET} -lpaddle_full_api_shared)
target_link_libraries(${TARGET} -lmklml_intel)
target_link_libraries(${TARGET} -ldl)
########## Torch libraries ##########
elseif (WITH_TORCH)
target_link_libraries(${TARGET} "${TORCH_LIBRARIES}")
endif()
......@@ -7,22 +7,23 @@ DeepES是一个支持**快速验证**ES效果、**兼容多个框架**的C++库
## 使用示范
```c++
//实例化一个预测,根据配置文件加载模型,采样方式(Gaussian\CMA sampling..)、更新方式(SGD\Adam)等
auto predictor = Predicotr(config);
auto agent = ESAgent(config);
for (int i = 0; i < 100; ++i) {
auto noisy_predictor = predictor->clone(); // copy 一份参数
int key = noisy_predictor->add_noise(); // 参数扰动,同时保存随机种子
int reward = evaluate(env, noisiy_predictor); //评估参数
for (int i = 0; i < 10; ++i) {
auto sampling_agnet = agent->clone(); // clone出一个sampling agent
SamplingKey key;
agent->add_noise(key); // 参数扰动,同时保存随机种子到key中
int reward = evaluate(env, sampling_agent); //评估参数
noisy_keys.push_back(key); // 记录随机噪声对应种子
noisy_rewards.push_back(reward); // 记录评估结果
}
//根据评估结果、随机种子更新参数,然后重复以上过程,直到收敛。
predictor->update(noisy_keys, noisy_rewards);
agent->update(noisy_keys, noisy_rewards);
```
## 一键运行demo列表
- **Torch**: sh [./scripts/build.sh](./scripts/build.sh)
- **Paddle**:
- **PaddleLite**: sh ./scripts/build.sh paddle
- **Torch**: sh ./scripts/build.sh torch
- **裸写网络**
## 相关依赖:
......@@ -33,5 +34,8 @@ predictor->update(noisy_keys, noisy_rewards);
## 额外依赖:
### 使用PaddleLite
下载PaddleLite的X86预编译库,或者编译PaddleLite源码,得到inference_lite_lib文件夹,放在当前目录中。(可参考:[PaddleLite使用X86预测部署](https://paddle-lite.readthedocs.io/zh/latest/demo_guides/x86.html))
### 使用torch
下载[libtorch](https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-1.4.0%2Bcpu.zip)或者编译torch源码,得到libtorch文件夹,放在当前目录中。
// Third party code
// This code is copied or modified from openai/gym's cartpole.py
#include <torch/torch.h>
#include <iostream>
#include <random>
#include <cassert>
#include <vector>
const double kPi = 3.1415926535898;
......@@ -21,13 +23,13 @@ public:
double x_threshold = 2.4;
int steps_beyond_done = -1;
torch::Tensor state;
std::vector<float> state = {0, 0, 0, 0};
double reward;
bool done;
int step_ = 0;
torch::Tensor getState() {
return state;
const float* getState() {
return state.data();
}
double getReward() {
......@@ -39,7 +41,13 @@ public:
}
void reset() {
state = torch::empty({ 4 }).uniform_(-0.05, 0.05);
std::random_device rd;
std::default_random_engine generator(rd());
std::uniform_real_distribution<float> distribution(-0.05, 0.05);
for (int i = 0; i < 4; ++i) {
state[i] = distribution(generator);
}
steps_beyond_done = -1;
step_ = 0;
}
......@@ -49,10 +57,10 @@ public:
}
void step(int action) {
auto x = state[0].item<float>();
auto x_dot = state[1].item<float>();
auto theta = state[2].item<float>();
auto theta_dot = state[3].item<float>();
float x = state[0];
float x_dot = state[1];
float theta = state[2];
float theta_dot = state[3];
auto force = (action == 1) ? force_mag : -force_mag;
auto costheta = std::cos(theta);
......@@ -67,7 +75,8 @@ public:
x_dot = x_dot + tau * xacc;
theta = theta + tau * theta_dot;
theta_dot = theta_dot + tau * thetaacc;
state = torch::tensor({ x, x_dot, theta, theta_dot });
state = {x, x_dot, theta, theta_dot};
done = x < -x_threshold || x > x_threshold ||
theta < -theta_threshold_radians || theta > theta_threshold_radians ||
......@@ -83,7 +92,7 @@ public:
}
else {
if (steps_beyond_done == 0) {
AT_ASSERT(false); // Can't do this
assert(false); // Can't do this
}
}
step_++;
......
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include <glog/logging.h>
#include <omp.h>
#include "cartpole.h"
#include "gaussian_sampling.h"
#include "es_agent.h"
#include "paddle_api.h"
using namespace DeepES;
using namespace paddle::lite_api;
const int ITER = 10;
std::shared_ptr<PaddlePredictor> create_paddle_predictor(const std::string& model_dir) {
// 1. Create CxxConfig
CxxConfig config;
config.set_model_dir(model_dir);
config.set_valid_places({
Place{TARGET(kX86), PRECISION(kFloat)},
Place{TARGET(kHost), PRECISION(kFloat)}
});
// 2. Create PaddlePredictor by CxxConfig
std::shared_ptr<PaddlePredictor> predictor = CreatePaddlePredictor<CxxConfig>(config);
return predictor;
}
// Use PaddlePredictor of CartPole model to predict the action.
std::vector<float> forward(std::shared_ptr<PaddlePredictor> predictor, const float* obs) {
std::unique_ptr<Tensor> input_tensor(std::move(predictor->GetInput(0)));
input_tensor->Resize({1, 4});
input_tensor->CopyFromCpu(obs);
predictor->Run();
std::vector<float> probs(2, 0.0);
std::unique_ptr<const Tensor> output_tensor(
std::move(predictor->GetOutput(0)));
output_tensor->CopyToCpu(probs.data());
return probs;
}
int arg_max(const std::vector<float>& vec) {
return static_cast<int>(std::distance(vec.begin(), std::max_element(vec.begin(), vec.end())));
}
float evaluate(CartPole& env, std::shared_ptr<ESAgent> agent) {
float total_reward = 0.0;
env.reset();
const float* obs = env.getState();
std::shared_ptr<PaddlePredictor> paddle_predictor;
paddle_predictor = agent->get_predictor();
while (true) {
std::vector<float> probs = forward(paddle_predictor, obs);
int act = arg_max(probs);
env.step(act);
float reward = env.getReward();
bool done = env.isDone();
total_reward += reward;
if (done) break;
obs = env.getState();
}
return total_reward;
}
int main(int argc, char* argv[]) {
std::vector<CartPole> envs;
for (int i = 0; i < ITER; ++i) {
envs.push_back(CartPole());
}
std::shared_ptr<PaddlePredictor> paddle_predictor = create_paddle_predictor("../demo/paddle/cartpole_init_model");
std::shared_ptr<ESAgent> agent = std::make_shared<ESAgent>(paddle_predictor, "../benchmark/cartpole_config.prototxt");
// Clone agents to sample (explore).
std::vector< std::shared_ptr<ESAgent> > sampling_agents;
for (int i = 0; i < ITER; ++i) {
sampling_agents.push_back(agent->clone());
}
std::vector<SamplingKey> noisy_keys;
std::vector<float> noisy_rewards(ITER, 0.0f);
noisy_keys.resize(ITER);
omp_set_num_threads(10);
for (int epoch = 0; epoch < 10000; ++epoch) {
#pragma omp parallel for schedule(dynamic, 1)
for (int i = 0; i < ITER; ++i) {
std::shared_ptr<ESAgent> sampling_agent = sampling_agents[i];
SamplingKey key;
bool success = sampling_agent->add_noise(key);
float reward = evaluate(envs[i], sampling_agent);
noisy_keys[i] = key;
noisy_rewards[i] = reward;
}
// NOTE: all parameters of sampling_agents will be updated
bool success = agent->update(noisy_keys, noisy_rewards);
int reward = evaluate(envs[0], agent);
LOG(INFO) << "Epoch:" << epoch << " Reward: " << reward;
}
}
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle import fluid
def net(obs, act_dim):
hid1_size = act_dim * 10
hid1 = fluid.layers.fc(obs, size=hid1_size)
prob = fluid.layers.fc(hid1, size=act_dim, act='softmax')
return prob
if __name__ == '__main__':
obs_dim = 4
act_dim = 2
obs = fluid.layers.data(name="obs", shape=[obs_dim], dtype='float32')
prob = net(obs, act_dim)
exe = fluid.Executor(fluid.CPUPlace())
exe.run(fluid.default_startup_program())
fluid.io.save_inference_model(
dirname='cartpole_init_model',
feeded_var_names=['obs'],
target_vars=[prob],
executor=exe)
......@@ -20,17 +20,18 @@
#include "cartpole.h"
#include "gaussian_sampling.h"
#include "model.h"
#include "torch_predictor.h"
#include "es_agent.h"
using namespace DeepES;
const int ITER = 100;
const int ITER = 10;
float evaluate(CartPole& env, std::shared_ptr<Predictor<Model>> predictor) {
float evaluate(CartPole& env, std::shared_ptr<ESAgent<Model>> agent) {
float total_reward = 0.0;
env.reset();
auto obs = env.getState();
const float* obs = env.getState();
while (true) {
torch::Tensor action = predictor->predict(obs);
torch::Tensor obs_tensor = torch::tensor({obs[0], obs[1], obs[2], obs[3]});
torch::Tensor action = agent->predict(obs_tensor);
int act = std::get<1>(action.max(-1)).item<long>();
env.step(act);
float reward = env.getReward();
......@@ -50,10 +51,12 @@ int main(int argc, char* argv[]) {
}
auto model = std::make_shared<Model>(4, 2);
std::shared_ptr<Predictor<Model>> predictor = std::make_shared<Predictor<Model>>(model, "../deepes_config.prototxt");
std::vector<std::shared_ptr<Predictor<Model>>> noisy_predictors;
std::shared_ptr<ESAgent<Model>> agent = std::make_shared<ESAgent<Model>>(model, "../benchmark/cartpole_config.prototxt");
// Clone agents to sample (explore).
std::vector<std::shared_ptr<ESAgent<Model>>> sampling_agents;
for (int i = 0; i < ITER; ++i) {
noisy_predictors.push_back(predictor->clone());
sampling_agents.push_back(agent->clone());
}
std::vector<SamplingKey> noisy_keys;
......@@ -63,16 +66,19 @@ int main(int argc, char* argv[]) {
for (int epoch = 0; epoch < 1000; ++epoch) {
#pragma omp parallel for schedule(dynamic, 1)
for (int i = 0; i < ITER; ++i) {
auto noisy_predictor = noisy_predictors[i];
SamplingKey key = noisy_predictor->add_noise();
float reward = evaluate(envs[i], noisy_predictor);
auto sampling_agent = sampling_agents[i];
SamplingKey key;
bool success = sampling_agent->add_noise(key);
float reward = evaluate(envs[i], sampling_agent);
noisy_keys[i] = key;
noisy_rewards[i] = reward;
}
predictor->update(noisy_keys, noisy_rewards);
int reward = evaluate(envs[0], predictor);
// Will also update parameters of sampling_agents
bool success = agent->update(noisy_keys, noisy_rewards);
// Use original agent to evalute (without noise).
int reward = evaluate(envs[0], agent);
LOG(INFO) << "Epoch:" << epoch << " Reward: " << reward;
}
}
......@@ -41,7 +41,7 @@ public:
*@return:
* success: load configuration successfully or not.
*/
int sampling(float* noise, int size);
int sampling(float* noise, int64_t size);
/*@brief reconstruct the Gaussion noise given the key.
* This function is often used for updating the neuron network parameters in the offline environment.
......@@ -51,7 +51,7 @@ public:
* noise: a pointer pointed to the memory that stores the noise
* size: the number of float to be sampled.
*/
bool resampling(int key, float* noise, int size);
bool resampling(int key, float* noise, int64_t size);
private:
float _std;
......
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DEEPES_PADDLE_ES_AGENT_H_
#define DEEPES_PADDLE_ES_AGENT_H_
#include "paddle_api.h"
#include "optimizer.h"
#include "utils.h"
#include "gaussian_sampling.h"
#include "deepes.pb.h"
#include <vector>
namespace DeepES {
typedef paddle::lite_api::PaddlePredictor PaddlePredictor;
/**
* @brief DeepES agent for PaddleLite.
*
* Users use `clone` fucntion to clone a sampling agent, which can call `add_noise`
* function to add noise to copied parameters and call `get_predictor` fucntion to
* get a paddle predictor with added noise.
*
* Then can use `update` function to update parameters based on ES algorithm.
* Note: parameters of cloned agents will also be updated.
*/
class ESAgent {
public:
ESAgent();
~ESAgent();
ESAgent(
std::shared_ptr<PaddlePredictor> predictor,
std::string config_path);
/**
* @breif Clone a sampling agent
*
* Only cloned ESAgent can call `add_noise` function.
* Each cloned ESAgent will have a copy of original parameters.
* (support sampling in multi-thread way)
*/
std::shared_ptr<ESAgent> clone();
/**
* @brief Update parameters of predictor based on ES algorithm.
*
* Only not cloned ESAgent can call `update` function.
* Parameters of cloned agents will also be updated.
*/
bool update(
std::vector<SamplingKey>& noisy_keys,
std::vector<float>& noisy_rewards);
// copied parameters = original parameters + noise
bool add_noise(SamplingKey& sampling_key);
/**
* @brief Get paddle predict
*
* if _is_sampling_agent is true, will return predictor with added noise;
* if _is_sampling_agent is false, will return predictor without added noise.
*/
std::shared_ptr<PaddlePredictor> get_predictor();
private:
int64_t _calculate_param_size();
std::shared_ptr<PaddlePredictor> _predictor;
std::shared_ptr<PaddlePredictor> _sampling_predictor;
bool _is_sampling_agent;
std::shared_ptr<SamplingMethod> _sampling_method;
std::shared_ptr<Optimizer> _optimizer;
std::shared_ptr<DeepESConfig> _config;
int64_t _param_size;
std::vector<std::string> _param_names;
// malloc memory of noise and neg_gradients in advance.
float* _noise;
float* _neg_gradients;
};
}
#endif /* DEEPES_PADDLE_ES_AGENT_H_ */
......@@ -55,7 +55,7 @@ public:
*@return:
* success: load configuration successfully or not.
*/
virtual int sampling(float* noise, int size)=0;
virtual int sampling(float* noise, int64_t size)=0;
/*@brief reconstruct the Gaussion noise given the key.
* This function is often used for updating the neuron network parameters in the offline environment.
......@@ -65,7 +65,7 @@ public:
* noise: a pointer pointed to the memory that stores the noise
* size: the number of float to be sampled.
*/
virtual bool resampling(int key, float* noise, int size)=0;
virtual bool resampling(int key, float* noise, int64_t size)=0;
bool set_seed(int seed) {
_seed = seed;
......
......@@ -12,169 +12,176 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TORCHPREDICTOR_H
#define TORCHPREDICTOR_H
#ifndef TORCH_ESAGENT_H
#define TORCH_ESAGENT_H
#include <memory>
#include <string>
#include <algorithm>
#include "sgd_optimizer.h"
#include "adam_optimizer.h"
#include "optimizer.h"
#include "utils.h"
#include "gaussian_sampling.h"
#include "deepes.pb.h"
namespace DeepES{
/* DeepES predictor for Torch.
/**
* @brief DeepES agent for Torch.
*
* Our implemtation is flexible to support any model that subclass torch::nn::Module.
* That is, we can instantiate a preditor by: predictor = Predcitor<Model>(model);
* After that, users can clone a predictor for multi-thread processing, add parametric noise for exploration,
* That is, we can instantiate an agent by: es_agent = ESAgent<Model>(model);
* After that, users can clone an agent for multi-thread processing, add parametric noise for exploration,
* and update the parameteres, according to the evaluation resutls of noisy parameters.
*
*/
template <class T>
class Predictor{
class ESAgent{
public:
Predictor(): _param_size(0){}
ESAgent() {}
~ESAgent() {
delete[] _noise;
if (!_is_sampling_agent)
delete[] _neg_gradients;
}
Predictor(std::shared_ptr<T> model, std::string config_path): _model(model) {
ESAgent(std::shared_ptr<T> model, std::string config_path): _model(model) {
_is_sampling_agent = false;
_config = std::make_shared<DeepESConfig>();
load_proto_conf(config_path, *_config);
_sampling_method = std::make_shared<GaussianSampling>();
_sampling_method->load_config(*_config);
_optimizer = std::make_shared<SGDOptimizer>(_config->optimizer().base_lr());
// Origin agent can't be used to sample, so keep it same with _model for evaluating.
_sampling_model = model;
_param_size = _calculate_param_size();
std::string opt_type = _config->optimizer().type();
std::transform(opt_type.begin(),opt_type.end(),opt_type.begin(),::tolower);
if (opt_type == "sgd") {
_optimizer = std::make_shared<SGDOptimizer>(_config->optimizer().base_lr(), \
_config->optimizer().momentum());
}else if (opt_type == "adam") {
_optimizer = std::make_shared<AdamOptimizer>(_config->optimizer().base_lr(), \
_config->optimizer().beta1(), \
_config->optimizer().beta2(), \
_config->optimizer().epsilon());
}else {
// TODO: NotImplementedError
}
_param_size = 0;
_sampled_model = model;
param_size();
_noise = new float [_param_size];
_neg_gradients = new float [_param_size];
}
std::shared_ptr<Predictor> clone() {
/**
* @breif Clone a sampling agent
*
* Only cloned ESAgent can call `add_noise` function.
* Each cloned ESAgent will have a copy of original parameters.
* (support sampling in multi-thread way)
*/
std::shared_ptr<ESAgent> clone() {
std::shared_ptr<ESAgent> new_agent = std::make_shared<ESAgent>();
new_agent->_model = _model;
std::shared_ptr<T> new_model = _model->clone();
std::shared_ptr<Predictor> new_predictor = std::make_shared<Predictor>();
new_predictor->set_model(new_model, _model);
new_predictor->set_sampling_method(_sampling_method);
new_predictor->set_param_size(_param_size);
return new_predictor;
}
void set_config(std::shared_ptr<DeepESConfig> config) {
_config = config;
}
void set_sampling_method(std::shared_ptr<SamplingMethod> sampling_method) {
_sampling_method = sampling_method;
}
new_agent->_sampling_model = new_model;
void set_model(std::shared_ptr<T> sampled_model, std::shared_ptr<T> model) {
_sampled_model = sampled_model;
_model = model;
}
std::shared_ptr<SamplingMethod> get_sampling_method() {
return _sampling_method;
}
std::shared_ptr<Optimizer> get_optimizer() {
return _optimizer;
}
new_agent->_is_sampling_agent = true;
new_agent->_sampling_method = _sampling_method;
new_agent->_param_size = _param_size;
void set_optimizer(std::shared_ptr<Optimizer> optimizer) {
_optimizer = optimizer;
}
float* new_noise = new float [_param_size];
new_agent->_noise = new_noise;
void set_param_size(int param_size) {
_param_size = param_size;
return new_agent;
}
/**
* @brief Use the model to predict.
*
* if _is_sampling_agent is true, will use the sampling model with added noise;
* if _is_sampling_agent is false, will use the original model without added noise.
*/
torch::Tensor predict(const torch::Tensor& x) {
return _sampled_model->forward(x);
return _sampling_model->forward(x);
}
/**
* @brief Update parameters of model based on ES algorithm.
*
* Only not cloned ESAgent can call `update` function.
* Parameters of cloned agents will also be updated.
*/
bool update(std::vector<SamplingKey>& noisy_keys, std::vector<float>& noisy_rewards) {
if (_is_sampling_agent) {
LOG(ERROR) << "[DeepES] Cloned ESAgent cannot call update function, please use original ESAgent.";
return false;
}
compute_centered_ranks(noisy_rewards);
float* noise = new float [_param_size];
float* neg_gradients = new float [_param_size];
memset(neg_gradients, 0, _param_size * sizeof(float));
memset(_neg_gradients, 0, _param_size * sizeof(float));
for (int i = 0; i < noisy_keys.size(); ++i) {
int key = noisy_keys[i].key(0);
float reward = noisy_rewards[i];
bool success = _sampling_method->resampling(key, noise, _param_size);
for (int j = 0; j < _param_size; ++j) {
neg_gradients[j] += noise[j] * reward;
bool success = _sampling_method->resampling(key, _noise, _param_size);
for (int64_t j = 0; j < _param_size; ++j) {
_neg_gradients[j] += _noise[j] * reward;
}
}
for (int j = 0; j < _param_size; ++j) {
neg_gradients[j] /= -1.0 * noisy_keys.size();
for (int64_t j = 0; j < _param_size; ++j) {
_neg_gradients[j] /= -1.0 * noisy_keys.size();
}
//update
auto params = _model->named_parameters();
int counter = 0;
int64_t counter = 0;
for (auto& param: params) {
torch::Tensor tensor = param.value().view({-1});
auto tensor_a = tensor.accessor<float,1>();
_optimizer->update(tensor_a, neg_gradients+counter, tensor.size(0), param.key());
_optimizer->update(tensor_a, _neg_gradients+counter, tensor.size(0));
counter += tensor.size(0);
}
delete[] noise;
delete[] neg_gradients;
return true;
}
SamplingKey add_noise() {
SamplingKey sampling_key;
auto sampled_params = _sampled_model->named_parameters();
// copied parameters = original parameters + noise
bool add_noise(SamplingKey& sampling_key) {
if (!_is_sampling_agent) {
LOG(ERROR) << "[DeepES] Original ESAgent cannot call add_noise function, please use cloned ESAgent.";
return false;
}
auto sampling_params = _sampling_model->named_parameters();
auto params = _model->named_parameters();
float* noise = new float [_param_size];
int key = _sampling_method->sampling(noise, _param_size);
int key = _sampling_method->sampling(_noise, _param_size);
sampling_key.add_key(key);
int counter = 0;
for (auto& param: sampled_params) {
torch::Tensor sampled_tensor = param.value().view({-1});
int64_t counter = 0;
for (auto& param: sampling_params) {
torch::Tensor sampling_tensor = param.value().view({-1});
std::string param_name = param.key();
torch::Tensor tensor = params.find(param_name)->view({-1});
auto sampled_tensor_a = sampled_tensor.accessor<float,1>();
auto sampling_tensor_a = sampling_tensor.accessor<float,1>();
auto tensor_a = tensor.accessor<float,1>();
for (int j = 0; j < tensor.size(0); ++j) {
sampled_tensor_a[j] = tensor_a[j] + noise[counter + j];
for (int64_t j = 0; j < tensor.size(0); ++j) {
sampling_tensor_a[j] = tensor_a[j] + _noise[counter + j];
}
counter += tensor.size(0);
}
delete[] noise;
return sampling_key;
return true;
}
int param_size() {
if (_param_size == 0) {
auto params = _model->named_parameters();
for (auto& param: params) {
torch::Tensor tensor = param.value().view({-1});
_param_size += tensor.size(0);
}
private:
int64_t _calculate_param_size() {
auto params = _model->named_parameters();
for (auto& param: params) {
torch::Tensor tensor = param.value().view({-1});
_param_size += tensor.size(0);
}
return _param_size;
}
private:
std::shared_ptr<T> _sampled_model;
std::shared_ptr<T> _model;
std::shared_ptr<T> _sampling_model;
bool _is_sampling_agent;
std::shared_ptr<SamplingMethod> _sampling_method;
std::shared_ptr<Optimizer> _optimizer;
std::shared_ptr<DeepESConfig> _config;
int _param_size;
int64_t _param_size;
// malloc memory of noise and neg_gradients in advance.
float* _noise;
float* _neg_gradients;
};
}
#endif
#endif /* TORCH_ESAGENT_H */
#!/bin/bash
export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
if [ $# != 1 ]; then
echo "You must choose one framework (paddle/torch) to compile DeepES."
exit 0
fi
if [ $1 = "paddle" ]; then
#---------------paddlelite-------------#
if [ ! -d "./inference_lite_lib" ];then
echo "Cannot find the PaddleLite library: ./inference_lite_lib"
echo "Please put the PaddleLite libraray to current folder according the instruction in README"
exit 1
fi
# Initialization model
if [ ! -d ./demo/paddle/cartpole_init_model]; then
unzip ./demo/paddle/cartpole_init_model.zip -d ./demo/paddle/
fi
FLAGS=" -DWITH_PADDLE=ON"
elif [ $1 = "torch" ]; then
#---------------libtorch-------------#
if [ ! -d "./libtorch" ];then
echo "Cannot find the torch library: ./libtorch"
echo "Please put the torch libraray to current folder according the instruction in README"
exit 1
fi
FLAGS=" -DWITH_TORCH=ON"
else
echo "Invalid arguments. [paddle/torch]"
exit 0
fi
#export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
#----------------protobuf-------------#
cp ./src/proto/deepes.proto ./
protoc deepes.proto --cpp_out ./
mv deepes.pb.h ./include
mv deepes.pb.cc ./src
#---------------libtorch-------------#
if [ ! -d "./libtorch" ];then
echo "Cannot find the torch library: ./libtorch"
echo "Please put the torch libraray to current folder according the instruction in README"
exit 1
fi
rm deepes.proto
#----------------build---------------#
echo ${FLAGS}
rm -rf build
mkdir build
cd build
cmake -DCMAKE_PREFIX_PATH=./libtorch ../
cmake ../ ${FLAGS}
make -j10
#-----------------run----------------#
./parallel_main
......@@ -26,17 +26,17 @@ void GaussianSampling::load_config(const DeepESConfig& config) {
set_seed(config.seed());
}
int GaussianSampling::sampling(float* noise, int size) {
int GaussianSampling::sampling(float* noise, int64_t size) {
int key = rand();
std::default_random_engine generator(key);
std::normal_distribution<float> norm;
for (int i = 0; i < size; ++i) {
for (int64_t i = 0; i < size; ++i) {
*(noise + i) = norm(generator) * _std;
}
return key;
}
bool GaussianSampling::resampling(int key, float* noise, int size) {
bool GaussianSampling::resampling(int key, float* noise, int64_t size) {
bool success = true;
if (noise == nullptr) {
success = false;
......@@ -44,7 +44,7 @@ bool GaussianSampling::resampling(int key, float* noise, int size) {
else {
std::default_random_engine generator(key);
std::normal_distribution<float> norm;
for (int i = 0; i < size; ++i) {
for (int64_t i = 0; i < size; ++i) {
*(noise + i) = norm(generator) * _std;
}
}
......
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <vector>
#include "es_agent.h"
namespace DeepES {
typedef paddle::lite_api::PaddlePredictor PaddlePredictor;
typedef paddle::lite_api::Tensor Tensor;
typedef paddle::lite_api::shape_t shape_t;
inline int64_t ShapeProduction(const shape_t& shape) {
int64_t res = 1;
for (auto i : shape) res *= i;
return res;
}
ESAgent::ESAgent() {}
ESAgent::~ESAgent() {
delete[] _noise;
if (!_is_sampling_agent)
delete[] _neg_gradients;
}
ESAgent::ESAgent(
std::shared_ptr<PaddlePredictor> predictor,
std::string config_path) {
_is_sampling_agent = false;
_predictor = predictor;
// Original agent can't be used to sample, so keep it same with _predictor for evaluating.
_sampling_predictor = predictor;
_config = std::make_shared<DeepESConfig>();
load_proto_conf(config_path, *_config);
_sampling_method = std::make_shared<GaussianSampling>();
_sampling_method->load_config(*_config);
_optimizer = std::make_shared<SGDOptimizer>(_config->optimizer().base_lr());
_param_names = _predictor->GetParamNames();
_param_size = _calculate_param_size();
_noise = new float [_param_size];
_neg_gradients = new float [_param_size];
}
std::shared_ptr<ESAgent> ESAgent::clone() {
std::shared_ptr<PaddlePredictor> new_sampling_predictor = _predictor->Clone();
std::shared_ptr<ESAgent> new_agent = std::make_shared<ESAgent>();
float* noise = new float [_param_size];
new_agent->_predictor = _predictor;
new_agent->_sampling_predictor = new_sampling_predictor;
new_agent->_is_sampling_agent = true;
new_agent->_sampling_method = _sampling_method;
new_agent->_param_names = _param_names;
new_agent->_param_size = _param_size;
new_agent->_noise = noise;
return new_agent;
}
bool ESAgent::update(
std::vector<SamplingKey>& noisy_keys,
std::vector<float>& noisy_rewards) {
if (_is_sampling_agent) {
LOG(ERROR) << "[DeepES] Cloned ESAgent cannot call update function, please use original ESAgent.";
return false;
}
compute_centered_ranks(noisy_rewards);
memset(_neg_gradients, 0, _param_size * sizeof(float));
for (int i = 0; i < noisy_keys.size(); ++i) {
int key = noisy_keys[i].key(0);
float reward = noisy_rewards[i];
bool success = _sampling_method->resampling(key, _noise, _param_size);
for (int64_t j = 0; j < _param_size; ++j) {
_neg_gradients[j] += _noise[j] * reward;
}
}
for (int64_t j = 0; j < _param_size; ++j) {
_neg_gradients[j] /= -1.0 * noisy_keys.size();
}
//update
int64_t counter = 0;
for (std::string param_name: _param_names) {
std::unique_ptr<Tensor> tensor = _predictor->GetMutableTensor(param_name);
float* tensor_data = tensor->mutable_data<float>();
int64_t tensor_size = ShapeProduction(tensor->shape());
_optimizer->update(tensor_data, _neg_gradients + counter, tensor_size);
counter += tensor_size;
}
return true;
}
bool ESAgent::add_noise(SamplingKey& sampling_key) {
if (!_is_sampling_agent) {
LOG(ERROR) << "[DeepES] Original ESAgent cannot call add_noise function, please use cloned ESAgent.";
return false;
}
int key = _sampling_method->sampling(_noise, _param_size);
sampling_key.add_key(key);
int64_t counter = 0;
for (std::string param_name: _param_names) {
std::unique_ptr<Tensor> sample_tensor = _sampling_predictor->GetMutableTensor(param_name);
std::unique_ptr<const Tensor> tensor = _predictor->GetTensor(param_name);
int64_t tensor_size = ShapeProduction(tensor->shape());
for (int64_t j = 0; j < tensor_size; ++j) {
sample_tensor->mutable_data<float>()[j] = tensor->data<float>()[j] + _noise[counter + j];
}
counter += tensor_size;
}
return true;
}
std::shared_ptr<PaddlePredictor> ESAgent::get_predictor() {
return _sampling_predictor;
}
int64_t ESAgent::_calculate_param_size() {
int64_t param_size = 0;
for (std::string param_name: _param_names) {
std::unique_ptr<const Tensor> tensor = _predictor->GetTensor(param_name);
param_size += ShapeProduction(tensor->shape());
}
return param_size;
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册