未验证 提交 c37e0b55 编写于 作者: B barry-ai 提交者: GitHub

[APU] Add model cache(#4456)

上级 cde383dc
......@@ -89,6 +89,14 @@ void CxxPaddleApiImpl::Init(const lite_api::CxxConfig &config) {
Context<TargetType::kNPU>::SetSubgraphModelCacheDir(
raw_predictor_->scope(), config.subgraph_model_cache_dir());
#endif
#ifdef LITE_WITH_APU
// Store the model-level configuration into scope for kernels, and use
// exe_scope to store the execution-level configuration
Context<TargetType::kAPU>::SetSubgraphModelCacheDir(
raw_predictor_->scope(), config.subgraph_model_cache_dir());
#endif
#ifdef LITE_WITH_HUAWEI_ASCEND_NPU
Context<TargetType::kHuaweiAscendNPU>::SetHuaweiAscendDeviceID(
config.get_device_id());
......
......@@ -47,6 +47,14 @@ void LightPredictorImpl::Init(const lite_api::MobileConfig& config) {
Context<TargetType::kNPU>::SetSubgraphModelCacheDir(
raw_predictor_->scope(), config.subgraph_model_cache_dir());
#endif
#ifdef LITE_WITH_APU
// Store the model-level configuration into scope for kernels, and use
// exe_scope to store the execution-level configuration
Context<TargetType::kAPU>::SetSubgraphModelCacheDir(
raw_predictor_->scope(), config.subgraph_model_cache_dir());
#endif
#ifdef LITE_WITH_HUAWEI_ASCEND_NPU
Context<TargetType::kHuaweiAscendNPU>::SetHuaweiAscendDeviceID(
config.get_device_id());
......
......@@ -84,10 +84,14 @@ void NeuronAdapter::InitFunctions() {
PADDLE_DLSYM(NeuronModel_addOperation);
PADDLE_DLSYM(NeuronModel_addOperationExtension);
PADDLE_DLSYM(NeuronModel_identifyInputsAndOutputs);
PADDLE_DLSYM(NeuronModel_restoreFromCompiledNetwork);
PADDLE_DLSYM(NeuronCompilation_create);
PADDLE_DLSYM(NeuronCompilation_free);
PADDLE_DLSYM(NeuronCompilation_finish);
PADDLE_DLSYM(NeuronCompilation_setCaching);
PADDLE_DLSYM(NeuronCompilation_storeCompiledNetwork);
PADDLE_DLSYM(NeuronCompilation_createForDevices);
PADDLE_DLSYM(NeuronCompilation_getCompiledNetworkSize);
PADDLE_DLSYM(NeuronExecution_create);
PADDLE_DLSYM(NeuronExecution_free);
PADDLE_DLSYM(NeuronExecution_setInput);
......@@ -179,6 +183,15 @@ int NeuronModel_identifyInputsAndOutputs(NeuronModel* model,
model, inputCount, inputs, outputCount, outputs);
}
int NeuronModel_restoreFromCompiledNetwork(NeuronModel** model,
NeuronCompilation** compilation,
const void* buffer,
const size_t size) {
return paddle::lite::NeuronAdapter::Global()
->NeuronModel_restoreFromCompiledNetwork()(
model, compilation, buffer, size);
}
int NeuronCompilation_create(NeuronModel* model,
NeuronCompilation** compilation) {
return paddle::lite::NeuronAdapter::Global()->NeuronCompilation_create()(
......@@ -195,6 +208,26 @@ int NeuronCompilation_finish(NeuronCompilation* compilation) {
compilation);
}
int NeuronCompilation_setCaching(NeuronCompilation* compilation,
const char* cacheDir,
const uint8_t* token) {
return paddle::lite::NeuronAdapter::Global()->NeuronCompilation_setCaching()(
compilation, cacheDir, token);
}
int NeuronCompilation_storeCompiledNetwork(NeuronCompilation* compilation,
void* buffer,
const size_t size) {
return paddle::lite::NeuronAdapter::Global()
->NeuronCompilation_storeCompiledNetwork()(compilation, buffer, size);
}
int NeuronCompilation_getCompiledNetworkSize(NeuronCompilation* compilation,
size_t* size) {
return paddle::lite::NeuronAdapter::Global()
->NeuronCompilation_getCompiledNetworkSize()(compilation, size);
}
int NeuronCompilation_createForDevices(NeuronModel* model,
const NeuronDevice* const* devices,
uint32_t numDevices,
......
......@@ -52,15 +52,24 @@ class NeuronAdapter final {
const uint32_t *);
using NeuronModel_identifyInputsAndOutputs_Type = int (*)(
NeuronModel *, uint32_t, const uint32_t *, uint32_t, const uint32_t *);
using NeuronModel_restoreFromCompiledNetwork_Type =
int (*)(NeuronModel **, NeuronCompilation **, const void *, const size_t);
using NeuronCompilation_create_Type = int (*)(NeuronModel *,
NeuronCompilation **);
using NeuronCompilation_free_Type = void (*)(NeuronCompilation *);
using NeuronCompilation_finish_Type = int (*)(NeuronCompilation *);
using NeuronCompilation_setCaching_Type = int (*)(NeuronCompilation *,
const char *,
const uint8_t *);
using NeuronCompilation_createForDevices_Type =
int (*)(NeuronModel *,
const NeuronDevice *const *,
uint32_t,
NeuronCompilation **);
using NeuronCompilation_storeCompiledNetwork_Type =
int (*)(NeuronCompilation *, void *, const size_t);
using NeuronCompilation_getCompiledNetworkSize_Type =
int (*)(NeuronCompilation *, size_t *);
using NeuronExecution_create_Type = int (*)(NeuronCompilation *,
NeuronExecution **);
using NeuronExecution_free_Type = void (*)(NeuronExecution *);
......@@ -78,131 +87,202 @@ class NeuronAdapter final {
const char **);
Neuron_getVersion_Type Neuron_getVersion() {
CHECK(Neuron_getVersion_ != nullptr) << "Cannot load Neuron_getVersion!";
CHECK(Neuron_getVersion_ != nullptr) << "Cannot load "
"Neuron_"
"getVersion!";
return Neuron_getVersion_;
}
NeuronModel_restoreFromCompiledNetwork_Type
NeuronModel_restoreFromCompiledNetwork() {
CHECK(NeuronModel_restoreFromCompiledNetwork_ != nullptr)
<< "Cannot load "
"NeuronModel_"
"restoreFromCompil"
"edNetwork!";
return NeuronModel_restoreFromCompiledNetwork_;
}
NeuronModel_create_Type NeuronModel_create() {
CHECK(NeuronModel_create_ != nullptr) << "Cannot load NeuronModel_create!";
CHECK(NeuronModel_create_ != nullptr) << "Cannot load "
"NeuronModel_"
"create!";
return NeuronModel_create_;
}
NeuronModel_free_Type NeuronModel_free() {
CHECK(NeuronModel_free_ != nullptr) << "Cannot load NeuronModel_free!";
CHECK(NeuronModel_free_ != nullptr) << "Cannot load "
"NeuronModel_"
"free!";
return NeuronModel_free_;
}
NeuronModel_finish_Type NeuronModel_finish() {
CHECK(NeuronModel_finish_ != nullptr) << "Cannot load NeuronModel_finish!";
CHECK(NeuronModel_finish_ != nullptr) << "Cannot load "
"NeuronModel_"
"finish!";
return NeuronModel_finish_;
}
NeuronModel_addOperand_Type NeuronModel_addOperand() {
CHECK(NeuronModel_addOperand_ != nullptr)
<< "Cannot load NeuronModel_addOperand!";
CHECK(NeuronModel_addOperand_ != nullptr) << "Cannot load "
"NeuronModel_"
"addOperand!";
return NeuronModel_addOperand_;
}
NeuronModel_setOperandValue_Type NeuronModel_setOperandValue() {
CHECK(NeuronModel_setOperandValue_ != nullptr)
<< "Cannot load NeuronModel_setOperandValue!";
CHECK(NeuronModel_setOperandValue_ != nullptr) << "Cannot load "
"NeuronModel_"
"setOperandValue!";
return NeuronModel_setOperandValue_;
}
NeuronModel_setOperandSymmPerChannelQuantParams_Type
NeuronModel_setOperandSymmPerChannelQuantParams() {
CHECK(NeuronModel_setOperandSymmPerChannelQuantParams_ != nullptr)
<< "Cannot load NeuronModel_setOperandSymmPerChannelQuantParams!";
<< "Cannot load "
"NeuronModel_"
"setOperandSymmPer"
"ChannelQuantParam"
"s!";
return NeuronModel_setOperandSymmPerChannelQuantParams_;
}
NeuronModel_addOperation_Type NeuronModel_addOperation() {
CHECK(NeuronModel_addOperation_ != nullptr)
<< "Cannot load NeuronModel_addOperation!";
CHECK(NeuronModel_addOperation_ != nullptr) << "Cannot load "
"NeuronModel_"
"addOperation!";
return NeuronModel_addOperation_;
}
NeuronModel_addOperationExtension_Type NeuronModel_addOperationExtension() {
CHECK(NeuronModel_addOperationExtension_ != nullptr)
<< "Cannot load NeuronModel_addOperationExtension!";
CHECK(NeuronModel_addOperationExtension_ != nullptr) << "Cannot load "
"NeuronModel_"
"addOperationExten"
"sion!";
return NeuronModel_addOperationExtension_;
}
NeuronModel_identifyInputsAndOutputs_Type
NeuronModel_identifyInputsAndOutputs() {
CHECK(NeuronModel_identifyInputsAndOutputs_ != nullptr)
<< "Cannot load NeuronModel_identifyInputsAndOutputs!";
<< "Cannot load "
"NeuronModel_"
"identifyInputsAnd"
"Outputs!";
return NeuronModel_identifyInputsAndOutputs_;
}
NeuronCompilation_create_Type NeuronCompilation_create() {
CHECK(NeuronCompilation_create_ != nullptr)
<< "Cannot load NeuronCompilation_create!";
CHECK(NeuronCompilation_create_ != nullptr) << "Cannot load "
"NeuronCompilation"
"_create!";
return NeuronCompilation_create_;
}
NeuronCompilation_free_Type NeuronCompilation_free() {
CHECK(NeuronCompilation_free_ != nullptr)
<< "Cannot load NeuronCompilation_free!";
CHECK(NeuronCompilation_free_ != nullptr) << "Cannot load "
"NeuronCompilation"
"_free!";
return NeuronCompilation_free_;
}
NeuronCompilation_finish_Type NeuronCompilation_finish() {
CHECK(NeuronCompilation_finish_ != nullptr)
<< "Cannot load NeuronCompilation_finish!";
CHECK(NeuronCompilation_finish_ != nullptr) << "Cannot load "
"NeuronCompilation"
"_finish!";
return NeuronCompilation_finish_;
}
NeuronCompilation_setCaching_Type NeuronCompilation_setCaching() {
CHECK(NeuronCompilation_setCaching_ != nullptr) << "Cannot load "
"NeuronCompilation"
"_setCaching!";
return NeuronCompilation_setCaching_;
}
NeuronCompilation_createForDevices_Type NeuronCompilation_createForDevices() {
CHECK(NeuronCompilation_createForDevices_ != nullptr)
<< "Cannot load NeuronCompilation_createForDevices!";
CHECK(NeuronCompilation_createForDevices_ != nullptr) << "Cannot load "
"NeuronCompilation"
"_createForDevices"
"!";
return NeuronCompilation_createForDevices_;
}
NeuronCompilation_storeCompiledNetwork_Type
NeuronCompilation_storeCompiledNetwork() {
CHECK(NeuronCompilation_storeCompiledNetwork_ != nullptr)
<< "Cannot load "
"NeuronCompilation"
"_storeCompiledNet"
"work!";
return NeuronCompilation_storeCompiledNetwork_;
}
NeuronCompilation_getCompiledNetworkSize_Type
NeuronCompilation_getCompiledNetworkSize() {
CHECK(NeuronCompilation_getCompiledNetworkSize_ != nullptr)
<< "Cannot load "
"NeuronCompilation"
"_getCompiledNetwo"
"rkSize!";
return NeuronCompilation_getCompiledNetworkSize_;
}
NeuronExecution_create_Type NeuronExecution_create() {
CHECK(NeuronExecution_create_ != nullptr)
<< "Cannot load NeuronExecution_create!";
CHECK(NeuronExecution_create_ != nullptr) << "Cannot load "
"NeuronExecution_"
"create!";
return NeuronExecution_create_;
}
NeuronExecution_free_Type NeuronExecution_free() {
CHECK(NeuronExecution_free_ != nullptr)
<< "Cannot load NeuronExecution_free!";
CHECK(NeuronExecution_free_ != nullptr) << "Cannot load "
"NeuronExecution_"
"free!";
return NeuronExecution_free_;
}
NeuronExecution_setInput_Type NeuronExecution_setInput() {
CHECK(NeuronExecution_setInput_ != nullptr)
<< "Cannot loadcl NeuronExecution_setInput!";
CHECK(NeuronExecution_setInput_ != nullptr) << "Cannot loadcl "
"NeuronExecution_"
"setInput!";
return NeuronExecution_setInput_;
}
NeuronExecution_setOutput_Type NeuronExecution_setOutput() {
CHECK(NeuronExecution_setOutput_ != nullptr)
<< "Cannot load NeuronExecution_setOutput!";
CHECK(NeuronExecution_setOutput_ != nullptr) << "Cannot load "
"NeuronExecution_"
"setOutput!";
return NeuronExecution_setOutput_;
}
NeuronExecution_compute_Type NeuronExecution_compute() {
CHECK(NeuronExecution_compute_ != nullptr)
<< "Cannot load NeuronExecution_compute!";
CHECK(NeuronExecution_compute_ != nullptr) << "Cannot load "
"NeuronExecution_"
"compute!";
return NeuronExecution_compute_;
}
Neuron_getDeviceCount_Type Neuron_getDeviceCount() {
CHECK(Neuron_getDeviceCount_ != nullptr)
<< "Cannot load Neuron_getDeviceCount!";
CHECK(Neuron_getDeviceCount_ != nullptr) << "Cannot load "
"Neuron_"
"getDeviceCount!";
return Neuron_getDeviceCount_;
}
Neuron_getDevice_Type Neuron_getDevice() {
CHECK(Neuron_getDevice_ != nullptr) << "Cannot load Neuron_getDevice!";
CHECK(Neuron_getDevice_ != nullptr) << "Cannot load "
"Neuron_"
"getDevice!";
return Neuron_getDevice_;
}
NeuronDevice_getName_Type NeuronDevice_getName() {
CHECK(NeuronDevice_getName_ != nullptr)
<< "Cannot load NeuronDevice_getName!";
CHECK(NeuronDevice_getName_ != nullptr) << "Cannot load "
"NeuronDevice_"
"getName!";
return NeuronDevice_getName_;
}
......@@ -226,11 +306,18 @@ class NeuronAdapter final {
nullptr};
NeuronModel_identifyInputsAndOutputs_Type
NeuronModel_identifyInputsAndOutputs_{nullptr};
NeuronModel_restoreFromCompiledNetwork_Type
NeuronModel_restoreFromCompiledNetwork_{nullptr};
NeuronCompilation_create_Type NeuronCompilation_create_{nullptr};
NeuronCompilation_free_Type NeuronCompilation_free_{nullptr};
NeuronCompilation_finish_Type NeuronCompilation_finish_{nullptr};
NeuronCompilation_setCaching_Type NeuronCompilation_setCaching_{nullptr};
NeuronCompilation_createForDevices_Type NeuronCompilation_createForDevices_{
nullptr};
NeuronCompilation_storeCompiledNetwork_Type
NeuronCompilation_storeCompiledNetwork_{nullptr};
NeuronCompilation_getCompiledNetworkSize_Type
NeuronCompilation_getCompiledNetworkSize_{nullptr};
NeuronExecution_create_Type NeuronExecution_create_{nullptr};
NeuronExecution_free_Type NeuronExecution_free_{nullptr};
NeuronExecution_setInput_Type NeuronExecution_setInput_{nullptr};
......
......@@ -144,6 +144,21 @@ class Context<TargetType::kAPU> {
APUContext& operator=(const APUContext& ctx) {}
std::string name() const { return "APUContext"; }
static void SetSubgraphModelCacheDir(Scope* scope,
std::string subgraph_model_cache_dir) {
auto var = scope->Var("SUBGRAPH_MODEL_CACHE_DIR");
CHECK(var);
auto data = var->GetMutable<std::string>();
CHECK(data);
*data = subgraph_model_cache_dir;
}
static std::string SubgraphModelCacheDir(Scope* scope) {
auto var = scope->FindVar("SUBGRAPH_MODEL_CACHE_DIR");
if (!var) return "";
return var->Get<std::string>();
}
};
#endif
......
文件模式从 100755 更改为 100644
......@@ -53,7 +53,7 @@ int ConvTransposeConverter(void *ctx, OpLite *op, KernelBase *kernel) {
auto strides = op_info->GetAttr<std::vector<int>>("strides");
CHECK_EQ(strides.size(), 2L);
auto paddings = op_info->GetAttr<std::vector<int>>("paddings");
std::vector<int> paddings = op_info->GetAttr<std::vector<int>>("paddings");
auto groups = op_info->GetAttr<int>("groups");
if (groups > 1) {
LOG(WARNING) << "[NPU] only support groups == 1";
......@@ -70,7 +70,7 @@ int ConvTransposeConverter(void *ctx, OpLite *op, KernelBase *kernel) {
auto fuse_relu =
op_info->HasAttr("fuse_relu") && op_info->GetAttr<bool>("fuse_relu");
auto dilations = op_info->GetAttr<std::vector<int>>("dilations");
std::vector<int> dilations = op_info->GetAttr<std::vector<int>>("dilations");
CHECK_EQ(dilations.size(), 2L);
std::string padding_algorithm =
op_info->HasAttr("padding_algorithm")
......
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
......@@ -22,17 +22,102 @@
#include "lite/kernels/apu/bridges/graph.h"
#include "lite/kernels/apu/bridges/paddle_use_bridges.h"
#include "lite/kernels/apu/bridges/utility.h"
#include "lite/utils/io.h"
#include "lite/utils/md5.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace apu {
bool SubgraphEngine::BuildDeviceProgram() {
if (!origin_program_) {
BuildOriginProgram();
// Generate the model name by using md5 hashes based on:
// 1. the sorted variable input names
// 2. the shapes of the origin input tensors
// 3. the sorted variable output names
std::string DeviceProgram::GenerateModelName(
const std::vector<std::string>& input_names,
const std::vector<std::string>& output_names,
const std::vector<std::vector<int64_t>>& origin_idims) {
std::ostringstream os;
CHECK_EQ(input_names.size(), origin_idims.size());
for (int i = 0; i < input_names.size(); i++) {
os << input_names[i];
for (auto dim : origin_idims[i]) {
os << dim;
}
}
for (auto output_name : output_names) {
os << output_name;
}
return MD5(os.str());
}
// Deserialize the generated model
bool DeviceProgram::LoadFromCacheFile(
const std::vector<std::string>& input_names,
const std::vector<std::string>& output_names,
const std::vector<std::vector<int64_t>>& origin_idims,
const std::string& model_cache_dir) {
int status;
// Generate the model name if not initialized
if (model_name_.empty()) {
model_name_ = GenerateModelName(input_names, output_names, origin_idims);
}
// Load from the cached model file
auto model_path = model_cache_dir + "/" + model_name_ + ".dla";
VLOG(3) << "[APU] Load model from " << model_path;
std::vector<char> compilationBuffer;
if (!ReadFile(model_path, &compilationBuffer)) {
LOG(WARNING) << "[NPU] Open " << model_path << " for reading failed!";
return false;
}
model_ = nullptr;
compilation_ = nullptr;
status = NeuronModel_restoreFromCompiledNetwork(
&model_, &compilation_, &compilationBuffer[0], compilationBuffer.size());
if (status != NEURON_NO_ERROR) {
LOG(WARNING) << "[APU] Load model failed!" << compilationBuffer.size();
return false;
}
VLOG(3) << "[APU] Complete Load model!";
// Deserialize the preicisions and shapes of the origin output tensors from
// the
// cached configuration file
auto config_path = model_cache_dir + "/" + model_name_ + ".cfg";
VLOG(3) << "[APU] Load configuration from " << config_path;
std::vector<char> config_buffer;
if (!ReadFile(config_path, &config_buffer)) {
LOG(WARNING) << "[APU] read from " << config_path << " failed!";
return false;
}
std::string str(config_buffer.begin(), config_buffer.end());
// Parse the precision and shapes of the output tensors
auto output_options = Split<std::string>(str, ";");
CHECK_EQ(output_options.size(), output_names.size());
origin_otypes_.resize(output_names.size());
origin_odims_.resize(output_names.size());
for (int i = 0; i < output_names.size(); i++) {
auto items = Split<std::string>(output_options[i], ":");
CHECK_EQ(items.size(), 2); // precision and shapes
origin_otypes_[i] = static_cast<PrecisionType>(std::stoi(items[0]));
origin_odims_[i] = Split<int64_t>(items[1], ",");
}
return true;
}
bool DeviceProgram::BuildGraphAndCacheToFile(
RuntimeProgram* origin_program,
const std::vector<std::string>& input_names,
const std::vector<std::string>& output_names,
const std::vector<std::vector<int64_t>>& origin_idims,
const std::vector<Tensor*>& origin_itensors,
const std::vector<Tensor*>& origin_otensors,
const std::string& model_cache_dir) {
auto GetCurrentUS = []() -> double {
struct timeval time;
gettimeofday(&time, NULL);
......@@ -53,13 +138,14 @@ bool SubgraphEngine::BuildDeviceProgram() {
return false;
}
graph.set_model(model_);
graph.set_input_names(input_names_);
graph.set_output_names(output_names_);
graph.set_input_names(input_names);
graph.set_output_names(output_names);
// Convert all of ops and their input vars and weights and added into the APU
// NIR graph
const auto& bridges = subgraph::SubgraphBridgeRegistry::Instance();
const auto& insts = origin_program_->instructions(kRootBlockIdx);
const auto& insts = origin_program->instructions(kRootBlockIdx);
for (auto& inst : insts) {
auto op = const_cast<OpLite*>(inst.op());
CHECK(op);
......@@ -82,25 +168,25 @@ bool SubgraphEngine::BuildDeviceProgram() {
// Get the index of input tensors
std::vector<uint32_t> input_indices;
for (int i = 0; i < input_names_.size(); i++) {
CHECK(graph.Has(input_names_[i])) << "[APU] Failed to find input node "
<< input_names_[i];
auto index = graph.Get(input_names_[i])->index();
for (int i = 0; i < input_names.size(); i++) {
CHECK(graph.Has(input_names[i])) << "[APU] Failed to find input node "
<< input_names[i];
auto index = graph.Get(input_names[i])->index();
input_indices.push_back(index);
VLOG(3) << "[APU] Input[" << i << "] name " << input_names_[i] << " dims "
<< origin_itensors_[i]->dims() << " index " << index;
VLOG(3) << "[APU] Input[" << i << "] name " << input_names[i] << " dims "
<< origin_itensors[i]->dims() << " index " << index;
}
// Get the index of output tensors
std::vector<uint32_t> output_indices;
for (int i = 0; i < output_names_.size(); i++) {
CHECK(graph.Has(output_names_[i])) << "[APU] Failed to find output node "
<< output_names_[i];
origin_otensors_[i]->mutable_data<int8_t>();
auto index = graph.Get(output_names_[i])->index();
for (int i = 0; i < output_names.size(); i++) {
CHECK(graph.Has(output_names[i])) << "[APU] Failed to find output node "
<< output_names[i];
origin_otensors[i]->mutable_data<int8_t>();
auto index = graph.Get(output_names[i])->index();
output_indices.push_back(index);
VLOG(3) << "[APU] Output[" << i << "] name " << output_names_[i] << " dims "
<< origin_otensors_[i]->dims() << " index " << index;
VLOG(3) << "[APU] Output[" << i << "] name " << output_names[i] << " dims "
<< origin_otensors[i]->dims() << " index " << index;
}
// Indentify the input and output tensors of the neuron model
......@@ -114,7 +200,6 @@ bool SubgraphEngine::BuildDeviceProgram() {
LOG(WARNING) << "[APU] Fail to create NIR model:" << neuron_errCode;
return false;
}
VLOG(3) << "[APU] APU NIR model created!";
VLOG(1) << "[APU] APU NIR model created, Create cost "
<< GetCurrentUS() - start_time << " us";
......@@ -127,9 +212,109 @@ bool SubgraphEngine::BuildDeviceProgram() {
}
VLOG(1) << "[APU] APU DLA model created, Build cost "
<< GetCurrentUS() - start_time << " us";
CHECK_EQ(origin_otensors.size(), output_names.size());
origin_otypes_.resize(output_names.size());
origin_odims_.resize(output_names.size());
for (size_t i = 0; i < output_names.size(); i++) {
origin_otypes_[i] = origin_otensors[i]->precision();
origin_odims_[i] = origin_otensors[i]->dims().Vectorize();
}
if (!model_cache_dir.empty()) {
// Save the generated model to file
auto model_path = model_cache_dir + "/" + model_name_ + ".dla";
VLOG(3) << "[APU] Save model to " << model_path;
size_t compilationSize;
status = NeuronCompilation_getCompiledNetworkSize(compilation_,
&compilationSize);
if (status == NEURON_NO_ERROR) {
// Serialization DLA
std::vector<char> model_buffer;
model_buffer.resize(compilationSize);
status = NeuronCompilation_storeCompiledNetwork(
compilation_, &model_buffer[0], compilationSize);
if (status != NEURON_NO_ERROR) {
LOG(WARNING) << "[APU] Serialization DLA failed!";
}
VLOG(3) << "[APU] Export the model to " << model_path;
if (!WriteFile(model_path, model_buffer)) {
LOG(WARNING) << "[APU] Open " << model_path << " for writting failed!";
}
}
// Serialize the precisions and shapes of the origin output tensors into the
// configuration file
std::ostringstream os;
for (int i = 0; i < output_names.size(); i++) {
os << static_cast<int32_t>(origin_otypes_[i]) << ":";
for (auto dim : origin_odims_[i]) {
os << dim << ",";
}
os << ";";
}
auto str = os.str();
std::vector<char> config_buffer(str.begin(), str.end());
auto config_path = model_cache_dir + "/" + model_name_ + ".cfg";
VLOG(3) << "[APU] Save configuration to " << config_path;
if (!WriteFile(config_path, config_buffer)) {
LOG(WARNING) << "[APU] Open " << config_path << " for writting failed!";
}
}
return true;
}
bool SubgraphEngine::BuildDeviceProgram() {
// Check if the cache device program exists
if (!device_programs_.count(origin_idims_)) {
auto device_program = std::make_shared<DeviceProgram>();
// Obtain the model cache dir from the NPU Context of the subgraph op
auto model_cache_dir =
ctx_->As<APUContext>().SubgraphModelCacheDir(exec_scope_);
VLOG(3) << "[APU] Getting subgraph_model_cache_dir: " << model_cache_dir;
// Check and load if the cached model and configuration file exists
if (model_cache_dir.empty() ||
!device_program->LoadFromCacheFile(
input_names_, output_names_, origin_idims_, model_cache_dir)) {
// Build the model online, including converting the paddle ops to the NIR
// nodes, building the MTK NIR graph, and compile MTK NIR graph to dla
if (!origin_program_) {
BuildOriginProgram();
}
CHECK(origin_program_) << "[APU] The origin program is not initialized!";
CHECK_GT(origin_program_->instructions().size(), 0)
<< "[APU] No instructions found in the origin program!";
if (!device_program->BuildGraphAndCacheToFile(origin_program_.get(),
input_names_,
output_names_,
origin_idims_,
origin_itensors_,
origin_otensors_,
model_cache_dir)) {
return false;
}
}
if (device_program->model_ == nullptr) {
LOG(WARNING) << "dla create fail!";
return false;
}
device_programs_[origin_idims_] = device_program;
}
// Get the index of output tensors
auto device_program = device_programs_[origin_idims_];
CHECK(device_program && device_program->model_);
for (int i = 0; i < output_names_.size(); i++) {
origin_otensors_[i]->Resize(device_program->origin_odims_[i]);
origin_otensors_[i]->mutable_data<int8_t>();
VLOG(3) << "[APU] Output[" << i << "] name " << output_names_[i] << " dims "
<< origin_otensors_[i]->dims() << " memory_size "
<< origin_otensors_[i]->memory_size();
}
}
bool SubgraphEngine::LaunchDeviceProgram() {
auto GetCurrentUS = []() -> double {
struct timeval time;
......@@ -137,9 +322,17 @@ bool SubgraphEngine::LaunchDeviceProgram() {
return 1e+6 * time.tv_sec + time.tv_usec;
};
if (device_programs_.count(origin_idims_) == 0 ||
device_programs_[origin_idims_]->model_ == nullptr) {
return LaunchOriginProgram();
}
auto device_program = device_programs_[origin_idims_];
auto start_time = GetCurrentUS();
NeuronExecution* run = NULL;
int neuron_errCode = NeuronExecution_create(compilation_, &run);
int neuron_errCode =
NeuronExecution_create(device_program->compilation_, &run);
if (NEURON_NO_ERROR != neuron_errCode) {
LOG(WARNING) << "[APU] Build APU runtime failed!";
return false;
......@@ -187,11 +380,13 @@ bool SubgraphEngine::LaunchDeviceProgram() {
}
SubgraphEngine::~SubgraphEngine() {
if (compilation_) {
NeuronCompilation_free(compilation_);
}
if (model_) {
NeuronModel_free(model_);
for (auto& device_program : device_programs_) {
if (device_program.second->compilation_) {
NeuronCompilation_free(device_program.second->compilation_);
}
if (device_program.second->model_) {
NeuronModel_free(device_program.second->model_);
}
}
}
......
......@@ -14,6 +14,7 @@
#pragma once
#include <map>
#include <memory>
#include <string>
#include <vector>
......@@ -27,14 +28,43 @@ namespace lite {
namespace kernels {
namespace apu {
class DeviceProgram {
public:
DeviceProgram() {}
~DeviceProgram() {}
std::string GenerateModelName(
const std::vector<std::string>& input_names,
const std::vector<std::string>& output_names,
const std::vector<std::vector<int64_t>>& origin_idims);
bool LoadFromCacheFile(const std::vector<std::string>& input_names,
const std::vector<std::string>& output_names,
const std::vector<std::vector<int64_t>>& origin_idims,
const std::string& model_cache_dir);
bool BuildGraphAndCacheToFile(
RuntimeProgram* origin_program,
const std::vector<std::string>& input_names,
const std::vector<std::string>& output_names,
const std::vector<std::vector<int64_t>>& origin_idims,
const std::vector<Tensor*>& origin_itensors,
const std::vector<Tensor*>& origin_otensors,
const std::string& model_cache_dir);
public:
std::string model_name_{""};
std::vector<std::vector<int64_t>> origin_odims_;
std::vector<PrecisionType> origin_otypes_;
NeuronModel* model_;
NeuronCompilation* compilation_;
};
class SubgraphEngine : public subgraph::SubgraphEngineBase {
public:
SubgraphEngine(KernelContext *ctx,
SubgraphEngine(KernelContext* ctx,
int block_idx,
const std::shared_ptr<const cpp::ProgramDesc> &program_desc,
Scope *exec_scope,
const std::vector<std::string> &input_names,
const std::vector<std::string> &output_names)
const std::shared_ptr<const cpp::ProgramDesc>& program_desc,
Scope* exec_scope,
const std::vector<std::string>& input_names,
const std::vector<std::string>& output_names)
: subgraph::SubgraphEngineBase(ctx,
block_idx,
program_desc,
......@@ -48,8 +78,8 @@ class SubgraphEngine : public subgraph::SubgraphEngineBase {
bool BuildDeviceProgram() override;
bool LaunchDeviceProgram() override;
NeuronModel *model_;
NeuronCompilation *compilation_;
std::map<std::vector<std::vector<int64_t>>, std::shared_ptr<DeviceProgram>>
device_programs_;
};
class SubgraphCompute
......
......@@ -33,11 +33,13 @@ namespace paddle {
namespace lite {
TEST(MobileNetV1, test_mobilenetv1_int8_mediatek_apu) {
std::string subgraph_model_cache_dir = FLAGS_model_dir;
lite_api::CxxConfig config;
config.set_model_dir(FLAGS_model_dir);
config.set_valid_places({lite_api::Place{TARGET(kARM), PRECISION(kFloat)},
lite_api::Place{TARGET(kARM), PRECISION(kInt8)},
lite_api::Place{TARGET(kAPU), PRECISION(kInt8)}});
config.set_subgraph_model_cache_dir(subgraph_model_cache_dir);
auto predictor = lite_api::CreatePaddlePredictor(config);
std::string raw_data_dir = FLAGS_data_dir + std::string("/raw_data");
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册