diff --git a/lite/api/cxx_api_impl.cc b/lite/api/cxx_api_impl.cc index 0b5b9ad94c47a3d97492cd5b91618b184c9ef122..e147fc7ebc18d8fa213503d75a4fe68be06f3293 100644 --- a/lite/api/cxx_api_impl.cc +++ b/lite/api/cxx_api_impl.cc @@ -89,6 +89,14 @@ void CxxPaddleApiImpl::Init(const lite_api::CxxConfig &config) { Context::SetSubgraphModelCacheDir( raw_predictor_->scope(), config.subgraph_model_cache_dir()); #endif + +#ifdef LITE_WITH_APU + // Store the model-level configuration into scope for kernels, and use + // exe_scope to store the execution-level configuration + Context::SetSubgraphModelCacheDir( + raw_predictor_->scope(), config.subgraph_model_cache_dir()); +#endif + #ifdef LITE_WITH_HUAWEI_ASCEND_NPU Context::SetHuaweiAscendDeviceID( config.get_device_id()); diff --git a/lite/api/light_api_impl.cc b/lite/api/light_api_impl.cc index 3c5be7b9cdd340fe0fe82c589706c77875de0030..7f90a069544f0a8b4a7623ce35b34fccd46bb886 100644 --- a/lite/api/light_api_impl.cc +++ b/lite/api/light_api_impl.cc @@ -47,6 +47,14 @@ void LightPredictorImpl::Init(const lite_api::MobileConfig& config) { Context::SetSubgraphModelCacheDir( raw_predictor_->scope(), config.subgraph_model_cache_dir()); #endif + +#ifdef LITE_WITH_APU + // Store the model-level configuration into scope for kernels, and use + // exe_scope to store the execution-level configuration + Context::SetSubgraphModelCacheDir( + raw_predictor_->scope(), config.subgraph_model_cache_dir()); +#endif + #ifdef LITE_WITH_HUAWEI_ASCEND_NPU Context::SetHuaweiAscendDeviceID( config.get_device_id()); diff --git a/lite/backends/apu/neuron_adapter.cc b/lite/backends/apu/neuron_adapter.cc index ff08507504b8bd7e5342c5705afb17550f37469e..bd6e41aeb19f0f71122e37e5e695f8b12467eebd 100644 --- a/lite/backends/apu/neuron_adapter.cc +++ b/lite/backends/apu/neuron_adapter.cc @@ -84,10 +84,14 @@ void NeuronAdapter::InitFunctions() { PADDLE_DLSYM(NeuronModel_addOperation); PADDLE_DLSYM(NeuronModel_addOperationExtension); PADDLE_DLSYM(NeuronModel_identifyInputsAndOutputs); + PADDLE_DLSYM(NeuronModel_restoreFromCompiledNetwork); PADDLE_DLSYM(NeuronCompilation_create); PADDLE_DLSYM(NeuronCompilation_free); PADDLE_DLSYM(NeuronCompilation_finish); + PADDLE_DLSYM(NeuronCompilation_setCaching); + PADDLE_DLSYM(NeuronCompilation_storeCompiledNetwork); PADDLE_DLSYM(NeuronCompilation_createForDevices); + PADDLE_DLSYM(NeuronCompilation_getCompiledNetworkSize); PADDLE_DLSYM(NeuronExecution_create); PADDLE_DLSYM(NeuronExecution_free); PADDLE_DLSYM(NeuronExecution_setInput); @@ -179,6 +183,15 @@ int NeuronModel_identifyInputsAndOutputs(NeuronModel* model, model, inputCount, inputs, outputCount, outputs); } +int NeuronModel_restoreFromCompiledNetwork(NeuronModel** model, + NeuronCompilation** compilation, + const void* buffer, + const size_t size) { + return paddle::lite::NeuronAdapter::Global() + ->NeuronModel_restoreFromCompiledNetwork()( + model, compilation, buffer, size); +} + int NeuronCompilation_create(NeuronModel* model, NeuronCompilation** compilation) { return paddle::lite::NeuronAdapter::Global()->NeuronCompilation_create()( @@ -195,6 +208,26 @@ int NeuronCompilation_finish(NeuronCompilation* compilation) { compilation); } +int NeuronCompilation_setCaching(NeuronCompilation* compilation, + const char* cacheDir, + const uint8_t* token) { + return paddle::lite::NeuronAdapter::Global()->NeuronCompilation_setCaching()( + compilation, cacheDir, token); +} + +int NeuronCompilation_storeCompiledNetwork(NeuronCompilation* compilation, + void* buffer, + const size_t size) { + return paddle::lite::NeuronAdapter::Global() + ->NeuronCompilation_storeCompiledNetwork()(compilation, buffer, size); +} + +int NeuronCompilation_getCompiledNetworkSize(NeuronCompilation* compilation, + size_t* size) { + return paddle::lite::NeuronAdapter::Global() + ->NeuronCompilation_getCompiledNetworkSize()(compilation, size); +} + int NeuronCompilation_createForDevices(NeuronModel* model, const NeuronDevice* const* devices, uint32_t numDevices, diff --git a/lite/backends/apu/neuron_adapter.h b/lite/backends/apu/neuron_adapter.h index c1b9669a98626699b126913dcc840906de4de8e0..8d57075d6c1f5e9d865b2367d119b665f4182b6e 100644 --- a/lite/backends/apu/neuron_adapter.h +++ b/lite/backends/apu/neuron_adapter.h @@ -52,15 +52,24 @@ class NeuronAdapter final { const uint32_t *); using NeuronModel_identifyInputsAndOutputs_Type = int (*)( NeuronModel *, uint32_t, const uint32_t *, uint32_t, const uint32_t *); + using NeuronModel_restoreFromCompiledNetwork_Type = + int (*)(NeuronModel **, NeuronCompilation **, const void *, const size_t); using NeuronCompilation_create_Type = int (*)(NeuronModel *, NeuronCompilation **); using NeuronCompilation_free_Type = void (*)(NeuronCompilation *); using NeuronCompilation_finish_Type = int (*)(NeuronCompilation *); + using NeuronCompilation_setCaching_Type = int (*)(NeuronCompilation *, + const char *, + const uint8_t *); using NeuronCompilation_createForDevices_Type = int (*)(NeuronModel *, const NeuronDevice *const *, uint32_t, NeuronCompilation **); + using NeuronCompilation_storeCompiledNetwork_Type = + int (*)(NeuronCompilation *, void *, const size_t); + using NeuronCompilation_getCompiledNetworkSize_Type = + int (*)(NeuronCompilation *, size_t *); using NeuronExecution_create_Type = int (*)(NeuronCompilation *, NeuronExecution **); using NeuronExecution_free_Type = void (*)(NeuronExecution *); @@ -78,131 +87,202 @@ class NeuronAdapter final { const char **); Neuron_getVersion_Type Neuron_getVersion() { - CHECK(Neuron_getVersion_ != nullptr) << "Cannot load Neuron_getVersion!"; + CHECK(Neuron_getVersion_ != nullptr) << "Cannot load " + "Neuron_" + "getVersion!"; return Neuron_getVersion_; } + NeuronModel_restoreFromCompiledNetwork_Type + NeuronModel_restoreFromCompiledNetwork() { + CHECK(NeuronModel_restoreFromCompiledNetwork_ != nullptr) + << "Cannot load " + "NeuronModel_" + "restoreFromCompil" + "edNetwork!"; + return NeuronModel_restoreFromCompiledNetwork_; + } + NeuronModel_create_Type NeuronModel_create() { - CHECK(NeuronModel_create_ != nullptr) << "Cannot load NeuronModel_create!"; + CHECK(NeuronModel_create_ != nullptr) << "Cannot load " + "NeuronModel_" + "create!"; return NeuronModel_create_; } NeuronModel_free_Type NeuronModel_free() { - CHECK(NeuronModel_free_ != nullptr) << "Cannot load NeuronModel_free!"; + CHECK(NeuronModel_free_ != nullptr) << "Cannot load " + "NeuronModel_" + "free!"; return NeuronModel_free_; } NeuronModel_finish_Type NeuronModel_finish() { - CHECK(NeuronModel_finish_ != nullptr) << "Cannot load NeuronModel_finish!"; + CHECK(NeuronModel_finish_ != nullptr) << "Cannot load " + "NeuronModel_" + "finish!"; return NeuronModel_finish_; } NeuronModel_addOperand_Type NeuronModel_addOperand() { - CHECK(NeuronModel_addOperand_ != nullptr) - << "Cannot load NeuronModel_addOperand!"; + CHECK(NeuronModel_addOperand_ != nullptr) << "Cannot load " + "NeuronModel_" + "addOperand!"; return NeuronModel_addOperand_; } NeuronModel_setOperandValue_Type NeuronModel_setOperandValue() { - CHECK(NeuronModel_setOperandValue_ != nullptr) - << "Cannot load NeuronModel_setOperandValue!"; + CHECK(NeuronModel_setOperandValue_ != nullptr) << "Cannot load " + "NeuronModel_" + "setOperandValue!"; return NeuronModel_setOperandValue_; } NeuronModel_setOperandSymmPerChannelQuantParams_Type NeuronModel_setOperandSymmPerChannelQuantParams() { CHECK(NeuronModel_setOperandSymmPerChannelQuantParams_ != nullptr) - << "Cannot load NeuronModel_setOperandSymmPerChannelQuantParams!"; + << "Cannot load " + "NeuronModel_" + "setOperandSymmPer" + "ChannelQuantParam" + "s!"; return NeuronModel_setOperandSymmPerChannelQuantParams_; } NeuronModel_addOperation_Type NeuronModel_addOperation() { - CHECK(NeuronModel_addOperation_ != nullptr) - << "Cannot load NeuronModel_addOperation!"; + CHECK(NeuronModel_addOperation_ != nullptr) << "Cannot load " + "NeuronModel_" + "addOperation!"; return NeuronModel_addOperation_; } NeuronModel_addOperationExtension_Type NeuronModel_addOperationExtension() { - CHECK(NeuronModel_addOperationExtension_ != nullptr) - << "Cannot load NeuronModel_addOperationExtension!"; + CHECK(NeuronModel_addOperationExtension_ != nullptr) << "Cannot load " + "NeuronModel_" + "addOperationExten" + "sion!"; return NeuronModel_addOperationExtension_; } NeuronModel_identifyInputsAndOutputs_Type NeuronModel_identifyInputsAndOutputs() { CHECK(NeuronModel_identifyInputsAndOutputs_ != nullptr) - << "Cannot load NeuronModel_identifyInputsAndOutputs!"; + << "Cannot load " + "NeuronModel_" + "identifyInputsAnd" + "Outputs!"; return NeuronModel_identifyInputsAndOutputs_; } NeuronCompilation_create_Type NeuronCompilation_create() { - CHECK(NeuronCompilation_create_ != nullptr) - << "Cannot load NeuronCompilation_create!"; + CHECK(NeuronCompilation_create_ != nullptr) << "Cannot load " + "NeuronCompilation" + "_create!"; return NeuronCompilation_create_; } NeuronCompilation_free_Type NeuronCompilation_free() { - CHECK(NeuronCompilation_free_ != nullptr) - << "Cannot load NeuronCompilation_free!"; + CHECK(NeuronCompilation_free_ != nullptr) << "Cannot load " + "NeuronCompilation" + "_free!"; return NeuronCompilation_free_; } NeuronCompilation_finish_Type NeuronCompilation_finish() { - CHECK(NeuronCompilation_finish_ != nullptr) - << "Cannot load NeuronCompilation_finish!"; + CHECK(NeuronCompilation_finish_ != nullptr) << "Cannot load " + "NeuronCompilation" + "_finish!"; return NeuronCompilation_finish_; } + NeuronCompilation_setCaching_Type NeuronCompilation_setCaching() { + CHECK(NeuronCompilation_setCaching_ != nullptr) << "Cannot load " + "NeuronCompilation" + "_setCaching!"; + return NeuronCompilation_setCaching_; + } + NeuronCompilation_createForDevices_Type NeuronCompilation_createForDevices() { - CHECK(NeuronCompilation_createForDevices_ != nullptr) - << "Cannot load NeuronCompilation_createForDevices!"; + CHECK(NeuronCompilation_createForDevices_ != nullptr) << "Cannot load " + "NeuronCompilation" + "_createForDevices" + "!"; return NeuronCompilation_createForDevices_; } + NeuronCompilation_storeCompiledNetwork_Type + NeuronCompilation_storeCompiledNetwork() { + CHECK(NeuronCompilation_storeCompiledNetwork_ != nullptr) + << "Cannot load " + "NeuronCompilation" + "_storeCompiledNet" + "work!"; + return NeuronCompilation_storeCompiledNetwork_; + } + + NeuronCompilation_getCompiledNetworkSize_Type + NeuronCompilation_getCompiledNetworkSize() { + CHECK(NeuronCompilation_getCompiledNetworkSize_ != nullptr) + << "Cannot load " + "NeuronCompilation" + "_getCompiledNetwo" + "rkSize!"; + return NeuronCompilation_getCompiledNetworkSize_; + } + NeuronExecution_create_Type NeuronExecution_create() { - CHECK(NeuronExecution_create_ != nullptr) - << "Cannot load NeuronExecution_create!"; + CHECK(NeuronExecution_create_ != nullptr) << "Cannot load " + "NeuronExecution_" + "create!"; return NeuronExecution_create_; } NeuronExecution_free_Type NeuronExecution_free() { - CHECK(NeuronExecution_free_ != nullptr) - << "Cannot load NeuronExecution_free!"; + CHECK(NeuronExecution_free_ != nullptr) << "Cannot load " + "NeuronExecution_" + "free!"; return NeuronExecution_free_; } NeuronExecution_setInput_Type NeuronExecution_setInput() { - CHECK(NeuronExecution_setInput_ != nullptr) - << "Cannot loadcl NeuronExecution_setInput!"; + CHECK(NeuronExecution_setInput_ != nullptr) << "Cannot loadcl " + "NeuronExecution_" + "setInput!"; return NeuronExecution_setInput_; } NeuronExecution_setOutput_Type NeuronExecution_setOutput() { - CHECK(NeuronExecution_setOutput_ != nullptr) - << "Cannot load NeuronExecution_setOutput!"; + CHECK(NeuronExecution_setOutput_ != nullptr) << "Cannot load " + "NeuronExecution_" + "setOutput!"; return NeuronExecution_setOutput_; } NeuronExecution_compute_Type NeuronExecution_compute() { - CHECK(NeuronExecution_compute_ != nullptr) - << "Cannot load NeuronExecution_compute!"; + CHECK(NeuronExecution_compute_ != nullptr) << "Cannot load " + "NeuronExecution_" + "compute!"; return NeuronExecution_compute_; } Neuron_getDeviceCount_Type Neuron_getDeviceCount() { - CHECK(Neuron_getDeviceCount_ != nullptr) - << "Cannot load Neuron_getDeviceCount!"; + CHECK(Neuron_getDeviceCount_ != nullptr) << "Cannot load " + "Neuron_" + "getDeviceCount!"; return Neuron_getDeviceCount_; } Neuron_getDevice_Type Neuron_getDevice() { - CHECK(Neuron_getDevice_ != nullptr) << "Cannot load Neuron_getDevice!"; + CHECK(Neuron_getDevice_ != nullptr) << "Cannot load " + "Neuron_" + "getDevice!"; return Neuron_getDevice_; } NeuronDevice_getName_Type NeuronDevice_getName() { - CHECK(NeuronDevice_getName_ != nullptr) - << "Cannot load NeuronDevice_getName!"; + CHECK(NeuronDevice_getName_ != nullptr) << "Cannot load " + "NeuronDevice_" + "getName!"; return NeuronDevice_getName_; } @@ -226,11 +306,18 @@ class NeuronAdapter final { nullptr}; NeuronModel_identifyInputsAndOutputs_Type NeuronModel_identifyInputsAndOutputs_{nullptr}; + NeuronModel_restoreFromCompiledNetwork_Type + NeuronModel_restoreFromCompiledNetwork_{nullptr}; NeuronCompilation_create_Type NeuronCompilation_create_{nullptr}; NeuronCompilation_free_Type NeuronCompilation_free_{nullptr}; NeuronCompilation_finish_Type NeuronCompilation_finish_{nullptr}; + NeuronCompilation_setCaching_Type NeuronCompilation_setCaching_{nullptr}; NeuronCompilation_createForDevices_Type NeuronCompilation_createForDevices_{ nullptr}; + NeuronCompilation_storeCompiledNetwork_Type + NeuronCompilation_storeCompiledNetwork_{nullptr}; + NeuronCompilation_getCompiledNetworkSize_Type + NeuronCompilation_getCompiledNetworkSize_{nullptr}; NeuronExecution_create_Type NeuronExecution_create_{nullptr}; NeuronExecution_free_Type NeuronExecution_free_{nullptr}; NeuronExecution_setInput_Type NeuronExecution_setInput_{nullptr}; diff --git a/lite/core/context.h b/lite/core/context.h index f140e7575b82b264e27cec00ac8eb05fcd33eb2d..6db0faffe4843d2b2dedb977605268de7be09ac1 100644 --- a/lite/core/context.h +++ b/lite/core/context.h @@ -144,6 +144,21 @@ class Context { APUContext& operator=(const APUContext& ctx) {} std::string name() const { return "APUContext"; } + + static void SetSubgraphModelCacheDir(Scope* scope, + std::string subgraph_model_cache_dir) { + auto var = scope->Var("SUBGRAPH_MODEL_CACHE_DIR"); + CHECK(var); + auto data = var->GetMutable(); + CHECK(data); + *data = subgraph_model_cache_dir; + } + + static std::string SubgraphModelCacheDir(Scope* scope) { + auto var = scope->FindVar("SUBGRAPH_MODEL_CACHE_DIR"); + if (!var) return ""; + return var->Get(); + } }; #endif diff --git a/lite/kernels/apu/bridges/CMakeLists.txt b/lite/kernels/apu/bridges/CMakeLists.txt old mode 100755 new mode 100644 diff --git a/lite/kernels/apu/bridges/conv_transpose_op.cc b/lite/kernels/apu/bridges/conv_transpose_op.cc index 386c89c128e476611ebde4b337823775b5ae01a9..ecc6677ac1e8f80f473caefc44d51248e8609476 100644 --- a/lite/kernels/apu/bridges/conv_transpose_op.cc +++ b/lite/kernels/apu/bridges/conv_transpose_op.cc @@ -53,7 +53,7 @@ int ConvTransposeConverter(void *ctx, OpLite *op, KernelBase *kernel) { auto strides = op_info->GetAttr>("strides"); CHECK_EQ(strides.size(), 2L); - auto paddings = op_info->GetAttr>("paddings"); + std::vector paddings = op_info->GetAttr>("paddings"); auto groups = op_info->GetAttr("groups"); if (groups > 1) { LOG(WARNING) << "[NPU] only support groups == 1"; @@ -70,7 +70,7 @@ int ConvTransposeConverter(void *ctx, OpLite *op, KernelBase *kernel) { auto fuse_relu = op_info->HasAttr("fuse_relu") && op_info->GetAttr("fuse_relu"); - auto dilations = op_info->GetAttr>("dilations"); + std::vector dilations = op_info->GetAttr>("dilations"); CHECK_EQ(dilations.size(), 2L); std::string padding_algorithm = op_info->HasAttr("padding_algorithm") diff --git a/lite/kernels/apu/bridges/graph.cc b/lite/kernels/apu/bridges/graph.cc old mode 100755 new mode 100644 diff --git a/lite/kernels/apu/bridges/paddle_use_bridges.h b/lite/kernels/apu/bridges/paddle_use_bridges.h old mode 100755 new mode 100644 diff --git a/lite/kernels/apu/bridges/utility.h b/lite/kernels/apu/bridges/utility.h old mode 100755 new mode 100644 diff --git a/lite/kernels/apu/subgraph_compute.cc b/lite/kernels/apu/subgraph_compute.cc old mode 100755 new mode 100644 index 5e86514478f421ece6642afdd0bfaab4025420bb..825e735a2f0709ba979fa7a2a7fc203539e8f483 --- a/lite/kernels/apu/subgraph_compute.cc +++ b/lite/kernels/apu/subgraph_compute.cc @@ -22,17 +22,102 @@ #include "lite/kernels/apu/bridges/graph.h" #include "lite/kernels/apu/bridges/paddle_use_bridges.h" #include "lite/kernels/apu/bridges/utility.h" +#include "lite/utils/io.h" +#include "lite/utils/md5.h" namespace paddle { namespace lite { namespace kernels { namespace apu { -bool SubgraphEngine::BuildDeviceProgram() { - if (!origin_program_) { - BuildOriginProgram(); +// Generate the model name by using md5 hashes based on: +// 1. the sorted variable input names +// 2. the shapes of the origin input tensors +// 3. the sorted variable output names +std::string DeviceProgram::GenerateModelName( + const std::vector& input_names, + const std::vector& output_names, + const std::vector>& origin_idims) { + std::ostringstream os; + CHECK_EQ(input_names.size(), origin_idims.size()); + for (int i = 0; i < input_names.size(); i++) { + os << input_names[i]; + for (auto dim : origin_idims[i]) { + os << dim; + } + } + for (auto output_name : output_names) { + os << output_name; + } + return MD5(os.str()); +} + +// Deserialize the generated model +bool DeviceProgram::LoadFromCacheFile( + const std::vector& input_names, + const std::vector& output_names, + const std::vector>& origin_idims, + const std::string& model_cache_dir) { + int status; + + // Generate the model name if not initialized + if (model_name_.empty()) { + model_name_ = GenerateModelName(input_names, output_names, origin_idims); } + // Load from the cached model file + auto model_path = model_cache_dir + "/" + model_name_ + ".dla"; + VLOG(3) << "[APU] Load model from " << model_path; + std::vector compilationBuffer; + if (!ReadFile(model_path, &compilationBuffer)) { + LOG(WARNING) << "[NPU] Open " << model_path << " for reading failed!"; + return false; + } + model_ = nullptr; + compilation_ = nullptr; + status = NeuronModel_restoreFromCompiledNetwork( + &model_, &compilation_, &compilationBuffer[0], compilationBuffer.size()); + if (status != NEURON_NO_ERROR) { + LOG(WARNING) << "[APU] Load model failed!" << compilationBuffer.size(); + return false; + } + + VLOG(3) << "[APU] Complete Load model!"; + + // Deserialize the preicisions and shapes of the origin output tensors from + // the + // cached configuration file + auto config_path = model_cache_dir + "/" + model_name_ + ".cfg"; + VLOG(3) << "[APU] Load configuration from " << config_path; + std::vector config_buffer; + if (!ReadFile(config_path, &config_buffer)) { + LOG(WARNING) << "[APU] read from " << config_path << " failed!"; + return false; + } + + std::string str(config_buffer.begin(), config_buffer.end()); + // Parse the precision and shapes of the output tensors + auto output_options = Split(str, ";"); + CHECK_EQ(output_options.size(), output_names.size()); + origin_otypes_.resize(output_names.size()); + origin_odims_.resize(output_names.size()); + for (int i = 0; i < output_names.size(); i++) { + auto items = Split(output_options[i], ":"); + CHECK_EQ(items.size(), 2); // precision and shapes + origin_otypes_[i] = static_cast(std::stoi(items[0])); + origin_odims_[i] = Split(items[1], ","); + } + return true; +} + +bool DeviceProgram::BuildGraphAndCacheToFile( + RuntimeProgram* origin_program, + const std::vector& input_names, + const std::vector& output_names, + const std::vector>& origin_idims, + const std::vector& origin_itensors, + const std::vector& origin_otensors, + const std::string& model_cache_dir) { auto GetCurrentUS = []() -> double { struct timeval time; gettimeofday(&time, NULL); @@ -53,13 +138,14 @@ bool SubgraphEngine::BuildDeviceProgram() { return false; } graph.set_model(model_); - graph.set_input_names(input_names_); - graph.set_output_names(output_names_); + graph.set_input_names(input_names); + graph.set_output_names(output_names); // Convert all of ops and their input vars and weights and added into the APU // NIR graph const auto& bridges = subgraph::SubgraphBridgeRegistry::Instance(); - const auto& insts = origin_program_->instructions(kRootBlockIdx); + const auto& insts = origin_program->instructions(kRootBlockIdx); + for (auto& inst : insts) { auto op = const_cast(inst.op()); CHECK(op); @@ -82,25 +168,25 @@ bool SubgraphEngine::BuildDeviceProgram() { // Get the index of input tensors std::vector input_indices; - for (int i = 0; i < input_names_.size(); i++) { - CHECK(graph.Has(input_names_[i])) << "[APU] Failed to find input node " - << input_names_[i]; - auto index = graph.Get(input_names_[i])->index(); + for (int i = 0; i < input_names.size(); i++) { + CHECK(graph.Has(input_names[i])) << "[APU] Failed to find input node " + << input_names[i]; + auto index = graph.Get(input_names[i])->index(); input_indices.push_back(index); - VLOG(3) << "[APU] Input[" << i << "] name " << input_names_[i] << " dims " - << origin_itensors_[i]->dims() << " index " << index; + VLOG(3) << "[APU] Input[" << i << "] name " << input_names[i] << " dims " + << origin_itensors[i]->dims() << " index " << index; } // Get the index of output tensors std::vector output_indices; - for (int i = 0; i < output_names_.size(); i++) { - CHECK(graph.Has(output_names_[i])) << "[APU] Failed to find output node " - << output_names_[i]; - origin_otensors_[i]->mutable_data(); - auto index = graph.Get(output_names_[i])->index(); + for (int i = 0; i < output_names.size(); i++) { + CHECK(graph.Has(output_names[i])) << "[APU] Failed to find output node " + << output_names[i]; + origin_otensors[i]->mutable_data(); + auto index = graph.Get(output_names[i])->index(); output_indices.push_back(index); - VLOG(3) << "[APU] Output[" << i << "] name " << output_names_[i] << " dims " - << origin_otensors_[i]->dims() << " index " << index; + VLOG(3) << "[APU] Output[" << i << "] name " << output_names[i] << " dims " + << origin_otensors[i]->dims() << " index " << index; } // Indentify the input and output tensors of the neuron model @@ -114,7 +200,6 @@ bool SubgraphEngine::BuildDeviceProgram() { LOG(WARNING) << "[APU] Fail to create NIR model:" << neuron_errCode; return false; } - VLOG(3) << "[APU] APU NIR model created!"; VLOG(1) << "[APU] APU NIR model created, Create cost " << GetCurrentUS() - start_time << " us"; @@ -127,9 +212,109 @@ bool SubgraphEngine::BuildDeviceProgram() { } VLOG(1) << "[APU] APU DLA model created, Build cost " << GetCurrentUS() - start_time << " us"; + + CHECK_EQ(origin_otensors.size(), output_names.size()); + origin_otypes_.resize(output_names.size()); + origin_odims_.resize(output_names.size()); + for (size_t i = 0; i < output_names.size(); i++) { + origin_otypes_[i] = origin_otensors[i]->precision(); + origin_odims_[i] = origin_otensors[i]->dims().Vectorize(); + } + if (!model_cache_dir.empty()) { + // Save the generated model to file + auto model_path = model_cache_dir + "/" + model_name_ + ".dla"; + VLOG(3) << "[APU] Save model to " << model_path; + + size_t compilationSize; + status = NeuronCompilation_getCompiledNetworkSize(compilation_, + &compilationSize); + if (status == NEURON_NO_ERROR) { + // Serialization DLA + std::vector model_buffer; + model_buffer.resize(compilationSize); + status = NeuronCompilation_storeCompiledNetwork( + compilation_, &model_buffer[0], compilationSize); + if (status != NEURON_NO_ERROR) { + LOG(WARNING) << "[APU] Serialization DLA failed!"; + } + + VLOG(3) << "[APU] Export the model to " << model_path; + if (!WriteFile(model_path, model_buffer)) { + LOG(WARNING) << "[APU] Open " << model_path << " for writting failed!"; + } + } + + // Serialize the precisions and shapes of the origin output tensors into the + // configuration file + std::ostringstream os; + for (int i = 0; i < output_names.size(); i++) { + os << static_cast(origin_otypes_[i]) << ":"; + for (auto dim : origin_odims_[i]) { + os << dim << ","; + } + os << ";"; + } + auto str = os.str(); + std::vector config_buffer(str.begin(), str.end()); + auto config_path = model_cache_dir + "/" + model_name_ + ".cfg"; + VLOG(3) << "[APU] Save configuration to " << config_path; + if (!WriteFile(config_path, config_buffer)) { + LOG(WARNING) << "[APU] Open " << config_path << " for writting failed!"; + } + } + return true; } +bool SubgraphEngine::BuildDeviceProgram() { + // Check if the cache device program exists + if (!device_programs_.count(origin_idims_)) { + auto device_program = std::make_shared(); + // Obtain the model cache dir from the NPU Context of the subgraph op + auto model_cache_dir = + ctx_->As().SubgraphModelCacheDir(exec_scope_); + VLOG(3) << "[APU] Getting subgraph_model_cache_dir: " << model_cache_dir; + // Check and load if the cached model and configuration file exists + if (model_cache_dir.empty() || + !device_program->LoadFromCacheFile( + input_names_, output_names_, origin_idims_, model_cache_dir)) { + // Build the model online, including converting the paddle ops to the NIR + // nodes, building the MTK NIR graph, and compile MTK NIR graph to dla + if (!origin_program_) { + BuildOriginProgram(); + } + CHECK(origin_program_) << "[APU] The origin program is not initialized!"; + CHECK_GT(origin_program_->instructions().size(), 0) + << "[APU] No instructions found in the origin program!"; + if (!device_program->BuildGraphAndCacheToFile(origin_program_.get(), + input_names_, + output_names_, + origin_idims_, + origin_itensors_, + origin_otensors_, + model_cache_dir)) { + return false; + } + } + if (device_program->model_ == nullptr) { + LOG(WARNING) << "dla create fail!"; + return false; + } + device_programs_[origin_idims_] = device_program; + } + + // Get the index of output tensors + auto device_program = device_programs_[origin_idims_]; + CHECK(device_program && device_program->model_); + for (int i = 0; i < output_names_.size(); i++) { + origin_otensors_[i]->Resize(device_program->origin_odims_[i]); + origin_otensors_[i]->mutable_data(); + VLOG(3) << "[APU] Output[" << i << "] name " << output_names_[i] << " dims " + << origin_otensors_[i]->dims() << " memory_size " + << origin_otensors_[i]->memory_size(); + } +} + bool SubgraphEngine::LaunchDeviceProgram() { auto GetCurrentUS = []() -> double { struct timeval time; @@ -137,9 +322,17 @@ bool SubgraphEngine::LaunchDeviceProgram() { return 1e+6 * time.tv_sec + time.tv_usec; }; + if (device_programs_.count(origin_idims_) == 0 || + device_programs_[origin_idims_]->model_ == nullptr) { + return LaunchOriginProgram(); + } + + auto device_program = device_programs_[origin_idims_]; + auto start_time = GetCurrentUS(); NeuronExecution* run = NULL; - int neuron_errCode = NeuronExecution_create(compilation_, &run); + int neuron_errCode = + NeuronExecution_create(device_program->compilation_, &run); if (NEURON_NO_ERROR != neuron_errCode) { LOG(WARNING) << "[APU] Build APU runtime failed!"; return false; @@ -187,11 +380,13 @@ bool SubgraphEngine::LaunchDeviceProgram() { } SubgraphEngine::~SubgraphEngine() { - if (compilation_) { - NeuronCompilation_free(compilation_); - } - if (model_) { - NeuronModel_free(model_); + for (auto& device_program : device_programs_) { + if (device_program.second->compilation_) { + NeuronCompilation_free(device_program.second->compilation_); + } + if (device_program.second->model_) { + NeuronModel_free(device_program.second->model_); + } } } diff --git a/lite/kernels/apu/subgraph_compute.h b/lite/kernels/apu/subgraph_compute.h index f85f315220e0b52baf1cb1388ce6edd5bc004d1a..f2c593d58da9bbf9716cf4d89a6c8844a4c004b9 100644 --- a/lite/kernels/apu/subgraph_compute.h +++ b/lite/kernels/apu/subgraph_compute.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -27,14 +28,43 @@ namespace lite { namespace kernels { namespace apu { +class DeviceProgram { + public: + DeviceProgram() {} + ~DeviceProgram() {} + std::string GenerateModelName( + const std::vector& input_names, + const std::vector& output_names, + const std::vector>& origin_idims); + bool LoadFromCacheFile(const std::vector& input_names, + const std::vector& output_names, + const std::vector>& origin_idims, + const std::string& model_cache_dir); + bool BuildGraphAndCacheToFile( + RuntimeProgram* origin_program, + const std::vector& input_names, + const std::vector& output_names, + const std::vector>& origin_idims, + const std::vector& origin_itensors, + const std::vector& origin_otensors, + const std::string& model_cache_dir); + + public: + std::string model_name_{""}; + std::vector> origin_odims_; + std::vector origin_otypes_; + NeuronModel* model_; + NeuronCompilation* compilation_; +}; + class SubgraphEngine : public subgraph::SubgraphEngineBase { public: - SubgraphEngine(KernelContext *ctx, + SubgraphEngine(KernelContext* ctx, int block_idx, - const std::shared_ptr &program_desc, - Scope *exec_scope, - const std::vector &input_names, - const std::vector &output_names) + const std::shared_ptr& program_desc, + Scope* exec_scope, + const std::vector& input_names, + const std::vector& output_names) : subgraph::SubgraphEngineBase(ctx, block_idx, program_desc, @@ -48,8 +78,8 @@ class SubgraphEngine : public subgraph::SubgraphEngineBase { bool BuildDeviceProgram() override; bool LaunchDeviceProgram() override; - NeuronModel *model_; - NeuronCompilation *compilation_; + std::map>, std::shared_ptr> + device_programs_; }; class SubgraphCompute diff --git a/lite/tests/api/test_mobilenetv1_int8_mediatek_apu.cc b/lite/tests/api/test_mobilenetv1_int8_mediatek_apu.cc index 76b3722d2d6d4d15fb57a00b055d714ad8d2e1c5..e61b20b54a2ed0c453e28bd5e0fa19a54a7b2455 100644 --- a/lite/tests/api/test_mobilenetv1_int8_mediatek_apu.cc +++ b/lite/tests/api/test_mobilenetv1_int8_mediatek_apu.cc @@ -33,11 +33,13 @@ namespace paddle { namespace lite { TEST(MobileNetV1, test_mobilenetv1_int8_mediatek_apu) { + std::string subgraph_model_cache_dir = FLAGS_model_dir; lite_api::CxxConfig config; config.set_model_dir(FLAGS_model_dir); config.set_valid_places({lite_api::Place{TARGET(kARM), PRECISION(kFloat)}, lite_api::Place{TARGET(kARM), PRECISION(kInt8)}, lite_api::Place{TARGET(kAPU), PRECISION(kInt8)}}); + config.set_subgraph_model_cache_dir(subgraph_model_cache_dir); auto predictor = lite_api::CreatePaddlePredictor(config); std::string raw_data_dir = FLAGS_data_dir + std::string("/raw_data");