未验证 提交 aaba6447 编写于 作者: H hong19860320 提交者: GitHub

[cherry-pick][APU] Mtk apu add more OPs (#4287) (#4451)

上级 5a5794b3
...@@ -82,16 +82,20 @@ void NeuronAdapter::InitFunctions() { ...@@ -82,16 +82,20 @@ void NeuronAdapter::InitFunctions() {
PADDLE_DLSYM(NeuronModel_setOperandValue); PADDLE_DLSYM(NeuronModel_setOperandValue);
PADDLE_DLSYM(NeuronModel_setOperandSymmPerChannelQuantParams); PADDLE_DLSYM(NeuronModel_setOperandSymmPerChannelQuantParams);
PADDLE_DLSYM(NeuronModel_addOperation); PADDLE_DLSYM(NeuronModel_addOperation);
PADDLE_DLSYM(NeuronModel_addOperationExtension);
PADDLE_DLSYM(NeuronModel_identifyInputsAndOutputs); PADDLE_DLSYM(NeuronModel_identifyInputsAndOutputs);
PADDLE_DLSYM(NeuronCompilation_create); PADDLE_DLSYM(NeuronCompilation_create);
PADDLE_DLSYM(NeuronCompilation_free); PADDLE_DLSYM(NeuronCompilation_free);
PADDLE_DLSYM(NeuronCompilation_finish); PADDLE_DLSYM(NeuronCompilation_finish);
PADDLE_DLSYM(NeuronCompilation_createForDevices);
PADDLE_DLSYM(NeuronExecution_create); PADDLE_DLSYM(NeuronExecution_create);
PADDLE_DLSYM(NeuronExecution_free); PADDLE_DLSYM(NeuronExecution_free);
PADDLE_DLSYM(NeuronExecution_setInput); PADDLE_DLSYM(NeuronExecution_setInput);
PADDLE_DLSYM(NeuronExecution_setOutput); PADDLE_DLSYM(NeuronExecution_setOutput);
PADDLE_DLSYM(NeuronExecution_compute); PADDLE_DLSYM(NeuronExecution_compute);
PADDLE_DLSYM(Neuron_getDeviceCount);
PADDLE_DLSYM(Neuron_getDevice);
PADDLE_DLSYM(NeuronDevice_getName);
#undef PADDLE_DLSYM #undef PADDLE_DLSYM
} }
...@@ -146,6 +150,25 @@ int NeuronModel_addOperation(NeuronModel* model, ...@@ -146,6 +150,25 @@ int NeuronModel_addOperation(NeuronModel* model,
model, type, inputCount, inputs, outputCount, outputs); model, type, inputCount, inputs, outputCount, outputs);
} }
int NeuronModel_addOperationExtension(NeuronModel* model,
const char* name,
const char* vendor,
const NeuronDevice* device,
uint32_t inputCount,
const uint32_t* inputs,
uint32_t outputCount,
const uint32_t* outputs) {
return paddle::lite::NeuronAdapter::Global()
->NeuronModel_addOperationExtension()(model,
name,
vendor,
device,
inputCount,
inputs,
outputCount,
outputs);
}
int NeuronModel_identifyInputsAndOutputs(NeuronModel* model, int NeuronModel_identifyInputsAndOutputs(NeuronModel* model,
uint32_t inputCount, uint32_t inputCount,
const uint32_t* inputs, const uint32_t* inputs,
...@@ -172,6 +195,15 @@ int NeuronCompilation_finish(NeuronCompilation* compilation) { ...@@ -172,6 +195,15 @@ int NeuronCompilation_finish(NeuronCompilation* compilation) {
compilation); compilation);
} }
int NeuronCompilation_createForDevices(NeuronModel* model,
const NeuronDevice* const* devices,
uint32_t numDevices,
NeuronCompilation** compilation) {
return paddle::lite::NeuronAdapter::Global()
->NeuronCompilation_createForDevices()(
model, devices, numDevices, compilation);
}
int NeuronExecution_create(NeuronCompilation* compilation, int NeuronExecution_create(NeuronCompilation* compilation,
NeuronExecution** execution) { NeuronExecution** execution) {
return paddle::lite::NeuronAdapter::Global()->NeuronExecution_create()( return paddle::lite::NeuronAdapter::Global()->NeuronExecution_create()(
...@@ -205,3 +237,18 @@ int NeuronExecution_compute(NeuronExecution* execution) { ...@@ -205,3 +237,18 @@ int NeuronExecution_compute(NeuronExecution* execution) {
return paddle::lite::NeuronAdapter::Global()->NeuronExecution_compute()( return paddle::lite::NeuronAdapter::Global()->NeuronExecution_compute()(
execution); execution);
} }
int Neuron_getDeviceCount(uint32_t* numDevices) {
return paddle::lite::NeuronAdapter::Global()->Neuron_getDeviceCount()(
numDevices);
}
int Neuron_getDevice(uint32_t devIndex, NeuronDevice** device) {
return paddle::lite::NeuronAdapter::Global()->Neuron_getDevice()(devIndex,
device);
}
int NeuronDevice_getName(const NeuronDevice* device, const char** name) {
return paddle::lite::NeuronAdapter::Global()->NeuronDevice_getName()(device,
name);
}
...@@ -42,12 +42,25 @@ class NeuronAdapter final { ...@@ -42,12 +42,25 @@ class NeuronAdapter final {
const uint32_t *, const uint32_t *,
uint32_t, uint32_t,
const uint32_t *); const uint32_t *);
using NeuronModel_addOperationExtension_Type = int (*)(NeuronModel *,
const char *,
const char *,
const NeuronDevice *,
uint32_t,
const uint32_t *,
uint32_t,
const uint32_t *);
using NeuronModel_identifyInputsAndOutputs_Type = int (*)( using NeuronModel_identifyInputsAndOutputs_Type = int (*)(
NeuronModel *, uint32_t, const uint32_t *, uint32_t, const uint32_t *); NeuronModel *, uint32_t, const uint32_t *, uint32_t, const uint32_t *);
using NeuronCompilation_create_Type = int (*)(NeuronModel *, using NeuronCompilation_create_Type = int (*)(NeuronModel *,
NeuronCompilation **); NeuronCompilation **);
using NeuronCompilation_free_Type = void (*)(NeuronCompilation *); using NeuronCompilation_free_Type = void (*)(NeuronCompilation *);
using NeuronCompilation_finish_Type = int (*)(NeuronCompilation *); using NeuronCompilation_finish_Type = int (*)(NeuronCompilation *);
using NeuronCompilation_createForDevices_Type =
int (*)(NeuronModel *,
const NeuronDevice *const *,
uint32_t,
NeuronCompilation **);
using NeuronExecution_create_Type = int (*)(NeuronCompilation *, using NeuronExecution_create_Type = int (*)(NeuronCompilation *,
NeuronExecution **); NeuronExecution **);
using NeuronExecution_free_Type = void (*)(NeuronExecution *); using NeuronExecution_free_Type = void (*)(NeuronExecution *);
...@@ -59,6 +72,10 @@ class NeuronAdapter final { ...@@ -59,6 +72,10 @@ class NeuronAdapter final {
using NeuronExecution_setOutput_Type = int (*)( using NeuronExecution_setOutput_Type = int (*)(
NeuronExecution *, int32_t, const NeuronOperandType *, void *, size_t); NeuronExecution *, int32_t, const NeuronOperandType *, void *, size_t);
using NeuronExecution_compute_Type = int (*)(NeuronExecution *); using NeuronExecution_compute_Type = int (*)(NeuronExecution *);
using Neuron_getDeviceCount_Type = int (*)(uint32_t *);
using Neuron_getDevice_Type = int (*)(uint32_t, NeuronDevice **);
using NeuronDevice_getName_Type = int (*)(const NeuronDevice *,
const char **);
Neuron_getVersion_Type Neuron_getVersion() { Neuron_getVersion_Type Neuron_getVersion() {
CHECK(Neuron_getVersion_ != nullptr) << "Cannot load Neuron_getVersion!"; CHECK(Neuron_getVersion_ != nullptr) << "Cannot load Neuron_getVersion!";
...@@ -105,6 +122,12 @@ class NeuronAdapter final { ...@@ -105,6 +122,12 @@ class NeuronAdapter final {
return NeuronModel_addOperation_; return NeuronModel_addOperation_;
} }
NeuronModel_addOperationExtension_Type NeuronModel_addOperationExtension() {
CHECK(NeuronModel_addOperationExtension_ != nullptr)
<< "Cannot load NeuronModel_addOperationExtension!";
return NeuronModel_addOperationExtension_;
}
NeuronModel_identifyInputsAndOutputs_Type NeuronModel_identifyInputsAndOutputs_Type
NeuronModel_identifyInputsAndOutputs() { NeuronModel_identifyInputsAndOutputs() {
CHECK(NeuronModel_identifyInputsAndOutputs_ != nullptr) CHECK(NeuronModel_identifyInputsAndOutputs_ != nullptr)
...@@ -130,6 +153,12 @@ class NeuronAdapter final { ...@@ -130,6 +153,12 @@ class NeuronAdapter final {
return NeuronCompilation_finish_; return NeuronCompilation_finish_;
} }
NeuronCompilation_createForDevices_Type NeuronCompilation_createForDevices() {
CHECK(NeuronCompilation_createForDevices_ != nullptr)
<< "Cannot load NeuronCompilation_createForDevices!";
return NeuronCompilation_createForDevices_;
}
NeuronExecution_create_Type NeuronExecution_create() { NeuronExecution_create_Type NeuronExecution_create() {
CHECK(NeuronExecution_create_ != nullptr) CHECK(NeuronExecution_create_ != nullptr)
<< "Cannot load NeuronExecution_create!"; << "Cannot load NeuronExecution_create!";
...@@ -160,6 +189,23 @@ class NeuronAdapter final { ...@@ -160,6 +189,23 @@ class NeuronAdapter final {
return NeuronExecution_compute_; return NeuronExecution_compute_;
} }
Neuron_getDeviceCount_Type Neuron_getDeviceCount() {
CHECK(Neuron_getDeviceCount_ != nullptr)
<< "Cannot load Neuron_getDeviceCount!";
return Neuron_getDeviceCount_;
}
Neuron_getDevice_Type Neuron_getDevice() {
CHECK(Neuron_getDevice_ != nullptr) << "Cannot load Neuron_getDevice!";
return Neuron_getDevice_;
}
NeuronDevice_getName_Type NeuronDevice_getName() {
CHECK(NeuronDevice_getName_ != nullptr)
<< "Cannot load NeuronDevice_getName!";
return NeuronDevice_getName_;
}
private: private:
NeuronAdapter(); NeuronAdapter();
NeuronAdapter(const NeuronAdapter &) = delete; NeuronAdapter(const NeuronAdapter &) = delete;
...@@ -176,16 +222,23 @@ class NeuronAdapter final { ...@@ -176,16 +222,23 @@ class NeuronAdapter final {
NeuronModel_setOperandSymmPerChannelQuantParams_Type NeuronModel_setOperandSymmPerChannelQuantParams_Type
NeuronModel_setOperandSymmPerChannelQuantParams_{nullptr}; NeuronModel_setOperandSymmPerChannelQuantParams_{nullptr};
NeuronModel_addOperation_Type NeuronModel_addOperation_{nullptr}; NeuronModel_addOperation_Type NeuronModel_addOperation_{nullptr};
NeuronModel_addOperationExtension_Type NeuronModel_addOperationExtension_{
nullptr};
NeuronModel_identifyInputsAndOutputs_Type NeuronModel_identifyInputsAndOutputs_Type
NeuronModel_identifyInputsAndOutputs_{nullptr}; NeuronModel_identifyInputsAndOutputs_{nullptr};
NeuronCompilation_create_Type NeuronCompilation_create_{nullptr}; NeuronCompilation_create_Type NeuronCompilation_create_{nullptr};
NeuronCompilation_free_Type NeuronCompilation_free_{nullptr}; NeuronCompilation_free_Type NeuronCompilation_free_{nullptr};
NeuronCompilation_finish_Type NeuronCompilation_finish_{nullptr}; NeuronCompilation_finish_Type NeuronCompilation_finish_{nullptr};
NeuronCompilation_createForDevices_Type NeuronCompilation_createForDevices_{
nullptr};
NeuronExecution_create_Type NeuronExecution_create_{nullptr}; NeuronExecution_create_Type NeuronExecution_create_{nullptr};
NeuronExecution_free_Type NeuronExecution_free_{nullptr}; NeuronExecution_free_Type NeuronExecution_free_{nullptr};
NeuronExecution_setInput_Type NeuronExecution_setInput_{nullptr}; NeuronExecution_setInput_Type NeuronExecution_setInput_{nullptr};
NeuronExecution_setOutput_Type NeuronExecution_setOutput_{nullptr}; NeuronExecution_setOutput_Type NeuronExecution_setOutput_{nullptr};
NeuronExecution_compute_Type NeuronExecution_compute_{nullptr}; NeuronExecution_compute_Type NeuronExecution_compute_{nullptr};
Neuron_getDeviceCount_Type Neuron_getDeviceCount_{nullptr};
Neuron_getDevice_Type Neuron_getDevice_{nullptr};
NeuronDevice_getName_Type NeuronDevice_getName_{nullptr};
}; };
} // namespace lite } // namespace lite
} // namespace paddle } // namespace paddle
...@@ -14,6 +14,8 @@ lite_cc_library(subgraph_bridge_act_op_apu SRCS act_op.cc DEPS ${apu_subgraph_br ...@@ -14,6 +14,8 @@ lite_cc_library(subgraph_bridge_act_op_apu SRCS act_op.cc DEPS ${apu_subgraph_br
lite_cc_library(subgraph_bridge_pool_op_apu SRCS pool_op.cc DEPS ${apu_subgraph_bridge_deps}) lite_cc_library(subgraph_bridge_pool_op_apu SRCS pool_op.cc DEPS ${apu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_softmax_op_apu SRCS softmax_op.cc DEPS ${apu_subgraph_bridge_deps}) lite_cc_library(subgraph_bridge_softmax_op_apu SRCS softmax_op.cc DEPS ${apu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_fc_op_apu SRCS fc_op.cc DEPS ${apu_subgraph_bridge_deps}) lite_cc_library(subgraph_bridge_fc_op_apu SRCS fc_op.cc DEPS ${apu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_concat_op_apu SRCS concat_op.cc DEPS ${apu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_conv_transpose_op_apu SRCS conv_transpose_op.cc DEPS ${apu_subgraph_bridge_deps})
set(apu_subgraph_bridges set(apu_subgraph_bridges
...@@ -25,6 +27,8 @@ set(apu_subgraph_bridges ...@@ -25,6 +27,8 @@ set(apu_subgraph_bridges
subgraph_bridge_softmax_op_apu subgraph_bridge_softmax_op_apu
subgraph_bridge_fc_op_apu subgraph_bridge_fc_op_apu
subgraph_bridge_pool_op_apu subgraph_bridge_pool_op_apu
subgraph_bridge_conv_transpose_op_apu
subgraph_bridge_concat_op_apu
CACHE INTERNAL "apu_subgraph_bridges") CACHE INTERNAL "apu_subgraph_bridges")
message(STATUS "+++++ apu_subgraph_bridges: ${apu_subgraph_bridges}") message(STATUS "+++++ apu_subgraph_bridges: ${apu_subgraph_bridges}")
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <vector>
#include "lite/core/subgraph_bridge_registry.h"
#include "lite/kernels/apu/bridges/graph.h"
#include "lite/kernels/apu/bridges/utility.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace apu {
int ConcatConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto model = graph->model();
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto scope = op->scope();
int neuron_errCode;
VLOG(3) << "[APU] Converting [" << op_type << "]";
// Get input and output vars and op attributes
auto x_names = op_info->Input("X");
auto out_name = op_info->Output("Out").front();
auto axis = op_info->GetAttr<int>("axis");
auto num = x_names.size();
// Process data layout axis change
if (axis == 1)
axis = 3;
else if (axis == 2)
axis = 1;
else if (axis == 3)
axis = 2;
// Limitation:
// All input tensors of NEURON_TENSOR_QUANT8_ASYMM must
// have the same scale and zeroPoint as the output tensor
CHECK(op_info->HasOutputScale(out_name));
auto output_scale = op_info->GetOutputScale(out_name)[0];
// Traverse all of input nodes
std::vector<std::shared_ptr<Node>> input_nodes;
NeuronOperandType xType;
for (auto& x_name : x_names) {
auto x = scope->FindMutableTensor(x_name);
auto x_dims = x->dims();
std::shared_ptr<Node> x_node = nullptr;
CHECK(op_info->HasInputScale(x_name));
auto input_scale = op_info->GetInputScale(x_name)[0];
// Add x tensor type
xType.type = NEURON_TENSOR_QUANT8_ASYMM;
xType.scale = input_scale;
xType.zeroPoint = 128;
xType.dimensionCount = x_dims.size();
std::vector<uint32_t> dims_x = {(uint32_t)x_dims[0],
(uint32_t)x_dims[2],
(uint32_t)x_dims[3],
(uint32_t)x_dims[1]};
xType.dimensions = &dims_x[0];
if (graph->Has(x_name)) {
VLOG(3) << "Graph has " << x_name;
if (graph->IsInput(x_name)) {
VLOG(3) << x_name << "is input and already exist";
x_name = "transpose_" + x_name;
}
if (graph->IsOutput(x_name)) {
VLOG(3) << x_name << "is input and output node";
x_name = "transpose_" + x_name;
}
x_node = graph->Get(x_name);
} else {
// Add input operand
if (graph->IsInput(x_name)) {
// Insert transpose for NCHW -> NHWC
insert_transpose_node(ctx,
x_name,
"transpose_" + x_name,
{(uint32_t)x_dims[0],
(uint32_t)x_dims[1],
(uint32_t)x_dims[2],
(uint32_t)x_dims[3]},
dims_x,
{0, 2, 3, 1},
xType.scale,
xType.zeroPoint);
// Change x_name because we add transpose op
x_name = "transpose_" + x_name;
x_node = graph->Get(x_name);
} else {
NeuronModel_addOperand(model, &xType);
x_node = graph->Add(x_name, dims_x);
}
} // End of else
if (x_node == nullptr) return subgraph::FAILED;
input_nodes.push_back(x_node);
VLOG(3) << "input node x: " << x_node->index()
<< ": input_scale: " << input_scale << " x_dims:" << x_dims[0]
<< ":" << x_dims[1] << ":" << x_dims
<< ", inType: " << xType.dimensions[0] << ":" << xType.dimensions[1]
<< ":" << xType.dimensions[2] << ":" << xType.dimensions[3];
} // End of for
if (input_nodes.size() != num) {
LOG(WARNING) << "Create input operand failed!";
return subgraph::FAILED;
}
// Add axis operand type
NeuronOperandType int32Type;
int32Type.type = NEURON_INT32;
int32Type.dimensionCount = 0;
std::vector<uint32_t> dims_int32 = {1};
// Add axis operand
std::shared_ptr<Node> axis_node = nullptr;
NeuronModel_addOperand(model, &int32Type); // axis
axis_node = graph->Add(out_name + "_axis", dims_int32);
VLOG(3) << "axis:" << axis;
// Add out operand type
auto out = scope->FindMutableTensor(out_name);
auto out_dims = out->dims();
NeuronOperandType outType;
outType.type = NEURON_TENSOR_QUANT8_ASYMM;
outType.scale = output_scale;
outType.zeroPoint = 128;
outType.dimensionCount = out_dims.size();
std::vector<uint32_t> dims_out = {(uint32_t)out_dims[0],
(uint32_t)out_dims[2],
(uint32_t)out_dims[3],
(uint32_t)out_dims[1]};
outType.dimensions = &dims_out[0];
// Add out operand
std::shared_ptr<Node> out_node = nullptr;
if (graph->Has(out_name)) {
out_node = graph->Get(out_name);
} else {
if (graph->IsOutput(out_name)) {
NeuronModel_addOperand(model, &outType);
out_node = graph->Add("transpose_" + out_name, dims_out);
} else {
NeuronModel_addOperand(model, &outType);
out_node = graph->Add(out_name, dims_out);
}
}
VLOG(3) << "out node idx: " << out_node->index()
<< ": output_scle: " << outType.scale
<< ", outType: " << outType.dimensions[0] << ":"
<< outType.dimensions[1] << ":" << outType.dimensions[2] << ":"
<< outType.dimensions[3];
// Set axis value
int32_t axis_val[1] = {(int32_t)axis};
NeuronModel_setOperandValue(
model, axis_node->index(), axis_val, sizeof(int32_t) * 1);
std::vector<uint32_t> addInIndex;
for (auto& node : input_nodes) {
addInIndex.push_back(node->index());
}
addInIndex.push_back(axis_node->index());
std::vector<uint32_t> addOutIndex = {out_node->index()};
neuron_errCode = NeuronModel_addOperation(model,
NEURON_CONCATENATION,
addInIndex.size(),
&addInIndex[0],
addOutIndex.size(),
&addOutIndex[0]);
if (NEURON_NO_ERROR != neuron_errCode) {
LOG(WARNING) << "Add op fail:" << op_type;
return subgraph::FAILED;
}
if (graph->IsOutput(out_name)) {
// Insert transpose for NHWC -> NCHW
insert_transpose_node(ctx,
"transpose_" + out_name,
out_name,
dims_out,
{(uint32_t)out_dims[0],
(uint32_t)out_dims[1],
(uint32_t)out_dims[2],
(uint32_t)out_dims[3]},
{0, 3, 1, 2},
outType.scale,
outType.zeroPoint);
out_node = graph->Get(out_name);
if (out_node == nullptr) return subgraph::FAILED;
}
return SUCCESS;
}
} // namespace apu
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(concat,
kAPU,
paddle::lite::subgraph::apu::ConcatConverter);
...@@ -73,7 +73,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -73,7 +73,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK_EQ(strides.size(), 2L); CHECK_EQ(strides.size(), 2L);
CHECK_EQ(dilations.size(), 2L); CHECK_EQ(dilations.size(), 2L);
bool is_depthwise_mode = ic == groups && oc == groups; bool is_depthwise_mode = ic == groups && oc == groups;
VLOG(3) << "is_depthwise_mode" << is_depthwise_mode; VLOG(3) << "is_depthwise_mode: " << is_depthwise_mode;
if (paddings.size() == 2L) { if (paddings.size() == 2L) {
for (size_t i = 0; i < strides.size(); ++i) { for (size_t i = 0; i < strides.size(); ++i) {
...@@ -103,6 +103,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -103,6 +103,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto filter_scale = op_info->GetInputScale(filter_name); auto filter_scale = op_info->GetInputScale(filter_name);
CHECK(op_info->HasOutputScale(output_name)); CHECK(op_info->HasOutputScale(output_name));
auto output_scale = op_info->GetOutputScale(output_name)[0]; auto output_scale = op_info->GetOutputScale(output_name)[0];
auto orig_output_scale = op_info->GetOutputScale(output_name)[0];
VLOG(3) << "strides.size(): " << strides.size() << " ,groups: " << groups VLOG(3) << "strides.size(): " << strides.size() << " ,groups: " << groups
<< " ,dilations: " << dilations[0] << ":" << dilations[1]; << " ,dilations: " << dilations[0] << ":" << dilations[1];
...@@ -128,23 +129,32 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -128,23 +129,32 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
std::shared_ptr<Node> input_node = nullptr; std::shared_ptr<Node> input_node = nullptr;
if (graph->Has(input_name)) { if (graph->Has(input_name)) {
VLOG(3) << "Graph has " << input_name; VLOG(3) << "Graph has " << input_name;
// input operand already exist
if (graph->IsInput(input_name)) {
VLOG(3) << input_name << "is input and already exist";
input_name = "transpose_" + input_name;
}
if (graph->IsOutput(input_name)) {
VLOG(3) << input_name << "is input and output node";
input_name = "transpose_" + input_name;
}
input_node = graph->Get(input_name); input_node = graph->Get(input_name);
} else { } else {
// add input operand
if (graph->IsInput(input_name)) { if (graph->IsInput(input_name)) {
// Insert transpose for NCHW -> NHWC // Insert transpose for NCHW -> NHWC
insert_transpose_node( insert_transpose_node(ctx,
ctx, input_name,
input_name, "transpose_" + input_name,
"transpose_" + input_name, {(uint32_t)input_dims[0],
{input_dims[0], input_dims[1], input_dims[2], input_dims[3]}, (uint32_t)input_dims[1],
dims_in, (uint32_t)input_dims[2],
{0, 2, 3, 1}, (uint32_t)input_dims[3]},
inType.scale, dims_in,
inType.zeroPoint); {0, 2, 3, 1},
inType.scale,
// change input_name inType.zeroPoint);
input_name = "transpose_" + input_name; input_name = "transpose_" + input_name;
input_node = graph->Get(input_name); input_node = graph->Get(input_name);
if (input_node == nullptr) return subgraph::FAILED; if (input_node == nullptr) return subgraph::FAILED;
...@@ -153,7 +163,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -153,7 +163,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
input_node = graph->Add(input_name, dims_in); input_node = graph->Add(input_name, dims_in);
} }
} }
VLOG(3) << "input node idx" << input_node->index() VLOG(3) << "input node idx: " << input_node->index()
<< ": input_scale: " << input_scale << ": input_scale: " << input_scale
<< ", inType: " << inType.dimensions[0] << ":" << inType.dimensions[1] << ", inType: " << inType.dimensions[0] << ":" << inType.dimensions[1]
<< ":" << inType.dimensions[2] << ":" << inType.dimensions[3]; << ":" << inType.dimensions[2] << ":" << inType.dimensions[3];
...@@ -161,8 +171,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -161,8 +171,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
// Add bias type // Add bias type
NeuronOperandType biasType; NeuronOperandType biasType;
// Add filter type // Add filter type, filter data re-layout NCHW -> NHWC
// filter NCHW -> NHWC
Tensor transpose_filter; Tensor transpose_filter;
std::vector<uint32_t> dims_filter; std::vector<uint32_t> dims_filter;
...@@ -233,10 +242,11 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -233,10 +242,11 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
biasType.scale = 0; biasType.scale = 0;
} }
auto precision = filter->precision();
std::shared_ptr<Node> filter_node = nullptr; std::shared_ptr<Node> filter_node = nullptr;
if (1 == filter_scale.size()) { if (1 == filter_scale.size()) {
NeuronModel_addOperand(model, &filterType); // 1: filter NeuronModel_addOperand(model, &filterType);
filter_node = graph->Add(filter_name, dims_filter); filter_node = graph->Add(filter_name, dims_filter); // Operand 1: filter
VLOG(3) << "filter node idx: " << filter_node->index() << "filter_scale[0]" VLOG(3) << "filter node idx: " << filter_node->index() << "filter_scale[0]"
<< filter_scale[0] << ": filterType: " << filterType.dimensions[0] << filter_scale[0] << ": filterType: " << filterType.dimensions[0]
<< ":" << filterType.dimensions[1] << ":" << ":" << filterType.dimensions[1] << ":"
...@@ -251,7 +261,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -251,7 +261,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
return subgraph::FAILED; return subgraph::FAILED;
} }
} else { } else {
NeuronModel_addOperand(model, &channelFilterType); // 1: filter NeuronModel_addOperand(model, &channelFilterType); // Operand 1: filter
filter_node = graph->Add(filter_name, dims_filter); filter_node = graph->Add(filter_name, dims_filter);
VLOG(3) << "chennel filter node idx: " << filter_node->index() VLOG(3) << "chennel filter node idx: " << filter_node->index()
<< " ,scale_count:" << filter_scale.size() << " ,scale_count:" << filter_scale.size()
...@@ -280,7 +290,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -280,7 +290,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
// Add biasType node value // Add biasType node value
// A 1-D tensor, of shape [depth_out], specifying the bias. // A 1-D tensor, of shape [depth_out], specifying the bias.
// For filter tensor of NEURON_TENSOR_QUANT8_SYMM_PER_CHANNEL, the bias // For filter tensor of NEURON_TENSOR_QUANT8_SYMM_PER_CHANNEL, the bias
// should be of ANEURALNETWORKS_TENSOR_INT32, with zeroPoint of 0 // should be of NEURON_TENSOR_INT32, with zeroPoint of 0
// and bias_scale of 0. The actual scale of each value 'i' is equal // and bias_scale of 0. The actual scale of each value 'i' is equal
// to bias_scale[i] = input_scale * filter_scale[i]. // to bias_scale[i] = input_scale * filter_scale[i].
biasType.type = NEURON_TENSOR_INT32; biasType.type = NEURON_TENSOR_INT32;
...@@ -296,16 +306,17 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -296,16 +306,17 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
for (int i = 0; i < bias_dims.size(); i++) for (int i = 0; i < bias_dims.size(); i++)
dims_bias.push_back(bias_dims[i]); dims_bias.push_back(bias_dims[i]);
biasType.dimensions = &dims_bias[0]; biasType.dimensions = &dims_bias[0];
NeuronModel_addOperand(model, &biasType); // 2: bias NeuronModel_addOperand(model, &biasType); // Operand 2: bias
bias_node = graph->Add(bias_name, dims_bias); bias_node = graph->Add(bias_name, dims_bias);
VLOG(3) << "node idx" << bias_node->index() << ": Bias name: " << bias_name VLOG(3) << "node idx: " << bias_node->index()
<< ": Bias name: " << bias_name
<< " ,bias scale: " << biasType.scale << " ,bias scale: " << biasType.scale
<< " ,dimensions: " << bias_dims; << " ,dimensions: " << bias_dims;
} else { } else {
biasType.dimensionCount = 1; biasType.dimensionCount = 1;
dims_bias = {(uint32_t)output_dims[1]}; dims_bias = {(uint32_t)output_dims[1]};
biasType.dimensions = &dims_bias[0]; biasType.dimensions = &dims_bias[0];
NeuronModel_addOperand(model, &biasType); // 2: bias NeuronModel_addOperand(model, &biasType); // Operand 2: bias
bias_node = graph->Add(filter_name + "_default_bias", dims_bias); bias_node = graph->Add(filter_name + "_default_bias", dims_bias);
VLOG(3) << "node idx" << bias_node->index() << ": Bias name: default_bias " VLOG(3) << "node idx" << bias_node->index() << ": Bias name: default_bias "
<< " ,bias scale: " << biasType.scale << " ,bias scale: " << biasType.scale
...@@ -318,39 +329,51 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -318,39 +329,51 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
std::vector<uint32_t> dims_int32 = {1}; std::vector<uint32_t> dims_int32 = {1};
std::shared_ptr<Node> paddingL_node = nullptr; std::shared_ptr<Node> paddingL_node = nullptr;
NeuronModel_addOperand(model, &int32Type); // 3: padding left NeuronModel_addOperand(model, &int32Type); // Operand 3: padding left
paddingL_node = graph->Add(filter_name + "_padding_left", dims_int32); paddingL_node = graph->Add(filter_name + "_padding_left", dims_int32);
std::shared_ptr<Node> paddingR_node = nullptr; std::shared_ptr<Node> paddingR_node = nullptr;
NeuronModel_addOperand(model, &int32Type); // 4: padding right NeuronModel_addOperand(model, &int32Type); // Operand 4: padding right
paddingR_node = graph->Add(filter_name + "_padding_right", dims_int32); paddingR_node = graph->Add(filter_name + "_padding_right", dims_int32);
std::shared_ptr<Node> paddingT_node = nullptr; std::shared_ptr<Node> paddingT_node = nullptr;
NeuronModel_addOperand(model, &int32Type); // 5: padding top NeuronModel_addOperand(model, &int32Type); // Operand 5: padding top
paddingT_node = graph->Add(filter_name + "_padding_top", dims_int32); paddingT_node = graph->Add(filter_name + "_padding_top", dims_int32);
std::shared_ptr<Node> paddingB_node = nullptr; std::shared_ptr<Node> paddingB_node = nullptr;
NeuronModel_addOperand(model, &int32Type); // 6: padding bottom NeuronModel_addOperand(model, &int32Type); // Operand 6: padding bottom
paddingB_node = graph->Add(filter_name + "_padding_bottom", dims_int32); paddingB_node = graph->Add(filter_name + "_padding_bottom", dims_int32);
std::shared_ptr<Node> strideW_node = nullptr; std::shared_ptr<Node> strideW_node = nullptr;
NeuronModel_addOperand(model, &int32Type); // 7: stride width NeuronModel_addOperand(model, &int32Type); // Operand 7: stride width
strideW_node = graph->Add(filter_name + "_stride_width", dims_int32); strideW_node = graph->Add(filter_name + "_stride_width", dims_int32);
std::shared_ptr<Node> strideH_node = nullptr; std::shared_ptr<Node> strideH_node = nullptr;
NeuronModel_addOperand(model, &int32Type); // 8: stride height NeuronModel_addOperand(model, &int32Type); // Operand 8: stride height
strideH_node = graph->Add(filter_name + "_stride_height", dims_int32); strideH_node = graph->Add(filter_name + "_stride_height", dims_int32);
std::shared_ptr<Node> dm_node = nullptr; std::shared_ptr<Node> dm_node = nullptr;
if (is_depthwise_mode) { if (is_depthwise_mode) {
NeuronModel_addOperand(model, &int32Type); // 9: depthwise multiplier NeuronModel_addOperand(model,
&int32Type); // Operand 9: depthwise multiplier
dm_node = graph->Add(filter_name + "_dm", dims_int32); dm_node = graph->Add(filter_name + "_dm", dims_int32);
} }
std::shared_ptr<Node> fuse_node = nullptr; std::shared_ptr<Node> fuse_node = nullptr;
NeuronModel_addOperand(model, &int32Type); // 9/10: fuse NeuronModel_addOperand(model, &int32Type); // Operand 9/10: fuse
fuse_node = graph->Add(filter_name + "_fuse", dims_int32); fuse_node = graph->Add(filter_name + "_fuse", dims_int32);
/* Check output scale */
if (is_depthwise_mode) {
for (auto s : filter_scale) {
if (output_scale < s * input_scale)
output_scale = s * input_scale + 0.000001;
}
#ifdef LITE_MEDIATEK_APU_ENABLE_REQUANT
output_scale = orig_output_scale;
#endif
}
// Add output tensor type // Add output tensor type
NeuronOperandType outType; NeuronOperandType outType;
outType.type = NEURON_TENSOR_QUANT8_ASYMM; outType.type = NEURON_TENSOR_QUANT8_ASYMM;
...@@ -366,12 +389,17 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -366,12 +389,17 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
if (graph->Has(output_name)) { if (graph->Has(output_name)) {
output_node = graph->Get(output_name); output_node = graph->Get(output_name);
} else { } else {
// add output operand // Add output operand
if (graph->IsOutput(output_name)) { NeuronModel_addOperand(model, &outType);
NeuronModel_addOperand(model, &outType); // output
if (orig_output_scale != output_scale) {
// Need to insert requant op, the result is requant_ -> transpose_ ->
// output
output_node = graph->Add("requant_" + output_name, dims_out);
} else if (graph->IsOutput(output_name)) {
// Need to insert transpose op, transpose_ -> output
output_node = graph->Add("transpose_" + output_name, dims_out); output_node = graph->Add("transpose_" + output_name, dims_out);
} else { } else {
NeuronModel_addOperand(model, &outType); // output
output_node = graph->Add(output_name, dims_out); output_node = graph->Add(output_name, dims_out);
} }
} }
...@@ -433,10 +461,10 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -433,10 +461,10 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
// Add Stride // Add Stride
int32_t stride_val[1]; int32_t stride_val[1];
stride_val[0] = strides[1]; // width stride_val[0] = strides[1]; // Entry 1: width stride
NeuronModel_setOperandValue( NeuronModel_setOperandValue(
model, strideW_node->index(), stride_val, sizeof(int32_t) * 1); model, strideW_node->index(), stride_val, sizeof(int32_t) * 1);
stride_val[0] = strides[0]; // height stride_val[0] = strides[0]; // Entry 0: height stride
NeuronModel_setOperandValue( NeuronModel_setOperandValue(
model, strideH_node->index(), stride_val, sizeof(int32_t) * 1); model, strideH_node->index(), stride_val, sizeof(int32_t) * 1);
...@@ -460,7 +488,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -460,7 +488,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
model, dm_node->index(), &dm, sizeof(int32_t) * 1); model, dm_node->index(), &dm, sizeof(int32_t) * 1);
VLOG(3) << "depthwise multiplier:" << dm; VLOG(3) << "depthwise multiplier:" << dm;
// Depthwise conv // Depthwise conv case
NeuronModel_setOperandValue( NeuronModel_setOperandValue(
model, fuse_node->index(), fuse_val, sizeof(int32_t) * 1); model, fuse_node->index(), fuse_val, sizeof(int32_t) * 1);
std::vector<uint32_t> addInIndex = { std::vector<uint32_t> addInIndex = {
...@@ -512,19 +540,46 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -512,19 +540,46 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
return FAILED; return FAILED;
} }
// Check if Requant OP is needed
std::shared_ptr<Node> requant_node = nullptr;
if (orig_output_scale != output_scale) {
std::string requant_out_name = output_name;
VLOG(3) << "Insert requant output scale, orig:" << orig_output_scale
<< " ,output_scale:" << output_scale;
if (graph->IsOutput(output_name)) {
requant_out_name = "transpose_" + output_name;
}
insert_requant_node(ctx,
"requant_" + output_name,
requant_out_name,
dims_out,
dims_out,
output_scale,
orig_output_scale,
outType.zeroPoint);
requant_node = graph->Get(requant_out_name);
if (requant_node == nullptr) return subgraph::FAILED;
}
std::shared_ptr<Node> transpose_node = nullptr;
if (graph->IsOutput(output_name)) { if (graph->IsOutput(output_name)) {
VLOG(3) << "Add output transpose:" << output_name;
// Insert transpose for NHWC -> NCHW // Insert transpose for NHWC -> NCHW
insert_transpose_node( insert_transpose_node(ctx,
ctx, "transpose_" + output_name,
"transpose_" + output_name, output_name,
output_name, dims_out,
dims_out, {(uint32_t)output_dims[0],
{output_dims[0], output_dims[1], output_dims[2], output_dims[3]}, (uint32_t)output_dims[1],
{0, 3, 1, 2}, (uint32_t)output_dims[2],
outType.scale, (uint32_t)output_dims[3]},
outType.zeroPoint); {0, 3, 1, 2},
output_node = graph->Get(output_name); outType.scale,
if (output_node == nullptr) return subgraph::FAILED; outType.zeroPoint);
transpose_node = graph->Get(output_name);
if (transpose_node == nullptr) return subgraph::FAILED;
} }
return REBUILD_WHEN_SHAPE_CHANGED; return REBUILD_WHEN_SHAPE_CHANGED;
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <vector>
#include "lite/core/subgraph_bridge_registry.h"
#include "lite/kernels/apu/bridges/graph.h"
#include "lite/kernels/apu/bridges/utility.h"
#include "lite/operators/conv_op.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace apu {
int ConvTransposeConverter(void *ctx, OpLite *op, KernelBase *kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph *>(ctx);
auto model = graph->model();
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto scope = op->scope();
int neuron_errCode;
VLOG(3) << "[APU] Converting [" << op_type << "]";
CHECK(op_info->HasAttr("enable_int8") &&
op_info->GetAttr<bool>("enable_int8"));
// Get input, output and op attributes
auto input_name = op_info->Input("Input").front();
auto input = scope->FindMutableTensor(input_name);
auto input_dims = input->dims();
CHECK_EQ(input_dims.size(), 4);
auto filter_name = op_info->Input("Filter").front();
auto filter = scope->FindMutableTensor(filter_name);
auto filter_dims = filter->dims();
CHECK_EQ(filter_dims.size(), 4);
auto output_name = op_info->Output("Output").front();
auto strides = op_info->GetAttr<std::vector<int>>("strides");
CHECK_EQ(strides.size(), 2L);
auto paddings = op_info->GetAttr<std::vector<int>>("paddings");
auto groups = op_info->GetAttr<int>("groups");
if (groups > 1) {
LOG(WARNING) << "[NPU] only support groups == 1";
return FAILED;
}
bool with_act =
op_info->HasAttr("with_act") && op_info->GetAttr<bool>("with_act");
std::string act_type =
with_act ? op_info->GetAttr<std::string>("act_type") : "";
float leaky_relu_alpha = act_type == "leaky_relu"
? op_info->GetAttr<float>("leaky_relu_alpha")
: 0.f;
auto fuse_relu =
op_info->HasAttr("fuse_relu") && op_info->GetAttr<bool>("fuse_relu");
auto dilations = op_info->GetAttr<std::vector<int>>("dilations");
CHECK_EQ(dilations.size(), 2L);
std::string padding_algorithm =
op_info->HasAttr("padding_algorithm")
? op_info->GetAttr<std::string>("padding_algorithm")
: "";
if (paddings.size() == 2L) {
for (size_t i = 0; i < strides.size(); ++i) {
int copy_pad = *(paddings.begin() + 2 * i);
paddings.insert(paddings.begin() + 2 * i + 1, copy_pad);
}
}
CHECK_EQ(paddings.size(), 4L)
<< "[APU] Paddings size should be the same or twice as the input size."
<< paddings.size();
operators::UpdatePaddingAndDilation(&paddings,
&dilations,
strides,
padding_algorithm,
input_dims,
filter_dims);
std::vector<int> output_dims;
// Set output_dims: batches
output_dims.push_back(input_dims[0]);
std::vector<int> output_size;
if (op_info->HasAttr("output_size")) {
output_size = op_info->GetAttr<std::vector<int>>("output_size");
}
if (output_size.size() > 2) {
// Set output_dims: height, width
output_dims.push_back(output_size[0]);
output_dims.push_back(output_size[1]);
} else {
// Compute output size
for (int i = 0; i < strides.size(); i++) {
int kernel_ext = filter_dims[i + 2];
int output_size = (input_dims[i + 2] - 1) * strides[i] + kernel_ext -
paddings[i * 2] - paddings[i * 2 + 1];
output_dims.push_back(output_size);
}
}
output_dims.push_back(filter_dims[1]);
CHECK(op_info->HasInputScale(input_name));
auto input_scale = op_info->GetInputScale(input_name)[0];
CHECK(op_info->HasInputScale(filter_name));
auto filter_scale = op_info->GetInputScale(filter_name);
CHECK(op_info->HasOutputScale(output_name));
auto output_scale = op_info->GetOutputScale(output_name)[0];
VLOG(3) << "strides.size(): " << strides.size() << " ,groups: " << groups
<< " ,dilations: " << dilations[0] << ":" << dilations[1];
VLOG(3) << "with_act: " << with_act << " ,act_type: " << act_type;
VLOG(3) << "input_dims: " << input_dims
<< " ,filter_scale size: " << filter_scale.size();
VLOG(3) << "filter_dims(Cin, Cout, H, W): " << filter_dims
<< " ,memory_size: " << filter->memory_size()
<< " ,data_size: " << filter->data_size();
// Add input tensor type
NeuronOperandType inType;
inType.type = NEURON_TENSOR_QUANT8_ASYMM;
inType.scale = input_scale;
inType.zeroPoint = 128;
inType.dimensionCount = input_dims.size();
std::vector<uint32_t> dims_in = {(uint32_t)input_dims[0],
(uint32_t)input_dims[2],
(uint32_t)input_dims[3],
(uint32_t)input_dims[1]};
inType.dimensions = &dims_in[0];
std::shared_ptr<Node> input_node = nullptr;
if (graph->Has(input_name)) {
VLOG(3) << "Graph has " << input_name;
// Input operand already created by previous OP
input_node = graph->Get(input_name);
} else {
// Add input operand
if (graph->IsInput(input_name)) {
// Insert transpose for NCHW -> NHWC
insert_transpose_node(ctx,
input_name,
"transpose_" + input_name,
{(uint32_t)input_dims[0],
(uint32_t)input_dims[1],
(uint32_t)input_dims[2],
(uint32_t)input_dims[3]},
dims_in,
{0, 2, 3, 1},
inType.scale,
inType.zeroPoint);
// Change input_name because we add transpose op
input_name = "transpose_" + input_name;
input_node = graph->Get(input_name);
if (input_node == nullptr) return subgraph::FAILED;
} else {
NeuronModel_addOperand(model, &inType);
input_node = graph->Add(input_name, dims_in);
}
}
VLOG(3) << "input node idx: " << input_node->index()
<< ": input_scale: " << input_scale
<< ", inType: " << inType.dimensions[0] << ":" << inType.dimensions[1]
<< ":" << inType.dimensions[2] << ":" << inType.dimensions[3];
// Add bias type
NeuronOperandType biasType;
// Add filter type
// Relay out filter (Cin,Cout,H,W) -> (depth_out, h, w, depth_in)
Tensor transpose_filter;
std::vector<uint32_t> dims_filter;
transpose_filter.Resize({(uint32_t)filter_dims[1],
(uint32_t)filter_dims[2],
(uint32_t)filter_dims[3],
(uint32_t)filter_dims[0]});
transposeAsym(filter->data<int8_t>(),
transpose_filter.mutable_data<uint8_t>(),
{(uint32_t)filter_dims[0],
(uint32_t)filter_dims[1],
(uint32_t)filter_dims[2],
(uint32_t)filter_dims[3]},
{1, 2, 3, 0});
dims_filter = {(uint32_t)filter_dims[1],
(uint32_t)filter_dims[2],
(uint32_t)filter_dims[3],
(uint32_t)filter_dims[0]};
NeuronOperandType filterType;
filterType.type = NEURON_TENSOR_QUANT8_ASYMM;
filterType.scale = filter_scale[0];
filterType.zeroPoint = 128;
filterType.dimensionCount = filter_dims.size();
filterType.dimensions = &dims_filter[0];
biasType.scale = inType.scale * filterType.scale;
std::shared_ptr<Node> filter_node = nullptr;
NeuronModel_addOperand(model, &filterType);
filter_node = graph->Add(filter_name, dims_filter);
auto precision = filter->precision();
VLOG(3) << " filter node idx: " << filter_node->index()
<< " filter_scale[0]=" << filter_scale[0]
<< " filter memory_size=" << filter->memory_size()
<< " filter precision=" << PrecisionToStr(precision)
<< " :filterType: " << filterType.dimensions[0] << ":"
<< filterType.dimensions[2] << ":" << filterType.dimensions[2] << ":"
<< filterType.dimensions[3];
memcpy(filter->mutable_data<int8_t>(),
transpose_filter.mutable_data<uint8_t>(),
filter->memory_size());
// Set filter value
neuron_errCode = NeuronModel_setOperandValue(
model, filter_node->index(), filter->raw_data(), filter->memory_size());
if (NEURON_NO_ERROR != neuron_errCode) {
LOG(WARNING) << "Set filter operand value fail:" << neuron_errCode;
return subgraph::FAILED;
}
// Add biasType node value
// A 1-D tensor, of shape [depth_out], specifying the bias.
// For filter tensor of NEURON_TENSOR_QUANT8_ASYMM, the bias should be of
// NEURON_TENSOR_INT32 with zeroPoint of 0 and bias_scale ==
// input_scale * filter_scale
biasType.type = NEURON_TENSOR_INT32;
biasType.zeroPoint = 0;
std::vector<uint32_t> dims_bias;
std::shared_ptr<Node> bias_node = nullptr;
if (HasInputArg(op_info, scope, "Bias")) {
auto bias_name = op_info->Input("Bias").front();
auto bias = scope->FindMutableTensor(bias_name);
auto bias_dims = bias->dims();
auto channel_size = bias->dims().production();
CHECK_EQ(channel_size, filter_dims[1] * groups);
CHECK_EQ(bias_dims.size(), 1);
biasType.dimensionCount = bias_dims.size();
for (int i = 0; i < bias_dims.size(); i++)
dims_bias.push_back(bias_dims[i]);
biasType.dimensions = &dims_bias[0];
NeuronModel_addOperand(model, &biasType); // Operand 2: bias
bias_node = graph->Add(bias_name, dims_bias);
VLOG(3) << "node idx: " << bias_node->index()
<< ": Bias name: " << bias_name
<< " ,bias scale: " << biasType.scale
<< " ,dimensions: " << bias_dims
<< " ,channel_size:" << channel_size;
} else {
// Create default bias with value 0
biasType.dimensionCount = 1;
dims_bias = {(uint32_t)output_dims[1]};
biasType.dimensions = &dims_bias[0];
NeuronModel_addOperand(model, &biasType); // Operand 2: bias
bias_node = graph->Add(filter_name + "_default_bias", dims_bias);
VLOG(3) << "node idx: " << bias_node->index()
<< ": Bias name: default_bias "
<< " ,bias scale: " << biasType.scale
<< " ,dimensions: " << dims_bias.size();
}
NeuronOperandType int32Type;
int32Type.type = NEURON_INT32;
int32Type.dimensionCount = 0;
std::vector<uint32_t> dims_int32 = {1};
std::shared_ptr<Node> paddingL_node = nullptr;
NeuronModel_addOperand(model, &int32Type); // Operand 3: padding left
paddingL_node = graph->Add(filter_name + "_padding_left", dims_int32);
std::shared_ptr<Node> paddingR_node = nullptr;
NeuronModel_addOperand(model, &int32Type); // Operand 4: padding right
paddingR_node = graph->Add(filter_name + "_padding_right", dims_int32);
std::shared_ptr<Node> paddingT_node = nullptr;
NeuronModel_addOperand(model, &int32Type); // Operand 5: padding top
paddingT_node = graph->Add(filter_name + "_padding_top", dims_int32);
std::shared_ptr<Node> paddingB_node = nullptr;
NeuronModel_addOperand(model, &int32Type); // Operand 6: padding bottom
paddingB_node = graph->Add(filter_name + "_padding_bottom", dims_int32);
std::shared_ptr<Node> strideW_node = nullptr;
NeuronModel_addOperand(model, &int32Type); // Operand 7: stride width
strideW_node = graph->Add(filter_name + "_stride_width", dims_int32);
std::shared_ptr<Node> strideH_node = nullptr;
NeuronModel_addOperand(model, &int32Type); // Operand 8: stride height
strideH_node = graph->Add(filter_name + "_stride_height", dims_int32);
std::shared_ptr<Node> fuse_node = nullptr;
NeuronModel_addOperand(model, &int32Type); // Operand 9: fuse
fuse_node = graph->Add(filter_name + "_fuse", dims_int32);
NeuronOperandType boolType;
boolType.type = NEURON_BOOL;
boolType.dimensionCount = 0; // Must be 0 for scalars.
std::shared_ptr<Node> layout_node = nullptr;
NeuronModel_addOperand(model, &boolType); // Operand 9: fuse
layout_node = graph->Add(filter_name + "_layout", dims_int32);
// Add output tensor type
NeuronOperandType outType;
outType.type = NEURON_TENSOR_QUANT8_ASYMM;
outType.scale = output_scale;
outType.zeroPoint = 128;
outType.dimensionCount = output_dims.size();
std::vector<uint32_t> dims_out = {(uint32_t)output_dims[0],
(uint32_t)output_dims[1],
(uint32_t)output_dims[2],
(uint32_t)output_dims[3]};
outType.dimensions = &dims_out[0];
std::shared_ptr<Node> output_node = nullptr;
if (graph->Has(output_name)) {
output_node = graph->Get(output_name);
} else {
if (graph->IsOutput(output_name)) {
NeuronModel_addOperand(model, &outType);
output_node = graph->Add("transpose_" + output_name, dims_out);
} else {
NeuronModel_addOperand(model, &outType);
output_node = graph->Add(output_name, dims_out);
}
}
VLOG(3) << "output node idx: " << output_node->index()
<< ": output_scale: " << outType.scale
<< " ,outType: " << outType.dimensions[0] << ":"
<< outType.dimensions[1] << ":" << outType.dimensions[2] << ":"
<< outType.dimensions[3];
// Add bias value
if (HasInputArg(op_info, scope, "Bias")) {
auto bias_name = op_info->Input("Bias").front();
auto bias = scope->FindMutableTensor(bias_name);
int32_t *int32_bias_data =
reinterpret_cast<int32_t *>(bias->mutable_data<float>());
float2int32(
bias->data<float>(), input_scale, filter_scale, int32_bias_data);
VLOG(3) << "int32_bias_data: " << int32_bias_data[0] << ":"
<< int32_bias_data[1] << ":" << int32_bias_data[2] << ":"
<< int32_bias_data[3];
neuron_errCode = NeuronModel_setOperandValue(
model, bias_node->index(), bias->raw_data(), bias->memory_size());
} else {
auto int32_bias = std::make_shared<Tensor>();
int32_bias->Resize({1, output_dims[3]});
int32_bias->mutable_data<int32_t>();
VLOG(3) << "bais_default: " << int32_bias->memory_size();
memset(int32_bias->mutable_data<int32_t>(), 0, int32_bias->memory_size());
neuron_errCode = NeuronModel_setOperandValue(model,
bias_node->index(),
int32_bias->raw_data(),
int32_bias->memory_size());
bias_node->set_data(int32_bias);
}
if (NEURON_NO_ERROR != neuron_errCode) {
LOG(WARNING) << "Set bias operand value fail:" << neuron_errCode;
return subgraph::FAILED;
}
VLOG(3) << "paddings: " << paddings[0] << ":" << paddings[1] << ":"
<< paddings[2] << ":" << paddings[3];
// Add padding value
int32_t padding_val[1];
padding_val[0] = paddings[2];
NeuronModel_setOperandValue(
model, paddingL_node->index(), padding_val, sizeof(int32_t) * 1);
padding_val[0] = paddings[3];
NeuronModel_setOperandValue(
model, paddingR_node->index(), padding_val, sizeof(int32_t) * 1);
padding_val[0] = paddings[0];
NeuronModel_setOperandValue(
model, paddingT_node->index(), padding_val, sizeof(int32_t) * 1);
padding_val[0] = paddings[1];
NeuronModel_setOperandValue(
model, paddingB_node->index(), padding_val, sizeof(int32_t) * 1);
VLOG(3) << " stride width:" << strides[1] << " height:" << strides[0];
// Add Stride
int32_t stride_val[1];
stride_val[0] = strides[1]; // entry 1: width stride
NeuronModel_setOperandValue(
model, strideW_node->index(), stride_val, sizeof(int32_t) * 1);
stride_val[0] = strides[0]; // entry 0: height stride
NeuronModel_setOperandValue(
model, strideH_node->index(), stride_val, sizeof(int32_t) * 1);
int32_t fuse_val[1] = {NEURON_FUSED_NONE};
if (act_type == "relu") {
fuse_val[0] = NEURON_FUSED_RELU;
} else if (act_type == "relu1") {
fuse_val[0] = NEURON_FUSED_RELU1;
} else if (act_type == "relu6") {
fuse_val[0] = NEURON_FUSED_RELU6;
} else if (!act_type.empty()) {
fuse_val[0] = NEURON_FUSED_NONE;
LOG(WARNING) << "Support act_type: " << act_type;
return FAILED;
}
NeuronModel_setOperandValue(
model, fuse_node->index(), fuse_val, sizeof(int32_t) * 1);
bool layout_val[] = {false};
NeuronModel_setOperandValue(
model, layout_node->index(), layout_val, sizeof(bool) * 1);
std::vector<uint32_t> addInIndex = {
input_node->index(), // 0: input
filter_node->index(), // 1: filter
bias_node->index(), // 2: bias
paddingL_node->index(), // 3: padding left
paddingR_node->index(), // 4: padding right
paddingT_node->index(), // 5: padding top
paddingB_node->index(), // 6: padding bottom
strideW_node->index(), // 7: stride width
strideH_node->index(), // 8: stride height
fuse_node->index(), // 9: fuse
layout_node->index()}; // 10: layout
std::vector<uint32_t> addOutIndex = {output_node->index()};
neuron_errCode = NeuronModel_addOperation(model,
NEURON_TRANSPOSE_CONV_2D,
addInIndex.size(),
&addInIndex[0],
addOutIndex.size(),
&addOutIndex[0]);
if (NEURON_NO_ERROR != neuron_errCode) {
LOG(WARNING) << "Add op fail:" << op_type;
return FAILED;
}
if (graph->IsOutput(output_name)) {
// Insert transpose for NHWC -> NCHW
insert_transpose_node(ctx,
"transpose_" + output_name,
output_name,
dims_out,
{(uint32_t)output_dims[0],
(uint32_t)output_dims[1],
(uint32_t)output_dims[2],
(uint32_t)output_dims[3]},
{0, 3, 1, 2},
outType.scale,
outType.zeroPoint);
output_node = graph->Get(output_name);
if (output_node == nullptr) return subgraph::FAILED;
}
return REBUILD_WHEN_SHAPE_CHANGED;
}
} // namespace apu
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(conv2d_transpose,
kAPU,
paddle::lite::subgraph::apu::ConvTransposeConverter);
...@@ -29,28 +29,252 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -29,28 +29,252 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto op_info = op->op_info(); auto op_info = op->op_info();
auto op_type = op_info->Type(); auto op_type = op_info->Type();
auto scope = op->scope(); auto scope = op->scope();
VLOG(3) << "[APU] Converting " + op_type + "..."; int neuron_errCode;
VLOG(3) << "[APU] Converting [" + op_type + "]";
// Get input and output vars and op attributes // Get input and output vars and op attributes
auto x_name = op_info->Input("X").front(); auto x_name = op_info->Input("X").front();
auto x = scope->FindMutableTensor(x_name); auto x = scope->FindTensor(x_name);
auto x_dims = x->dims(); auto x_dims = x->dims();
auto y_name = op_info->Input("Y").front(); auto y_name = op_info->Input("Y").front();
auto y = scope->FindMutableTensor(y_name); auto y = scope->FindTensor(y_name);
auto y_dims = y->dims(); auto y_dims = y->dims();
auto out_name = op_info->Output("Out").front(); auto out_name = op_info->Output("Out").front();
auto out = scope->FindMutableTensor(out_name); auto out = scope->FindTensor(out_name);
auto out_dims = out->dims(); auto out_dims = out->dims();
auto axis = op_info->GetAttr<int>("axis"); auto axis = op_info->GetAttr<int>("axis");
if (axis < 0) {
axis = x_dims.size() - y_dims.size();
}
auto x_shape = x_dims.Vectorize();
auto y_shape = y_dims.Vectorize();
// Two dimensions are compatible when:
// 1. they are equal, or
// 2. one of them is 1
for (int i = axis; i < x_shape.size(); i++) {
if (x_dims[i] != y_dims[i - axis]) {
// Input 1 compatible dimensions as input0
if (y_dims[i - axis] != 1) {
LOG(WARNING) << i << ":" << axis << ":" << y_dims[i - axis];
return FAILED;
}
}
} // End of for
int32_t fuse_val[1] = {NEURON_FUSED_NONE};
// Act node // Act node
if (op_type == "fusion_elementwise_add_activation" || if (op_type == "fusion_elementwise_add_activation" ||
op_type == "fusion_elementwise_sub_activation" || op_type == "fusion_elementwise_sub_activation" ||
op_type == "fusion_elementwise_mul_activation" || op_type == "fusion_elementwise_mul_activation" ||
op_type == "fusion_elementwise_div_activation") { op_type == "fusion_elementwise_div_activation") {
auto act_type = op_info->GetAttr<std::string>("act_type"); auto act_type = op_info->GetAttr<std::string>("act_type");
if (act_type == "relu") {
fuse_val[0] = NEURON_FUSED_RELU;
} else if (act_type == "relu1") {
fuse_val[0] = NEURON_FUSED_RELU1;
} else if (act_type == "relu6") {
fuse_val[0] = NEURON_FUSED_RELU6;
} else if (!act_type.empty()) {
fuse_val[0] = NEURON_FUSED_NONE;
LOG(WARNING) << "Support act_type: " << act_type;
return FAILED;
}
} // End of if
VLOG(3) << "x_name" << x_name;
CHECK(op_info->HasInputScale(x_name));
auto x_scale = op_info->GetInputScale(x_name)[0];
CHECK(op_info->HasInputScale(y_name));
auto y_scale = op_info->GetInputScale(y_name)[0];
CHECK(op_info->HasOutputScale(out_name));
auto out_scale = op_info->GetOutputScale(out_name)[0];
// Add x tensor type
NeuronOperandType xType;
xType.type = NEURON_TENSOR_QUANT8_ASYMM;
xType.scale = x_scale;
xType.zeroPoint = 128;
xType.dimensionCount = x_dims.size();
std::vector<uint32_t> dims_x = {(uint32_t)x_dims[0],
(uint32_t)x_dims[2],
(uint32_t)x_dims[3],
(uint32_t)x_dims[1]};
xType.dimensions = &dims_x[0];
std::shared_ptr<Node> x_node = nullptr;
if (graph->Has(x_name)) {
VLOG(3) << "Graph has " << x_name;
if (graph->IsInput(x_name)) {
VLOG(3) << x_name << "is input and already exist";
x_name = "transpose_" + x_name;
}
if (graph->IsOutput(x_name)) {
VLOG(3) << x_name << "is input and output node";
x_name = "transpose_" + x_name;
}
x_node = graph->Get(x_name);
} else {
if (graph->IsInput(x_name)) {
insert_transpose_node(ctx,
x_name,
"transpose_" + x_name,
{(uint32_t)x_dims[0],
(uint32_t)x_dims[1],
(uint32_t)x_dims[2],
(uint32_t)x_dims[3]},
dims_x,
{0, 2, 3, 1},
xType.scale,
xType.zeroPoint);
// Change x name after insert transpose op for x data relayout
x_name = "transpose_" + x_name;
x_node = graph->Get(x_name);
} else {
NeuronModel_addOperand(model, &xType);
x_node = graph->Add(x_name, dims_x);
}
} // End of else
VLOG(3) << "x node idx: " << x_node->index() << "x_dims: " << x_dims
<< ": x_scale: " << x_scale << ", xType: " << xType.dimensions[0]
<< ":" << xType.dimensions[1] << ":" << xType.dimensions[2] << ":"
<< xType.dimensions[3];
// Add y tensor type
NeuronOperandType yType;
yType.type = NEURON_TENSOR_QUANT8_ASYMM;
yType.scale = y_scale;
yType.zeroPoint = 128;
yType.dimensionCount = y_dims.size();
std::vector<uint32_t> dims_y = {(uint32_t)y_dims[0],
(uint32_t)y_dims[2],
(uint32_t)y_dims[3],
(uint32_t)y_dims[1]};
yType.dimensions = &dims_y[0];
std::shared_ptr<Node> y_node = nullptr;
if (graph->Has(y_name)) {
VLOG(3) << "Graph has " << y_name;
y_node = graph->Get(y_name);
} else {
if (graph->IsInput(y_name)) {
insert_transpose_node(ctx,
y_name,
"transpose_" + y_name,
{(uint32_t)y_dims[0],
(uint32_t)y_dims[1],
(uint32_t)y_dims[2],
(uint32_t)y_dims[3]},
dims_y,
{0, 2, 3, 1},
yType.scale,
yType.zeroPoint);
y_name = "transpose_" + y_name;
y_node = graph->Get(y_name);
} else {
NeuronModel_addOperand(model, &yType);
y_node = graph->Add(y_name, dims_y);
}
}
VLOG(3) << "y node idx: " << y_node->index() << "y_dims: " << y_dims
<< ": y_scale: " << y_scale << ", yType: " << yType.dimensions[0]
<< ":" << yType.dimensions[1] << ":" << yType.dimensions[2] << ":"
<< yType.dimensions[3];
// Add fuse operand type
NeuronOperandType int32Type;
int32Type.type = NEURON_INT32;
int32Type.dimensionCount = 0;
std::vector<uint32_t> dims_int32 = {1};
// Add fuse operand
std::shared_ptr<Node> fuse_node = nullptr;
NeuronModel_addOperand(model, &int32Type); // Operand 2: fuse
fuse_node = graph->Add(out_name + "_fuse", dims_int32);
// Add out tensor type
NeuronOperandType outType;
outType.type = NEURON_TENSOR_QUANT8_ASYMM;
outType.scale = out_scale;
outType.zeroPoint = 128;
outType.dimensionCount = out_dims.size();
std::vector<uint32_t> dims_out = {(uint32_t)out_dims[0],
(uint32_t)out_dims[2],
(uint32_t)out_dims[3],
(uint32_t)out_dims[1]};
outType.dimensions = &dims_out[0];
std::shared_ptr<Node> out_node = nullptr;
if (graph->Has(out_name)) {
VLOG(3) << "Graph has " << out_name;
out_node = graph->Get(out_name);
} else {
if (graph->IsOutput(out_name)) {
NeuronModel_addOperand(model, &outType);
out_node = graph->Add("transpose_" + out_name, dims_out);
} else {
NeuronModel_addOperand(model, &outType);
out_node = graph->Add(out_name, dims_out);
}
}
VLOG(3) << "out node idx: " << out_node->index() << "out_dims: " << out_dims
<< ": out_scale: " << out_scale
<< ", outType: " << outType.dimensions[0] << ":"
<< outType.dimensions[1] << ":" << outType.dimensions[2] << ":"
<< outType.dimensions[3];
// Set fuse value
NeuronModel_setOperandValue(
model, fuse_node->index(), fuse_val, sizeof(int32_t) * 1);
std::vector<uint32_t> addInIndex = {
x_node->index(), // 0: A tensor
y_node->index(), // 1: A tensor of the same OperandCode,
// and compatible dimensions as input 0
fuse_node->index()}; // 2: fuse
std::vector<uint32_t> addOutIndex = {out_node->index()};
if (op_type == "elementwise_add" ||
op_type == "fusion_elementwise_add_activation") {
neuron_errCode = NeuronModel_addOperation(model,
NEURON_ADD,
addInIndex.size(),
&addInIndex[0],
addOutIndex.size(),
&addOutIndex[0]);
} else {
LOG(WARNING) << "[APU] Unsupported op type: " << op_type;
return FAILED;
}
if (NEURON_NO_ERROR != neuron_errCode) {
LOG(WARNING) << "ADD op fail:" << op_type;
return FAILED;
}
if (graph->IsOutput(out_name)) {
// Insert transpose for NHWC -> NCHW
insert_transpose_node(ctx,
"transpose_" + out_name,
out_name,
dims_out,
{(uint32_t)out_dims[0],
(uint32_t)out_dims[1],
(uint32_t)out_dims[2],
(uint32_t)out_dims[3]},
{0, 3, 1, 2},
outType.scale,
outType.zeroPoint);
out_node = graph->Get(out_name);
if (out_node == nullptr) return FAILED;
} }
return REBUILD_WHEN_SHAPE_CHANGED; return REBUILD_WHEN_SHAPE_CHANGED;
...@@ -67,3 +291,6 @@ REGISTER_SUBGRAPH_BRIDGE(elementwise_add, ...@@ -67,3 +291,6 @@ REGISTER_SUBGRAPH_BRIDGE(elementwise_add,
REGISTER_SUBGRAPH_BRIDGE(elementwise_mul, REGISTER_SUBGRAPH_BRIDGE(elementwise_mul,
kAPU, kAPU,
paddle::lite::subgraph::apu::ElementwiseConverter); paddle::lite::subgraph::apu::ElementwiseConverter);
REGISTER_SUBGRAPH_BRIDGE(fusion_elementwise_add_activation,
kAPU,
paddle::lite::subgraph::apu::ElementwiseConverter);
...@@ -77,12 +77,10 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -77,12 +77,10 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
inType.dimensions = &dims_in[0]; inType.dimensions = &dims_in[0];
std::shared_ptr<Node> in_node = nullptr; std::shared_ptr<Node> in_node = nullptr;
if (graph->Has(input_name)) { if (graph->Has(input_name)) {
// input operand already exist
in_node = graph->Get(input_name); in_node = graph->Get(input_name);
VLOG(3) << "Graph has " << input_name << ",index: " << in_node->index(); VLOG(3) << "Graph has " << input_name << ",index: " << in_node->index();
} else { } else {
// add input operand NeuronModel_addOperand(model, &inType); // Operand 0: input
NeuronModel_addOperand(model, &inType); // 0: input
in_node = graph->Add(input_name, dims_in); in_node = graph->Add(input_name, dims_in);
} }
VLOG(3) << "input_scale: " << input_scale VLOG(3) << "input_scale: " << input_scale
...@@ -97,7 +95,7 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -97,7 +95,7 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
wType.dimensionCount = w_dims.size(); wType.dimensionCount = w_dims.size();
std::vector<uint32_t> dims_w = {(uint32_t)w_dims[1], (uint32_t)w_dims[0]}; std::vector<uint32_t> dims_w = {(uint32_t)w_dims[1], (uint32_t)w_dims[0]};
wType.dimensions = &dims_w[0]; wType.dimensions = &dims_w[0];
NeuronModel_addOperand(model, &wType); // 1: weight NeuronModel_addOperand(model, &wType); // Operand 1: weight
std::shared_ptr<Node> w_node = nullptr; std::shared_ptr<Node> w_node = nullptr;
w_node = graph->Add(w_name, dims_w); w_node = graph->Add(w_name, dims_w);
VLOG(3) << "w_scale size: " << w_scale.size() << ",w_scale: " << w_scale[0] VLOG(3) << "w_scale size: " << w_scale.size() << ",w_scale: " << w_scale[0]
...@@ -119,7 +117,7 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -119,7 +117,7 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
biasType.dimensionCount = bias_dims.size(); biasType.dimensionCount = bias_dims.size();
std::vector<uint32_t> dims_bias = {(uint32_t)bias_dims[0]}; std::vector<uint32_t> dims_bias = {(uint32_t)bias_dims[0]};
biasType.dimensions = &dims_bias[0]; biasType.dimensions = &dims_bias[0];
NeuronModel_addOperand(model, &biasType); // 2: bias NeuronModel_addOperand(model, &biasType); // Operand 2: bias
bias_node = graph->Add(bias_name, dims_bias); bias_node = graph->Add(bias_name, dims_bias);
VLOG(3) << "Bias name: " << bias_name << ", bias dims: " << bias_dims VLOG(3) << "Bias name: " << bias_name << ", bias dims: " << bias_dims
<< ", bias scale: " << biasType.scale << ", bias scale: " << biasType.scale
...@@ -128,7 +126,7 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -128,7 +126,7 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
biasType.dimensionCount = 1; biasType.dimensionCount = 1;
std::vector<uint32_t> dims_bias = {(uint32_t)n}; std::vector<uint32_t> dims_bias = {(uint32_t)n};
biasType.dimensions = &dims_bias[0]; biasType.dimensions = &dims_bias[0];
NeuronModel_addOperand(model, &biasType); // 2: bias NeuronModel_addOperand(model, &biasType); // Operand 2: bias
bias_node = graph->Add(w_name + "_default_bias", dims_bias); bias_node = graph->Add(w_name + "_default_bias", dims_bias);
} }
...@@ -137,7 +135,7 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -137,7 +135,7 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
fuseType.type = NEURON_INT32; fuseType.type = NEURON_INT32;
fuseType.dimensionCount = 0; fuseType.dimensionCount = 0;
std::vector<uint32_t> dims_int32 = {0}; std::vector<uint32_t> dims_int32 = {0};
NeuronModel_addOperand(model, &fuseType); // 3: fuse NeuronModel_addOperand(model, &fuseType); // Operand 3: fuse
std::shared_ptr<Node> fuse_node = nullptr; std::shared_ptr<Node> fuse_node = nullptr;
fuse_node = graph->Add(w_name + "_fuse", dims_int32); fuse_node = graph->Add(w_name + "_fuse", dims_int32);
...@@ -147,12 +145,13 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -147,12 +145,13 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
outType.scale = out_scale; outType.scale = out_scale;
outType.zeroPoint = 128; outType.zeroPoint = 128;
outType.dimensionCount = 2; outType.dimensionCount = 2;
std::vector<uint32_t> dims_out = {(uint32_t)out_dims[0], out_dims[1]}; std::vector<uint32_t> dims_out = {(uint32_t)out_dims[0],
(uint32_t)out_dims[1]};
outType.dimensions = &dims_out[0]; outType.dimensions = &dims_out[0];
VLOG(3) << "out_scale: " << out_scale VLOG(3) << "out_scale: " << out_scale
<< ", outType: " << outType.dimensions[0] << " : " << ", outType: " << outType.dimensions[0] << " : "
<< outType.dimensions[1]; << outType.dimensions[1];
NeuronModel_addOperand(model, &outType); // output NeuronModel_addOperand(model, &outType);
std::shared_ptr<Node> out_node = nullptr; std::shared_ptr<Node> out_node = nullptr;
out_node = graph->Add(out_name, dims_out); out_node = graph->Add(out_name, dims_out);
...@@ -190,29 +189,31 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -190,29 +189,31 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
NeuronModel_setOperandValue(model, NeuronModel_setOperandValue(model,
bias_node->index(), bias_node->index(),
bias->raw_data(), bias->raw_data(),
bias->memory_size()); // 2: bias bias->memory_size()); // Operand 2: bias
} else { } else {
auto int32_bias = std::make_shared<Tensor>(); auto int32_bias = std::make_shared<Tensor>();
int32_bias->Resize({1, out_dims[1]}); int32_bias->Resize({1, out_dims[1]});
int32_bias->mutable_data<int32_t>(); int32_bias->mutable_data<int32_t>();
memset(int32_bias->mutable_data<int32_t>(), 0, int32_bias->memory_size()); memset(int32_bias->mutable_data<int32_t>(), 0, int32_bias->memory_size());
VLOG(3) << "default: " << int32_bias->memory_size(); VLOG(3) << "default: " << int32_bias->memory_size();
neuron_errCode = neuron_errCode = NeuronModel_setOperandValue(
NeuronModel_setOperandValue(model, model,
bias_node->index(), bias_node->index(),
int32_bias->raw_data(), int32_bias->raw_data(),
int32_bias->memory_size()); // 2: bias int32_bias->memory_size()); // Operand 2: bias
bias_node->set_data(int32_bias); bias_node->set_data(int32_bias);
} }
// Add fuse value // Add fuse value
int32_t fuse_val[1] = {0}; int32_t fuse_val[1] = {0};
NeuronModel_setOperandValue( NeuronModel_setOperandValue(model,
model, fuse_node->index(), fuse_val, sizeof(int32_t) * 1); // 3: fuse fuse_node->index(),
fuse_val,
std::vector<uint32_t> addInIndex = {in_node->index(), sizeof(int32_t) * 1); // Operand 3: fuse
w_node->index(),
bias_node->index(), std::vector<uint32_t> addInIndex = {in_node->index(), // 0: input
fuse_node->index()}; w_node->index(), // 1: weight
bias_node->index(), // 2: bias
fuse_node->index()}; // 3: fuse
std::vector<uint32_t> addOutIndex = {out_node->index()}; std::vector<uint32_t> addOutIndex = {out_node->index()};
neuron_errCode = NeuronModel_addOperation(model, neuron_errCode = NeuronModel_addOperation(model,
NEURON_FULLY_CONNECTED, NEURON_FULLY_CONNECTED,
......
...@@ -28,7 +28,7 @@ int Graph::Add(const std::string& name, std::shared_ptr<Node> node) { ...@@ -28,7 +28,7 @@ int Graph::Add(const std::string& name, std::shared_ptr<Node> node) {
LOG(FATAL) << "[APU] Node" << name << " is redefined."; LOG(FATAL) << "[APU] Node" << name << " is redefined.";
return -1; return -1;
} else { } else {
VLOG(3) << " Add: " << name << " : " << node->index(); VLOG(5) << " Add: " << name << " : " << node->index();
auto ret = nodes_.insert( auto ret = nodes_.insert(
std::make_pair(name, std::vector<std::shared_ptr<Node>>())); std::make_pair(name, std::vector<std::shared_ptr<Node>>()));
CHECK(ret.second); CHECK(ret.second);
......
...@@ -22,3 +22,6 @@ USE_SUBGRAPH_BRIDGE(elementwise_mul, kAPU); ...@@ -22,3 +22,6 @@ USE_SUBGRAPH_BRIDGE(elementwise_mul, kAPU);
USE_SUBGRAPH_BRIDGE(fc, kAPU); USE_SUBGRAPH_BRIDGE(fc, kAPU);
USE_SUBGRAPH_BRIDGE(pool2d, kAPU); USE_SUBGRAPH_BRIDGE(pool2d, kAPU);
USE_SUBGRAPH_BRIDGE(softmax, kAPU); USE_SUBGRAPH_BRIDGE(softmax, kAPU);
USE_SUBGRAPH_BRIDGE(concat, kAPU);
USE_SUBGRAPH_BRIDGE(fusion_elementwise_add_activation, kAPU);
USE_SUBGRAPH_BRIDGE(conv2d_transpose, kAPU);
...@@ -47,14 +47,14 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -47,14 +47,14 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto ksize = op_info->GetAttr<std::vector<int>>("ksize"); auto ksize = op_info->GetAttr<std::vector<int>>("ksize");
std::vector<int> paddings = op_info->GetAttr<std::vector<int>>("paddings"); std::vector<int> paddings = op_info->GetAttr<std::vector<int>>("paddings");
// pool mode // Check pool mode
if ((pooling_type == "max") || (pooling_type == "avg")) { if ((pooling_type == "max") || (pooling_type == "avg")) {
} else { } else {
LOG(WARNING) << "[APU] Unsupported pooling type: " << pooling_type; LOG(WARNING) << "[APU] Unsupported pooling type: " << pooling_type;
return FAILED; return FAILED;
} }
// pad mode // Check padding mode
int pad_mode = 0; int pad_mode = 0;
std::string padding_algorithm(""); std::string padding_algorithm("");
if (op_info->HasAttr("padding_algorithm")) { if (op_info->HasAttr("padding_algorithm")) {
...@@ -66,7 +66,7 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -66,7 +66,7 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
pad_mode = 5; pad_mode = 5;
} }
// paddings and strides // Check paddings and strides
if (paddings.size() == 2L) { if (paddings.size() == 2L) {
for (size_t i = 0; i < 2L; ++i) { for (size_t i = 0; i < 2L; ++i) {
int copy_pad = *(paddings.begin() + 2 * i); int copy_pad = *(paddings.begin() + 2 * i);
...@@ -107,60 +107,59 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -107,60 +107,59 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
xType.dimensions = &dims_x[0]; xType.dimensions = &dims_x[0];
std::shared_ptr<Node> x_node = nullptr; std::shared_ptr<Node> x_node = nullptr;
if (graph->Has(x_name)) { if (graph->Has(x_name)) {
LOG(INFO) << "Graph has " << x_name; VLOG(3) << "Graph has " << x_name;
// input operand already exist
x_node = graph->Get(x_name); x_node = graph->Get(x_name);
} else { } else {
// add input operand NeuronModel_addOperand(model, &xType); // Operand 0: x
NeuronModel_addOperand(model, &xType); // 0: x
x_node = graph->Add(x_name, dims_x); x_node = graph->Add(x_name, dims_x);
} }
VLOG(3) << "x_scale: " << x_scale << ", xType: " << xType.dimensions[0] << ":" VLOG(3) << "x_scale: " << x_scale << ", xType: " << xType.dimensions[0] << ":"
<< xType.dimensions[1] << ":" << xType.dimensions[2] << ":" << xType.dimensions[1] << ":" << xType.dimensions[2] << ":"
<< xType.dimensions[3]; << xType.dimensions[3];
VLOG(3) << "ksize:" << ksize[0] << ":" << ksize[1];
NeuronOperandType int32Type; NeuronOperandType int32Type;
int32Type.type = NEURON_INT32; int32Type.type = NEURON_INT32;
int32Type.dimensionCount = 0; int32Type.dimensionCount = 0;
std::vector<uint32_t> dims_int32 = {0}; std::vector<uint32_t> dims_int32 = {0};
std::shared_ptr<Node> paddingL_node = nullptr; std::shared_ptr<Node> paddingL_node = nullptr;
NeuronModel_addOperand(model, &int32Type); // 1: padding left NeuronModel_addOperand(model, &int32Type); // Operand 1: padding left
paddingL_node = graph->Add(x_name + "_padding_left", dims_int32); paddingL_node = graph->Add(x_name + "_padding_left", dims_int32);
std::shared_ptr<Node> paddingR_node = nullptr; std::shared_ptr<Node> paddingR_node = nullptr;
NeuronModel_addOperand(model, &int32Type); // 2: padding right NeuronModel_addOperand(model, &int32Type); // Operand 2: padding right
paddingR_node = graph->Add(x_name + "_padding_right", dims_int32); paddingR_node = graph->Add(x_name + "_padding_right", dims_int32);
std::shared_ptr<Node> paddingT_node = nullptr; std::shared_ptr<Node> paddingT_node = nullptr;
NeuronModel_addOperand(model, &int32Type); // 3: padding top NeuronModel_addOperand(model, &int32Type); // Operand 3: padding top
paddingT_node = graph->Add(x_name + "_padding_top", dims_int32); paddingT_node = graph->Add(x_name + "_padding_top", dims_int32);
std::shared_ptr<Node> paddingB_node = nullptr; std::shared_ptr<Node> paddingB_node = nullptr;
NeuronModel_addOperand(model, &int32Type); // 4: padding bottom NeuronModel_addOperand(model, &int32Type); // Operand 4: padding bottom
paddingB_node = graph->Add(x_name + "_padding_bottom", dims_int32); paddingB_node = graph->Add(x_name + "_padding_bottom", dims_int32);
std::shared_ptr<Node> strideW_node = nullptr; std::shared_ptr<Node> strideW_node = nullptr;
NeuronModel_addOperand(model, &int32Type); // 5: stride width NeuronModel_addOperand(model, &int32Type); // Operand 5: stride width
strideW_node = graph->Add(x_name + "_stride_width", dims_int32); strideW_node = graph->Add(x_name + "_stride_width", dims_int32);
std::shared_ptr<Node> strideH_node = nullptr; std::shared_ptr<Node> strideH_node = nullptr;
NeuronModel_addOperand(model, &int32Type); // 6: stride height NeuronModel_addOperand(model, &int32Type); // Operand 6: stride height
strideH_node = graph->Add(x_name + "_stride_height", dims_int32); strideH_node = graph->Add(x_name + "_stride_height", dims_int32);
std::shared_ptr<Node> filterW_node = nullptr; std::shared_ptr<Node> filterW_node = nullptr;
NeuronModel_addOperand(model, &int32Type); // 7: filter width NeuronModel_addOperand(model, &int32Type); // Operand 7: filter width
filterW_node = graph->Add(x_name + "_filter_width", dims_int32); filterW_node = graph->Add(x_name + "_filter_width", dims_int32);
std::shared_ptr<Node> filterH_node = nullptr; std::shared_ptr<Node> filterH_node = nullptr;
NeuronModel_addOperand(model, &int32Type); // 8: filter height NeuronModel_addOperand(model, &int32Type); // Operand 8: filter height
filterH_node = graph->Add(x_name + "_filter_height", dims_int32); filterH_node = graph->Add(x_name + "_filter_height", dims_int32);
std::shared_ptr<Node> fuse_node = nullptr; std::shared_ptr<Node> fuse_node = nullptr;
NeuronModel_addOperand(model, &int32Type); // 9: fuse NeuronModel_addOperand(model, &int32Type); // Operand 9: fuse
fuse_node = graph->Add(x_name + "_fuse", dims_int32); fuse_node = graph->Add(x_name + "_pool_fuse", dims_int32);
// Add out type
// Add output tensor type // Add output tensor type
NeuronOperandType outType; NeuronOperandType outType;
outType.type = NEURON_TENSOR_QUANT8_ASYMM; outType.type = NEURON_TENSOR_QUANT8_ASYMM;
...@@ -176,10 +175,10 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -176,10 +175,10 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
if (graph->Has(out_name)) { if (graph->Has(out_name)) {
out_node = graph->Get(out_name); out_node = graph->Get(out_name);
} else { } else {
NeuronModel_addOperand(model, &outType); // out NeuronModel_addOperand(model, &outType);
out_node = graph->Add(out_name, dims_out); out_node = graph->Add(out_name, dims_out);
} }
VLOG(3) << "output_scale: " << x_scale VLOG(3) << "output_scale: " << out_scale
<< ", outType: " << outType.dimensions[0] << ":" << ", outType: " << outType.dimensions[0] << ":"
<< outType.dimensions[1] << ":" << outType.dimensions[2] << ":" << outType.dimensions[1] << ":" << outType.dimensions[2] << ":"
<< outType.dimensions[3]; << outType.dimensions[3];
...@@ -201,19 +200,21 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -201,19 +200,21 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
// Add Stride // Add Stride
int32_t stride_val[1]; int32_t stride_val[1];
stride_val[0] = strides[1]; // width stride_val[0] = strides[1]; // Entry 1: width stride
NeuronModel_setOperandValue( NeuronModel_setOperandValue(
model, strideW_node->index(), stride_val, sizeof(int32_t) * 1); model, strideW_node->index(), stride_val, sizeof(int32_t) * 1);
stride_val[0] = strides[0]; // height stride_val[0] = strides[0]; // Entry 0: height stride
NeuronModel_setOperandValue( NeuronModel_setOperandValue(
model, strideH_node->index(), stride_val, sizeof(int32_t) * 1); model, strideH_node->index(), stride_val, sizeof(int32_t) * 1);
// Add filter // Add filter
int32_t filter_val[1]; int32_t filter_val[1];
filter_val[0] = global_pooling ? x_dims[3] : ksize[1]; // width filter_val[0] =
global_pooling ? x_dims[3] : ksize[1]; // Entry 1: filter width
NeuronModel_setOperandValue( NeuronModel_setOperandValue(
model, filterW_node->index(), filter_val, sizeof(int32_t) * 1); model, filterW_node->index(), filter_val, sizeof(int32_t) * 1);
filter_val[0] = global_pooling ? x_dims[2] : ksize[0]; // height filter_val[0] =
global_pooling ? x_dims[2] : ksize[0]; // Entry 0: filter height
NeuronModel_setOperandValue( NeuronModel_setOperandValue(
model, filterH_node->index(), filter_val, sizeof(int32_t) * 1); model, filterH_node->index(), filter_val, sizeof(int32_t) * 1);
......
...@@ -64,12 +64,10 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -64,12 +64,10 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
xType.dimensions = &dims_x[0]; xType.dimensions = &dims_x[0];
std::shared_ptr<Node> x_node = nullptr; std::shared_ptr<Node> x_node = nullptr;
if (graph->Has(x_name)) { if (graph->Has(x_name)) {
// input operand already exist
x_node = graph->Get(x_name); x_node = graph->Get(x_name);
VLOG(3) << "Graph has " << x_name << ",index: " << x_node->index(); VLOG(3) << "Graph has " << x_name << ",index: " << x_node->index();
} else { } else {
// add input operand NeuronModel_addOperand(model, &xType); // Operand 0: input
NeuronModel_addOperand(model, &xType); // 0: input
x_node = graph->Add(x_name, dims_x); x_node = graph->Add(x_name, dims_x);
} }
VLOG(3) << "input_scale size: " << input_scale VLOG(3) << "input_scale size: " << input_scale
...@@ -80,7 +78,7 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -80,7 +78,7 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
NeuronOperandType betaType; NeuronOperandType betaType;
betaType.type = NEURON_FLOAT32; betaType.type = NEURON_FLOAT32;
betaType.dimensionCount = 0; betaType.dimensionCount = 0;
NeuronModel_addOperand(model, &betaType); // 1: beta NeuronModel_addOperand(model, &betaType); // Operand 1: beta
std::shared_ptr<Node> beta_node = nullptr; std::shared_ptr<Node> beta_node = nullptr;
beta_node = graph->Add(x_name + "_beta", dims_int32); beta_node = graph->Add(x_name + "_beta", dims_int32);
...@@ -88,7 +86,7 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -88,7 +86,7 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
NeuronOperandType axisType; NeuronOperandType axisType;
axisType.type = NEURON_INT32; axisType.type = NEURON_INT32;
axisType.dimensionCount = 0; axisType.dimensionCount = 0;
NeuronModel_addOperand(model, &axisType); // 2: axis NeuronModel_addOperand(model, &axisType); // Operand 2: axis
std::shared_ptr<Node> axis_node = nullptr; std::shared_ptr<Node> axis_node = nullptr;
axis_node = graph->Add(x_name + "_axis", dims_int32); axis_node = graph->Add(x_name + "_axis", dims_int32);
...@@ -99,7 +97,7 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -99,7 +97,7 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
outType.zeroPoint = 128; outType.zeroPoint = 128;
outType.dimensionCount = x_dims.size(); outType.dimensionCount = x_dims.size();
outType.dimensions = &dims_x[0]; outType.dimensions = &dims_x[0];
NeuronModel_addOperand(model, &outType); // 3: output NeuronModel_addOperand(model, &outType); // Operand 3: output
std::shared_ptr<Node> out_node = nullptr; std::shared_ptr<Node> out_node = nullptr;
out_node = graph->Add(out_name, dims_x); out_node = graph->Add(out_name, dims_x);
VLOG(3) << "out_scale: " << out_scale; VLOG(3) << "out_scale: " << out_scale;
...@@ -112,8 +110,9 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -112,8 +110,9 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
axis_val[0] = axis; axis_val[0] = axis;
NeuronModel_setOperandValue( NeuronModel_setOperandValue(
model, axis_node->index(), axis_val, sizeof(int32_t) * 1); model, axis_node->index(), axis_val, sizeof(int32_t) * 1);
std::vector<uint32_t> addInIndex = { std::vector<uint32_t> addInIndex = {x_node->index(), // 0: input
x_node->index(), beta_node->index(), axis_node->index()}; beta_node->index(), // 1: beta
axis_node->index()}; // 2: axis
std::vector<uint32_t> addOutIndex = {out_node->index()}; std::vector<uint32_t> addOutIndex = {out_node->index()};
int neuron_errCode = NeuronModel_addOperation(model, int neuron_errCode = NeuronModel_addOperation(model,
NEURON_SOFTMAX, NEURON_SOFTMAX,
......
...@@ -39,22 +39,43 @@ bool HasInputArg(const OpInfo* op_info, ...@@ -39,22 +39,43 @@ bool HasInputArg(const OpInfo* op_info,
} }
} }
void insert_transpose_node(void* ctx, int insert_requant_node(void* ctx,
const std::string& input_name, const std::string& input_name,
const std::string& output_name, const std::string& output_name,
std::vector<uint32_t> input_shape, std::vector<uint32_t> input_shape,
std::vector<uint32_t> output_shape, std::vector<uint32_t> output_shape,
std::vector<int32_t> axis, float scale_in,
float scale, float scale_out,
int32_t zeroPoint) { int32_t zeroPoint) {
int neuron_errCode; int neuron_errCode;
auto graph = static_cast<Graph*>(ctx); auto graph = static_cast<Graph*>(ctx);
auto model = graph->model(); auto model = graph->model();
uint32_t numDevices = 0;
CHECK_EQ(Neuron_getDeviceCount(&numDevices), NEURON_NO_ERROR);
CHECK_GT(numDevices, (uint32_t)0);
NeuronDevice* targetDevice = nullptr;
for (uint32_t i = 0; i < numDevices; ++i) {
NeuronDevice* device = nullptr;
Neuron_getDevice(i, &device);
const char* name;
NeuronDevice_getName(device, &name);
if (0 == strcmp(name, "mtk-dsp")) {
targetDevice = device;
break;
}
}
if (targetDevice == nullptr) {
LOG(FATAL) << "Insert mtk_requant op fail!";
return -1;
}
// Add input // Add input
NeuronOperandType inType; NeuronOperandType inType;
inType.type = NEURON_TENSOR_QUANT8_ASYMM; inType.type = NEURON_TENSOR_QUANT8_ASYMM;
inType.scale = scale; inType.scale = scale_in;
inType.zeroPoint = zeroPoint; inType.zeroPoint = zeroPoint;
inType.dimensionCount = input_shape.size(); inType.dimensionCount = input_shape.size();
inType.dimensions = &input_shape[0]; inType.dimensions = &input_shape[0];
...@@ -64,15 +85,81 @@ void insert_transpose_node(void* ctx, ...@@ -64,15 +85,81 @@ void insert_transpose_node(void* ctx,
VLOG(3) << "Has " << input_name; VLOG(3) << "Has " << input_name;
input_node = graph->Get(input_name); input_node = graph->Get(input_name);
} else { } else {
neuron_errCode = NeuronModel_addOperand(model, &inType); // input neuron_errCode = NeuronModel_addOperand(model, &inType);
if (NEURON_NO_ERROR != neuron_errCode) { if (NEURON_NO_ERROR != neuron_errCode) {
LOG(WARNING) << "Insert transpose op fail!"; LOG(FATAL) << "Insert mtk_requant op fail!";
return; return -1;
} }
VLOG(3) << "Add " << input_name; VLOG(3) << "Add " << input_name;
input_node = graph->Add(input_name, input_shape); input_node = graph->Add(input_name, input_shape);
} }
// Add output
NeuronOperandType outType;
outType.type = NEURON_TENSOR_QUANT8_ASYMM;
outType.scale = scale_out;
outType.zeroPoint = zeroPoint;
outType.dimensionCount = output_shape.size();
outType.dimensions = &output_shape[0];
NeuronModel_addOperand(model, &outType);
std::shared_ptr<Node> output_node = nullptr;
output_node = graph->Add(output_name, output_shape);
std::vector<uint32_t> addInIndex = {input_node->index()};
std::vector<uint32_t> addOutIndex = {output_node->index()};
neuron_errCode = NeuronModel_addOperationExtension(model,
"MTK_REQUANTIZE",
"mediatek",
targetDevice,
addInIndex.size(),
&addInIndex[0],
addOutIndex.size(),
&addOutIndex[0]);
if (NEURON_NO_ERROR != neuron_errCode) {
LOG(FATAL) << "Insert mtk_requant op fail!";
return -1;
}
return 0;
}
int insert_transpose_node(void* ctx,
const std::string& input_name,
const std::string& output_name,
std::vector<uint32_t> input_shape,
std::vector<uint32_t> output_shape,
std::vector<int32_t> axis,
float scale,
int32_t zeroPoint) {
int neuron_errCode;
auto graph = static_cast<Graph*>(ctx);
auto model = graph->model();
// Add input
NeuronOperandType inType;
inType.type = NEURON_TENSOR_QUANT8_ASYMM;
inType.scale = scale;
inType.zeroPoint = zeroPoint;
inType.dimensionCount = input_shape.size();
inType.dimensions = &input_shape[0];
std::shared_ptr<Node> input_node = nullptr;
if (graph->Has(input_name)) {
VLOG(5) << "Has " << input_name;
input_node = graph->Get(input_name);
} else {
neuron_errCode = NeuronModel_addOperand(model, &inType);
if (NEURON_NO_ERROR != neuron_errCode) {
LOG(FATAL) << "Insert transpose op fail!";
return -1;
}
VLOG(5) << "Add " << input_name;
input_node = graph->Add(input_name, input_shape);
}
// Add perm // Add perm
NeuronOperandType permsType; NeuronOperandType permsType;
permsType.type = NEURON_TENSOR_INT32; permsType.type = NEURON_TENSOR_INT32;
...@@ -80,22 +167,22 @@ void insert_transpose_node(void* ctx, ...@@ -80,22 +167,22 @@ void insert_transpose_node(void* ctx,
uint32_t dims_perms[1] = {4}; uint32_t dims_perms[1] = {4};
permsType.dimensions = dims_perms; permsType.dimensions = dims_perms;
neuron_errCode = NeuronModel_addOperand(model, &permsType); // perm neuron_errCode = NeuronModel_addOperand(model, &permsType);
if (NEURON_NO_ERROR != neuron_errCode) { if (NEURON_NO_ERROR != neuron_errCode) {
LOG(WARNING) << "Insert transpose op fail!"; LOG(FATAL) << "Insert transpose op fail!";
return; return -1;
} }
std::shared_ptr<Node> perms_node = nullptr; std::shared_ptr<Node> perms_node = nullptr;
perms_node = graph->Add(input_name + "_perms", {4}); perms_node = graph->Add(input_name + "_perms", {4});
VLOG(3) << "axis :" << axis[0] << ":" << axis[1] << ":" << axis[2] << ":" VLOG(5) << "axis :" << axis[0] << ":" << axis[1] << ":" << axis[2] << ":"
<< axis[3]; << axis[3];
// &axis[0], sizeof(int32_t) * axis.size());
neuron_errCode = NeuronModel_setOperandValue( neuron_errCode = NeuronModel_setOperandValue(
model, perms_node->index(), &axis[0], sizeof(int32_t) * axis.size()); model, perms_node->index(), &axis[0], sizeof(int32_t) * axis.size());
if (NEURON_NO_ERROR != neuron_errCode) { if (NEURON_NO_ERROR != neuron_errCode) {
LOG(WARNING) << "Insert transpose op fail!"; LOG(FATAL) << "Insert transpose op fail!";
return; return -1;
} }
// Add output // Add output
...@@ -106,7 +193,7 @@ void insert_transpose_node(void* ctx, ...@@ -106,7 +193,7 @@ void insert_transpose_node(void* ctx,
outType.dimensionCount = output_shape.size(); outType.dimensionCount = output_shape.size();
outType.dimensions = &output_shape[0]; outType.dimensions = &output_shape[0];
NeuronModel_addOperand(model, &outType); // output NeuronModel_addOperand(model, &outType);
std::shared_ptr<Node> output_node = nullptr; std::shared_ptr<Node> output_node = nullptr;
output_node = graph->Add(output_name, output_shape); output_node = graph->Add(output_name, output_shape);
...@@ -123,8 +210,10 @@ void insert_transpose_node(void* ctx, ...@@ -123,8 +210,10 @@ void insert_transpose_node(void* ctx,
&addOutIndex[0]); &addOutIndex[0]);
if (NEURON_NO_ERROR != neuron_errCode) { if (NEURON_NO_ERROR != neuron_errCode) {
LOG(WARNING) << "Insert transpose op fail!"; LOG(FATAL) << "Insert transpose op fail!";
} }
return 0;
} }
void transpose(const int8_t* input_data, void transpose(const int8_t* input_data,
...@@ -135,9 +224,9 @@ void transpose(const int8_t* input_data, ...@@ -135,9 +224,9 @@ void transpose(const int8_t* input_data,
int new_index = -1; int new_index = -1;
int dim[4] = {0}; int dim[4] = {0};
std::vector<uint32_t> shape = input_shape; std::vector<uint32_t> shape = input_shape;
VLOG(3) << input_shape[0] << ":" << input_shape[1] << ":" << input_shape[2] VLOG(5) << input_shape[0] << ":" << input_shape[1] << ":" << input_shape[2]
<< ":" << input_shape[3]; << ":" << input_shape[3];
VLOG(3) << axis[0] << ":" << axis[1] << ":" << axis[2] << ":" << axis[3]; VLOG(5) << axis[0] << ":" << axis[1] << ":" << axis[2] << ":" << axis[3];
for (dim[0] = 0; dim[0] < input_shape[0]; dim[0]++) { for (dim[0] = 0; dim[0] < input_shape[0]; dim[0]++) {
for (dim[1] = 0; dim[1] < input_shape[1]; dim[1]++) { for (dim[1] = 0; dim[1] < input_shape[1]; dim[1]++) {
for (dim[2] = 0; dim[2] < input_shape[2]; dim[2]++) { for (dim[2] = 0; dim[2] < input_shape[2]; dim[2]++) {
...@@ -164,9 +253,9 @@ void transposeAsym(const int8_t* input_data, ...@@ -164,9 +253,9 @@ void transposeAsym(const int8_t* input_data,
int new_index = -1; int new_index = -1;
int dim[4] = {0}; int dim[4] = {0};
std::vector<uint32_t> shape = input_shape; std::vector<uint32_t> shape = input_shape;
VLOG(3) << input_shape[0] << ":" << input_shape[1] << ":" << input_shape[2] VLOG(5) << input_shape[0] << ":" << input_shape[1] << ":" << input_shape[2]
<< ":" << input_shape[3]; << ":" << input_shape[3];
VLOG(3) << axis[0] << ":" << axis[1] << ":" << axis[2] << ":" << axis[3]; VLOG(5) << axis[0] << ":" << axis[1] << ":" << axis[2] << ":" << axis[3];
for (dim[0] = 0; dim[0] < input_shape[0]; dim[0]++) { for (dim[0] = 0; dim[0] < input_shape[0]; dim[0]++) {
for (dim[1] = 0; dim[1] < input_shape[1]; dim[1]++) { for (dim[1] = 0; dim[1] < input_shape[1]; dim[1]++) {
for (dim[2] = 0; dim[2] < input_shape[2]; dim[2]++) { for (dim[2] = 0; dim[2] < input_shape[2]; dim[2]++) {
...@@ -177,8 +266,8 @@ void transposeAsym(const int8_t* input_data, ...@@ -177,8 +266,8 @@ void transposeAsym(const int8_t* input_data,
dim[axis[0]] * shape[axis[1]] * shape[axis[2]] * shape[axis[3]] + dim[axis[0]] * shape[axis[1]] * shape[axis[2]] * shape[axis[3]] +
dim[axis[1]] * shape[axis[2]] * shape[axis[3]] + dim[axis[1]] * shape[axis[2]] * shape[axis[3]] +
dim[axis[2]] * shape[axis[3]] + dim[axis[3]]; dim[axis[2]] * shape[axis[3]] + dim[axis[3]];
// Per layer op is asym op and need to add 128
output_data[new_index] = input_data[old_index] + 128; // per layer output_data[new_index] = input_data[old_index] + 128;
} }
} }
} }
......
...@@ -33,14 +33,23 @@ bool HasInputArg(const OpInfo* op_info, ...@@ -33,14 +33,23 @@ bool HasInputArg(const OpInfo* op_info,
const Scope* scope, const Scope* scope,
const std::string& argname); const std::string& argname);
void insert_transpose_node(void* ctx, int insert_requant_node(void* ctx,
const std::string& input_name, const std::string& input_name,
const std::string& output_name, const std::string& output_name,
std::vector<uint32_t> input_shape, std::vector<uint32_t> input_shape,
std::vector<uint32_t> output_shape, std::vector<uint32_t> output_shape,
std::vector<int32_t> axis, float scale_in,
float scale, float scale_out,
int32_t zeroPoint); int32_t zeroPoint);
int insert_transpose_node(void* ctx,
const std::string& input_name,
const std::string& output_name,
std::vector<uint32_t> input_shape,
std::vector<uint32_t> output_shape,
std::vector<int32_t> axis,
float scale,
int32_t zeroPoint);
void transpose(const int8_t* input_data, void transpose(const int8_t* input_data,
uint8_t* output_data, uint8_t* output_data,
......
...@@ -33,6 +33,14 @@ bool SubgraphEngine::BuildDeviceProgram() { ...@@ -33,6 +33,14 @@ bool SubgraphEngine::BuildDeviceProgram() {
BuildOriginProgram(); BuildOriginProgram();
} }
auto GetCurrentUS = []() -> double {
struct timeval time;
gettimeofday(&time, NULL);
return 1e+6 * time.tv_sec + time.tv_usec;
};
auto start_time = GetCurrentUS();
unsigned int version; unsigned int version;
Neuron_getVersion(&version); Neuron_getVersion(&version);
VLOG(3) << "Neuron Adapter version: " << version; VLOG(3) << "Neuron Adapter version: " << version;
...@@ -108,18 +116,16 @@ bool SubgraphEngine::BuildDeviceProgram() { ...@@ -108,18 +116,16 @@ bool SubgraphEngine::BuildDeviceProgram() {
} }
VLOG(3) << "[APU] APU NIR model created!"; VLOG(3) << "[APU] APU NIR model created!";
auto GetCurrentUS = []() -> double { VLOG(1) << "[APU] APU NIR model created, Create cost "
struct timeval time; << GetCurrentUS() - start_time << " us";
gettimeofday(&time, NULL);
return 1e+6 * time.tv_sec + time.tv_usec; start_time = GetCurrentUS();
};
auto start_time = GetCurrentUS();
compilation_ = lite::apu::Device::Global().Build(model_); compilation_ = lite::apu::Device::Global().Build(model_);
if (compilation_ == nullptr) { if (compilation_ == nullptr) {
LOG(WARNING) << "[APU] Build APU DLA model failed!"; LOG(WARNING) << "[APU] Build APU DLA model failed!";
return false; return false;
} }
VLOG(3) << "[APU] APU DLA model created, Build cost " VLOG(1) << "[APU] APU DLA model created, Build cost "
<< GetCurrentUS() - start_time << " us"; << GetCurrentUS() - start_time << " us";
return true; return true;
} }
...@@ -176,7 +182,7 @@ bool SubgraphEngine::LaunchDeviceProgram() { ...@@ -176,7 +182,7 @@ bool SubgraphEngine::LaunchDeviceProgram() {
} }
} }
NeuronExecution_free(run); NeuronExecution_free(run);
VLOG(3) << "[APU] Process cost " << GetCurrentUS() - start_time << " us"; VLOG(1) << "[APU] Process cost " << GetCurrentUS() - start_time << " us";
return true; return true;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册