未验证 提交 31903cf1 编写于 作者: B bzhang5 提交者: GitHub

Tensorflow serializer (#1109)

* tensorflow serializer version 1

* tf serializer version 1

* suppoer tensorflow mobilenet

* Update CMakeLists.txt

* add tf serializer
Co-authored-by: Nbzhang <bzhang@openailab.com>
Co-authored-by: Nbzhang5 <bzhang5@users.noreply.github.com>
上级 baf47d36
...@@ -37,6 +37,45 @@ file(GLOB_RECURSE NCNN_SERIALIZER_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/ncnn/*.cpp") ...@@ -37,6 +37,45 @@ file(GLOB_RECURSE NCNN_SERIALIZER_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/ncnn/*.cpp")
# TENSORFLOW # TENSORFLOW
file(GLOB_RECURSE TF_SERIALIZER_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/tensorflow/*.cpp") file(GLOB_RECURSE TF_SERIALIZER_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/tensorflow/*.cpp")
list(APPEND TENGINE_LIB_SRCS ${serializer_src})
# the generated pb.cc
set(TF_PROTO_SRC ${CMAKE_CURRENT_BINARY_DIR}/tensorflow/graph.pb.cc
${CMAKE_CURRENT_BINARY_DIR}/tensorflow/function.pb.cc
${CMAKE_CURRENT_BINARY_DIR}/tensorflow/node_def.pb.cc
${CMAKE_CURRENT_BINARY_DIR}/tensorflow/op_def.pb.cc
${CMAKE_CURRENT_BINARY_DIR}/tensorflow/attr_value.pb.cc
${CMAKE_CURRENT_BINARY_DIR}/tensorflow/tensor.pb.cc
${CMAKE_CURRENT_BINARY_DIR}/tensorflow/tensor_shape.pb.cc
${CMAKE_CURRENT_BINARY_DIR}/tensorflow/types.pb.cc
${CMAKE_CURRENT_BINARY_DIR}/tensorflow/versions.pb.cc
${CMAKE_CURRENT_BINARY_DIR}/tensorflow/resource_handle.pb.cc)
set(TF_PROTO_PATH ${CMAKE_CURRENT_SOURCE_DIR}/tensorflow)
set(TF_PROTO_OUT_PATH ${CMAKE_CURRENT_BINARY_DIR}/tensorflow)
ADD_CUSTOM_COMMAND(OUTPUT ${TF_PROTO_SRC}
COMMAND mkdir -p ${TF_PROTO_OUT_PATH}
COMMAND ${Protobuf_PROTOC_EXECUTABLE} --cpp_out=${TF_PROTO_OUT_PATH} --proto_path=${TF_PROTO_PATH} ${TF_PROTO_PATH}/graph.proto
COMMAND ${Protobuf_PROTOC_EXECUTABLE} --cpp_out=${TF_PROTO_OUT_PATH} --proto_path=${TF_PROTO_PATH} ${TF_PROTO_PATH}/function.proto
COMMAND ${Protobuf_PROTOC_EXECUTABLE} --cpp_out=${TF_PROTO_OUT_PATH} --proto_path=${TF_PROTO_PATH} ${TF_PROTO_PATH}/node_def.proto
COMMAND ${Protobuf_PROTOC_EXECUTABLE} --cpp_out=${TF_PROTO_OUT_PATH} --proto_path=${TF_PROTO_PATH} ${TF_PROTO_PATH}/op_def.proto
COMMAND ${Protobuf_PROTOC_EXECUTABLE} --cpp_out=${TF_PROTO_OUT_PATH} --proto_path=${TF_PROTO_PATH} ${TF_PROTO_PATH}/attr_value.proto
COMMAND ${Protobuf_PROTOC_EXECUTABLE} --cpp_out=${TF_PROTO_OUT_PATH} --proto_path=${TF_PROTO_PATH} ${TF_PROTO_PATH}/tensor.proto
COMMAND ${Protobuf_PROTOC_EXECUTABLE} --cpp_out=${TF_PROTO_OUT_PATH} --proto_path=${TF_PROTO_PATH} ${TF_PROTO_PATH}/tensor_shape.proto
COMMAND ${Protobuf_PROTOC_EXECUTABLE} --cpp_out=${TF_PROTO_OUT_PATH} --proto_path=${TF_PROTO_PATH} ${TF_PROTO_PATH}/types.proto
COMMAND ${Protobuf_PROTOC_EXECUTABLE} --cpp_out=${TF_PROTO_OUT_PATH} --proto_path=${TF_PROTO_PATH} ${TF_PROTO_PATH}/versions.proto
COMMAND ${Protobuf_PROTOC_EXECUTABLE} --cpp_out=${TF_PROTO_OUT_PATH} --proto_path=${TF_PROTO_PATH} ${TF_PROTO_PATH}/resource_handle.proto
#COMMAND mv ${TF_PROTO_OUT_PATH}/*.pb.h ${TF_PROTO_PATH}/../include/
)
ADD_CUSTOM_TARGET(TF_SERIALIZER_TARGET DEPENDS ${TF_PROTO_OUT_PATH})
include_directories(${TF_PROTO_OUT_PATH})
list(APPEND TF_SERIALIZER_SRCS ${TF_PROTO_SRC})
# the generated pb.cc # the generated pb.cc
protobuf_generate_cpp(TF_PROTO_SRCS TF_PROTO_HDRS protobuf_generate_cpp(TF_PROTO_SRCS TF_PROTO_HDRS
tensorflow/graph.proto tensorflow/graph.proto
...@@ -63,6 +102,7 @@ include_directories(${TFLITE_FLATBUFFERS_PATH}) ...@@ -63,6 +102,7 @@ include_directories(${TFLITE_FLATBUFFERS_PATH})
# DARKNET # DARKNET
file(GLOB_RECURSE DARKNET_SERIALIZER_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/darknet/*.cpp") file(GLOB_RECURSE DARKNET_SERIALIZER_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/darknet/*.cpp")
# SAVE GRAPH # SAVE GRAPH
FILE(GLOB_RECURSE SAVE_GRAPH_SRCS "${CMAKE_SOURCE_DIR}/tools/save_graph/*.cpp" "${CMAKE_SOURCE_DIR}/tools/save_graph/*.c") FILE(GLOB_RECURSE SAVE_GRAPH_SRCS "${CMAKE_SOURCE_DIR}/tools/save_graph/*.cpp" "${CMAKE_SOURCE_DIR}/tools/save_graph/*.c")
......
...@@ -370,7 +370,6 @@ int tensorflow_serializer::construct_graph() ...@@ -370,7 +370,6 @@ int tensorflow_serializer::construct_graph()
TLOG_ERR("cannot find input: %s for node: %s \n", input_name.c_str(), name.c_str()); TLOG_ERR("cannot find input: %s for node: %s \n", input_name.c_str(), name.c_str());
return false; return false;
} }
cur_node->inputs.push_back(input_node); cur_node->inputs.push_back(input_node);
input_node->outputs.push_back(cur_node); input_node->outputs.push_back(cur_node);
} }
...@@ -768,6 +767,7 @@ void tensorflow_serializer::MergeReluMinimum() ...@@ -768,6 +767,7 @@ void tensorflow_serializer::MergeReluMinimum()
int tensorflow_serializer::optimize_graph() int tensorflow_serializer::optimize_graph()
{ {
fused_node_count = 0;
/* first clean up the predictions module of TF */ /* first clean up the predictions module of TF */
auto ir = tf_graph.seq_nodes.begin(); auto ir = tf_graph.seq_nodes.begin();
...@@ -921,7 +921,6 @@ int tensorflow_serializer::optimize_graph() ...@@ -921,7 +921,6 @@ int tensorflow_serializer::optimize_graph()
break; break;
} }
ir++; ir++;
} }
...@@ -982,10 +981,15 @@ int tensorflow_serializer::optimize_graph() ...@@ -982,10 +981,15 @@ int tensorflow_serializer::optimize_graph()
if (output_node->op == "BiasAdd" || output_node->op == "Add") if (output_node->op == "BiasAdd" || output_node->op == "Add")
{ {
cur_node->biasAdd = 1;
fused_node_count++;
MergeChildNode(cur_node, output_node); MergeChildNode(cur_node, output_node);
} }
else
{
cur_node->biasAdd = 0;
}
} }
ir++; ir++;
} }
...@@ -1809,6 +1813,21 @@ int tensorflow_serializer::optimize_rnn() ...@@ -1809,6 +1813,21 @@ int tensorflow_serializer::optimize_rnn()
return true; return true;
} }
int find_tensor_index(ir_graph_t* graph, std::string t_name, TFNode* node)
{
int tensor_idx = -1;
for (int i = 0; i < graph->tensor_num; i++)
{
const ir_tensor_t* const tensor = graph->tensor_list[i];
if (tensor->name && 0 == strcmp(tensor->name, node->name.c_str()))
{
tensor_idx = i;
}
}
return tensor_idx;
}
int tensorflow_serializer::generate_graph(ir_graph_t* graph) int tensorflow_serializer::generate_graph(ir_graph_t* graph)
{ {
int node_number = tf_graph.seq_nodes.size(); int node_number = tf_graph.seq_nodes.size();
...@@ -1844,9 +1863,22 @@ int tensorflow_serializer::generate_graph(ir_graph_t* graph) ...@@ -1844,9 +1863,22 @@ int tensorflow_serializer::generate_graph(ir_graph_t* graph)
} }
} }
for (int i = 0; i < (int)tf_graph.seq_nodes.size(); i++) int count = 0;
if (fused_node_count > FUSE_NODE)
{ {
TFNode* tf_node = tf_graph.seq_nodes[i]; count = (int)out_graph.size();
}
else
{
count = (int)tf_graph.seq_nodes.size();
}
for (int i = 0; i < count; i++)
{
TFNode* tf_node = nullptr;
if (fused_node_count > FUSE_NODE)
tf_node = out_graph[i];
else
tf_node = tf_graph.seq_nodes[i];
if (tf_node->op == "Placeholder" || tf_node->op == "Const") if (tf_node->op == "Placeholder" || tf_node->op == "Const")
continue; continue;
...@@ -1861,36 +1893,48 @@ int tensorflow_serializer::generate_graph(ir_graph_t* graph) ...@@ -1861,36 +1893,48 @@ int tensorflow_serializer::generate_graph(ir_graph_t* graph)
{ {
ir_node = get_ir_graph_node(graph, node_idx); ir_node = get_ir_graph_node(graph, node_idx);
} }
for (int in = 0; in < tf_node->inputs.size(); in++) for (int in = 0; in < tf_node->inputs.size(); in++)
{ {
TFNode* node = tf_node->inputs[in]; TFNode* node = tf_node->inputs[in];
std::string t_name = tf_node->name;
int tensor_idx = get_ir_tensor_index_from_name(graph, node->name.c_str()); int tensor_idx = get_ir_tensor_index_from_name(graph, node->name.c_str());
ir_tensor_t* tensor = nullptr; ir_tensor_t* tensor = nullptr;
if (node->name == "Placeholder") if (node->name == "Placeholder")
{ {
continue; continue;
} }
if (tensor_idx < 0) if (tensor_idx < 0)
tensor = create_ir_tensor(graph, tf_node->name.c_str(), TENGINE_DT_FP32); {
tensor = create_ir_tensor(graph, t_name.c_str(), TENGINE_DT_FP32);
}
else else
{
tensor = get_ir_graph_tensor(graph, tensor_idx); tensor = get_ir_graph_tensor(graph, tensor_idx);
}
set_ir_node_input_tensor(ir_node, in, tensor); set_ir_node_input_tensor(ir_node, in, tensor);
input_tensors.push_back(node->name.c_str()); input_tensors.push_back(node->name.c_str());
} }
for (int out = 0; out < tf_node->outputs.size(); out++) for (int out = 0; out < tf_node->outputs.size(); out++)
{ {
TFNode* node = tf_node->outputs[out]; TFNode* node = tf_node->outputs[out];
int tensor_idx = get_ir_tensor_index_from_name(graph, node->name.c_str());
ir_tensor_t* tensor = nullptr; ir_tensor_t* tensor = nullptr;
std::string t_name = tf_node->name;
int tensor_idx = get_ir_tensor_index_from_name(graph, node->name.c_str());
if (tensor_idx < 0) if (tensor_idx < 0)
tensor = create_ir_tensor(graph, tf_node->name.c_str(), TENGINE_DT_FP32); {
tensor = create_ir_tensor(graph, t_name.c_str(), TENGINE_DT_FP32);
}
else else
{
tensor = get_ir_graph_tensor(graph, tensor_idx); tensor = get_ir_graph_tensor(graph, tensor_idx);
}
set_ir_node_output_tensor(ir_node, out, tensor); set_ir_node_output_tensor(ir_node, out, tensor);
output_tensors.push_back(node->name.c_str()); output_tensors.push_back(node->name.c_str());
} }
// printf("node op : %s \n", tf_node->op.c_str());
op_load_t loader = op_load_map[tf_node->op].second; op_load_t loader = op_load_map[tf_node->op].second;
if (loader(tf_node, tf_graph, graph, ir_node) < 0) if (loader(tf_node, tf_graph, graph, ir_node) < 0)
{ {
...@@ -1943,6 +1987,60 @@ int tensorflow_serializer::set_graph_output(ir_graph_t* graph) ...@@ -1943,6 +1987,60 @@ int tensorflow_serializer::set_graph_output(ir_graph_t* graph)
} }
set_ir_graph_output_node(graph, node_idx.data(), output_nodes.size()); set_ir_graph_output_node(graph, node_idx.data(), output_nodes.size());
return 0; return 0;
}
bool AllInputCheck(TFNode* node, std::vector<int>& visited)
{
for (int i = 0; i < node->inputs.size(); i++)
{
TFNode* o_node = node->inputs[i];
if (visited[o_node->idx] != 1)
{
if (o_node->op == "Const")
continue;
return false;
}
}
return true;
}
int tensorflow_serializer::DFSGraph(ir_graph_t* graph)
{
std::stack<TFNode*> visit_stack;
std::vector<int> visited(65535, 0);
std::vector<TFNode*> starts;
int node_number = tf_graph.seq_nodes.size();
/* Find start node */
for (int i = 0; i < node_number; i++)
{
TFNode* node = tf_graph.seq_nodes[i];
if (node->op == "Placeholder" || node->op == "Const")
continue;
if (node->inputs[0]->op == "Placeholder")
{
starts.push_back(node);
}
}
for (int i = 0; i < starts.size(); i++)
{
visit_stack.push(starts[i]);
}
while (!visit_stack.empty())
{
TFNode* node = visit_stack.top();
visited[node->idx] = 1;
visit_stack.pop();
for (int out = 0; out < node->outputs.size(); out++)
{
TFNode* out_node = node->outputs[out];
visited[out_node->idx] = 1;
if (AllInputCheck(out_node, visited))
visit_stack.push(out_node);
}
out_graph.push_back(node);
}
return 0; return 0;
} }
...@@ -1954,6 +2052,11 @@ int tensorflow_serializer::load_graph(ir_graph_t* graph) ...@@ -1954,6 +2052,11 @@ int tensorflow_serializer::load_graph(ir_graph_t* graph)
return false; return false;
if (optimize_graph() < 0) if (optimize_graph() < 0)
return -1; return -1;
if (fused_node_count > FUSE_NODE)
{
if (DFSGraph(graph) < 0)
return -1;
}
if (set_graph_input(graph) < 0) if (set_graph_input(graph) < 0)
return -1; return -1;
fprintf(stderr, "Process 2: Finish set graph input \n"); fprintf(stderr, "Process 2: Finish set graph input \n");
...@@ -2246,7 +2349,6 @@ int load_softmax(TFNode* tf_node, TFGraph& tf_graph, ir_graph_t* graph, ir_node_ ...@@ -2246,7 +2349,6 @@ int load_softmax(TFNode* tf_node, TFGraph& tf_graph, ir_graph_t* graph, ir_node_
} }
else else
param->axis = 1; param->axis = 1;
return 0; return 0;
} }
int load_relu(TFNode* tf_node, TFGraph& tf_graph, ir_graph_t* graph, ir_node_t* node) int load_relu(TFNode* tf_node, TFGraph& tf_graph, ir_graph_t* graph, ir_node_t* node)
...@@ -2392,6 +2494,75 @@ int load_pad(TFNode* tf_node, TFGraph& tf_graph, ir_graph_t* graph, ir_node_t* n ...@@ -2392,6 +2494,75 @@ int load_pad(TFNode* tf_node, TFGraph& tf_graph, ir_graph_t* graph, ir_node_t* n
param->pad_3_w = paddings[7]; param->pad_3_w = paddings[7];
return 0; return 0;
} }
int load_concat(TFNode* tf_node, TFGraph& tf_graph, ir_graph_t* graph, ir_node_t* node)
{
struct concat_param* param = (struct concat_param*)node->op.param_mem;
TFNode* input;
const tensorflow::NodeDef* node_def = tf_node->pb_defs[1];
tensorflow::AttrValue value;
if (GetAttrValue(node_def, "value", value))
{
const tensorflow::TensorProto& tf_tensor = value.tensor();
int axis = tf_tensor.int_val(0);
param->axis = NCHW_axis_swap[axis];
}
else
{
param->axis = 3;
}
return true;
}
int load_gemm(TFNode* tf_node, TFGraph& tf_graph, ir_graph_t* graph, ir_node_t* node)
{
TFNode* input1 = tf_node->inputs[1];
struct gemm_param* param = (struct gemm_param*)node->op.param_mem;
const tensorflow::NodeDef* node_def = tf_node->pb_defs[0];
tensorflow::AttrValue value;
if (GetAttrValue(node_def, "transpose_a", value))
{
param->transA = value.b();
}
if (GetAttrValue(node_def, "transpose_b", value))
{
param->transB = value.b();
}
param->alpha = 1;
param->beta = 1;
ir_tensor_t* weight_tensor = input1->ir_tensor;
if (!param->transB)
{
// swap shape
int k = weight_tensor->dims[0];
int n = weight_tensor->dims[1];
weight_tensor->dims[0] = n;
weight_tensor->dims[1] = k;
float* tmp = (float*)malloc(k * n * sizeof(float));
float* data = (float*)weight_tensor->data;
for (int i = 0; i < n; i++)
for (int j = 0; j < k; j++)
{
tmp[i * k + j] = data[j * n + i];
}
memcpy(data, tmp, n * k * sizeof(float));
free(tmp);
}
struct fc_param* fcparam = (struct fc_param*)node->op.param_mem;
fcparam->num_output = weight_tensor->dims[0];
return 0;
}
void tensorflow_serializer::register_op_load() void tensorflow_serializer::register_op_load()
{ {
op_load_map["AvgPool"] = std::pair<int, op_load_t>(OP_POOL, load_pool); op_load_map["AvgPool"] = std::pair<int, op_load_t>(OP_POOL, load_pool);
...@@ -2415,4 +2586,6 @@ void tensorflow_serializer::register_op_load() ...@@ -2415,4 +2586,6 @@ void tensorflow_serializer::register_op_load()
op_load_map["AddN"] = std::pair<int, op_load_t>(OP_ELTWISE, load_eltwise); op_load_map["AddN"] = std::pair<int, op_load_t>(OP_ELTWISE, load_eltwise);
op_load_map["Mean"] = std::pair<int, op_load_t>(OP_REDUCTION, load_reduction); op_load_map["Mean"] = std::pair<int, op_load_t>(OP_REDUCTION, load_reduction);
op_load_map["Pad"] = std::pair<int, op_load_t>(OP_PAD, load_pad); op_load_map["Pad"] = std::pair<int, op_load_t>(OP_PAD, load_pad);
op_load_map["ConcatV2"] = std::pair<int, op_load_t>(OP_CONCAT, load_concat);
op_load_map["MatMul"] = std::pair<int, op_load_t>(OP_FC, load_gemm);
} }
\ No newline at end of file
...@@ -32,6 +32,8 @@ ...@@ -32,6 +32,8 @@
#include <set> #include <set>
#include <algorithm> #include <algorithm>
#include <fstream> #include <fstream>
#include <queue>
#include <stack>
#include "graph.pb.h" #include "graph.pb.h"
...@@ -59,6 +61,8 @@ extern "C" { ...@@ -59,6 +61,8 @@ extern "C" {
#define TF_RNN_GRU 1 #define TF_RNN_GRU 1
#define TF_RNN_BASIC_LSTM 2 #define TF_RNN_BASIC_LSTM 2
#define TF_RNN_BASIC_RNN 3 #define TF_RNN_BASIC_RNN 3
#define FUSE_NODE 10
static int NCHW_axis_swap[] = {0, 2, 3, 1};
struct TFNode struct TFNode
{ {
...@@ -72,6 +76,9 @@ struct TFNode ...@@ -72,6 +76,9 @@ struct TFNode
ir_tensor_t* ir_tensor; ir_tensor_t* ir_tensor;
bool no_static_node; bool no_static_node;
int BNAddType; int BNAddType;
std::vector<std::string> in_tensors;
std::vector<std::string> out_tensors;
int biasAdd;
TFNode() TFNode()
{ {
...@@ -245,10 +252,15 @@ private: ...@@ -245,10 +252,15 @@ private:
int FuseComposedBN(TFNode* cur_node); int FuseComposedBN(TFNode* cur_node);
int optimize_rnn(); int optimize_rnn();
void CleanupResizeNearestNeighbor(); void CleanupResizeNearestNeighbor();
int DFSGraph(ir_graph_t* graph);
tensorflow::GraphDef tf_net; tensorflow::GraphDef tf_net;
TFGraph tf_graph; TFGraph tf_graph;
std::vector<std::string> input_tensors; std::vector<std::string> input_tensors;
std::vector<std::string> output_tensors; std::vector<std::string> output_tensors;
std::set<TFNode*> ck_graph;
std::vector<TFNode*> out_graph;
int fused_node_count;
}; };
#endif #endif
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册