Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MindSpore
mindspore
提交
bc4c5afc
M
mindspore
项目概览
MindSpore
/
mindspore
通知
35
Star
15
Fork
15
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
bc4c5afc
编写于
9月 04, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
9月 04, 2020
浏览文件
操作
浏览文件
下载
差异文件
!5667 add kernel select after optimize pass
Merge pull request !5667 from zyli2020/code_refactor
上级
f3742fdc
c3d69186
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
103 addition
and
58 deletion
+103
-58
mindspore/ccsrc/backend/kernel_compiler/gpu/nn/activation_grad_kernel.h
...c/backend/kernel_compiler/gpu/nn/activation_grad_kernel.h
+9
-3
mindspore/ccsrc/backend/optimizer/CMakeLists.txt
mindspore/ccsrc/backend/optimizer/CMakeLists.txt
+5
-1
mindspore/ccsrc/backend/optimizer/gpu/batch_norm_add_relu_fusion.cc
...ccsrc/backend/optimizer/gpu/batch_norm_add_relu_fusion.cc
+3
-1
mindspore/ccsrc/backend/optimizer/gpu/batch_norm_add_relu_grad_fusion.cc
.../backend/optimizer/gpu/batch_norm_add_relu_grad_fusion.cc
+4
-1
mindspore/ccsrc/backend/optimizer/gpu/batch_norm_relu_fusion.cc
...ore/ccsrc/backend/optimizer/gpu/batch_norm_relu_fusion.cc
+3
-1
mindspore/ccsrc/backend/optimizer/gpu/batch_norm_relu_grad_fusion.cc
...csrc/backend/optimizer/gpu/batch_norm_relu_grad_fusion.cc
+3
-1
mindspore/ccsrc/backend/session/gpu_session.cc
mindspore/ccsrc/backend/session/gpu_session.cc
+18
-41
mindspore/ccsrc/backend/session/gpu_session.h
mindspore/ccsrc/backend/session/gpu_session.h
+0
-5
mindspore/ccsrc/runtime/device/gpu/cuda_env_checker.cc
mindspore/ccsrc/runtime/device/gpu/cuda_env_checker.cc
+1
-1
mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.cc
mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.cc
+29
-2
mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.h
mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.h
+24
-1
tests/ut/cpp/CMakeLists.txt
tests/ut/cpp/CMakeLists.txt
+4
-0
未找到文件。
mindspore/ccsrc/backend/kernel_compiler/gpu/nn/activation_grad_kernel.h
浏览文件 @
bc4c5afc
...
...
@@ -96,9 +96,15 @@ class ActivationGradGpuKernel : public GpuKernel {
const
int
split_dim
=
4
;
if
(
input_shape
.
size
()
<=
split_dim
)
{
ShapeNdTo4d
(
input_shape
,
&
shape
);
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnSetTensor4dDescriptor
(
data_descriptor_
,
CUDNN_TENSOR_NCHW
,
cudnn_data_type_
,
shape
[
0
],
shape
[
1
],
shape
[
2
],
shape
[
3
]),
"SetTensor4dDescriptor failed"
);
if
(
AnfAlgo
::
GetInputFormat
(
kernel_node
,
0
)
==
kOpFormat_NHWC
)
{
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnSetTensor4dDescriptor
(
data_descriptor_
,
CUDNN_TENSOR_NHWC
,
cudnn_data_type_
,
shape
[
0
],
shape
[
3
],
shape
[
1
],
shape
[
2
]),
"cudnnSetTensor4dDescriptor failed"
);
}
else
{
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnSetTensor4dDescriptor
(
data_descriptor_
,
CUDNN_TENSOR_NCHW
,
cudnn_data_type_
,
shape
[
0
],
shape
[
1
],
shape
[
2
],
shape
[
3
]),
"cudnnSetTensor4dDescriptor failed"
);
}
}
else
{
CudnnSetTensorNdDescriptor
(
input_shape
,
data_descriptor_
,
cudnn_data_type_
);
}
...
...
mindspore/ccsrc/backend/optimizer/CMakeLists.txt
浏览文件 @
bc4c5afc
...
...
@@ -2,7 +2,6 @@ file(GLOB_RECURSE _PREACTIVATE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"common/*.cc"
"mem_reuse/*.cc"
"pass/*.cc"
"gpu/*.cc"
)
if
(
ENABLE_D
)
...
...
@@ -10,5 +9,10 @@ if (ENABLE_D)
list
(
APPEND _PREACTIVATE_SRC_LIST
${
_D_SRC_LIST
}
)
endif
()
if
(
ENABLE_GPU
)
file
(
GLOB_RECURSE _GPU_SRC_LIST RELATIVE
${
CMAKE_CURRENT_SOURCE_DIR
}
"gpu/*.cc"
)
list
(
APPEND _PREACTIVATE_SRC_LIST
${
_GPU_SRC_LIST
}
)
endif
()
set_property
(
SOURCE
${
_PREACTIVATE_SRC_LIST
}
PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PRE_ACT
)
add_library
(
_mindspore_backend_optimizer_obj OBJECT
${
_PREACTIVATE_SRC_LIST
}
)
mindspore/ccsrc/backend/optimizer/gpu/batch_norm_add_relu_fusion.cc
浏览文件 @
bc4c5afc
...
...
@@ -23,6 +23,7 @@
#include "ir/primitive.h"
#include "utils/utils.h"
#include "backend/optimizer/common/helper.h"
#include "runtime/device/gpu/kernel_info_setter.h"
namespace
mindspore
{
namespace
opt
{
...
...
@@ -46,7 +47,7 @@ const AnfNodePtr BatchNormAddReluFusion::Process(const FuncGraphPtr &graph, cons
auto
batch_norm_ex
=
AnfAlgo
::
GetInputNode
(
utils
::
cast
<
CNodePtr
>
(
tuple_get_item
),
0
);
MS_EXCEPTION_IF_NULL
(
batch_norm_ex
);
if
(
AnfAlgo
::
Get
OutputInferDataType
(
batch_norm_ex
,
0
)
!=
kNumberTypeFloat16
)
{
if
(
AnfAlgo
::
Get
InputFormat
(
batch_norm_ex
,
0
)
!=
kOpFormat_NHWC
)
{
return
nullptr
;
}
...
...
@@ -83,6 +84,7 @@ const AnfNodePtr BatchNormAddReluFusion::Process(const FuncGraphPtr &graph, cons
auto
manager
=
graph
->
manager
();
MS_EXCEPTION_IF_NULL
(
manager
);
manager
->
Replace
(
batch_norm_ex
,
fused_batch_norm_with_add_relu
);
device
::
gpu
::
SetKernelInfo
(
fused_batch_norm_with_add_relu
);
return
tuple_get_item
;
}
}
// namespace opt
...
...
mindspore/ccsrc/backend/optimizer/gpu/batch_norm_add_relu_grad_fusion.cc
浏览文件 @
bc4c5afc
...
...
@@ -24,6 +24,7 @@
#include "ir/primitive.h"
#include "utils/utils.h"
#include "backend/optimizer/common/helper.h"
#include "runtime/device/gpu/kernel_info_setter.h"
namespace
mindspore
{
namespace
opt
{
...
...
@@ -123,7 +124,8 @@ const AnfNodePtr BatchNormAddReluGradFusion::Process(const FuncGraphPtr &graph,
const
EquivPtr
&
)
const
{
MS_EXCEPTION_IF_NULL
(
graph
);
MS_EXCEPTION_IF_NULL
(
node
);
if
(
AnfAlgo
::
GetOutputInferDataType
(
node
,
0
)
!=
kNumberTypeFloat16
)
{
if
(
AnfAlgo
::
GetInputFormat
(
node
,
0
)
!=
kOpFormat_NHWC
)
{
return
nullptr
;
}
...
...
@@ -169,6 +171,7 @@ const AnfNodePtr BatchNormAddReluGradFusion::Process(const FuncGraphPtr &graph,
AnfAlgo
::
CopyNodeAttrs
(
node
,
fused_batch_norm_add_relu_grad
);
SetShapeAndType
(
fused_batch_norm_add_relu_grad
,
node
,
relu_grad
);
ReplaceOutput
(
graph
,
node
,
relu_grad
,
fused_batch_norm_add_relu_grad
);
device
::
gpu
::
SetKernelInfo
(
fused_batch_norm_add_relu_grad
);
return
nullptr
;
}
}
// namespace opt
...
...
mindspore/ccsrc/backend/optimizer/gpu/batch_norm_relu_fusion.cc
浏览文件 @
bc4c5afc
...
...
@@ -23,6 +23,7 @@
#include "ir/primitive.h"
#include "utils/utils.h"
#include "backend/optimizer/common/helper.h"
#include "runtime/device/gpu/kernel_info_setter.h"
namespace
mindspore
{
namespace
opt
{
...
...
@@ -43,7 +44,7 @@ const AnfNodePtr BatchNormReluFusion::Process(const FuncGraphPtr &graph, const A
auto
batch_norm_ex
=
AnfAlgo
::
GetInputNode
(
utils
::
cast
<
CNodePtr
>
(
tuple_get_item
),
0
);
MS_EXCEPTION_IF_NULL
(
batch_norm_ex
);
if
(
AnfAlgo
::
Get
OutputInferDataType
(
batch_norm_ex
,
0
)
!=
kNumberTypeFloat16
)
{
if
(
AnfAlgo
::
Get
InputFormat
(
batch_norm_ex
,
0
)
!=
kOpFormat_NHWC
)
{
return
nullptr
;
}
...
...
@@ -78,6 +79,7 @@ const AnfNodePtr BatchNormReluFusion::Process(const FuncGraphPtr &graph, const A
auto
manager
=
graph
->
manager
();
MS_EXCEPTION_IF_NULL
(
manager
);
manager
->
Replace
(
batch_norm_ex
,
fused_batch_norm_with_relu
);
device
::
gpu
::
SetKernelInfo
(
fused_batch_norm_with_relu
);
return
tuple_get_item
;
}
}
// namespace opt
...
...
mindspore/ccsrc/backend/optimizer/gpu/batch_norm_relu_grad_fusion.cc
浏览文件 @
bc4c5afc
...
...
@@ -23,6 +23,7 @@
#include "ir/primitive.h"
#include "utils/utils.h"
#include "backend/optimizer/common/helper.h"
#include "runtime/device/gpu/kernel_info_setter.h"
namespace
mindspore
{
namespace
opt
{
...
...
@@ -38,7 +39,7 @@ const AnfNodePtr BatchNormReluGradFusion::Process(const FuncGraphPtr &graph, con
MS_EXCEPTION_IF_NULL
(
graph
);
MS_EXCEPTION_IF_NULL
(
node
);
if
(
AnfAlgo
::
Get
OutputInferDataType
(
node
,
0
)
!=
kNumberTypeFloat16
)
{
if
(
AnfAlgo
::
Get
InputFormat
(
node
,
0
)
!=
kOpFormat_NHWC
)
{
return
nullptr
;
}
...
...
@@ -84,6 +85,7 @@ const AnfNodePtr BatchNormReluGradFusion::Process(const FuncGraphPtr &graph, con
}
AnfAlgo
::
SetOutputInferTypeAndShape
(
outputs_type
,
outputs_shape
,
fused_batch_norm_grad_with_relu
.
get
());
AnfAlgo
::
CopyNodeAttrs
(
node
,
fused_batch_norm_grad_with_relu
);
device
::
gpu
::
SetKernelInfo
(
fused_batch_norm_grad_with_relu
);
return
fused_batch_norm_grad_with_relu
;
}
}
// namespace opt
...
...
mindspore/ccsrc/backend/session/gpu_session.cc
浏览文件 @
bc4c5afc
...
...
@@ -53,10 +53,10 @@ using AnfAlgo = mindspore::session::AnfRuntimeAlgorithm;
void
GPUSession
::
SelectKernel
(
const
std
::
shared_ptr
<
KernelGraph
>
&
kernel_graph
)
const
{
MS_EXCEPTION_IF_NULL
(
kernel_graph
);
bool
graph_format_transform
=
Is
SupportFormatTransform
(
kernel_graph
);
device
::
gpu
::
FormatTransformChecker
::
GetInstance
().
Check
SupportFormatTransform
(
kernel_graph
);
for
(
const
auto
&
kernel_node
:
kernel_graph
->
execution_order
())
{
MS_EXCEPTION_IF_NULL
(
kernel_node
);
device
::
gpu
::
SetKernelInfo
(
kernel_node
,
graph_format_transform
);
device
::
gpu
::
SetKernelInfo
(
kernel_node
);
}
}
...
...
@@ -82,12 +82,6 @@ void GPUSession::Optimize(const std::shared_ptr<KernelGraph> &kernel_graph) {
pm
->
AddPass
(
std
::
make_shared
<
opt
::
ReplaceBNGradCastFusion
>
());
pm
->
AddPass
(
std
::
make_shared
<
opt
::
ReplaceMomentumCastFusion
>
());
pm
->
AddPass
(
std
::
make_shared
<
opt
::
ReplaceAddNFusion
>
());
if
(
IsSupportFormatTransform
(
kernel_graph
)
&&
context_ptr
->
get_param
<
int
>
(
MS_CTX_EXECUTION_MODE
)
!=
kPynativeMode
)
{
pm
->
AddPass
(
std
::
make_shared
<
opt
::
BatchNormReluFusion
>
());
pm
->
AddPass
(
std
::
make_shared
<
opt
::
BatchNormReluGradFusion
>
());
pm
->
AddPass
(
std
::
make_shared
<
opt
::
BatchNormAddReluFusion
>
());
// pm->AddPass(std::make_shared<opt::BatchNormAddReluGradFusion>());
}
optimizer
->
AddPassManager
(
pm
);
(
void
)
optimizer
->
Optimize
(
kernel_graph
);
kernel_graph
->
SetExecOrderByDefault
();
...
...
@@ -96,6 +90,10 @@ void GPUSession::Optimize(const std::shared_ptr<KernelGraph> &kernel_graph) {
void
GPUSession
::
HardwareOptimize
(
const
std
::
shared_ptr
<
KernelGraph
>
&
kernel_graph
)
{
auto
optimizer
=
std
::
make_shared
<
opt
::
GraphOptimizer
>
();
auto
pm
=
std
::
make_shared
<
opt
::
PassManager
>
();
pm
->
AddPass
(
std
::
make_shared
<
opt
::
BatchNormReluFusion
>
());
pm
->
AddPass
(
std
::
make_shared
<
opt
::
BatchNormReluGradFusion
>
());
pm
->
AddPass
(
std
::
make_shared
<
opt
::
BatchNormAddReluFusion
>
());
// pm->AddPass(std::make_shared<opt::BatchNormAddReluGradFusion>());
pm
->
AddPass
(
std
::
make_shared
<
opt
::
InsertFormatTransformOp
>
());
pm
->
AddPass
(
std
::
make_shared
<
opt
::
RemoveFormatTransformPair
>
());
pm
->
AddPass
(
std
::
make_shared
<
opt
::
RemoveRedundantFormatTransform
>
());
...
...
@@ -201,28 +199,6 @@ void GPUSession::Execute(const std::shared_ptr<KernelGraph> &kernel_graph) const
}
}
bool
GPUSession
::
IsSupportFormatTransform
(
const
std
::
shared_ptr
<
KernelGraph
>
&
kernel_graph
)
const
{
auto
kernels
=
kernel_graph
->
execution_order
();
size_t
conv_cnt
=
0
;
size_t
bn_cnt
=
0
;
for
(
const
auto
&
kernel
:
kernels
)
{
auto
kernel_name
=
AnfAlgo
::
GetCNodeName
(
kernel
);
if
(
kernel_name
==
prim
::
kPrimLayerNorm
->
name
())
{
return
false
;
}
if
(
kernel_name
==
prim
::
kPrimConv2D
->
name
())
{
conv_cnt
++
;
}
if
(
kernel_name
==
prim
::
kPrimFusedBatchNormEx
->
name
())
{
bn_cnt
++
;
}
}
if
(
conv_cnt
==
kConv2dCount
&&
bn_cnt
==
kFusedBatchNormCount
)
{
return
false
;
}
return
true
;
}
GraphId
GPUSession
::
CompileGraph
(
const
AnfNodePtrList
&
lst
,
const
AnfNodePtrList
&
outputs
)
{
// Construct graph, if successfully, graph_sum_ + 1
auto
graph_id
=
graph_sum_
;
...
...
@@ -232,26 +208,27 @@ GraphId GPUSession::CompileGraph(const AnfNodePtrList &lst, const AnfNodePtrList
auto
context_ptr
=
MsContext
::
GetInstance
();
MS_EXCEPTION_IF_NULL
(
context_ptr
);
bool
save_graphs
=
context_ptr
->
get_param
<
bool
>
(
MS_CTX_SAVE_GRAPHS_FLAG
);
// Optimize
// Dump .pb graph before graph optimization
if
(
save_graphs
)
{
DumpIRProto
(
graph
,
"before_opt_"
+
std
::
to_string
(
graph_id
));
}
// Graph optimization irrelevant to device data format
Optimize
(
graph
);
// Select kernel build info
SelectKernel
(
graph
);
// Graph optimization relevant to device data format
HardwareOptimize
(
graph
);
// Dump .pb graph after graph optimization
if
(
save_graphs
)
{
DumpIRProto
(
graph
,
"after_opt_"
+
std
::
to_string
(
graph_id
));
}
#if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU))
// Assign parameter keys.
AssignParamKey
(
graph
);
#endif
// Start gpu kernel runtime
StartKernelRT
();
// Dump .pb graph before hardware optimization
if
(
save_graphs
)
{
DumpIRProto
(
graph
,
"before_hwopt_"
+
std
::
to_string
(
graph_id
));
}
// HardwareOptimize
HardwareOptimize
(
graph
);
// Dump .pb graph after hardware optimization
if
(
save_graphs
)
{
DumpIRProto
(
graph
,
"after_hwopt_"
+
std
::
to_string
(
graph_id
));
}
// Assign CUDA streams
AssignStream
(
graph
);
// Hide NopOp from execution graph
...
...
mindspore/ccsrc/backend/session/gpu_session.h
浏览文件 @
bc4c5afc
...
...
@@ -67,8 +67,6 @@ class GPUSession : public SessionBasic {
void
Execute
(
const
std
::
shared_ptr
<
KernelGraph
>
&
kernel_graph
)
const
;
bool
IsSupportFormatTransform
(
const
std
::
shared_ptr
<
KernelGraph
>
&
kernel_graph
)
const
;
#ifdef ENABLE_DEBUGGER
void
Dump
(
const
std
::
shared_ptr
<
KernelGraph
>
&
kernel_graph
)
const
;
...
...
@@ -82,9 +80,6 @@ class GPUSession : public SessionBasic {
void
PostLoadTensor
(
const
std
::
shared_ptr
<
KernelGraph
>
&
kernel_graph
)
const
;
#endif
static
constexpr
size_t
kConv2dCount
=
96
;
static
constexpr
size_t
kFusedBatchNormCount
=
94
;
};
using
GPUSessionPtr
=
std
::
shared_ptr
<
GPUSession
>
;
MS_REG_SESSION
(
kGPUDevice
,
GPUSession
);
...
...
mindspore/ccsrc/runtime/device/gpu/cuda_env_checker.cc
浏览文件 @
bc4c5afc
...
...
@@ -47,7 +47,7 @@ bool CudaEnvChecker::CheckNvccInPath() {
};
auto
cuda_paths
=
GetCudaRealPaths
();
find_nvcc_
=
any_of
(
cuda_paths
.
begin
(),
cuda_paths
.
end
(),
checker
);
find_nvcc_
=
std
::
any_of
(
cuda_paths
.
begin
(),
cuda_paths
.
end
(),
checker
);
already_check_nvcc_
=
true
;
return
find_nvcc_
;
}
...
...
mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.cc
浏览文件 @
bc4c5afc
...
...
@@ -165,6 +165,9 @@ bool IsNeedProcessFormatInfo(const CNodePtr &kernel_node, const std::vector<Type
if
(
ms_context
->
get_param
<
int
>
(
MS_CTX_EXECUTION_MODE
)
==
kPynativeMode
)
{
return
false
;
}
if
(
!
FormatTransformChecker
::
GetInstance
().
format_transform
())
{
return
false
;
}
if
(
!
AnfAlgo
::
IsRealCNodeKernel
(
kernel_node
))
{
return
false
;
}
...
...
@@ -232,7 +235,31 @@ void UpdateKernelFormatInfo(const CNodePtr &kernel_node, const std::vector<TypeI
}
}
// namespace
void
SetKernelInfo
(
const
CNodePtr
&
kernel_node
,
bool
graph_format_transform
)
{
void
FormatTransformChecker
::
CheckSupportFormatTransform
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
kernel_graph
)
{
auto
kernels
=
kernel_graph
->
execution_order
();
size_t
conv_cnt
=
0
;
size_t
bn_cnt
=
0
;
for
(
const
auto
&
kernel
:
kernels
)
{
auto
kernel_name
=
AnfAlgo
::
GetCNodeName
(
kernel
);
if
(
kernel_name
==
prim
::
kPrimLayerNorm
->
name
())
{
format_transform_
=
false
;
return
;
}
if
(
kernel_name
==
prim
::
kPrimConv2D
->
name
())
{
conv_cnt
++
;
}
if
(
kernel_name
==
prim
::
kPrimFusedBatchNormEx
->
name
())
{
bn_cnt
++
;
}
}
if
(
conv_cnt
==
kConv2dCount
&&
bn_cnt
==
kFusedBatchNormCount
)
{
format_transform_
=
false
;
return
;
}
format_transform_
=
true
;
}
void
SetKernelInfo
(
const
CNodePtr
&
kernel_node
)
{
std
::
vector
<
std
::
string
>
inputs_format
;
std
::
vector
<
TypeId
>
inputs_type
;
for
(
size_t
input_index
=
0
;
input_index
<
AnfAlgo
::
GetInputTensorNum
(
kernel_node
);
++
input_index
)
{
...
...
@@ -246,7 +273,7 @@ void SetKernelInfo(const CNodePtr &kernel_node, bool graph_format_transform) {
outputs_type
.
push_back
(
AnfAlgo
::
GetOutputInferDataType
(
kernel_node
,
output_index
));
}
std
::
string
origin_data_format
=
kOpFormat_DEFAULT
;
if
(
graph_format_transform
&&
IsNeedProcessFormatInfo
(
kernel_node
,
inputs_type
))
{
if
(
IsNeedProcessFormatInfo
(
kernel_node
,
inputs_type
))
{
UpdateKernelFormatInfo
(
kernel_node
,
inputs_type
,
&
inputs_format
,
&
outputs_format
,
&
origin_data_format
);
}
std
::
shared_ptr
<
KernelBuildInfo
::
KernelBuildInfoBuilder
>
builder
=
...
...
mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.h
浏览文件 @
bc4c5afc
...
...
@@ -20,11 +20,13 @@
#include <utility>
#include <string>
#include <vector>
#include <memory>
#include <map>
#include "ir/anf.h"
#include "ir/dtype.h"
#include "utils/utils.h"
#include "frontend/operator/ops.h"
#include "backend/session/kernel_graph.h"
namespace
mindspore
{
namespace
device
{
...
...
@@ -53,7 +55,28 @@ static std::map<std::string, std::pair<std::vector<size_t>, std::vector<size_t>>
{
prim
::
kPrimAddN
->
name
(),
{{},
{
0
}}},
};
void
SetKernelInfo
(
const
CNodePtr
&
kernel_node
,
bool
graph_format_transform
=
false
);
void
SetKernelInfo
(
const
CNodePtr
&
kernel_node
);
class
FormatTransformChecker
{
public:
void
CheckSupportFormatTransform
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
kernel_graph
);
bool
format_transform
()
const
{
return
format_transform_
;
}
static
FormatTransformChecker
&
GetInstance
()
{
static
FormatTransformChecker
instance
;
return
instance
;
}
private:
FormatTransformChecker
()
=
default
;
~
FormatTransformChecker
()
=
default
;
FormatTransformChecker
(
const
FormatTransformChecker
&
);
FormatTransformChecker
&
operator
=
(
const
FormatTransformChecker
&
);
bool
format_transform_
{
true
};
static
constexpr
size_t
kConv2dCount
=
96
;
static
constexpr
size_t
kFusedBatchNormCount
=
94
;
};
class
KernelAttr
{
public:
...
...
tests/ut/cpp/CMakeLists.txt
浏览文件 @
bc4c5afc
...
...
@@ -133,6 +133,10 @@ list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/frontend/parallel/
list
(
REMOVE_ITEM MINDSPORE_SRC_LIST
"../../../mindspore/ccsrc/frontend/parallel/ps/scheduler.cc"
)
list
(
REMOVE_ITEM MINDSPORE_SRC_LIST
"../../../mindspore/ccsrc/frontend/parallel/ps/optimizer_info.cc"
)
list
(
REMOVE_ITEM MINDSPORE_SRC_LIST
"../../../mindspore/ccsrc/frontend/parallel/ps/optimizer_info_builder.cc"
)
list
(
REMOVE_ITEM MINDSPORE_SRC_LIST
"../../../mindspore/ccsrc/backend/optimizer/gpu/batch_norm_add_relu_fusion.cc"
)
list
(
REMOVE_ITEM MINDSPORE_SRC_LIST
"../../../mindspore/ccsrc/backend/optimizer/gpu/batch_norm_add_relu_grad_fusion.cc"
)
list
(
REMOVE_ITEM MINDSPORE_SRC_LIST
"../../../mindspore/ccsrc/backend/optimizer/gpu/batch_norm_relu_fusion.cc"
)
list
(
REMOVE_ITEM MINDSPORE_SRC_LIST
"../../../mindspore/ccsrc/backend/optimizer/gpu/batch_norm_relu_grad_fusion.cc"
)
add_library
(
_ut_mindspore_obj OBJECT
${
MINDSPORE_SRC_LIST
}
)
add_library
(
_ut_ut_obj OBJECT
${
UT_SRCS
}
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录