Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
0d720653
MegEngine
项目概览
MegEngine 天元
/
MegEngine
9 个月 前同步成功
通知
392
Star
4702
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
0d720653
编写于
1月 25, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor(megdnn): add default algo for convolution forward
GitOrigin-RevId: a12a7d399ac9f365ca7770e9b8c50cd4e88cddce
上级
659217ac
变更
13
隐藏空白更改
内联
并排
Showing
13 changed file
with
528 addition
and
223 deletion
+528
-223
dnn/src/cuda/convolution/forward/algos.cpp
dnn/src/cuda/convolution/forward/algos.cpp
+172
-0
dnn/src/cuda/convolution/forward/algos.h
dnn/src/cuda/convolution/forward/algos.h
+111
-0
dnn/src/cuda/convolution/opr_impl.cpp
dnn/src/cuda/convolution/opr_impl.cpp
+17
-95
dnn/src/cuda/convolution/opr_impl.h
dnn/src/cuda/convolution/opr_impl.h
+58
-60
dnn/test/common/checker.h
dnn/test/common/checker.h
+24
-0
dnn/test/common/convolution.cpp
dnn/test/common/convolution.cpp
+21
-3
dnn/test/common/opr_proxy.h
dnn/test/common/opr_proxy.h
+4
-3
dnn/test/cuda/chanwise_convolution.cpp
dnn/test/cuda/chanwise_convolution.cpp
+47
-25
dnn/test/cuda/convolution.cpp
dnn/test/cuda/convolution.cpp
+7
-4
dnn/test/cuda/dilated_convolution.cpp
dnn/test/cuda/dilated_convolution.cpp
+18
-11
dnn/test/cuda/group_conv.cpp
dnn/test/cuda/group_conv.cpp
+11
-6
src/opr/impl/search_policy/algo_chooser.cpp
src/opr/impl/search_policy/algo_chooser.cpp
+32
-13
src/opr/include/megbrain/opr/search_policy/algo_chooser.h
src/opr/include/megbrain/opr/search_policy/algo_chooser.h
+6
-3
未找到文件。
dnn/src/cuda/convolution/forward/algos.cpp
0 → 100644
浏览文件 @
0d720653
/**
* \file dnn/src/cuda/convolution/forward/algos.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/cuda/convolution/forward/algos.h"
#include "src/cuda/conv_bias/opr_impl.h"
#include "src/cuda/conv_bias/algo.h"
#include "src/common/algo_base.h"
#include "src/common/algo_chooser.h"
using
namespace
megdnn
;
using
namespace
cuda
;
namespace
{
std
::
pair
<
TensorLayoutArray
,
ConvBiasForward
::
Param
>
sub_opr_config
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
,
const
ConvolutionForwardImpl
*
opr
)
{
auto
conv_param
=
opr
->
param
();
DType
bias_type
;
if
(
src
.
dtype
.
enumv
()
==
DTypeEnum
::
QuantizedS8
)
{
bias_type
=
dtype
::
QuantizedS32
(
src
.
dtype
.
param
<
dtype
::
QuantizedS8
>
().
scale
*
filter
.
dtype
.
param
<
dtype
::
QuantizedS8
>
().
scale
);
}
else
if
(
src
.
dtype
.
enumv
()
==
DTypeEnum
::
Quantized8Asymm
)
{
bias_type
=
dtype
::
QuantizedS32
(
src
.
dtype
.
param
<
dtype
::
Quantized8Asymm
>
().
scale
*
filter
.
dtype
.
param
<
dtype
::
Quantized8Asymm
>
().
scale
);
}
else
if
(
src
.
dtype
.
enumv
()
==
DTypeEnum
::
Uint8
||
src
.
dtype
.
enumv
()
==
DTypeEnum
::
Int8
)
{
bias_type
=
dtype
::
Int32
{};
}
else
if
(
src
.
dtype
.
enumv
()
==
DTypeEnum
::
Quantized4Asymm
)
{
bias_type
=
dtype
::
QuantizedS32
(
src
.
dtype
.
param
<
dtype
::
Quantized4Asymm
>
().
scale
*
filter
.
dtype
.
param
<
dtype
::
Quantized4Asymm
>
().
scale
);
}
else
{
megdnn_assert
(
src
.
dtype
.
category
()
==
DTypeCategory
::
FLOAT
);
bias_type
=
src
.
dtype
;
}
std
::
pair
<
TensorLayoutArray
,
ConvBiasForward
::
Param
>
ret
;
ret
.
second
=
{
param
::
ConvBias
::
NonlineMode
::
IDENTITY
,
conv_param
.
mode
,
conv_param
.
sparse
,
conv_param
.
format
,
conv_param
.
pad_h
,
conv_param
.
pad_w
,
conv_param
.
stride_h
,
conv_param
.
stride_w
,
conv_param
.
dilate_h
,
conv_param
.
dilate_w
,
conv_param
.
compute_mode
};
ret
.
first
.
push_back
(
TensorLayout
({},
bias_type
));
ret
.
first
.
push_back
(
TensorLayout
({},
dst
.
dtype
));
return
ret
;
}
}
// namespace
ConvolutionForwardImpl
::
AlgoPack
::
AlgoPack
()
{
all_algos
.
push_back
(
&
algo_default
);
for
(
auto
&&
algo
:
all_algos
)
{
m_all_algos_map
.
emplace
(
algo
->
info
().
desc
,
algo
);
}
}
ConvolutionForwardImpl
::
AlgoPack
ConvolutionForwardImpl
::
sm_algo_pack
;
MEGDNN_DEF_GET_ALGO_FROM_DESC
(
ConvolutionForwardImpl
)
ConvolutionForwardImpl
::
AlgoBase
::
SizeArgs
::
SizeArgs
(
ConvolutionForwardImpl
*
o
,
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
)
:
opr
{
o
},
layout_src
{
&
src
},
layout_filter
{
&
filter
},
layout_dst
{
&
dst
}
{}
ConvolutionForwardImpl
::
AlgoBase
::
ExecArgs
::
ExecArgs
(
ConvolutionForwardImpl
*
opr
,
_megdnn_tensor_in
src
,
_megdnn_tensor_in
filter
,
_megdnn_tensor_out
dst
,
_megdnn_workspace
workspace
)
:
SizeArgs
(
opr
,
src
.
layout
,
filter
.
layout
,
dst
.
layout
),
tensor_src
{
src
},
tensor_filter
{
filter
},
tensor_dst
{
dst
},
workspace
{
workspace
}
{}
std
::
string
ConvolutionForwardImpl
::
AlgoBase
::
SizeArgs
::
to_string
()
const
{
return
megdnn_mangle
(
ssprintf
(
"src=%s, filter=%s, dst=%s"
,
layout_src
->
to_string
().
c_str
(),
layout_filter
->
to_string
().
c_str
(),
layout_dst
->
to_string
().
c_str
()));
}
/* ===================== default algo ===================== */
std
::
vector
<
Algorithm
::
SearchItem
>
ConvolutionForwardImpl
::
AlgoDefault
::
get_subopr_list
(
const
TensorLayoutArray
&
layouts
,
const
OperatorBase
*
opr
)
const
{
auto
&&
config
=
sub_opr_config
(
layouts
[
0
],
layouts
[
1
],
layouts
[
2
],
static_cast
<
const
ConvolutionForwardImpl
*>
(
opr
));
TensorLayoutArray
conv_bias_layouts
=
{
layouts
[
0
],
layouts
[
1
],
config
.
first
[
0
],
config
.
first
[
1
],
layouts
[
2
]};
std
::
string
param_str
;
Algorithm
::
serialize_write_pod
(
config
.
second
,
param_str
);
return
{{
Algorithm
::
OprType
::
CONVBIAS_FORWARD
,
param_str
,
conv_bias_layouts
}};
}
bool
ConvolutionForwardImpl
::
AlgoDefault
::
is_available
(
const
SizeArgs
&
args
)
const
{
auto
conv_bias_opr
=
args
.
opr
->
handle
()
->
create_operator
<
ConvBiasForward
>
();
auto
&&
config
=
sub_opr_config
(
*
args
.
layout_src
,
*
args
.
layout_filter
,
*
args
.
layout_dst
,
args
.
opr
);
conv_bias_opr
->
param
()
=
config
.
second
;
return
get_algorithm
(
static_cast
<
ConvBiasForwardImpl
*>
(
conv_bias_opr
.
get
()),
*
args
.
layout_src
,
*
args
.
layout_filter
,
config
.
first
[
0
],
config
.
first
[
1
],
*
args
.
layout_dst
);
}
size_t
ConvolutionForwardImpl
::
AlgoDefault
::
get_workspace_in_bytes
(
const
SizeArgs
&
args
)
const
{
auto
conv_bias_opr
=
args
.
opr
->
handle
()
->
create_operator
<
ConvBiasForward
>
();
if
(
args
.
opr
->
execution_policy
().
algo
.
valid
()
&&
!
args
.
opr
->
execution_policy
().
sub_policy
.
empty
())
{
megdnn_assert
(
args
.
opr
->
execution_policy
().
sub_policy
.
size
()
==
1
);
conv_bias_opr
->
execution_policy
()
=
args
.
opr
->
execution_policy
().
sub_policy
[
0
];
}
auto
&&
config
=
sub_opr_config
(
*
args
.
layout_src
,
*
args
.
layout_filter
,
*
args
.
layout_dst
,
args
.
opr
);
conv_bias_opr
->
param
()
=
config
.
second
;
return
conv_bias_opr
->
get_workspace_in_bytes
(
*
args
.
layout_src
,
*
args
.
layout_filter
,
config
.
first
[
0
],
config
.
first
[
1
],
*
args
.
layout_dst
,
nullptr
);
}
void
ConvolutionForwardImpl
::
AlgoDefault
::
exec
(
const
ExecArgs
&
args
)
const
{
auto
conv_bias_opr
=
args
.
opr
->
handle
()
->
create_operator
<
ConvBiasForward
>
();
if
(
args
.
opr
->
execution_policy
().
algo
.
valid
())
{
megdnn_assert
(
args
.
opr
->
execution_policy
().
sub_policy
.
size
()
==
1
);
conv_bias_opr
->
execution_policy
()
=
args
.
opr
->
execution_policy
().
sub_policy
[
0
];
}
auto
&&
config
=
sub_opr_config
(
*
args
.
layout_src
,
*
args
.
layout_filter
,
*
args
.
layout_dst
,
args
.
opr
);
conv_bias_opr
->
param
()
=
config
.
second
;
conv_bias_opr
->
exec
(
args
.
tensor_src
,
args
.
tensor_filter
,
{
nullptr
,
config
.
first
[
0
]},
{
nullptr
,
config
.
first
[
1
]},
args
.
tensor_dst
,
nullptr
,
args
.
workspace
);
}
// vim: syntax=cpp.doxygen
dnn/src/cuda/convolution/forward/algos.h
0 → 100644
浏览文件 @
0d720653
/**
* \file dnn/src/cuda/convolution/forward/algos.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#pragma once
#include "megdnn/oprs.h"
#include "src/common/algo_base.h"
#include "src/common/metahelper.h"
#include "src/common/utils.h"
#include "src/cuda/convolution/opr_impl.h"
#include <unordered_map>
namespace
megdnn
{
namespace
cuda
{
/*!
* \brief base class for convolutionForward algos
*
*/
class
ConvolutionForwardImpl
::
AlgoBase
:
public
Algorithm
{
protected:
~
AlgoBase
()
=
default
;
public:
enum
class
AlgoType
:
uint32_t
{
CUDA_DEFAULT
,
};
using
Mapper
=
std
::
unordered_map
<
AlgorithmDesc
,
AlgoBase
*>
;
AlgoBase
()
:
Algorithm
()
{
m_handle_type
=
Handle
::
HandleType
::
CUDA
;
}
struct
SizeArgs
{
ConvolutionForwardImpl
*
opr
;
const
TensorLayout
*
layout_src
,
*
layout_filter
,
*
layout_dst
;
std
::
string
to_string
()
const
;
SizeArgs
(
ConvolutionForwardImpl
*
opr
,
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
);
};
struct
ExecArgs
:
public
SizeArgs
{
TensorND
tensor_src
,
tensor_filter
,
tensor_dst
;
Workspace
workspace
;
ExecArgs
(
ConvolutionForwardImpl
*
opr
,
_megdnn_tensor_in
src
,
_megdnn_tensor_in
filter
,
_megdnn_tensor_out
dst
,
_megdnn_workspace
workspace
);
};
virtual
bool
is_available
(
const
SizeArgs
&
args
)
const
=
0
;
virtual
size_t
get_workspace_in_bytes
(
const
SizeArgs
&
args
)
const
=
0
;
virtual
void
exec
(
const
ExecArgs
&
)
const
=
0
;
bool
is_available_wk
(
const
SizeArgs
&
args
,
size_t
limit
)
const
{
return
is_available
(
args
)
&&
get_workspace_in_bytes
(
args
)
<=
limit
;
}
bool
is_available_reproducible
(
const
SizeArgs
&
args
,
bool
reproducible
=
true
,
size_t
limit
=
std
::
numeric_limits
<
size_t
>::
max
())
const
{
return
(
!
reproducible
||
is_reproducible
())
&&
is_available_wk
(
args
,
limit
);
}
AlgoBase
&
check_workspace
(
const
SizeArgs
&
args
,
const
Workspace
&
workspace
)
{
auto
req
=
get_workspace_in_bytes
(
args
);
megdnn_assert
(
req
<=
workspace
.
size
,
"convolution fwd algo %s: required workspace %zu bytes, "
"got %zu"
,
name
(),
req
,
workspace
.
size
);
return
*
this
;
}
};
class
ConvolutionForwardImpl
::
AlgoDefault
final
:
public
AlgoBase
{
public:
AlgoDefault
()
=
default
;
bool
is_available
(
const
SizeArgs
&
)
const
override
;
size_t
get_workspace_in_bytes
(
const
SizeArgs
&
/* args */
)
const
override
;
const
char
*
name
()
const
override
{
return
"DEFAULT"
;
}
void
exec
(
const
ExecArgs
&
)
const
override
;
bool
is_reproducible
()
const
override
{
return
true
;
}
std
::
vector
<
SearchItem
>
get_subopr_list
(
const
TensorLayoutArray
&
layouts
,
const
OperatorBase
*
opr
)
const
override
;
MEGDNN_DECL_ALGO_TYPE
(
CUDA_DEFAULT
)
};
class
ConvolutionForwardImpl
::
AlgoPack
:
NonCopyableObj
{
private:
AlgoBase
::
Mapper
m_all_algos_map
;
public:
AlgoPack
();
AlgoDefault
algo_default
;
std
::
vector
<
AlgoBase
*>
all_algos
;
const
AlgoBase
::
Mapper
&
all_algos_map
()
const
{
return
m_all_algos_map
;
}
};
}
// namespace cuda
}
// namespace megdnn
// vim: syntax=cpp.doxygen
dnn/src/cuda/convolution/opr_impl.cpp
浏览文件 @
0d720653
...
...
@@ -12,6 +12,7 @@
#include "src/cuda/convolution/opr_impl.h"
#include "megdnn/dtype.h"
#include "src/cuda/convolution/helper.h"
#include "src/cuda/convolution/forward/algos.h"
#include "src/cuda/convolution/backward_data/algo.h"
#include "src/cuda/convolution/backward_filter/algo.h"
#include "src/cuda/conv_bias/opr_impl.h"
...
...
@@ -28,108 +29,34 @@ using namespace convolution;
TO_STRING(CUDNN_MINOR) "." TO_STRING(CUDNN_PATCHLEVEL)
/* ============== ConvolutionForwardImpl ============== */
ConvolutionForwardImpl
::
ConvBiasExtraData
ConvolutionForwardImpl
::
conv_bias_extra_data
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
)
{
auto
conv_param
=
param
();
DType
bias_type
;
if
(
src
.
dtype
.
enumv
()
==
DTypeEnum
::
QuantizedS8
)
{
bias_type
=
dtype
::
QuantizedS32
(
src
.
dtype
.
param
<
dtype
::
QuantizedS8
>
().
scale
*
filter
.
dtype
.
param
<
dtype
::
QuantizedS8
>
().
scale
);
}
else
if
(
src
.
dtype
.
enumv
()
==
DTypeEnum
::
Quantized8Asymm
)
{
bias_type
=
dtype
::
QuantizedS32
(
src
.
dtype
.
param
<
dtype
::
Quantized8Asymm
>
().
scale
*
filter
.
dtype
.
param
<
dtype
::
Quantized8Asymm
>
().
scale
);
}
else
if
(
src
.
dtype
.
enumv
()
==
DTypeEnum
::
Uint8
||
src
.
dtype
.
enumv
()
==
DTypeEnum
::
Int8
)
{
bias_type
=
dtype
::
Int32
{};
}
else
if
(
src
.
dtype
.
enumv
()
==
DTypeEnum
::
Quantized4Asymm
)
{
bias_type
=
dtype
::
QuantizedS32
(
src
.
dtype
.
param
<
dtype
::
Quantized4Asymm
>
().
scale
*
filter
.
dtype
.
param
<
dtype
::
Quantized4Asymm
>
().
scale
);
}
else
{
megdnn_assert
(
src
.
dtype
.
category
()
==
DTypeCategory
::
FLOAT
);
bias_type
=
src
.
dtype
;
}
ConvBiasExtraData
ret
=
{
this
->
handle
()
->
create_operator
<
ConvBiasForward
>
(),
TensorLayout
(
bias_type
),
TensorLayout
(
dst
.
dtype
)};
ret
.
convbias_opr
->
param
()
=
{
param
::
ConvBias
::
NonlineMode
::
IDENTITY
,
conv_param
.
mode
,
conv_param
.
sparse
,
conv_param
.
format
,
conv_param
.
pad_h
,
conv_param
.
pad_w
,
conv_param
.
stride_h
,
conv_param
.
stride_w
,
conv_param
.
dilate_h
,
conv_param
.
dilate_w
,
conv_param
.
compute_mode
};
ret
.
convbias_opr
->
execution_policy
()
=
{
this
->
execution_policy
().
algo
,
{}};
return
ret
;
}
ConvolutionForwardImpl
::
Algorithm
*
ConvolutionForwardImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
auto
extra_data
=
conv_bias_extra_data
(
src
,
filter
,
dst
);
return
static_cast
<
ConvBiasForwardImpl
*>
(
extra_data
.
convbias_opr
.
get
())
->
get_algorithm_heuristic
(
src
,
filter
,
extra_data
.
bias_layout
,
extra_data
.
z_layout
,
dst
,
workspace_limit_in_bytes
,
reproducible
);
}
ConvolutionForwardImpl
::
Algorithm
*
ConvolutionForwardImpl
::
get_algorithm_from_desc
(
const
ConvolutionForward
::
AlgorithmDesc
&
desc
)
{
auto
conv_param
=
param
();
auto
convbias_opr
=
this
->
handle
()
->
create_operator
<
ConvBiasForward
>
();
convbias_opr
->
param
()
=
{
param
::
ConvBias
::
NonlineMode
::
IDENTITY
,
conv_param
.
mode
,
conv_param
.
sparse
,
conv_param
.
format
,
conv_param
.
pad_h
,
conv_param
.
pad_w
,
conv_param
.
stride_h
,
conv_param
.
stride_w
,
conv_param
.
dilate_h
,
conv_param
.
dilate_w
,
conv_param
.
compute_mode
};
convbias_opr
->
execution_policy
()
=
{
this
->
execution_policy
().
algo
,
{}};
return
static_cast
<
ConvBiasForwardImpl
*>
(
convbias_opr
.
get
())
->
get_algorithm_from_desc
(
desc
);
AlgoBase
::
SizeArgs
args
{
this
,
src
,
filter
,
dst
};
MEGDNN_MARK_USED_VAR
(
workspace_limit_in_bytes
);
MEGDNN_MARK_USED_VAR
(
reproducible
);
return
&
sm_algo_pack
.
algo_default
;
}
std
::
vector
<
ConvolutionForwardImpl
::
Algorithm
*>
ConvolutionForwardImpl
::
get_all_algorithms
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
)
{
auto
extra_data
=
conv_bias_extra_data
(
src
,
filter
,
dst
);
return
static_cast
<
ConvBiasForwardImpl
*>
(
extra_data
.
convbias_opr
.
get
())
->
get_all_algorithms
(
src
,
filter
,
extra_data
.
bias_layout
,
extra_data
.
z_layout
,
dst
);
AlgoBase
::
SizeArgs
args
{
this
,
src
,
filter
,
dst
};
return
megdnn
::
get_all_algorithms
<
ConvolutionForwardImpl
>
(
args
);
}
size_t
ConvolutionForwardImpl
::
get_workspace_in_bytes
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
,
const
PreprocessedFilter
*
preprocessed_filter
)
{
auto
extra_data
=
conv_bias_extra_data
(
src
,
filter
,
dst
);
return
static_cast
<
ConvBiasForwardImpl
*>
(
extra_data
.
convbias_opr
.
get
())
->
get_workspace_in_bytes
(
src
,
filter
,
extra_data
.
bias_layout
,
extra_data
.
z_layout
,
dst
,
reinterpret_cast
<
const
ConvolutionBase
<
param
::
ConvBias
>::
PreprocessedFilter
*>
(
preprocessed_filter
));
MEGDNN_MARK_USED_VAR
(
preprocessed_filter
);
AlgoBase
::
SizeArgs
args
{
this
,
src
,
filter
,
dst
};
return
megdnn
::
get_algorithm
(
this
,
src
,
filter
,
dst
)
->
get_workspace_in_bytes
(
args
);
}
void
ConvolutionForwardImpl
::
exec
(
_megdnn_tensor_in
src
,
...
...
@@ -137,20 +64,15 @@ void ConvolutionForwardImpl::exec(_megdnn_tensor_in src,
_megdnn_tensor_out
dst
,
const
PreprocessedFilter
*
preprocessed_filter
,
_megdnn_workspace
workspace
)
{
auto
extra_data
=
conv_bias_extra_data
(
src
.
layout
,
filter
.
layout
,
dst
.
layout
);
TensorND
bias
(
nullptr
,
extra_data
.
bias_layout
);
TensorND
z
(
nullptr
,
extra_data
.
z_layout
);
return
static_cast
<
ConvBiasForwardImpl
*>
(
extra_data
.
convbias_opr
.
get
())
->
exec
(
src
,
filter
,
bias
,
z
,
dst
,
reinterpret_cast
<
const
ConvolutionBase
<
param
::
ConvBias
>::
PreprocessedFilter
*>
(
preprocessed_filter
),
workspace
);
check_exec
(
src
.
layout
,
filter
.
layout
,
dst
.
layout
,
workspace
.
size
,
preprocessed_filter
);
AlgoBase
::
ExecArgs
args
(
this
,
src
,
filter
,
dst
,
workspace
);
auto
&&
algo
=
get_algorithm
(
this
,
src
.
layout
,
filter
.
layout
,
dst
.
layout
);
algo
->
check_workspace
(
args
,
workspace
).
exec
(
args
);
}
const
char
*
ConvolutionForwardImpl
::
get_algorithm_set_name
()
const
{
return
"CUDA
CONV0+CUDNN"
CUDNN_VERSION_STR
;
return
"CUDA
CONVOLUTION_FORWARD"
;
}
/* ============== ConvolutionBackwardDataImpl ============== */
...
...
dnn/src/cuda/convolution/opr_impl.h
浏览文件 @
0d720653
...
...
@@ -6,7 +6,8 @@
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#pragma once
...
...
@@ -16,58 +17,56 @@
namespace
megdnn
{
namespace
cuda
{
class
ConvolutionForwardImpl
:
public
ConvolutionForward
{
public:
using
ConvolutionForward
::
ConvolutionForward
;
void
exec
(
_megdnn_tensor_in
src
,
_megdnn_tensor_in
filter
,
_megdnn_tensor_out
dst
,
const
PreprocessedFilter
*
preprocessed_filter
,
_megdnn_workspace
workspace
)
override
;
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
,
const
PreprocessedFilter
*
preprocessed_filter
)
override
;
const
char
*
get_algorithm_set_name
()
const
override
;
SmallVector
<
TensorLayout
>
deduce_preprocessed_filter_layout
(
const
TensorLayout
&
,
const
TensorLayout
&
,
const
TensorLayout
&
)
override
{
return
{};
}
size_t
get_preprocess_workspace_in_bytes
(
const
TensorLayout
&
,
const
TensorLayout
&
,
const
TensorLayout
&
)
override
{
return
0
;
}
void
exec_preprocess
(
const
TensorLayout
&
,
_megdnn_tensor_in
,
const
TensorLayout
&
,
PreprocessedFilter
*
,
_megdnn_workspace
)
override
{
megdnn_throw
(
"cuda exec_preprocess has not implemeted yet"
);
}
Algorithm
*
get_algorithm_from_desc
(
const
AlgorithmDesc
&
desc
)
override
;
protected:
struct
ConvBiasExtraData
{
std
::
unique_ptr
<
ConvBiasForward
>
convbias_opr
;
TensorLayout
bias_layout
;
TensorLayout
z_layout
;
};
std
::
vector
<
Algorithm
*>
get_all_algorithms
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
)
override
;
Algorithm
*
get_algorithm_heuristic
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
override
;
private:
ConvBiasExtraData
conv_bias_extra_data
(
const
TensorLayout
&
,
const
TensorLayout
&
,
const
TensorLayout
&
);
class
ConvolutionForwardImpl
:
public
ConvolutionForward
{
public:
using
ConvolutionForward
::
ConvolutionForward
;
void
exec
(
_megdnn_tensor_in
src
,
_megdnn_tensor_in
filter
,
_megdnn_tensor_out
dst
,
const
PreprocessedFilter
*
preprocessed_filter
,
_megdnn_workspace
workspace
)
override
;
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
,
const
PreprocessedFilter
*
preprocessed_filter
)
override
;
const
char
*
get_algorithm_set_name
()
const
override
;
SmallVector
<
TensorLayout
>
deduce_preprocessed_filter_layout
(
const
TensorLayout
&
,
const
TensorLayout
&
,
const
TensorLayout
&
)
override
{
return
{};
}
size_t
get_preprocess_workspace_in_bytes
(
const
TensorLayout
&
,
const
TensorLayout
&
,
const
TensorLayout
&
)
override
{
return
0
;
}
void
exec_preprocess
(
const
TensorLayout
&
,
_megdnn_tensor_in
,
const
TensorLayout
&
,
PreprocessedFilter
*
,
_megdnn_workspace
)
override
{
megdnn_throw
(
"cuda exec_preprocess has not implemeted yet"
);
}
Algorithm
*
get_algorithm_from_desc
(
const
AlgorithmDesc
&
desc
)
override
;
class
AlgoBase
;
class
AlgoDefault
;
class
AlgoPack
;
static
const
AlgoPack
&
algo_pack
()
{
return
sm_algo_pack
;
}
protected:
std
::
vector
<
Algorithm
*>
get_all_algorithms
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
)
override
;
Algorithm
*
get_algorithm_heuristic
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
override
;
private:
static
AlgoPack
sm_algo_pack
;
};
class
ConvolutionBackwardDataImpl
:
public
ConvolutionBackwardData
{
...
...
@@ -122,6 +121,7 @@ protected:
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
override
;
private:
Algorithm
*
get_algorithm_heuristic
(
const
TensorLayout
&
filter
,
const
CanonizedFilterMeta
&
filter_meta
,
...
...
@@ -141,12 +141,10 @@ public:
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
)
override
;
AlgorithmInfo
get_algorithm_info_heuristic
(
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
const
CanonizedFilterMeta
&
grad_meta
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
AlgorithmInfo
get_algorithm_info_heuristic
(
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
const
CanonizedFilterMeta
&
grad_meta
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
return
get_algorithm_heuristic
(
src
,
diff
,
grad
,
grad_meta
,
workspace_limit_in_bytes
,
reproducible
)
->
info
();
...
...
@@ -162,7 +160,6 @@ public:
->
info
();
}
const
char
*
get_algorithm_set_name
()
const
override
;
class
AlgoBase
;
...
...
@@ -187,6 +184,7 @@ protected:
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
override
;
private:
Algorithm
*
get_algorithm_heuristic
(
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
...
...
dnn/test/common/checker.h
浏览文件 @
0d720653
...
...
@@ -532,6 +532,30 @@ private:
bool
*
m_require_algo
;
};
template
<
typename
Opr
>
void
construct_sub_execution_policy_heuristic
(
ExecutionPolicy
&
policy
,
const
TensorLayoutArray
&
layouts
,
const
std
::
string
&
param
,
Handle
*
handle
)
{
megdnn_assert
(
layouts
.
size
()
==
OprTrait
<
Opr
>::
arity
);
auto
opr
=
handle
->
create_operator
<
Opr
>
();
opr
->
param
()
=
Algorithm
::
deserialize_read_pod
<
typename
Opr
::
Param
>
(
param
);
if
(
!
policy
.
algo
.
valid
())
{
policy
.
algo
=
AlgoProxy
<
Opr
,
OprTrait
<
Opr
>::
arity
>::
get_algorithm_info_heuristic
(
opr
.
get
(),
layouts
).
desc
;
}
Algorithm
*
algo
=
opr
->
get_algorithm_from_desc
(
policy
.
algo
);
std
::
vector
<
Algorithm
::
SearchItem
>&&
sub_items
=
algo
->
get_subopr_list
(
layouts
,
opr
.
get
());
FOREACH_OPR_TYPE_DISPATCH
(
sub_items
,
{
policy
.
sub_policy
.
push_back
(
ExecutionPolicy
{});
construct_sub_execution_policy_heuristic
<
_Opr
>
(
policy
.
sub_policy
.
back
(),
_item
.
layouts
,
_item
.
param
,
handle
);
});
}
}
// namespace test
}
// namespace megdnn
...
...
dnn/test/common/convolution.cpp
浏览文件 @
0d720653
...
...
@@ -570,6 +570,8 @@ void convolution::test_conv_config_combinations(int k_size,
.
set_param
(
param
);
auto
opr
=
checker
.
opr
();
opr
->
param
()
=
param
;
std
::
string
param_str
;
Algorithm
::
serialize_write_pod
(
opr
->
param
(),
param_str
);
TensorLayout
ily
{
ishp
,
inp_type
},
fly
{
fshp
,
inp_type
},
oly
;
oly
.
dtype
=
out_type
;
opr
->
deduce_layout
(
ily
,
fly
,
oly
);
...
...
@@ -581,10 +583,14 @@ void convolution::test_conv_config_combinations(int k_size,
for
(
auto
algo
:
opr
->
get_all_algorithms_info
(
ily
,
fly
,
oly
))
{
used_algos
.
insert
(
algo
.
desc
);
opr
->
execution_policy
().
algo
=
algo
.
desc
;
construct_sub_execution_policy_heuristic
<
ConvolutionForward
>
(
opr
->
execution_policy
(),
{
ily
,
fly
,
oly
},
param_str
,
opr
->
handle
());
checker
.
set_epsilon
(
eps_getter
(
dtype
==
1
,
0
,
algo
.
name
.
c_str
()))
.
execs
({
ishp
,
fshp
,
{}});
opr
->
execution_policy
()
.
algo
.
reset
()
;
opr
->
execution_policy
()
=
{}
;
ASSERT_TRUE
(
checker
.
prev_succ
())
<<
errmsg
(
algo
.
name
.
c_str
());
}
...
...
@@ -597,13 +603,19 @@ void convolution::test_conv_config_combinations(int k_size,
auto
opr
=
checker_bwd_data
.
opr
();
opr
->
param
()
=
param
;
std
::
string
param_str
;
Algorithm
::
serialize_write_pod
(
opr
->
param
(),
param_str
);
for
(
auto
algo
:
opr
->
get_all_algorithms_info
(
fly
,
oly
,
ily
))
{
used_algos_bwd_data
.
insert
(
algo
.
desc
);
opr
->
execution_policy
().
algo
=
algo
.
desc
;
construct_sub_execution_policy_heuristic
<
ConvolutionBackwardData
>
(
opr
->
execution_policy
(),
{
fly
,
oly
,
ily
},
param_str
,
opr
->
handle
());
checker_bwd_data
.
set_epsilon
(
eps_getter
(
dtype
==
1
,
1
,
algo
.
name
.
c_str
()))
.
execl
({
fly
,
oly
,
ily
});
opr
->
execution_policy
()
.
algo
.
reset
()
;
opr
->
execution_policy
()
=
{}
;
ASSERT_TRUE
(
checker_bwd_data
.
prev_succ
())
<<
errmsg
(
algo
.
name
.
c_str
());
}
...
...
@@ -618,13 +630,19 @@ void convolution::test_conv_config_combinations(int k_size,
auto
opr
=
checker_bwd_filter
.
opr
();
opr
->
param
()
=
param
;
std
::
string
param_str
;
Algorithm
::
serialize_write_pod
(
opr
->
param
(),
param_str
);
for
(
auto
algo
:
opr
->
get_all_algorithms_info
(
ily
,
oly
,
fly
))
{
used_algos_bwd_flt
.
insert
(
algo
.
desc
);
opr
->
execution_policy
().
algo
=
algo
.
desc
;
construct_sub_execution_policy_heuristic
<
ConvolutionBackwardFilter
>
(
opr
->
execution_policy
(),
{
ily
,
oly
,
fly
},
param_str
,
opr
->
handle
());
checker_bwd_filter
.
set_epsilon
(
eps_getter
(
dtype
==
1
,
2
,
algo
.
name
.
c_str
()))
.
execl
({
ily
,
oly
,
fly
});
opr
->
execution_policy
()
.
algo
.
reset
()
;
opr
->
execution_policy
()
=
{}
;
ASSERT_TRUE
(
checker_bwd_filter
.
prev_succ
())
<<
errmsg
(
algo
.
name
.
c_str
());
}
...
...
dnn/test/common/opr_proxy.h
浏览文件 @
0d720653
...
...
@@ -338,6 +338,7 @@ struct OprProxyProfilingBase
FastRunCache
&
cache
)
{
megdnn_assert
(
layouts
.
size
()
==
arity
);
auto
opr
=
handle
->
create_operator
<
Opr
>
();
opr
->
param
()
=
Algorithm
::
deserialize_read_pod
<
typename
Opr
::
Param
>
(
param
);
SmallVector
<
size_t
>
sizes_in_bytes
;
...
...
@@ -427,9 +428,9 @@ struct OprProxyProfilingBase
auto
&&
search_items
=
flatten_search_space
(
layouts
,
param_str
,
opr
->
handle
());
FOREACH_OPR_TYPE_DISPATCH
(
search_items
,
{
OprProxyProfilingBase
<
_Opr
>::
search
(
_item
.
layouts
,
param_str
,
W
,
opr
->
handle
(),
warmup_times
,
exec_times
,
cache
);
OprProxyProfilingBase
<
_Opr
>::
search
(
_item
.
layouts
,
_item
.
param
,
W
,
opr
->
handle
()
,
warmup_times
,
exec_times
,
cache
);
});
construct_execution_policy
(
layouts
,
param_str
,
opr
->
handle
(),
cache
,
...
...
dnn/test/cuda/chanwise_convolution.cpp
浏览文件 @
0d720653
...
...
@@ -273,10 +273,14 @@ TEST_F(CUDA, CHANWISE_CONVOLUTION_FORWARD) {
Checker
<
Convolution
>
checker
(
handle_cuda
());
bool
require_algo
=
false
;
checker
.
set_before_exec_callback
(
AlgoChecker
<
ConvolutionForward
>
(
ConvBiasForward
::
algo_name
<
ConvBiasForward
::
DirectParam
>
(
"CHANNEL_WISE"
,
{})
.
c_str
(),
ExecutionPolicyAlgoName
{
"DEFAULT"
,
{{
ConvBiasForward
::
algo_name
<
ConvBiasForward
::
DirectParam
>
(
"CHANNEL_WISE"
,
{})
.
c_str
(),
{}}}},
&
require_algo
));
for
(
auto
dtype
:
std
::
vector
<
DType
>
{
dtype
::
Float32
(),
dtype
::
Float16
()})
{
checker
.
set_dtype
(
0
,
dtype
).
set_dtype
(
1
,
dtype
).
set_dtype
(
2
,
dtype
);
if
(
dtype
.
enumv
()
==
DTypeEnum
::
Float16
)
...
...
@@ -306,8 +310,12 @@ TEST_F(CUDA, CHANWISE_CONVOLUTION_FORWARD_SMALL) {
Checker
<
Convolution
>
checker
(
handle_cuda
());
bool
require_algo
=
false
;
checker
.
set_before_exec_callback
(
AlgoChecker
<
ConvolutionForward
>
(
ConvBiasForward
::
algo_name
<
ConvBiasForward
::
DirectParam
>
(
"CHANNEL_WISE_SMALL"
,
{}).
c_str
(),
ExecutionPolicyAlgoName
{
"DEFAULT"
,
{{
ConvBiasForward
::
algo_name
<
ConvBiasForward
::
DirectParam
>
(
"CHANNEL_WISE_SMALL"
,
{})
.
c_str
(),
{}}}},
&
require_algo
));
for
(
auto
dtype
:
std
::
vector
<
DType
>
{
dtype
::
Float32
(),
...
...
@@ -338,6 +346,7 @@ TEST_F(CUDA, CHANWISE_CONVOLUTION_BACKWARD_DATA) {
bool
require_algo
=
false
;
checker
.
set_before_exec_callback
(
AlgoChecker
<
ConvolutionBackwardData
>
(
"CHANNEL_WISE"
,
&
require_algo
));
for
(
auto
dtype
:
std
::
vector
<
DType
>
{
dtype
::
Float32
(),
dtype
::
Float16
()})
{
checker
.
set_dtype
(
0
,
dtype
).
set_dtype
(
1
,
dtype
).
set_dtype
(
2
,
dtype
);
if
(
dtype
.
enumv
()
==
DTypeEnum
::
Float16
)
...
...
@@ -368,9 +377,8 @@ TEST_F(CUDA, CHANWISE_CONVOLUTION_BACKWARD_DATA) {
TEST_F
(
CUDA
,
CHANWISE_CONVOLUTION_BACKWARD_DATA_SMALL
)
{
Checker
<
ConvolutionBackwardData
>
checker
(
handle_cuda
());
bool
require_algo
=
false
;
checker
.
set_before_exec_callback
(
AlgoChecker
<
ConvolutionBackwardData
>
(
"CHANNEL_WISE_SMALL"
,
&
require_algo
));
checker
.
set_before_exec_callback
(
AlgoChecker
<
ConvolutionBackwardData
>
(
"CHANNEL_WISE_SMALL"
,
&
require_algo
));
for
(
auto
dtype
:
std
::
vector
<
DType
>
{
dtype
::
Float32
(),
#if CUDA_VERSION >= 9000
...
...
@@ -396,10 +404,14 @@ TEST_F(CUDA, CHANWISE_CONVOLUTION_BACKWARD_FILTER) {
Checker
<
ConvolutionBackwardFilter
>
checker
(
handle_cuda
());
bool
require_algo
=
false
;
checker
.
set_before_exec_callback
(
AlgoChecker
<
ConvolutionBackwardFilter
>
(
"CHANNEL_WISE"
,
&
require_algo
));
"CHANNEL_WISE"
,
&
require_algo
));
UniformFloatRNG
rng
(
-
0.1
,
0.1
);
for
(
auto
dtype
:
std
::
vector
<
DType
>
{
dtype
::
Float32
(),
dtype
::
Float16
()})
{
checker
.
set_dtype
(
0
,
dtype
).
set_dtype
(
1
,
dtype
).
set_dtype
(
2
,
dtype
).
set_rng
(
0
,
&
rng
).
set_rng
(
1
,
&
rng
);
checker
.
set_dtype
(
0
,
dtype
)
.
set_dtype
(
1
,
dtype
)
.
set_dtype
(
2
,
dtype
)
.
set_rng
(
0
,
&
rng
)
.
set_rng
(
1
,
&
rng
);
if
(
dtype
.
enumv
()
==
DTypeEnum
::
Float16
)
checker
.
set_epsilon
(
2e-1
);
// simple case
...
...
@@ -514,7 +526,7 @@ TEST_F(CUDA, CHANWISE_CONVOLUTION_BENCH_ALL_ALGO_FWD) {
auto
run
=
[
&
](
size_t
N
,
size_t
C
,
size_t
IH
,
size_t
IW
,
size_t
FH
,
size_t
FW
)
{
checker
.
proxy
()
->
target_execution_policy
.
algo
.
reset
()
;
checker
.
proxy
()
->
target_execution_policy
=
{}
;
checker
.
execs
({{
N
,
C
,
IH
,
IW
},
{
C
,
1
,
1
,
FH
,
FW
},
{}});
};
...
...
@@ -614,7 +626,7 @@ TEST_F(CUDA, BENCHMARK_CHANWISE_CONV_ALL_ALGO_FORWARD) {
.
set_dtype
(
2
,
dtype
::
Float32
())
.
set_rng
(
0
,
&
rng
)
.
set_rng
(
1
,
&
rng
);
bencher
.
proxy
()
->
target_execution_policy
.
algo
.
reset
()
;
bencher
.
proxy
()
->
target_execution_policy
=
{}
;
auto
time_in_ms_fp32
=
bencher
.
execs
({
src
,
filter
,
{}})
/
RUNS
;
bencher
.
set_param
(
param
)
...
...
@@ -623,7 +635,7 @@ TEST_F(CUDA, BENCHMARK_CHANWISE_CONV_ALL_ALGO_FORWARD) {
.
set_dtype
(
2
,
dtype
::
Float16
())
.
set_rng
(
0
,
&
rng
)
.
set_rng
(
1
,
&
rng
);
bencher
.
proxy
()
->
target_execution_policy
.
algo
.
reset
()
;
bencher
.
proxy
()
->
target_execution_policy
=
{}
;
auto
time_in_ms_fp16
=
bencher
.
execs
({
src
,
filter
,
{}})
/
RUNS
;
bencher
.
proxy
()
->
target_execution_policy
.
algo
.
reset
();
...
...
@@ -677,10 +689,13 @@ TEST_F(CUDA, BENCHMARK_CHANWISE_CONV_FORWARD_FLOAT) {
CUBenchmarker
<
ConvolutionForward
>
bencher
(
handle_cuda
());
size_t
RUNS
=
1
;
bencher
.
set_display
(
false
).
set_times
(
RUNS
);
bencher
.
set_before_exec_callback
(
AlgoChecker
<
ConvolutionForward
>
(
ConvBiasForward
::
algo_name
<
ConvBiasForward
::
DirectParam
>
(
"CHANNEL_WISE"
,
{})
.
c_str
()));
bencher
.
set_before_exec_callback
(
AlgoChecker
<
ConvolutionForward
>
(
ExecutionPolicyAlgoName
{
"DEFAULT"
,
{{
ConvBiasForward
::
algo_name
<
ConvBiasForward
::
DirectParam
>
(
"CHANNEL_WISE"
,
{})
.
c_str
(),
{}}}}));
Convolution
::
Param
param
;
param
.
format
=
ConvBias
::
Param
::
Format
::
NCHW
;
...
...
@@ -783,17 +798,24 @@ TEST_F(CUDA, BENCHMARK_CHANWISE_CONV_FORWARD_FLOAT_SMALL) {
.
set_dtype
(
2
,
dtype
::
Float32
())
.
set_rng
(
0
,
&
rng
)
.
set_rng
(
1
,
&
rng
)
.
set_before_exec_callback
(
AlgoChecker
<
ConvolutionForward
>
(
ConvBiasForward
::
algo_name
<
ConvBiasForward
::
DirectParam
>
(
"CHANNEL_WISE"
,
{})
.
c_str
()));
.
set_before_exec_callback
(
AlgoChecker
<
ConvolutionForward
>
(
ExecutionPolicyAlgoName
{
"DEFAULT"
,
{{
ConvBiasForward
::
algo_name
<
ConvBiasForward
::
DirectParam
>
(
"CHANNEL_WISE"
,
{})
.
c_str
(),
{}}}}));
auto
time_in_ms_fp32_normal
=
bencher
.
execs
({
src
,
filter
,
{}})
/
RUNS
;
bencher
.
set_before_exec_callback
(
AlgoChecker
<
ConvolutionForward
>
(
ConvBiasForward
::
algo_name
<
ConvBiasForward
::
DirectParam
>
(
"CHANNEL_WISE"
,
{})
.
c_str
()));
ExecutionPolicyAlgoName
{
"DEFAULT"
,
{{
ConvBiasForward
::
algo_name
<
ConvBiasForward
::
DirectParam
>
(
"CHANNEL_WISE"
,
{})
.
c_str
(),
{}}}}));
auto
time_in_ms_fp32_small
=
bencher
.
execs
({
src
,
filter
,
{}})
/
RUNS
;
bencher
.
set_param
(
param
)
...
...
dnn/test/cuda/convolution.cpp
浏览文件 @
0d720653
...
...
@@ -135,10 +135,13 @@ TEST_F(CUDA, CONV_FORWARD_MATMUL_NCHW4) {
.
set_rng
(
1
,
&
int_rng
)
.
set_param
(
param
);
checker
.
set_before_exec_callback
(
AlgoChecker
<
Convolution
>
(
ConvBiasForward
::
algo_name
<
ConvBiasForward
::
MatmulParam
>
(
"MATMUL8X8X32"
,
{})
.
c_str
()));
checker
.
set_before_exec_callback
(
AlgoChecker
<
ConvolutionForward
>
(
ExecutionPolicyAlgoName
{
"DEFAULT"
,
{{
ConvBiasForward
::
algo_name
<
ConvBiasForward
::
MatmulParam
>
(
"MATMUL8X8X32"
,
{})
.
c_str
(),
{}}}}));
param
.
sparse
=
Convolution
::
Param
::
Sparse
::
DENSE
;
param
.
pad_h
=
param
.
pad_w
=
1
;
...
...
dnn/test/cuda/dilated_convolution.cpp
浏览文件 @
0d720653
...
...
@@ -30,19 +30,26 @@ TEST_F(CUDA, DILATED_CONVOLUTION_FORWARD)
auto
args
=
get_dilated_args
();
Checker
<
ConvolutionForward
>
checker
(
handle_cuda
());
#if CUDNN_VERSION >= 7500
checker
.
set_before_exec_callback
(
AlgoChecker
<
ConvolutionForward
>
(
ConvBiasForward
::
algo_name
<
ConvBiasForward
::
DefaultParam
>
(
"CUDNN:Convolution:CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_"
"PRECOMP_"
"GEMM"
CUDNN_VERSION_STRING
,
{})
.
c_str
()));
checker
.
set_before_exec_callback
(
AlgoChecker
<
ConvolutionForward
>
(
ExecutionPolicyAlgoName
{
"DEFAULT"
,
{{
ConvBiasForward
::
algo_name
<
ConvBiasForward
::
DefaultParam
>
(
"CUDNN:Convolution:CUDNN_CONVOLUTION_FWD_ALGO_"
"IMPLICIT_"
"PRECOMP_"
"GEMM"
CUDNN_VERSION_STRING
,
{})
.
c_str
(),
{}}}}));
printf
(
"cudnn version >= 7.5, use cudnn impl for dilated convolution
\n
"
);
#else
checker
.
set_before_exec_callback
(
AlgoChecker
<
ConvolutionForward
>
(
ConvBiasForward
::
algo_name
<
ConvBiasForward
::
MatmulParam
>
(
"MATMUL"
,
{})
.
c_str
()));
checker
.
set_before_exec_callback
(
AlgoChecker
<
ConvolutionForward
>
(
ExecutionPolicyAlgoName
{
"DEFAULT"
,
{{
ConvBiasForward
::
algo_name
<
ConvBiasForward
::
MatmulParam
>
(
"MATMUL"
,
{})
.
c_str
(),
{}}}}));
#endif
NormalRNG
default_rng
;
for
(
auto
&&
arg
:
args
)
{
...
...
dnn/test/cuda/group_conv.cpp
浏览文件 @
0d720653
...
...
@@ -116,12 +116,17 @@ TEST_F(CUDA, GROUP_CONV_FORWARD_1x1) {
std
::
string
conv1x1_name
=
ConvBiasForward
::
algo_name
<
ConvBiasForward
::
MatmulParam
>
(
"MATMUL1X1"
,
{});
checker
.
set_before_exec_callback
(
AlgoChecker
<
Convolution
>
(
ConvBiasForward
::
algo_name
<
ConvBiasForward
::
DirectParam
>
(
ssprintf
(
"%s:%s"
,
"CUDA:GROUP_CONV"
,
conv1x1_name
.
c_str
()),
{})
.
c_str
()));
checker
.
set_before_exec_callback
(
AlgoChecker
<
ConvolutionForward
>
(
ExecutionPolicyAlgoName
{
"DEFAULT"
,
{{
ConvBiasForward
::
algo_name
<
ConvBiasForward
::
DirectParam
>
(
ssprintf
(
"%s:%s"
,
"CUDA:GROUP_CONV"
,
conv1x1_name
.
c_str
())
.
c_str
(),
{})
.
c_str
(),
{}}}}));
#endif
Convolution
::
Param
param
;
param
.
sparse
=
Convolution
::
Param
::
Sparse
::
GROUP
;
...
...
src/opr/impl/search_policy/algo_chooser.cpp
浏览文件 @
0d720653
...
...
@@ -231,7 +231,7 @@ void AlgoChooser<Opr>::profile(ExeContext& ctx, bool require_reproducible) {
algo
.
name
.
c_str
(),
str_on_inp_shape
.
c_str
());
ImplExecutionPolicy
policy
;
policy
.
algo
=
algo
.
desc
;
ctx
.
construct_execution_policy
_from_cache
(
require_reproducible
,
policy
);
ctx
.
construct_execution_policy
(
require_reproducible
,
policy
);
if
(
ctx
.
get_workspace_size_bytes
(
policy
)
>=
workspace_limit
)
continue
;
...
...
@@ -302,7 +302,7 @@ AlgoChooser<Opr>::choose_by_profile(ExeContext& ctx, bool require_reproducible,
});
}
typename
AlgoChooser
<
Opr
>::
ImplExecutionPolicy
policy
;
ctx
.
construct_execution_policy
_from_cache
(
require_reproducible
,
policy
);
ctx
.
construct_execution_policy
(
require_reproducible
,
policy
);
return
policy
;
MIDOUT_E
}
...
...
@@ -324,6 +324,11 @@ size_t AlgoChooser<Opr>::setup_algo(const FixedTensorLayouts& layouts,
ImplExecutionPolicy
policy
;
if
(
auto
algo_choose_hook
=
mgb_opr
->
algo_chooser
())
{
policy
=
algo_choose_hook
(
mgb_opr
);
ctx
.
construct_execution_policy
(
mgb_opr
->
execution_policy
().
strategy
==
mixin
::
AlgoChooserHelper
::
ExecutionPolicy
::
Strategy
::
HEURISTIC_REPRODUCIBLE
,
policy
,
false
);
}
if
(
!
policy
.
algo
.
valid
())
{
policy
=
get_policy
(
ctx
);
...
...
@@ -520,13 +525,26 @@ AlgoChooser<Opr>::ExeContext::get_all_candidates() const {
}
template
<
typename
Opr
>
void
AlgoChooser
<
Opr
>::
ExeContext
::
construct_execution_policy
_from_cache
(
void
AlgoChooser
<
Opr
>::
ExeContext
::
construct_execution_policy
(
bool
require_reproducible
,
typename
AlgoChooser
<
Opr
>::
ImplExecutionPolicy
&
policy
)
const
{
typename
AlgoChooser
<
Opr
>::
ImplExecutionPolicy
&
policy
,
bool
retrive_from_cache
)
const
{
if
(
!
policy
.
algo
.
valid
())
{
policy
.
algo
=
get_profile_result_from_cache
(
require_reproducible
).
desc
;
if
(
retrive_from_cache
)
{
policy
.
algo
=
get_profile_result_from_cache
(
require_reproducible
).
desc
;
}
else
{
auto
workspace_limit
=
WorkspaceLimitGetter
::
get_workspace_limit
(
owner_graph
(),
m_cn
,
m_execution_policy
.
workspace_limit
);
policy
.
algo
=
APPLY
(
m_megdnn_opr
->
get_algorithm_info_heuristic
(
args
...,
workspace_limit
,
require_reproducible
),
m_layouts
)
.
desc
;
}
mgb_assert
(
policy
.
algo
.
valid
(),
"No cache found, maybe some error occured"
);
"No algo found from cache or heuristic, maybe some error "
"occured"
);
}
Algorithm
*
algo
=
m_megdnn_opr
->
get_algorithm_from_desc
(
policy
.
algo
);
...
...
@@ -544,8 +562,9 @@ void AlgoChooser<Opr>::ExeContext::construct_execution_policy_from_cache(
_item
.
param
,
m_base_mgb_opr
,
m_cn
,
m_execution_policy
,
m_allow_weight_preprocess
);
policy
.
sub_policy
.
push_back
({});
sub_ctx
.
construct_execution_policy_from_cache
(
require_reproducible
,
policy
.
sub_policy
.
back
());
sub_ctx
.
construct_execution_policy
(
require_reproducible
,
policy
.
sub_policy
.
back
(),
retrive_from_cache
);
});
return
;
...
...
@@ -672,11 +691,11 @@ AlgoChooser<Opr>::ExeContext::construct_fake_preprocess_filter() const {
AlgoChooser<megdnn::Opr>::ExeContext::get_workspace_size_bytes( \
const typename AlgoChooser<megdnn::Opr>::ImplExecutionPolicy& \
policy) const; \
template void
AlgoChooser<megdnn::Opr>::ExeContext::
\
construct_execution_policy_from_cache(
\
bool require_reproducible,
\
typename AlgoChooser<megdnn::Opr>::ImplExecutionPolicy&
\
policy) const;
\
template void
\
AlgoChooser<megdnn::Opr>::ExeContext::construct_execution_policy(
\
bool require_reproducible,
\
typename AlgoChooser<megdnn::Opr>::ImplExecutionPolicy& policy,
\
bool retrive_from_cache) const;
\
template Maybe<AlgoChooserProfileCache::ResultEntry> \
AlgoChooser<megdnn::Opr>::ExeContext::profile_single_algo( \
const typename AlgoChooser<megdnn::Opr>::ImplExecutionPolicy& \
...
...
src/opr/include/megbrain/opr/search_policy/algo_chooser.h
浏览文件 @
0d720653
...
...
@@ -129,13 +129,16 @@ public:
ImplAlgo
get_profile_result_from_cache
(
bool
require_reproducible
)
const
;
/**
* \brief construct execution policy from cache.
* \brief construct execution policy from cache
or heuristic
.
*
* \param require_reproducible select algo which is reproducible
* \param policy execution policy
* \param retrive_from_cache retrive algo from cache if set True, get
* from heuristic otherwise.
*/
void
construct_execution_policy_from_cache
(
bool
require_reproducible
,
ImplExecutionPolicy
&
policy
)
const
;
void
construct_execution_policy
(
bool
require_reproducible
,
ImplExecutionPolicy
&
policy
,
bool
retrive_from_cache
=
true
)
const
;
private:
Maybe
<
PreprocessFilter
<
Opr
>>
construct_fake_preprocess_filter
()
const
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录