Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
75eebb7c
MegEngine
项目概览
MegEngine 天元
/
MegEngine
9 个月 前同步成功
通知
392
Star
4702
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
75eebb7c
编写于
7月 20, 2020
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(opr): use weight preprocess feature of MegDNN
GitOrigin-RevId: 779041f8a87051e58d5e0ca289773b05a261a8a0
上级
66509a54
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
535 addition
and
78 deletion
+535
-78
dnn/include/megdnn/oprs/nn.h
dnn/include/megdnn/oprs/nn.h
+12
-7
src/opr/impl/dnn/convolution.cpp
src/opr/impl/dnn/convolution.cpp
+242
-67
src/opr/include/megbrain/opr/dnn/convolution.h
src/opr/include/megbrain/opr/dnn/convolution.h
+57
-3
src/opr/test/dnn/convolution.cpp
src/opr/test/dnn/convolution.cpp
+224
-1
未找到文件。
dnn/include/megdnn/oprs/nn.h
浏览文件 @
75eebb7c
...
@@ -51,6 +51,17 @@ protected:
...
@@ -51,6 +51,17 @@ protected:
};
};
using
SeparableConv
=
SeparableConvForward
;
using
SeparableConv
=
SeparableConvForward
;
namespace
detail
{
struct
PreprocessedFilter
{
//! user data; its lifetime should be bound to MegDNN Convolution
//! operator
void
*
algorithm_id
;
TensorNDArray
tensors
;
};
}
// namespace intl
/**
/**
* \brief base class for convolution operation
* \brief base class for convolution operation
*
*
...
@@ -131,13 +142,7 @@ public:
...
@@ -131,13 +142,7 @@ public:
return
flag
;
return
flag
;
}
}
};
};
using
PreprocessedFilter
=
detail
::
PreprocessedFilter
;
struct
PreprocessedFilter
{
//! user data; its lifetime should be bound to MegDNN Convolution
//! operator
void
*
algorithm_id
;
TensorNDArray
tensors
;
};
protected:
protected:
// Check or deduce output DType
// Check or deduce output DType
...
...
src/opr/impl/dnn/convolution.cpp
浏览文件 @
75eebb7c
...
@@ -10,6 +10,7 @@
...
@@ -10,6 +10,7 @@
*/
*/
#include "megbrain/opr/dnn/convolution.h"
#include "megbrain/opr/dnn/convolution.h"
#include "megbrain/opr/io.h"
#include "megbrain/graph/grad_impl.h"
#include "megbrain/graph/grad_impl.h"
#include "megbrain/system.h"
#include "megbrain/system.h"
...
@@ -95,67 +96,14 @@ MGB_FOREACH_FASTRUN_OPR(cb)
...
@@ -95,67 +96,14 @@ MGB_FOREACH_FASTRUN_OPR(cb)
#undef cb
#undef cb
template
<
class
MGBOpr
>
struct
OprAttributeTrait
{
static
bool
is_weights_persistent
(
const
MGBOpr
*
)
{
return
false
;
}
};
template
<
>
struct
OprAttributeTrait
<
opr
::
ConvBias
>
{
//! return true if the flag of weights is PERSISTENT_DEVICE_VALUE, false
//! otherwise. True means weights can be tranformed in the first run.
static
bool
is_weights_persistent
(
const
opr
::
ConvBias
*
opr
)
{
return
opr
->
input
()[
1
]
->
contain_flag
(
VarNode
::
Flag
::
PERSISTENT_DEVICE_VALUE
);
}
};
template
<
typename
Opr
>
constexpr
bool
opr_supports_preprocess
()
{
return
std
::
is_same
<
Opr
,
megdnn
::
ConvolutionForward
>::
value
||
std
::
is_same
<
Opr
,
megdnn
::
ConvBias
>::
value
;
}
template
<
typename
Opr
>
template
<
typename
Opr
>
struct
OprArityTrait
;
struct
OprArityTrait
;
#define APPLY(statement, ...) \
mgb::apply([&](const auto&... args) { return statement; }, \
std::tuple_cat(__VA_ARGS__))
template
<
typename
Opr
,
int
_arity_in
,
int
_arity_out
>
template
<
typename
Opr
,
int
_arity_in
,
int
_arity_out
>
struct
OprArityTraitTmpl
{
struct
OprArityTraitTmpl
{
static
constexpr
int
arity_in
=
_arity_in
;
static
constexpr
int
arity_in
=
_arity_in
;
static
constexpr
int
arity_out
=
_arity_out
;
static
constexpr
int
arity_out
=
_arity_out
;
static
constexpr
int
arity
=
arity_in
+
arity_out
;
static
constexpr
int
arity
=
arity_in
+
arity_out
;
using
Algorithm
=
typename
Opr
::
Algorithm
;
using
TensorLayoutArray
=
std
::
array
<
TensorLayout
,
arity
>
;
static
size_t
get_workspace_in_bytes
(
Opr
*
opr
,
Algorithm
*
algo
,
const
TensorLayoutArray
&
layouts
)
{
opr
->
execution_policy
()
=
{
algo
};
size_t
workspace_size
;
if_constexpr
<
opr_supports_preprocess
<
Opr
>
()
>
([
&
](
auto
)
{
workspace_size
=
APPLY
(
opr
->
get_workspace_in_bytes
(
args
...,
nullptr
),
layouts
);
},
/* else */
[
&
](
auto
)
{
workspace_size
=
APPLY
(
opr
->
get_workspace_in_bytes
(
args
...),
layouts
);
});
return
workspace_size
;
}
static
void
exec
(
Opr
*
opr
,
const
std
::
array
<
DeviceTensorND
,
arity_in
>&
inp_val
,
const
std
::
array
<
DeviceTensorND
,
arity_out
>&
out_val
,
megdnn
::
Workspace
&
workspace
)
{
if_constexpr
<
opr_supports_preprocess
<
Opr
>
()
>
([
&
](
auto
)
{
APPLY
(
opr
->
exec
(
args
.
as_megdnn
()...,
nullptr
,
workspace
),
inp_val
,
out_val
);
},
/* else */
[
&
](
auto
)
{
APPLY
(
opr
->
exec
(
args
.
as_megdnn
()...,
workspace
),
inp_val
,
out_val
);
});
}
};
};
#define INST_ARITY(_Opr, _in, _out) \
#define INST_ARITY(_Opr, _in, _out) \
...
@@ -179,6 +127,26 @@ INST_ARITY(megdnn::DeformableConvBackwardData, 5, 3);
...
@@ -179,6 +127,26 @@ INST_ARITY(megdnn::DeformableConvBackwardData, 5, 3);
#undef INST_ARITY
#undef INST_ARITY
template
<
typename
Opr
>
constexpr
bool
opr_supports_preprocess
()
{
return
std
::
is_same
<
Opr
,
megdnn
::
ConvolutionForward
>::
value
||
std
::
is_same
<
Opr
,
megdnn
::
ConvBias
>::
value
;
}
template
<
typename
Opr
,
bool
has_prep
>
struct
PreprocessFilterImpl
{
using
T
=
union
{};
};
template
<
typename
Opr
>
struct
PreprocessFilterImpl
<
Opr
,
true
>
{
using
T
=
typename
Opr
::
PreprocessedFilter
;
};
template
<
typename
Opr
>
using
PreprocessFilter
=
typename
PreprocessFilterImpl
<
Opr
,
opr_supports_preprocess
<
Opr
>
()
>::
T
;
// timeout delta to be added with fastest known algorithm for new algos
// timeout delta to be added with fastest known algorithm for new algos
constexpr
double
TIMEOUT_TOLERANCE
=
2
;
constexpr
double
TIMEOUT_TOLERANCE
=
2
;
...
@@ -225,6 +193,7 @@ public:
...
@@ -225,6 +193,7 @@ public:
CompNode
::
Locator
comp_node_loc
;
CompNode
::
Locator
comp_node_loc
;
ConvTensorShapes
shapes
;
ConvTensorShapes
shapes
;
typename
Opr
::
Param
opr_param
;
typename
Opr
::
Param
opr_param
;
bool
allow_weight_preprocess
;
//! filled by profile()
//! filled by profile()
mutable
double
actual_timeout
;
mutable
double
actual_timeout
;
...
@@ -277,6 +246,10 @@ double TimedProfiler<Opr>::init_timeout_setting() {
...
@@ -277,6 +246,10 @@ double TimedProfiler<Opr>::init_timeout_setting() {
return
0
;
return
0
;
}
}
#define APPLY(statement, ...) \
mgb::apply([&](const auto&... args) { return statement; }, \
std::tuple_cat(__VA_ARGS__))
template
<
typename
Opr
>
template
<
typename
Opr
>
typename
TimedProfiler
<
Opr
>::
TResult
TimedProfiler
<
Opr
>::
prof_impl
(
typename
TimedProfiler
<
Opr
>::
TResult
TimedProfiler
<
Opr
>::
prof_impl
(
const
TParam
&
raw_param
)
{
const
TParam
&
raw_param
)
{
...
@@ -324,6 +297,16 @@ typename TimedProfiler<Opr>::TResult TimedProfiler<Opr>::prof_impl(
...
@@ -324,6 +297,16 @@ typename TimedProfiler<Opr>::TResult TimedProfiler<Opr>::prof_impl(
megdnn_opr
->
execution_policy
()
=
{
algo
};
megdnn_opr
->
execution_policy
()
=
{
algo
};
}
}
// Allocate preprocessed weight buffers.
TensorLayoutArray
preprocessed_layout
;
if_constexpr
<
opr_supports_preprocess
<
Opr
>
()
>
([
&
](
auto
_
)
{
if
(
param
.
allow_weight_preprocess
)
{
preprocessed_layout
=
APPLY
(
_
(
megdnn_opr
)
->
deduce_preprocessed_filter_layout
(
args
...),
layouts
);
}
});
{
{
// first allocate a whole chunk to avoid memory fragmentation (here we
// first allocate a whole chunk to avoid memory fragmentation (here we
// rely on memory allocator to reuse memory)
// rely on memory allocator to reuse memory)
...
@@ -332,6 +315,9 @@ typename TimedProfiler<Opr>::TResult TimedProfiler<Opr>::prof_impl(
...
@@ -332,6 +315,9 @@ typename TimedProfiler<Opr>::TResult TimedProfiler<Opr>::prof_impl(
for
(
int
i
=
0
;
i
<
arity
;
++
i
)
{
for
(
int
i
=
0
;
i
<
arity
;
++
i
)
{
tot_size
+=
layouts
[
i
].
span
().
high_byte
+
align
;
tot_size
+=
layouts
[
i
].
span
().
high_byte
+
align
;
}
}
for
(
const
auto
&
layout
:
preprocessed_layout
)
{
tot_size
+=
layout
.
span
().
high_byte
+
align
;
}
tot_size
+=
param
.
workspace
;
tot_size
+=
param
.
workspace
;
DeviceTensorStorage
storage
{
cn
};
DeviceTensorStorage
storage
{
cn
};
storage
.
ensure_size
(
tot_size
);
storage
.
ensure_size
(
tot_size
);
...
@@ -362,15 +348,46 @@ typename TimedProfiler<Opr>::TResult TimedProfiler<Opr>::prof_impl(
...
@@ -362,15 +348,46 @@ typename TimedProfiler<Opr>::TResult TimedProfiler<Opr>::prof_impl(
mdn_workspace
.
raw_ptr
=
workspace
.
raw_ptr
();
mdn_workspace
.
raw_ptr
=
workspace
.
raw_ptr
();
}
}
// allocate storage for preprocessed filter
SmallVector
<
DeviceTensorND
>
flt_val
(
preprocessed_layout
.
size
());
for
(
size_t
i
=
0
;
i
<
preprocessed_layout
.
size
();
i
++
)
{
flt_val
[
i
]
=
{
cn
,
preprocessed_layout
[
i
],
preprocessed_layout
[
i
].
dtype
,
preprocessed_layout
[
i
].
format
};
}
for
(
int
i
=
0
;
i
<
arity_in
;
++
i
)
{
for
(
int
i
=
0
;
i
<
arity_in
;
++
i
)
{
fill_zero_dev_tensor
(
inp_val
[
i
]);
fill_zero_dev_tensor
(
inp_val
[
i
]);
}
}
PreprocessFilter
<
Opr
>
prep_flt
;
if_constexpr
<
opr_supports_preprocess
<
Opr
>
()
>
([
&
](
auto
_
)
{
if
(
!
preprocessed_layout
.
empty
())
{
auto
&&
pf
=
_
(
prep_flt
);
pf
.
algorithm_id
=
nullptr
;
pf
.
tensors
.
resize
(
flt_val
.
size
());
for
(
size_t
i
=
0
;
i
<
flt_val
.
size
();
i
++
)
{
pf
.
tensors
[
i
]
=
flt_val
[
i
].
as_megdnn
();
}
APPLY
(
_
(
megdnn_opr
)
->
exec_preprocess
(
args
...,
&
pf
,
mdn_workspace
),
std
::
forward_as_tuple
(
layouts
[
0
],
inp_val
[
1
].
as_megdnn
()),
array_skip
<
2
>
(
layouts
));
}
});
RealTimer
timer
;
RealTimer
timer
;
auto
ev_start
=
cn
.
create_event
(
CompNode
::
Event
::
NEED_TIMER
),
auto
ev_start
=
cn
.
create_event
(
CompNode
::
Event
::
NEED_TIMER
),
ev_end
=
cn
.
create_event
(
CompNode
::
Event
::
NEED_TIMER
);
ev_end
=
cn
.
create_event
(
CompNode
::
Event
::
NEED_TIMER
);
ev_start
->
record
();
ev_start
->
record
();
OprArityTrait
<
Opr
>::
exec
(
megdnn_opr
.
get
(),
inp_val
,
out_val
,
mdn_workspace
);
if_constexpr
<
opr_supports_preprocess
<
Opr
>
()
>
([
&
](
auto
_
)
{
auto
&&
opr
=
_
(
megdnn_opr
);
PreprocessFilter
<
Opr
>*
pf
=
preprocessed_layout
.
empty
()
?
nullptr
:
&
prep_flt
;
APPLY
(
opr
->
exec
(
args
.
as_megdnn
()...,
pf
,
mdn_workspace
),
inp_val
,
out_val
);
},
/* else */
[
&
](
auto
_
)
{
APPLY
(
_
(
megdnn_opr
)
->
exec
(
args
.
as_megdnn
()...,
mdn_workspace
),
inp_val
,
out_val
);
});
ev_end
->
record
();
ev_end
->
record
();
double
next_report_time
=
0.5
;
double
next_report_time
=
0.5
;
...
@@ -425,13 +442,15 @@ class AlgoChooser {
...
@@ -425,13 +442,15 @@ class AlgoChooser {
const
ConvTensorLayouts
&
m_layouts
;
const
ConvTensorLayouts
&
m_layouts
;
Opr
*
m_megdnn_opr
;
Opr
*
m_megdnn_opr
;
const
MGBOpr
*
m_mgb_opr
;
const
MGBOpr
*
m_mgb_opr
;
bool
m_allow_weight_preprocess
;
public:
public:
ExeContext
(
const
ConvTensorLayouts
&
layouts
,
Opr
*
megdnn_opr
,
ExeContext
(
const
ConvTensorLayouts
&
layouts
,
Opr
*
megdnn_opr
,
const
MGBOpr
*
mgb_opr
)
const
MGBOpr
*
mgb_opr
,
bool
allow_weight_preprocess
)
:
m_layouts
{
layouts
},
:
m_layouts
{
layouts
},
m_megdnn_opr
{
megdnn_opr
},
m_megdnn_opr
{
megdnn_opr
},
m_mgb_opr
{
mgb_opr
}
{
m_mgb_opr
{
mgb_opr
},
m_allow_weight_preprocess
{
allow_weight_preprocess
}
{
mgb_assert
(
m_layouts
.
size
()
==
layouts
.
size
());
mgb_assert
(
m_layouts
.
size
()
==
layouts
.
size
());
static_assert
(
static_assert
(
std
::
tuple_size
<
ConvTensorLayouts
>::
value
==
3
||
std
::
tuple_size
<
ConvTensorLayouts
>::
value
==
3
||
...
@@ -499,8 +518,23 @@ class AlgoChooser {
...
@@ -499,8 +518,23 @@ class AlgoChooser {
//! get workspace size required for specific algo
//! get workspace size required for specific algo
size_t
get_workspace_size_bytes
(
ImplAlgo
algo
)
const
{
size_t
get_workspace_size_bytes
(
ImplAlgo
algo
)
const
{
return
OprArityTrait
<
Opr
>::
get_workspace_in_bytes
(
m_megdnn_opr
,
m_megdnn_opr
->
execution_policy
()
=
{
algo
};
algo
,
m_layouts
);
size_t
result
;
if_constexpr
<
opr_supports_preprocess
<
Opr
>
()
>
([
&
](
auto
_
)
{
auto
&&
opr
=
_
(
m_megdnn_opr
);
auto
prep
=
construct_fake_preprocess_filter
();
PreprocessFilter
<
Opr
>*
prep_ptr
=
prep
.
valid
()
?
&
prep
.
val
()
:
nullptr
;
result
=
std
::
max
(
APPLY
(
opr
->
get_preprocess_workspace_in_bytes
(
args
...),
m_layouts
),
APPLY
(
opr
->
get_workspace_in_bytes
(
args
...,
prep_ptr
),
m_layouts
));
},
/* else */
[
&
](
auto
_
)
{
result
=
APPLY
(
_
(
m_megdnn_opr
)
->
get_workspace_in_bytes
(
args
...),
m_layouts
);
});
return
result
;
}
}
/*!
/*!
...
@@ -525,6 +559,28 @@ class AlgoChooser {
...
@@ -525,6 +559,28 @@ class AlgoChooser {
*/
*/
void
modify_param_with_weights_preprocessed
(
void
modify_param_with_weights_preprocessed
(
typename
TimedProfiler
<
Opr
>::
Param
&
param
)
const
{}
typename
TimedProfiler
<
Opr
>::
Param
&
param
)
const
{}
Maybe
<
PreprocessFilter
<
Opr
>>
construct_fake_preprocess_filter
()
const
{
Maybe
<
PreprocessFilter
<
Opr
>>
result
=
None
;
if_constexpr
<
opr_supports_preprocess
<
Opr
>
()
>
([
&
](
auto
_
)
{
if
(
!
m_allow_weight_preprocess
)
return
;
auto
opr
=
_
(
m_megdnn_opr
);
auto
layout
=
APPLY
(
opr
->
deduce_preprocessed_filter_layout
(
args
...),
m_layouts
);
if
(
layout
.
empty
())
return
;
result
=
PreprocessFilter
<
Opr
>
{};
auto
&
res
=
result
.
val
();
res
.
algorithm_id
=
nullptr
;
res
.
tensors
.
resize
(
layout
.
size
());
for
(
size_t
i
=
0
;
i
<
layout
.
size
();
i
++
)
{
res
.
tensors
[
i
]
=
megdnn
::
TensorND
(
nullptr
,
layout
[
i
]);
}
});
return
result
;
}
};
};
//! entrance for getting algorithm according to execution strategy
//! entrance for getting algorithm according to execution strategy
...
@@ -571,12 +627,13 @@ public:
...
@@ -571,12 +627,13 @@ public:
* \brief setup algorithm and return workspace size
* \brief setup algorithm and return workspace size
*/
*/
static
size_t
setup_algo
(
const
ConvTensorLayouts
&
layouts
,
Opr
*
megdnn_opr
,
static
size_t
setup_algo
(
const
ConvTensorLayouts
&
layouts
,
Opr
*
megdnn_opr
,
const
MGBOpr
*
mgb_opr
)
{
const
MGBOpr
*
mgb_opr
,
bool
allow_weight_preprocess
=
false
)
{
if
(
WorkspaceLimitGetter
::
is_prealloc_run
(
mgb_opr
->
owner_graph
()))
{
if
(
WorkspaceLimitGetter
::
is_prealloc_run
(
mgb_opr
->
owner_graph
()))
{
return
0
;
return
0
;
}
}
ExeContext
ctx
(
layouts
,
megdnn_opr
,
mgb_opr
);
ExeContext
ctx
(
layouts
,
megdnn_opr
,
mgb_opr
,
allow_weight_preprocess
);
auto
algo
=
get_algo
(
ctx
);
auto
algo
=
get_algo
(
ctx
);
size_t
workspace
=
ctx
.
get_workspace_size_bytes
(
algo
);
size_t
workspace
=
ctx
.
get_workspace_size_bytes
(
algo
);
...
@@ -780,9 +837,6 @@ Maybe<AlgoChooserProfileCache::ResultEntry>
...
@@ -780,9 +837,6 @@ Maybe<AlgoChooserProfileCache::ResultEntry>
AlgoChooser
<
Opr
>::
ExeContext
::
profile_single_algo
(
ImplAlgo
algo
,
AlgoChooser
<
Opr
>::
ExeContext
::
profile_single_algo
(
ImplAlgo
algo
,
double
&
timeout
)
const
{
double
&
timeout
)
const
{
typename
TimedProfiler
<
Opr
>::
Param
param
;
typename
TimedProfiler
<
Opr
>::
Param
param
;
bool
is_weights_persistent
=
OprAttributeTrait
<
typename
MegDNNOpr2MGBOpr
<
Opr
>::
MGBOpr
>::
is_weights_persistent
(
m_mgb_opr
);
auto
name
=
algo
->
name
();
auto
name
=
algo
->
name
();
// force check copy size <= dest len-1 from gcc8 for safe
// force check copy size <= dest len-1 from gcc8 for safe
auto
len
=
sizeof
(
param
.
algo_name
);
auto
len
=
sizeof
(
param
.
algo_name
);
...
@@ -806,8 +860,9 @@ AlgoChooser<Opr>::ExeContext::profile_single_algo(ImplAlgo algo,
...
@@ -806,8 +860,9 @@ AlgoChooser<Opr>::ExeContext::profile_single_algo(ImplAlgo algo,
for
(
size_t
i
=
0
;
i
<
param
.
shapes
.
size
();
++
i
)
for
(
size_t
i
=
0
;
i
<
param
.
shapes
.
size
();
++
i
)
param
.
shapes
[
i
]
=
m_layouts
[
i
];
param
.
shapes
[
i
]
=
m_layouts
[
i
];
param
.
opr_param
=
m_megdnn_opr
->
param
();
param
.
opr_param
=
m_megdnn_opr
->
param
();
param
.
allow_weight_preprocess
=
m_allow_weight_preprocess
;
if
(
is_weights_persistent
)
{
if
(
m_allow_weight_preprocess
)
{
modify_param_with_weights_preprocessed
(
param
);
modify_param_with_weights_preprocessed
(
param
);
}
}
...
@@ -911,6 +966,78 @@ AlgoChooserProfileCache& mixin::Convolution::profile_cache() const {
...
@@ -911,6 +966,78 @@ AlgoChooserProfileCache& mixin::Convolution::profile_cache() const {
return
*
m_profile_cache
;
return
*
m_profile_cache
;
}
}
class
mixin
::
WeightPreprocessExecutor
::
PreprocessedFilterExecDep
final
:
public
cg
::
GraphExecutable
::
ExecDependency
{
std
::
unique_ptr
<
PreprocessedFilter
>
m_pf
;
SmallVector
<
DeviceTensorND
>
m_filter_storage
;
public:
explicit
PreprocessedFilterExecDep
(
std
::
unique_ptr
<
PreprocessedFilter
>
preprocessed_filter
,
SmallVector
<
DeviceTensorND
>
filter_storage
)
:
m_pf
(
std
::
move
(
preprocessed_filter
)),
m_filter_storage
(
std
::
move
(
filter_storage
))
{}
};
void
mixin
::
WeightPreprocessExecutor
::
mixin_update_preprocessed_filter
(
cg
::
OperatorNodeBase
&
opr
)
{
if
(
!
mixin_allow_weight_preprocess
(
opr
))
return
;
auto
new_layout
=
deduce_preprocessed_filter_layout
();
if
(
new_layout
.
empty
())
{
// Weight preprocess was needed before, but no longer needed.
if
(
m_preprocessed_filter
)
{
m_preprocessed_filter
.
reset
();
m_filter_storage
.
clear
();
}
return
;
}
bool
should_update
=
false
;
size_t
new_size
=
new_layout
.
size
();
if
(
!
m_preprocessed_filter
||
m_preprocessed_filter
->
tensors
.
size
()
!=
new_size
)
{
should_update
=
true
;
}
else
{
for
(
size_t
i
=
0
;
i
<
new_size
;
i
++
)
{
if
(
!
new_layout
[
i
].
eq_layout
(
m_preprocessed_filter
->
tensors
[
i
].
layout
))
{
should_update
=
true
;
break
;
}
}
}
if
(
!
should_update
)
return
;
if
(
!
m_preprocessed_filter
)
{
m_preprocessed_filter
.
reset
(
new
PreprocessedFilter
{});
}
m_preprocessed_filter
->
tensors
.
resize
(
new_size
);
m_filter_storage
.
resize
(
new_size
);
m_preprocessed_filter
->
algorithm_id
=
nullptr
;
for
(
size_t
i
=
0
;
i
<
new_size
;
i
++
)
{
m_filter_storage
[
i
]
=
{
opr
.
output
(
0
)
->
comp_node
(),
new_layout
[
i
],
new_layout
[
i
].
dtype
,
new_layout
[
i
].
format
};
m_preprocessed_filter
->
tensors
[
i
]
=
m_filter_storage
[
i
].
as_megdnn
();
}
scn_do_execute_preprocess
();
}
void
mixin
::
WeightPreprocessExecutor
::
record_preprocessed_weight
(
cg
::
GraphExecutable
::
ExecDependencyArray
&
deps
)
{
deps
.
emplace_back
(
new
PreprocessedFilterExecDep
{
std
::
move
(
m_preprocessed_filter
),
std
::
move
(
m_filter_storage
)});
}
bool
mixin
::
WeightPreprocessExecutor
::
mixin_allow_weight_preprocess
(
const
cg
::
OperatorNodeBase
&
opr
)
const
{
bool
param_merged
=
opr
.
input
(
1
)
->
owner_opr
()
->
same_type
<
opr
::
MultipleDeviceTensorHolder
>
();
return
opr
.
input
(
1
)
->
contain_flag
(
VarNode
::
Flag
::
PERSISTENT_DEVICE_VALUE
)
&&
(
cg
::
is_const_var_value
(
opr
.
input
(
1
))
||
param_merged
);
}
/* ==================== ConvolutionForward ==================== */
/* ==================== ConvolutionForward ==================== */
IMPL_CONV
(
ConvolutionForward
,
"conv_fwd"
);
IMPL_CONV
(
ConvolutionForward
,
"conv_fwd"
);
...
@@ -971,7 +1098,7 @@ size_t ConvolutionForward::get_workspace_size_bytes(
...
@@ -971,7 +1098,7 @@ size_t ConvolutionForward::get_workspace_size_bytes(
input
(
0
)
->
format
()},
input
(
0
)
->
format
()},
{
input_shapes
[
1
],
input
(
1
)
->
dtype
(),
input
(
1
)
->
format
()},
{
input_shapes
[
1
],
input
(
1
)
->
dtype
(),
input
(
1
)
->
format
()},
{
output_shapes
[
0
],
output
(
0
)
->
dtype
(),
output
(
0
)
->
format
()}},
{
output_shapes
[
0
],
output
(
0
)
->
dtype
(),
output
(
0
)
->
format
()}},
megdnn_opr
(),
this
);
megdnn_opr
(),
this
,
allow_weight_preprocess
()
);
}
}
void
ConvolutionForward
::
init_output_format
()
{
void
ConvolutionForward
::
init_output_format
()
{
...
@@ -980,9 +1107,14 @@ void ConvolutionForward::init_output_format() {
...
@@ -980,9 +1107,14 @@ void ConvolutionForward::init_output_format() {
}
}
void
ConvolutionForward
::
scn_do_execute
()
{
void
ConvolutionForward
::
scn_do_execute
()
{
if
(
input
(
1
)
->
contain_flag
(
VarNode
::
Flag
::
PERSISTENT_DEVICE_VALUE
)
&&
cg
::
is_const_var_value
(
input
(
1
)))
{
update_preprocessed_filter
();
}
megdnn_opr
()
->
exec
(
input
(
0
)
->
dev_tensor
().
as_megdnn
(),
megdnn_opr
()
->
exec
(
input
(
0
)
->
dev_tensor
().
as_megdnn
(),
input
(
1
)
->
dev_tensor
().
as_megdnn
(),
input
(
1
)
->
dev_tensor
().
as_megdnn
(),
output
(
0
)
->
dev_tensor
().
as_megdnn
(),
nullptr
,
output
(
0
)
->
dev_tensor
().
as_megdnn
(),
preprocessed_filter
(),
intl
::
get_megdnn_workspace_from_var
(
output
().
back
()));
intl
::
get_megdnn_workspace_from_var
(
output
().
back
()));
}
}
...
@@ -1012,6 +1144,20 @@ void ConvolutionForward::get_output_var_shape(
...
@@ -1012,6 +1144,20 @@ void ConvolutionForward::get_output_var_shape(
void
ConvolutionForward
::
record_execute_deps
(
void
ConvolutionForward
::
record_execute_deps
(
cg
::
GraphExecutable
::
ExecDependencyArray
&
deps
)
{
cg
::
GraphExecutable
::
ExecDependencyArray
&
deps
)
{
record_megdnn_opr
(
deps
);
record_megdnn_opr
(
deps
);
record_preprocessed_weight
(
deps
);
}
SmallVector
<
TensorLayout
>
ConvolutionForward
::
deduce_preprocessed_filter_layout
()
{
return
megdnn_opr
()
->
deduce_preprocessed_filter_layout
(
input
(
0
)
->
layout
(),
input
(
1
)
->
layout
(),
output
(
0
)
->
layout
());
}
void
ConvolutionForward
::
scn_do_execute_preprocess
()
{
megdnn_opr
()
->
exec_preprocess
(
input
(
0
)
->
layout
(),
input
(
1
)
->
dev_tensor
().
as_megdnn
(),
output
(
0
)
->
layout
(),
preprocessed_filter
(),
intl
::
get_megdnn_workspace_from_var
(
output
().
back
()));
}
}
/* ==================== ConvolutionBackwardData ==================== */
/* ==================== ConvolutionBackwardData ==================== */
...
@@ -1504,10 +1650,12 @@ size_t ConvBiasForward::get_workspace_size_bytes(
...
@@ -1504,10 +1650,12 @@ size_t ConvBiasForward::get_workspace_size_bytes(
i2
,
i2
,
i3
,
i3
,
{
output_shapes
[
0
],
output
(
0
)
->
dtype
(),
output
(
0
)
->
format
()}},
{
output_shapes
[
0
],
output
(
0
)
->
dtype
(),
output
(
0
)
->
format
()}},
mo
,
this
);
mo
,
this
,
allow_weight_preprocess
()
);
}
}
void
ConvBiasForward
::
scn_do_execute
()
{
void
ConvBiasForward
::
scn_do_execute
()
{
update_preprocessed_filter
();
auto
&&
inp
=
input
();
auto
&&
inp
=
input
();
auto
mo
=
megdnn_opr
();
auto
mo
=
megdnn_opr
();
if
(
inp
.
size
()
==
2
)
{
if
(
inp
.
size
()
==
2
)
{
...
@@ -1621,6 +1769,33 @@ megdnn::param::MatrixMul::Format ConvBiasForward::get_matmul_format(
...
@@ -1621,6 +1769,33 @@ megdnn::param::MatrixMul::Format ConvBiasForward::get_matmul_format(
}
}
}
}
SmallVector
<
TensorLayout
>
ConvBiasForward
::
deduce_preprocessed_filter_layout
()
{
TensorLayout
i2
,
i3
;
if
(
input
().
size
()
>
2
)
{
i2
=
input
(
2
)
->
layout
();
}
if
(
input
().
size
()
>
3
)
{
i3
=
input
(
3
)
->
layout
();
}
return
megdnn_opr
()
->
deduce_preprocessed_filter_layout
(
input
(
0
)
->
layout
(),
input
(
1
)
->
layout
(),
i2
,
i3
,
output
(
0
)
->
layout
());
}
void
ConvBiasForward
::
scn_do_execute_preprocess
()
{
TensorLayout
bias_layout
(
output
(
0
)
->
dtype
()),
z_layout
(
output
(
0
)
->
dtype
());
if
(
input
().
size
()
>
2
)
{
bias_layout
=
input
(
2
)
->
layout
();
}
if
(
input
().
size
()
>
3
)
{
z_layout
=
input
(
3
)
->
layout
();
}
megdnn_opr
()
->
exec_preprocess
(
input
(
0
)
->
layout
(),
input
(
1
)
->
dev_tensor
().
as_megdnn
(),
bias_layout
,
z_layout
,
output
(
0
)
->
layout
(),
preprocessed_filter
(),
intl
::
get_megdnn_workspace_from_var
(
output
().
back
()));
}
/* ===================== LocalShareForward ==================== */
/* ===================== LocalShareForward ==================== */
IMPL_CONV
(
LocalShareForward
,
"local_share"
);
IMPL_CONV
(
LocalShareForward
,
"local_share"
);
...
...
src/opr/include/megbrain/opr/dnn/convolution.h
浏览文件 @
75eebb7c
...
@@ -72,13 +72,52 @@ class Convolution {
...
@@ -72,13 +72,52 @@ class Convolution {
cg
::
OperatorNodeBase
*
self
);
cg
::
OperatorNodeBase
*
self
);
};
};
class
WeightPreprocessExecutor
:
public
cg
::
OperatorNodeMixinBase
{
class
PreprocessedFilterExecDep
;
using
PreprocessedFilter
=
megdnn
::
detail
::
PreprocessedFilter
;
std
::
unique_ptr
<
PreprocessedFilter
>
m_preprocessed_filter
;
SmallVector
<
DeviceTensorND
>
m_filter_storage
;
protected:
//! this should only be called in scn_do_execute or similar functions (i.e.
//! post dispatch-to-ExecEnv)
void
mixin_update_preprocessed_filter
(
OperatorNodeBase
&
opr
);
void
record_preprocessed_weight
(
cg
::
GraphExecutable
::
ExecDependencyArray
&
deps
);
PreprocessedFilter
*
preprocessed_filter
()
const
{
return
m_preprocessed_filter
.
get
();
}
bool
mixin_allow_weight_preprocess
(
const
OperatorNodeBase
&
opr
)
const
;
virtual
SmallVector
<
TensorLayout
>
deduce_preprocessed_filter_layout
()
=
0
;
virtual
void
scn_do_execute_preprocess
()
=
0
;
};
}
// namespace mixin
}
// namespace mixin
namespace
intl
{
namespace
intl
{
//! glue class to apply mixin::WeightPreprocessExecutor
template
<
class
Base
=
cg
::
OperatorNodeBase
,
class
MixinImpl
=
mixin
::
WeightPreprocessExecutor
>
class
OprWithWeightPreprocess
:
public
mixin
::
CheckBase
<
Base
>::
Base
,
public
MixinImpl
{
protected:
using
Base
::
Base
;
void
update_preprocessed_filter
()
{
this
->
mixin_update_preprocessed_filter
(
*
this
);
}
bool
allow_weight_preprocess
()
const
{
return
this
->
mixin_allow_weight_preprocess
(
*
this
);
}
};
using
ConvBiasBase
=
cg
::
SingleCNOperatorNode
<
using
ConvBiasBase
=
cg
::
SingleCNOperatorNode
<
cg
::
OutshapePureByInshapeOpr
<>
,
cg
::
OutshapePureByInshapeOpr
<>
,
mixin
::
MegDNNOprHolderImpl
<
megdnn
::
ConvBiasForward
>>
;
mixin
::
MegDNNOprHolderImpl
<
megdnn
::
ConvBiasForward
>>
;
using
ConvBiasForwardBase
=
WorkspaceSizeInfer
<
ConvBiasBase
>
;
using
ConvBiasForwardBase
=
OprWithWeightPreprocess
<
WorkspaceSizeInfer
<
ConvBiasBase
>>
;
using
DeformableConvBackwardDataT
=
cg
::
SingleCNOperatorNode
<
using
DeformableConvBackwardDataT
=
cg
::
SingleCNOperatorNode
<
cg
::
OutshapePureByInshapeOpr
<>
,
cg
::
OutshapePureByInshapeOpr
<>
,
...
@@ -90,12 +129,20 @@ namespace intl {
...
@@ -90,12 +129,20 @@ namespace intl {
mixin
::
MegDNNOprHolderImpl
<
megdnn
::
BatchConvBiasForward
>>
;
mixin
::
MegDNNOprHolderImpl
<
megdnn
::
BatchConvBiasForward
>>
;
using
BatchConvBiasForwardBase
=
WorkspaceSizeInfer
<
BatchConvBiasBase
>
;
using
BatchConvBiasForwardBase
=
WorkspaceSizeInfer
<
BatchConvBiasBase
>
;
using
ConvolutionForwardBase
=
WorkspaceSizeInfer
<
using
ConvolutionForwardBase
=
OprWithWeightPreprocess
<
typename
MegDNNOprWrapperFwdBase
<
megdnn
::
ConvolutionForward
>::
Base
>
;
WorkspaceSizeInfer
<
typename
MegDNNOprWrapperFwdBase
<
megdnn
::
ConvolutionForward
>::
Base
>>
;
}
// namespace intl
}
// namespace intl
namespace
testing
{
class
ConvolutionTestingPeer
;
}
// namespace testing
MGB_DEFINE_OPR_CLASS
(
ConvolutionForward
,
MGB_DEFINE_OPR_CLASS
(
ConvolutionForward
,
intl
::
ConvolutionForwardBase
,
public
mixin
::
Convolution
)
// {
intl
::
ConvolutionForwardBase
,
public
mixin
::
Convolution
)
// {
void
init_profile_cache
()
override
;
void
init_profile_cache
()
override
;
void
init_output_dtype
()
override
;
void
init_output_dtype
()
override
;
size_t
get_workspace_size_bytes
(
size_t
get_workspace_size_bytes
(
...
@@ -109,6 +156,10 @@ MGB_DEFINE_OPR_CLASS(ConvolutionForward,
...
@@ -109,6 +156,10 @@ MGB_DEFINE_OPR_CLASS(ConvolutionForward,
TensorShapeArray
&
out_shape
)
const
override
final
;
TensorShapeArray
&
out_shape
)
const
override
final
;
void
record_execute_deps
(
void
record_execute_deps
(
cg
::
GraphExecutable
::
ExecDependencyArray
&
deps
)
override
;
cg
::
GraphExecutable
::
ExecDependencyArray
&
deps
)
override
;
SmallVector
<
TensorLayout
>
deduce_preprocessed_filter_layout
()
override
;
void
scn_do_execute_preprocess
()
override
;
friend
testing
::
ConvolutionTestingPeer
;
public:
public:
ConvolutionForward
(
VarNode
*
src
,
VarNode
*
filter
,
ConvolutionForward
(
VarNode
*
src
,
VarNode
*
filter
,
...
@@ -142,7 +193,10 @@ MGB_DEFINE_OPR_CLASS(ConvBiasForward, intl::ConvBiasForwardBase,
...
@@ -142,7 +193,10 @@ MGB_DEFINE_OPR_CLASS(ConvBiasForward, intl::ConvBiasForwardBase,
void
record_execute_deps
(
void
record_execute_deps
(
cg
::
GraphExecutable
::
ExecDependencyArray
&
deps
)
override
{
cg
::
GraphExecutable
::
ExecDependencyArray
&
deps
)
override
{
this
->
record_megdnn_opr
(
deps
);
this
->
record_megdnn_opr
(
deps
);
this
->
record_preprocessed_weight
(
deps
);
}
}
SmallVector
<
TensorLayout
>
deduce_preprocessed_filter_layout
()
override
;
void
scn_do_execute_preprocess
()
override
;
public:
public:
//! src * filter
//! src * filter
...
...
src/opr/test/dnn/convolution.cpp
浏览文件 @
75eebb7c
...
@@ -21,6 +21,8 @@
...
@@ -21,6 +21,8 @@
#include "megbrain/gopt/inference.h"
#include "megbrain/gopt/inference.h"
#include "megbrain/opr/tensor_manip.h"
#include "megbrain/opr/tensor_manip.h"
#include <gmock/gmock.h>
#include <cmath>
#include <cmath>
#include <random>
#include <random>
...
@@ -244,7 +246,6 @@ opr::Convolution::Param convert_to_conv_param(
...
@@ -244,7 +246,6 @@ opr::Convolution::Param convert_to_conv_param(
param
.
dilate_w
,
param
.
sparse
,
param
.
format
};
param
.
dilate_w
,
param
.
sparse
,
param
.
format
};
};
};
#endif
#endif
}
// anonymous namespace
TEST
(
TestOprDNN
,
ConvolutionForward
)
{
TEST
(
TestOprDNN
,
ConvolutionForward
)
{
uint32_t
ih
=
10
,
ic
=
16
,
oc
=
32
,
ph
=
0
,
sh
=
1
,
fh
=
2
;
uint32_t
ih
=
10
,
ic
=
16
,
oc
=
32
,
ph
=
0
,
sh
=
1
,
fh
=
2
;
...
@@ -1172,6 +1173,7 @@ TEST(TestOprDNN, ConvBiasForward) {
...
@@ -1172,6 +1173,7 @@ TEST(TestOprDNN, ConvBiasForward) {
{
1
,
OC
,
1
,
1
}},
{
1
,
OC
,
1
,
1
}},
opt3
);
opt3
);
};
};
run
(
1
,
1
,
1
,
5
,
5
,
1
,
1
);
run
(
1
,
1
,
1
,
5
,
5
,
3
,
3
);
run
(
1
,
1
,
1
,
5
,
5
,
3
,
3
);
run
(
2
,
3
,
4
,
5
,
5
,
3
,
3
);
run
(
2
,
3
,
4
,
5
,
5
,
3
,
3
);
run
(
3
,
3
,
4
,
224
,
223
,
3
,
3
);
run
(
3
,
3
,
4
,
224
,
223
,
3
,
3
);
...
@@ -2124,4 +2126,225 @@ TEST(TestOprDNN, ConvolutionMultiCompNode) {
...
@@ -2124,4 +2126,225 @@ TEST(TestOprDNN, ConvolutionMultiCompNode) {
#endif
#endif
}
// anonymous namespace
namespace
mgb
{
namespace
opr
{
namespace
testing
{
class
ConvolutionTestingPeer
{
opr
::
ConvolutionForward
&
m_conv_opr
;
public:
explicit
ConvolutionTestingPeer
(
cg
::
OperatorNodeBase
*
opr
)
:
m_conv_opr
(
opr
->
cast_final_safe
<
opr
::
ConvolutionForward
>
())
{}
void
set_megdnn_opr
(
std
::
unique_ptr
<
megdnn
::
ConvolutionForward
>
megdnn_opr
)
{
m_conv_opr
.
set_megdnn_opr
(
std
::
move
(
megdnn_opr
));
}
};
}
// namespace testing
}
// namespace opr
}
// namespace mgb
namespace
{
using
megdnn
::
TensorND
;
using
megdnn
::
Workspace
;
using
opr
::
testing
::
ConvolutionTestingPeer
;
class
MockConvolutionForward
:
public
megdnn
::
ConvolutionForward
{
const
char
*
m_algorithm_set_name
;
public:
MockConvolutionForward
(
megdnn
::
ConvolutionForward
*
orig
,
const
char
*
algo_set_name
)
:
megdnn
::
ConvolutionForward
(
orig
->
handle
()),
m_algorithm_set_name
(
algo_set_name
)
{}
MOCK_METHOD5
(
exec
,
void
(
_megdnn_tensor_in
src
,
_megdnn_tensor_in
filter
,
_megdnn_tensor_out
dst
,
const
PreprocessedFilter
*
preprocessed_filter
,
_megdnn_workspace
workspace
));
MOCK_METHOD5
(
exec_preprocess
,
void
(
const
TensorLayout
&
src_layout
,
_megdnn_tensor_in
filter
,
const
TensorLayout
&
dst_layout
,
PreprocessedFilter
*
preprocessed_filter
,
_megdnn_workspace
workspace
));
MOCK_METHOD4
(
get_workspace_in_bytes
,
size_t
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
,
const
PreprocessedFilter
*
preprocessed_filter
));
MOCK_METHOD3
(
deduce_preprocessed_filter_layout
,
SmallVector
<
TensorLayout
>
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
));
MOCK_METHOD3
(
get_preprocess_workspace_in_bytes
,
size_t
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
));
MOCK_METHOD3
(
get_all_algorithms
,
std
::
vector
<
Algorithm
*>
(
const
TensorLayout
&
p0
,
const
TensorLayout
&
p1
,
const
TensorLayout
&
p2
));
MOCK_METHOD5
(
get_algorithm_heuristic
,
Algorithm
*
(
const
TensorLayout
&
p0
,
const
TensorLayout
&
p1
,
const
TensorLayout
&
p2
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
));
const
char
*
get_algorithm_set_name
()
const
override
{
return
m_algorithm_set_name
;
}
};
class
MockAlgorithm
:
public
megdnn
::
detail
::
Algorithm
{
const
char
*
m_name
;
public:
MockAlgorithm
(
const
char
*
name
=
"NotImportant"
)
:
m_name
(
name
)
{}
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
return
m_name
;
}
virtual
~
MockAlgorithm
()
=
default
;
};
class
TestWeightPreprocess
:
public
::
testing
::
Test
{
protected:
CompNode
comp_node
;
std
::
shared_ptr
<
ComputingGraph
>
graph
;
std
::
shared_ptr
<
HostTensorND
>
x_host
;
MockConvolutionForward
*
mock_conv_ptr
;
SymbolVar
y
;
HostTensorND
y_host
;
std
::
unique_ptr
<
cg
::
AsyncExecutable
>
func
;
MockConvolutionForward
&
mock_conv
()
{
return
*
mock_conv_ptr
;
}
void
SetUp
()
override
{
constexpr
uint32_t
ih
=
10
,
ic
=
16
,
oc
=
32
,
ph
=
0
,
sh
=
1
,
fh
=
2
,
iw
=
ih
;
comp_node
=
CompNode
::
load
(
"cpux"
);
graph
=
ComputingGraph
::
make
();
TensorShape
x_shape
{
1
,
ic
,
ih
,
iw
},
w_shape
{
oc
,
ic
,
fh
,
fh
};
x_host
=
std
::
make_shared
<
HostTensorND
>
(
comp_node
,
x_shape
);
auto
x
=
opr
::
Host2DeviceCopy
::
make
(
*
graph
,
x_host
);
auto
w
=
opr
::
ImmutableTensor
::
make
(
*
graph
,
{
comp_node
,
w_shape
});
Param
param
;
param
.
pad_h
=
param
.
pad_w
=
ph
;
param
.
stride_h
=
param
.
stride_w
=
sh
;
param
.
format
=
Param
::
Format
::
NCHW
;
y
=
opr
::
ConvolutionForward
::
make
(
x
,
w
,
param
);
auto
&
opr
=
y
.
node
()
->
owner_opr
()
->
cast_final
<
opr
::
ConvolutionForward
>
();
auto
mock
=
std
::
make_unique
<
MockConvolutionForward
>
(
opr
.
megdnn_opr
(),
::
testing
::
UnitTest
::
GetInstance
()
->
current_test_info
()
->
name
());
mock_conv_ptr
=
mock
.
get
();
ConvolutionTestingPeer
{
&
opr
}.
set_megdnn_opr
(
std
::
move
(
mock
));
func
=
graph
->
compile
({
make_callback_copy
(
y
,
y_host
)});
}
void
run
()
{
func
->
execute
().
wait
();
}
void
TearDown
()
override
{
func
.
reset
();
// Triggers mock check
graph
.
reset
();
x_host
.
reset
();
}
};
TEST_F
(
TestWeightPreprocess
,
NoPreprocessNeeded
)
{
using
::
testing
::
_
;
using
::
testing
::
Return
;
auto
&
mock
=
mock_conv
();
MockAlgorithm
algo
;
EXPECT_CALL
(
mock
,
get_algorithm_heuristic
(
_
,
_
,
_
,
_
,
_
))
.
WillRepeatedly
(
Return
(
&
algo
));
EXPECT_CALL
(
mock
,
get_workspace_in_bytes
(
_
,
_
,
_
,
_
))
.
WillRepeatedly
(
Return
(
0
));
EXPECT_CALL
(
mock
,
get_preprocess_workspace_in_bytes
(
_
,
_
,
_
))
.
WillRepeatedly
(
Return
(
0
));
{
::
testing
::
InSequence
seq
;
// Return empty preprocess filters, indicating no need to preprocess
EXPECT_CALL
(
mock
,
deduce_preprocessed_filter_layout
(
_
,
_
,
_
))
.
WillRepeatedly
(
Return
(
SmallVector
<
TensorLayout
>
{}));
EXPECT_CALL
(
mock
,
exec_preprocess
(
_
,
_
,
_
,
_
,
_
)).
Times
(
0
);
EXPECT_CALL
(
mock
,
exec
(
_
,
_
,
_
,
nullptr
,
_
));
run
();
}
}
TEST_F
(
TestWeightPreprocess
,
PreprocessCalledOnlyOnce
)
{
using
::
testing
::
_
;
using
::
testing
::
Return
;
using
::
testing
::
Field
;
using
::
testing
::
Invoke
;
using
::
testing
::
Expectation
;
using
PF
=
MockConvolutionForward
::
PreprocessedFilter
;
auto
&
mock
=
mock_conv
();
MockAlgorithm
algo
;
SmallVector
<
TensorLayout
>
filter_layout
{{{
1
,
2
,
3
,
4
},
dtype
::
Float32
()},
{{
5
,
6
,
7
,
8
},
dtype
::
Float32
()}};
EXPECT_CALL
(
mock
,
deduce_preprocessed_filter_layout
(
_
,
_
,
_
))
.
WillRepeatedly
(
Return
(
filter_layout
));
Expectation
algo_call
=
EXPECT_CALL
(
mock
,
get_algorithm_heuristic
(
_
,
_
,
_
,
_
,
_
))
.
WillOnce
(
Return
(
&
algo
));
Expectation
ws_call
=
EXPECT_CALL
(
mock
,
get_workspace_in_bytes
(
_
,
_
,
_
,
_
))
.
After
(
algo_call
)
.
WillOnce
(
Return
(
0
));
Expectation
pre_ws_call
=
EXPECT_CALL
(
mock
,
get_preprocess_workspace_in_bytes
(
_
,
_
,
_
))
.
After
(
algo_call
)
.
WillOnce
(
Return
(
233
));
{
::
testing
::
InSequence
seq
;
// exec_preprocess should be called only once, with workspace allocated
int
salt
=
0
;
EXPECT_CALL
(
mock
,
exec_preprocess
(
_
,
_
,
_
,
_
,
_
))
.
After
(
ws_call
,
pre_ws_call
)
.
WillOnce
(
Invoke
([
&
](
const
TensorLayout
&
,
_megdnn_tensor_in
,
const
TensorLayout
&
,
PF
*
pf
,
_megdnn_workspace
workspace
)
{
ASSERT_EQ
(
workspace
.
size
,
233
);
ASSERT_NE
(
pf
,
nullptr
);
pf
->
algorithm_id
=
&
salt
;
ASSERT_EQ
(
pf
->
tensors
.
size
(),
2
);
ASSERT_TRUE
(
pf
->
tensors
[
0
].
layout
.
eq_shape
({
1
,
2
,
3
,
4
}));
ASSERT_TRUE
(
pf
->
tensors
[
1
].
layout
.
eq_shape
({
5
,
6
,
7
,
8
}));
ASSERT_NE
(
pf
->
tensors
[
0
].
raw_ptr
,
nullptr
);
ASSERT_NE
(
pf
->
tensors
[
1
].
raw_ptr
,
nullptr
);
pf
->
tensors
[
0
].
ptr
<
float
>
()[
0
]
=
114.514
f
;
pf
->
tensors
[
1
].
ptr
<
float
>
()[
0
]
=
1926.0817
f
;
}));
// Run the graph multiple times.
for
(
int
i
=
0
;
i
<
3
;
i
++
)
{
if
(
i
>
0
)
{
EXPECT_CALL
(
mock
,
exec_preprocess
(
_
,
_
,
_
,
_
,
_
)).
Times
(
0
);
}
EXPECT_CALL
(
mock
,
exec
(
_
,
_
,
_
,
_
,
_
))
.
WillOnce
(
Invoke
([
&
](
_megdnn_tensor_in
,
_megdnn_tensor_in
,
_megdnn_tensor_out
,
const
PF
*
pf
,
_megdnn_workspace
)
{
ASSERT_NE
(
pf
,
nullptr
);
ASSERT_EQ
(
pf
->
algorithm_id
,
&
salt
);
ASSERT_EQ
(
pf
->
tensors
[
0
].
ptr
<
float
>
()[
0
],
114.514
f
);
ASSERT_EQ
(
pf
->
tensors
[
1
].
ptr
<
float
>
()[
0
],
1926.0817
f
);
}));
run
();
}
}
}
}
// anonymous namespace
>>>>>>>
11
c3561ca
...
feat
(
opr
)
:
use
weight
preprocess
feature
of
MegDNN
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录