Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
840b1d5c
O
Opencv
项目概览
Greenplum
/
Opencv
9 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
840b1d5c
编写于
1月 11, 2023
作者:
Z
zihaomu
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add depthwise add fuse
上级
9208dcb0
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
41 addition
and
10 deletion
+41
-10
modules/dnn/src/layers/fast_convolution/depthwise_convolution.cpp
...dnn/src/layers/fast_convolution/depthwise_convolution.cpp
+33
-7
modules/dnn/src/layers/fast_convolution/fast_convolution.cpp
modules/dnn/src/layers/fast_convolution/fast_convolution.cpp
+2
-2
modules/dnn/src/layers/fast_convolution/fast_convolution.hpp
modules/dnn/src/layers/fast_convolution/fast_convolution.hpp
+1
-1
modules/dnn/test/test_onnx_importer.cpp
modules/dnn/test/test_onnx_importer.cpp
+5
-0
未找到文件。
modules/dnn/src/layers/fast_convolution/depthwise_convolution.cpp
浏览文件 @
840b1d5c
...
...
@@ -24,7 +24,7 @@ static void depthWiseBlockConv2D(const float* wptr,
const
float
*
inptr_
,
int
height
,
int
width
,
float
*
outptr_
,
int
out_d
,
int
outH
,
int
outW
)
int
out_d
,
int
outH
,
int
outW
,
bool
fusedAdd
)
{
const
float
w00_
=
wptr
[
0
],
w01_
=
wptr
[
1
],
w02_
=
wptr
[
2
],
w10
=
wptr
[
3
],
w11
=
wptr
[
4
],
w12
=
wptr
[
5
],
...
...
@@ -57,6 +57,8 @@ static void depthWiseBlockConv2D(const float* wptr,
out
=
imgptr0
[
0
]
*
w01
+
imgptr0
[
dilation_w
]
*
w02
+
imgptr1
[
0
]
*
w11
+
imgptr1
[
dilation_w
]
*
w12
+
imgptr2
[
0
]
*
w21
+
imgptr2
[
dilation_w
]
*
w22
+
bias
;
if
(
fusedAdd
)
out
+=
outptr
[
0
];
if
(
relu
)
out
=
out
>
0.
f
?
out
:
out
*
relu_coeff
;
outptr
[
0
]
=
out
;
...
...
@@ -65,6 +67,10 @@ static void depthWiseBlockConv2D(const float* wptr,
#if CV_SIMD128
const
int
VEC_NLANES
=
4
;
if
(
fusedAdd
)
outW1
=
max
(
out_j
,
outW1
-
outW1
%
VEC_NLANES
);
v_float32x4
vw00
=
v_setall_f32
(
w00
);
v_float32x4
vw01
=
v_setall_f32
(
w01
);
v_float32x4
vw02
=
v_setall_f32
(
w02
);
...
...
@@ -104,6 +110,8 @@ static void depthWiseBlockConv2D(const float* wptr,
v_float32x4
vout
=
v00
*
vw00
+
v01
*
vw01
+
v02
*
vw02
+
v10
*
vw10
+
v11
*
vw11
+
v12
*
vw12
+
v20
*
vw20
+
v21
*
vw21
+
v22
*
vw22
+
vbias
;
if
(
fusedAdd
)
vout
=
v_load
(
outptr
+
out_j
)
+
vout
;
if
(
relu
)
vout
=
v_select
(
vout
>
z
,
vout
,
vout
*
vrc
);
v_store
(
outptr
+
out_j
,
vout
);
...
...
@@ -134,6 +142,8 @@ static void depthWiseBlockConv2D(const float* wptr,
v10
*
vw10
+
v11
*
vw11
+
v12
*
vw12
+
v20
*
vw20
+
v21
*
vw21
+
v22
*
vw22
+
vbias
;
if
(
fusedAdd
)
vout
=
v_load
(
outptr
+
out_j
)
+
vout
;
if
(
relu
)
vout
=
v_select
(
vout
>
z
,
vout
,
vout
*
vrc
);
v_store
(
outptr
+
out_j
,
vout
);
...
...
@@ -148,6 +158,8 @@ static void depthWiseBlockConv2D(const float* wptr,
out
=
imgptr0
[
in_j
]
*
w00
+
imgptr0
[
in_j
+
dilation_w
]
*
w01
+
imgptr0
[
in_j
+
dilation_w
*
2
]
*
w02
+
imgptr1
[
in_j
]
*
w10
+
imgptr1
[
in_j
+
dilation_w
]
*
w11
+
imgptr1
[
in_j
+
dilation_w
*
2
]
*
w12
+
imgptr2
[
in_j
]
*
w20
+
imgptr2
[
in_j
+
dilation_w
]
*
w21
+
imgptr2
[
in_j
+
dilation_w
*
2
]
*
w22
+
bias
;
if
(
fusedAdd
)
out
+=
outptr
[
out_j
];
if
(
relu
)
out
=
out
>
0.
f
?
out
:
out
*
relu_coeff
;
outptr
[
out_j
]
=
out
;
...
...
@@ -175,6 +187,8 @@ static void depthWiseBlockConv2D(const float* wptr,
out
=
imgptr0
[
in_j0
]
*
w00
*
s0
+
imgptr0
[
in_j1
]
*
w01
*
s1
+
imgptr0
[
in_j2
]
*
w02
*
s2
+
imgptr1
[
in_j0
]
*
w10
*
s0
+
imgptr1
[
in_j1
]
*
w11
*
s1
+
imgptr1
[
in_j2
]
*
w12
*
s2
+
imgptr2
[
in_j0
]
*
w20
*
s0
+
imgptr2
[
in_j1
]
*
w21
*
s1
+
imgptr2
[
in_j2
]
*
w22
*
s2
+
bias
;
if
(
fusedAdd
)
out
+=
outptr
[
out_j
];
if
(
relu
)
out
=
out
>
0.
f
?
out
:
out
*
relu_coeff
;
outptr
[
out_j
]
=
out
;
...
...
@@ -187,7 +201,7 @@ static void depthWiseBlockConv1D(const float* wptr,
const
float
*
biasptr
,
const
float
*
relu
,
const
float
*
inptr_
,
int
width
,
float
*
outptr_
,
int
out_d
,
int
outW
)
int
out_d
,
int
outW
,
bool
fusedAdd
)
{
const
float
w00_
=
wptr
[
0
],
w01_
=
wptr
[
1
],
w02_
=
wptr
[
2
];
int
outW1
=
min
(
outW
,
(
width
-
dilation_w
*
(
kernel_w
-
1
)
+
pad_l
)
/
stride_w
);
...
...
@@ -201,7 +215,8 @@ static void depthWiseBlockConv1D(const float* wptr,
if
(
pad_l
>
0
)
{
out
=
imgptr0
[
0
]
*
w01
+
imgptr0
[
dilation_w
]
*
w02
+
bias
;
if
(
fusedAdd
)
out
+=
outptr
[
0
];
if
(
relu
)
out
=
out
>
0.
f
?
out
:
out
*
relu_coeff
;
outptr
[
0
]
=
out
;
...
...
@@ -210,6 +225,8 @@ static void depthWiseBlockConv1D(const float* wptr,
#if CV_SIMD128
const
int
VEC_NLANES
=
4
;
if
(
fusedAdd
)
outW1
=
max
(
out_j
,
outW1
-
outW1
%
VEC_NLANES
);
v_float32x4
vw00
=
v_setall_f32
(
w00
);
v_float32x4
vw01
=
v_setall_f32
(
w01
);
v_float32x4
vw02
=
v_setall_f32
(
w02
);
...
...
@@ -235,6 +252,8 @@ static void depthWiseBlockConv1D(const float* wptr,
v02
=
v_load
(
imgptr0
+
in_j
+
dilation_w
*
2
);
v_float32x4
vout
=
v00
*
vw00
+
v01
*
vw01
+
v02
*
vw02
+
vbias
;
if
(
fusedAdd
)
vout
=
v_load
(
outptr
+
out_j
)
+
vout
;
if
(
relu
)
vout
=
v_select
(
vout
>
z
,
vout
,
vout
*
vrc
);
v_store
(
outptr
+
out_j
,
vout
);
...
...
@@ -258,6 +277,9 @@ static void depthWiseBlockConv1D(const float* wptr,
v_float32x4
vout
=
v00
*
vw00
+
v01
*
vw01
+
v02
*
vw02
+
vbias
;
if
(
fusedAdd
)
vout
=
v_load
(
outptr
+
out_j
)
+
vout
;
if
(
relu
)
vout
=
v_select
(
vout
>
z
,
vout
,
vout
*
vrc
);
v_store
(
outptr
+
out_j
,
vout
);
...
...
@@ -270,6 +292,8 @@ static void depthWiseBlockConv1D(const float* wptr,
{
int
in_j
=
out_j
*
stride_w
-
pad_l
;
out
=
imgptr0
[
in_j
]
*
w00
+
imgptr0
[
in_j
+
dilation_w
]
*
w01
+
imgptr0
[
in_j
+
dilation_w
*
2
]
*
w02
+
bias
;
if
(
fusedAdd
)
out
+=
outptr
[
out_j
];
if
(
relu
)
out
=
out
>
0.
f
?
out
:
out
*
relu_coeff
;
outptr
[
out_j
]
=
out
;
...
...
@@ -295,6 +319,8 @@ static void depthWiseBlockConv1D(const float* wptr,
s2
=
0.
f
;
}
out
=
imgptr0
[
in_j0
]
*
w00
*
s0
+
imgptr0
[
in_j1
]
*
w01
*
s1
+
imgptr0
[
in_j2
]
*
w02
*
s2
+
bias
;
if
(
fusedAdd
)
out
+=
outptr
[
out_j
];
if
(
relu
)
out
=
out
>
0.
f
?
out
:
out
*
relu_coeff
;
outptr
[
out_j
]
=
out
;
...
...
@@ -302,7 +328,7 @@ static void depthWiseBlockConv1D(const float* wptr,
}
void
runDepthwise
(
InputArray
_input
,
OutputArray
_output
,
const
Ptr
<
FastConv
>&
conv
,
ActivationLayer
*
activ_
,
const
std
::
vector
<
float
>&
reluslope
)
const
std
::
vector
<
float
>&
reluslope
,
bool
fusedAdd
)
{
Mat
input
=
_input
.
getMat
();
Mat
output
=
_output
.
getMat
();
...
...
@@ -349,7 +375,7 @@ void runDepthwise(InputArray _input, OutputArray _output, const Ptr<FastConv>& c
#if CV_TRY_AVX2 || CV_TRY_AVX || CV_TRY_RVV
// TODO: remove the following limitation, need change code in layers_common.simd.hpp.
bool
canRunOpt
=
Wi
>=
16
+
dilation_w
*
(
Wk
-
1
);
bool
canRunOpt
=
Wi
>=
16
+
dilation_w
*
(
Wk
-
1
)
&&
!
fusedAdd
;
#endif
std
::
vector
<
int
>
ofstab_
(
3
*
ksize
,
0
);
int
*
ofstab
=
ofstab_
.
data
();
...
...
@@ -399,11 +425,11 @@ void runDepthwise(InputArray _input, OutputArray _output, const Ptr<FastConv>& c
else
#endif
depthWiseBlockConv2D
(
weights
,
Hk
,
Wk
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
pad_top
,
pad_left
,
bias
,
relu
,
inptr0
,
Hi
,
Wi
,
outptr0
,
c
,
H0
,
W0
);
pad_top
,
pad_left
,
bias
,
relu
,
inptr0
,
Hi
,
Wi
,
outptr0
,
c
,
H0
,
W0
,
fusedAdd
);
}
else
// conv_dim == CONV_1D, spatial branch for depth-wise Conv1D.
{
depthWiseBlockConv1D
(
weights
,
Wk
,
stride_w
,
dilation_w
,
pad_left
,
bias
,
relu
,
inptr0
,
Wi
,
outptr0
,
c
,
W0
);
depthWiseBlockConv1D
(
weights
,
Wk
,
stride_w
,
dilation_w
,
pad_left
,
bias
,
relu
,
inptr0
,
Wi
,
outptr0
,
c
,
W0
,
fusedAdd
);
}
if
(
activ
)
...
...
modules/dnn/src/layers/fast_convolution/fast_convolution.cpp
浏览文件 @
840b1d5c
...
...
@@ -369,8 +369,8 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr<FastConv>& co
if
(
conv
->
conv_type
==
_FX_CONV_TYPE_DEPTHWISE
)
{
// Depthwise-Convolution layer should not be followed by Add layer.
CV_Assert
(
fusedAddMat
.
empty
()
&&
(
conv_dim
==
CONV_1D
||
conv_dim
==
CONV_2D
));
return
runDepthwise
(
input
,
output
,
conv
,
actLayer
.
get
(),
reluslope
);
CV_Assert
((
conv_dim
==
CONV_1D
||
conv_dim
==
CONV_2D
));
return
runDepthwise
(
input
,
output
,
conv
,
actLayer
.
get
(),
reluslope
,
fusedAdd
);
}
MatShape
inputShape
=
shape
(
input
);
...
...
modules/dnn/src/layers/fast_convolution/fast_convolution.hpp
浏览文件 @
840b1d5c
...
...
@@ -100,7 +100,7 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr<FastConv>& co
const
Ptr
<
ActivationLayer
>&
actLayer
,
const
std
::
vector
<
float
>&
reluslope
,
bool
fusedAdd
);
void
runDepthwise
(
InputArray
_input
,
OutputArray
_output
,
const
Ptr
<
FastConv
>&
conv
,
ActivationLayer
*
activ
,
const
std
::
vector
<
float
>&
reluslope
);
const
std
::
vector
<
float
>&
reluslope
,
bool
fusedAdd
);
int
runWinograd63
(
InputArray
_input
,
InputArray
_fusedAddMat
,
OutputArray
_output
,
const
Ptr
<
FastConv
>&
conv
,
int
ntasks
,
float
minval
,
float
maxval
,
ActivationLayer
*
activ
,
bool
ifMinMaxAct
);
...
...
modules/dnn/test/test_onnx_importer.cpp
浏览文件 @
840b1d5c
...
...
@@ -1726,6 +1726,11 @@ TEST_P(Test_ONNX_layers, ConvResizePool1d)
testONNXModels
(
"conv_resize_pool_1d"
);
}
TEST_P
(
Test_ONNX_layers
,
DepthWiseAdd
)
{
testONNXModels
(
"depthwiseconv_add"
);
}
TEST_P
(
Test_ONNX_layers
,
SubFromConst
)
{
testONNXModels
(
"sub_from_const1"
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录