Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenCV
opencv
提交
c3910807
O
opencv
项目概览
OpenCV
/
opencv
上一次同步 8 个月
通知
981
Star
71099
Fork
55580
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
未验证
提交
c3910807
编写于
12月 03, 2021
作者:
A
Anna Khakimova
提交者:
GitHub
12月 03, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Merge pull request #21177 from anna-khakimova:ak/simd_mulc
* GAPI Fluid: SIMD for MulC kernel. * Changes for MulDouble kernel.
上级
c5b8b568
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
459 addition
and
68 deletion
+459
-68
modules/gapi/perf/common/gapi_core_perf_tests.hpp
modules/gapi/perf/common/gapi_core_perf_tests.hpp
+2
-2
modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp
modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp
+24
-14
modules/gapi/perf/cpu/gapi_core_perf_tests_cpu.cpp
modules/gapi/perf/cpu/gapi_core_perf_tests_cpu.cpp
+7
-5
modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp
modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp
+12
-10
modules/gapi/perf/gpu/gapi_core_perf_tests_gpu.cpp
modules/gapi/perf/gpu/gapi_core_perf_tests_gpu.cpp
+4
-2
modules/gapi/src/backends/fluid/gfluidcore.cpp
modules/gapi/src/backends/fluid/gfluidcore.cpp
+74
-32
modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp
modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp
+27
-0
modules/gapi/src/backends/fluid/gfluidcore_func.hpp
modules/gapi/src/backends/fluid/gfluidcore_func.hpp
+23
-0
modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp
modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp
+286
-3
未找到文件。
modules/gapi/perf/common/gapi_core_perf_tests.hpp
浏览文件 @
c3910807
...
...
@@ -33,8 +33,8 @@ namespace opencv_test
class
SubCPerfTest
:
public
TestPerfParams
<
tuple
<
compare_f
,
cv
::
Size
,
MatType
,
int
,
cv
::
GCompileArgs
>>
{};
class
SubRCPerfTest
:
public
TestPerfParams
<
tuple
<
cv
::
Size
,
MatType
,
int
,
cv
::
GCompileArgs
>>
{};
class
MulPerfTest
:
public
TestPerfParams
<
tuple
<
compare_f
,
cv
::
Size
,
MatType
,
int
,
double
,
cv
::
GCompileArgs
>>
{};
class
MulDoublePerfTest
:
public
TestPerfParams
<
tuple
<
cv
::
Size
,
MatType
,
int
,
cv
::
GCompileArgs
>>
{};
class
MulCPerfTest
:
public
TestPerfParams
<
tuple
<
cv
::
Size
,
MatType
,
int
,
cv
::
GCompileArgs
>>
{};
class
MulDoublePerfTest
:
public
TestPerfParams
<
tuple
<
c
ompare_f
,
c
v
::
Size
,
MatType
,
int
,
cv
::
GCompileArgs
>>
{};
class
MulCPerfTest
:
public
TestPerfParams
<
tuple
<
c
ompare_f
,
c
v
::
Size
,
MatType
,
int
,
cv
::
GCompileArgs
>>
{};
class
DivPerfTest
:
public
TestPerfParams
<
tuple
<
compare_f
,
cv
::
Size
,
MatType
,
int
,
double
,
cv
::
GCompileArgs
>>
{};
class
DivCPerfTest
:
public
TestPerfParams
<
tuple
<
cv
::
Size
,
MatType
,
int
,
cv
::
GCompileArgs
>>
{};
class
DivRCPerfTest
:
public
TestPerfParams
<
tuple
<
compare_f
,
cv
::
Size
,
MatType
,
int
,
cv
::
GCompileArgs
>>
{};
...
...
modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp
浏览文件 @
c3910807
...
...
@@ -257,17 +257,21 @@ PERF_TEST_P_(MulPerfTest, TestPerformance)
PERF_TEST_P_
(
MulDoublePerfTest
,
TestPerformance
)
{
Size
sz
=
get
<
0
>
(
GetParam
());
MatType
type
=
get
<
1
>
(
GetParam
());
int
dtype
=
get
<
2
>
(
GetParam
());
cv
::
GCompileArgs
compile_args
=
get
<
3
>
(
GetParam
());
compare_f
cmpF
;
cv
::
Size
sz
;
MatType
type
=
-
1
;
int
dtype
=
-
1
;
double
scale
=
1.0
;
cv
::
GCompileArgs
compile_args
;
std
::
tie
(
cmpF
,
sz
,
type
,
dtype
,
compile_args
)
=
GetParam
();
auto
&
rng
=
cv
::
theRNG
();
double
d
=
rng
.
uniform
(
0.0
,
10.0
);
initMatrixRandU
(
type
,
sz
,
dtype
,
false
);
// OpenCV code ///////////////////////////////////////////////////////////
cv
::
multiply
(
in_mat1
,
d
,
out_mat_ocv
,
1
,
dtype
);
cv
::
multiply
(
in_mat1
,
d
,
out_mat_ocv
,
scale
,
dtype
);
// G-API code ////////////////////////////////////////////////////////////
cv
::
GMat
in1
,
out
;
...
...
@@ -285,8 +289,9 @@ PERF_TEST_P_(MulDoublePerfTest, TestPerformance)
}
// Comparison ////////////////////////////////////////////////////////////
// FIXIT unrealiable check: EXPECT_EQ(0, cv::countNonZero(out_mat_gapi != out_mat_ocv));
EXPECT_EQ
(
out_mat_gapi
.
size
(),
sz
);
{
EXPECT_TRUE
(
cmpF
(
out_mat_gapi
,
out_mat_ocv
));
}
SANITY_CHECK_NOTHING
();
}
...
...
@@ -295,15 +300,19 @@ PERF_TEST_P_(MulDoublePerfTest, TestPerformance)
PERF_TEST_P_
(
MulCPerfTest
,
TestPerformance
)
{
Size
sz
=
get
<
0
>
(
GetParam
());
MatType
type
=
get
<
1
>
(
GetParam
());
int
dtype
=
get
<
2
>
(
GetParam
());
cv
::
GCompileArgs
compile_args
=
get
<
3
>
(
GetParam
());
compare_f
cmpF
;
cv
::
Size
sz
;
MatType
type
=
-
1
;
int
dtype
=
-
1
;
double
scale
=
1.0
;
cv
::
GCompileArgs
compile_args
;
std
::
tie
(
cmpF
,
sz
,
type
,
dtype
,
compile_args
)
=
GetParam
();
initMatsRandU
(
type
,
sz
,
dtype
,
false
);
// OpenCV code ///////////////////////////////////////////////////////////
cv
::
multiply
(
in_mat1
,
sc
,
out_mat_ocv
,
1
,
dtype
);
cv
::
multiply
(
in_mat1
,
sc
,
out_mat_ocv
,
scale
,
dtype
);
// G-API code ////////////////////////////////////////////////////////////
cv
::
GMat
in1
,
out
;
...
...
@@ -322,8 +331,9 @@ PERF_TEST_P_(MulCPerfTest, TestPerformance)
}
// Comparison ////////////////////////////////////////////////////////////
// FIXIT unrealiable check: EXPECT_EQ(0, cv::countNonZero(out_mat_gapi != out_mat_ocv));
EXPECT_EQ
(
out_mat_gapi
.
size
(),
sz
);
{
EXPECT_TRUE
(
cmpF
(
out_mat_gapi
,
out_mat_ocv
));
}
SANITY_CHECK_NOTHING
();
}
...
...
modules/gapi/perf/cpu/gapi_core_perf_tests_cpu.cpp
浏览文件 @
c3910807
...
...
@@ -56,13 +56,15 @@ INSTANTIATE_TEST_CASE_P(MulPerfTestCPU, MulPerfTest,
Values
(
cv
::
compile_args
(
CORE_CPU
))));
INSTANTIATE_TEST_CASE_P
(
MulDoublePerfTestCPU
,
MulDoublePerfTest
,
Combine
(
Values
(
szSmall128
,
szVGA
,
sz720p
,
sz1080p
),
Values
(
CV_8UC1
,
CV_8UC3
,
CV_16UC1
,
CV_16SC1
,
CV_32FC1
),
Values
(
-
1
,
CV_8U
,
CV_16U
,
CV_32F
),
Values
(
cv
::
compile_args
(
CORE_CPU
))));
Combine
(
Values
(
AbsExact
().
to_compare_f
()),
Values
(
szSmall128
,
szVGA
,
sz720p
,
sz1080p
),
Values
(
CV_8UC1
,
CV_8UC3
,
CV_16UC1
,
CV_16SC1
,
CV_32FC1
),
Values
(
-
1
,
CV_8U
,
CV_16U
,
CV_32F
),
Values
(
cv
::
compile_args
(
CORE_CPU
))));
INSTANTIATE_TEST_CASE_P
(
MulCPerfTestCPU
,
MulCPerfTest
,
Combine
(
Values
(
szSmall128
,
szVGA
,
sz720p
,
sz1080p
),
Combine
(
Values
(
AbsExact
().
to_compare_f
()),
Values
(
szSmall128
,
szVGA
,
sz720p
,
sz1080p
),
Values
(
CV_8UC1
,
CV_8UC3
,
CV_16UC1
,
CV_16SC1
,
CV_32FC1
),
Values
(
-
1
,
CV_8U
,
CV_16U
,
CV_32F
),
Values
(
cv
::
compile_args
(
CORE_CPU
))));
...
...
modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp
浏览文件 @
c3910807
...
...
@@ -52,17 +52,19 @@ INSTANTIATE_TEST_CASE_P(SubPerfTestFluid, SubPerfTest,
Values
(
2.0
),
Values
(
cv
::
compile_args
(
CORE_FLUID
))));
// INSTANTIATE_TEST_CASE_P(MulDoublePerfTestFluid, MulDoublePerfTest,
// Combine(Values(szSmall128, szVGA, sz720p, sz1080p),
// Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
// Values(-1, CV_8U, CV_16U, CV_32F),
// Values(cv::compile_args(CORE_FLUID))));
INSTANTIATE_TEST_CASE_P
(
MulDoublePerfTestFluid
,
MulDoublePerfTest
,
Combine
(
Values
(
Tolerance_FloatRel_IntAbs
(
1e-6
,
1
).
to_compare_f
()),
Values
(
szSmall128
,
szVGA
,
sz720p
,
sz1080p
),
Values
(
CV_8UC1
,
CV_8UC3
,
CV_16SC1
,
CV_32FC1
),
Values
(
-
1
,
CV_8U
,
CV_32F
),
Values
(
cv
::
compile_args
(
CORE_FLUID
))));
// INSTANTIATE_TEST_CASE_P(MulCPerfTestFluid, MulCPerfTest,
// Combine(Values(szSmall128, szVGA, sz720p, sz1080p),
// Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
// Values(-1, CV_8U, CV_16U, CV_32F),
// Values(cv::compile_args(CORE_FLUID))));
INSTANTIATE_TEST_CASE_P
(
MulCPerfTestFluid
,
MulCPerfTest
,
Combine
(
Values
(
Tolerance_FloatRel_IntAbs
(
1e-6
,
1
).
to_compare_f
()),
Values
(
szSmall128
,
szVGA
,
sz720p
,
sz1080p
),
Values
(
CV_8UC1
,
CV_8UC3
,
CV_16UC1
,
CV_16SC1
,
CV_32FC1
),
Values
(
-
1
,
CV_8U
,
CV_16U
,
CV_16S
,
CV_32F
),
Values
(
cv
::
compile_args
(
CORE_FLUID
))));
INSTANTIATE_TEST_CASE_P
(
DivPerfTestFluid
,
DivPerfTest
,
Combine
(
Values
(
AbsExact
().
to_compare_f
()),
...
...
modules/gapi/perf/gpu/gapi_core_perf_tests_gpu.cpp
浏览文件 @
c3910807
...
...
@@ -54,13 +54,15 @@ INSTANTIATE_TEST_CASE_P(MulPerfTestGPU, MulPerfTest,
Values
(
cv
::
compile_args
(
CORE_GPU
))));
INSTANTIATE_TEST_CASE_P
(
MulDoublePerfTestGPU
,
MulDoublePerfTest
,
Combine
(
Values
(
szSmall128
,
szVGA
,
sz720p
,
sz1080p
),
Combine
(
Values
(
AbsExact
().
to_compare_f
()),
Values
(
szSmall128
,
szVGA
,
sz720p
,
sz1080p
),
Values
(
CV_8UC1
,
CV_8UC3
,
CV_16UC1
,
CV_16SC1
,
CV_32FC1
),
Values
(
-
1
,
CV_8U
,
CV_16U
,
CV_32F
),
Values
(
cv
::
compile_args
(
CORE_GPU
))));
INSTANTIATE_TEST_CASE_P
(
MulCPerfTestGPU
,
MulCPerfTest
,
Combine
(
Values
(
szSmall128
,
szVGA
,
sz720p
,
sz1080p
),
Combine
(
Values
(
AbsExact
().
to_compare_f
()),
Values
(
szSmall128
,
szVGA
,
sz720p
,
sz1080p
),
Values
(
CV_8UC1
,
CV_8UC3
,
CV_16UC1
,
CV_16SC1
,
CV_32FC1
),
Values
(
-
1
,
CV_8U
,
CV_16U
,
CV_32F
),
Values
(
cv
::
compile_args
(
CORE_GPU
))));
...
...
modules/gapi/src/backends/fluid/gfluidcore.cpp
浏览文件 @
c3910807
...
...
@@ -1265,12 +1265,12 @@ CV_ALWAYS_INLINE void run_arithm_s(Buffer &dst, const View &src, const float sca
{
case
ARITHM_ADD
:
{
int
w
=
0
;
int
w
=
0
;
#if CV_SIMD
w
=
addc_simd
(
in
,
scalar
,
out
,
length
,
chan
);
w
=
addc_simd
(
in
,
scalar
,
out
,
length
,
chan
);
#endif
for
(;
w
<
length
;
++
w
)
out
[
w
]
=
add
<
DST
>
(
in
[
w
],
scalar
[
w
%
chan
]);
for
(;
w
<
length
;
++
w
)
out
[
w
]
=
add
<
DST
>
(
in
[
w
],
scalar
[
w
%
chan
]);
break
;
}
...
...
@@ -1284,12 +1284,17 @@ CV_ALWAYS_INLINE void run_arithm_s(Buffer &dst, const View &src, const float sca
out
[
w
]
=
sub
<
DST
>
(
in
[
w
],
scalar
[
w
%
chan
]);
break
;
}
// TODO: optimize miltiplication and division
case
ARITHM_MULTIPLY
:
for
(
int
w
=
0
;
w
<
width
;
w
++
)
for
(
int
c
=
0
;
c
<
chan
;
c
++
)
out
[
chan
*
w
+
c
]
=
mul
<
DST
>
(
in
[
chan
*
w
+
c
],
scalar
[
c
],
scale
);
{
int
w
=
0
;
#if CV_SIMD
w
=
mulc_simd
(
in
,
scalar
,
out
,
length
,
chan
,
scale
);
#endif
for
(;
w
<
width
;
++
w
)
for
(
int
c
=
0
;
c
<
chan
;
++
c
)
out
[
chan
*
w
+
c
]
=
mul
<
DST
>
(
in
[
chan
*
w
+
c
],
scalar
[
c
],
scale
);
break
;
}
case
ARITHM_DIVIDE
:
for
(
int
w
=
0
;
w
<
width
;
w
++
)
for
(
int
c
=
0
;
c
<
chan
;
c
++
)
...
...
@@ -1539,45 +1544,73 @@ GAPI_FLUID_KERNEL(GFluidSubRC, cv::gapi::core::GSubRC, false)
}
};
GAPI_FLUID_KERNEL
(
GFluidMulC
,
cv
::
gapi
::
core
::
GMulC
,
fals
e
)
GAPI_FLUID_KERNEL
(
GFluidMulC
,
cv
::
gapi
::
core
::
GMulC
,
tru
e
)
{
static
const
int
Window
=
1
;
static
void
run
(
const
View
&
src
,
const
cv
::
Scalar
&
_scalar
,
int
/*dtype*/
,
Buffer
&
dst
)
static
void
run
(
const
View
&
src
,
const
cv
::
Scalar
&
_scalar
,
int
/*dtype*/
,
Buffer
&
dst
,
Buffer
&
scratch
)
{
const
float
scalar
[
4
]
=
{
static_cast
<
float
>
(
_scalar
[
0
]),
static_cast
<
float
>
(
_scalar
[
1
]),
static_cast
<
float
>
(
_scalar
[
2
]),
static_cast
<
float
>
(
_scalar
[
3
])
};
const
float
scale
=
1.
f
;
GAPI_Assert
(
src
.
meta
().
chan
<=
4
);
if
(
dst
.
y
()
==
0
)
{
const
int
chan
=
src
.
meta
().
chan
;
float
*
sc
=
scratch
.
OutLine
<
float
>
();
for
(
int
i
=
0
;
i
<
scratch
.
length
();
++
i
)
sc
[
i
]
=
static_cast
<
float
>
(
_scalar
[
i
%
chan
]);
}
const
float
*
scalar
=
scratch
.
OutLine
<
float
>
();
const
float
scale
=
1.0
;
// DST SRC OP __VA_ARGS__
UNARY_
(
uchar
,
uchar
,
run_arithm_s
,
dst
,
src
,
scalar
,
ARITHM_MULTIPLY
,
scale
);
UNARY_
(
uchar
,
short
,
run_arithm_s
,
dst
,
src
,
scalar
,
ARITHM_MULTIPLY
,
scale
);
UNARY_
(
uchar
,
float
,
run_arithm_s
,
dst
,
src
,
scalar
,
ARITHM_MULTIPLY
,
scale
);
UNARY_
(
short
,
short
,
run_arithm_s
,
dst
,
src
,
scalar
,
ARITHM_MULTIPLY
,
scale
);
UNARY_
(
float
,
uchar
,
run_arithm_s
,
dst
,
src
,
scalar
,
ARITHM_MULTIPLY
,
scale
);
UNARY_
(
float
,
short
,
run_arithm_s
,
dst
,
src
,
scalar
,
ARITHM_MULTIPLY
,
scale
);
UNARY_
(
float
,
float
,
run_arithm_s
,
dst
,
src
,
scalar
,
ARITHM_MULTIPLY
,
scale
);
UNARY_
(
uchar
,
uchar
,
run_arithm_s
,
dst
,
src
,
scalar
,
ARITHM_MULTIPLY
,
scale
);
UNARY_
(
uchar
,
ushort
,
run_arithm_s
,
dst
,
src
,
scalar
,
ARITHM_MULTIPLY
,
scale
);
UNARY_
(
uchar
,
short
,
run_arithm_s
,
dst
,
src
,
scalar
,
ARITHM_MULTIPLY
,
scale
);
UNARY_
(
uchar
,
float
,
run_arithm_s
,
dst
,
src
,
scalar
,
ARITHM_MULTIPLY
,
scale
);
UNARY_
(
ushort
,
ushort
,
run_arithm_s
,
dst
,
src
,
scalar
,
ARITHM_MULTIPLY
,
scale
);
UNARY_
(
ushort
,
short
,
run_arithm_s
,
dst
,
src
,
scalar
,
ARITHM_MULTIPLY
,
scale
);
UNARY_
(
ushort
,
uchar
,
run_arithm_s
,
dst
,
src
,
scalar
,
ARITHM_MULTIPLY
,
scale
);
UNARY_
(
ushort
,
float
,
run_arithm_s
,
dst
,
src
,
scalar
,
ARITHM_MULTIPLY
,
scale
);
UNARY_
(
short
,
short
,
run_arithm_s
,
dst
,
src
,
scalar
,
ARITHM_MULTIPLY
,
scale
);
UNARY_
(
short
,
ushort
,
run_arithm_s
,
dst
,
src
,
scalar
,
ARITHM_MULTIPLY
,
scale
);
UNARY_
(
short
,
uchar
,
run_arithm_s
,
dst
,
src
,
scalar
,
ARITHM_MULTIPLY
,
scale
);
UNARY_
(
short
,
float
,
run_arithm_s
,
dst
,
src
,
scalar
,
ARITHM_MULTIPLY
,
scale
);
UNARY_
(
float
,
uchar
,
run_arithm_s
,
dst
,
src
,
scalar
,
ARITHM_MULTIPLY
,
scale
);
UNARY_
(
float
,
ushort
,
run_arithm_s
,
dst
,
src
,
scalar
,
ARITHM_MULTIPLY
,
scale
);
UNARY_
(
float
,
short
,
run_arithm_s
,
dst
,
src
,
scalar
,
ARITHM_MULTIPLY
,
scale
);
UNARY_
(
float
,
float
,
run_arithm_s
,
dst
,
src
,
scalar
,
ARITHM_MULTIPLY
,
scale
);
CV_Error
(
cv
::
Error
::
StsBadArg
,
"unsupported combination of types"
);
}
static
void
initScratch
(
const
GMatDesc
&
,
const
GScalarDesc
&
,
int
,
Buffer
&
scratch
)
{
initScratchBuffer
(
scratch
);
}
static
void
resetScratch
(
Buffer
&
/*scratch*/
)
{
}
};
GAPI_FLUID_KERNEL
(
GFluidMulCOld
,
cv
::
gapi
::
core
::
GMulCOld
,
fals
e
)
GAPI_FLUID_KERNEL
(
GFluidMulCOld
,
cv
::
gapi
::
core
::
GMulCOld
,
tru
e
)
{
static
const
int
Window
=
1
;
static
void
run
(
const
View
&
src
,
double
_scalar
,
int
/*dtype*/
,
Buffer
&
dst
)
static
void
run
(
const
View
&
src
,
double
_scalar
,
int
/*dtype*/
,
Buffer
&
dst
,
Buffer
&
scratch
)
{
const
float
scalar
[
4
]
=
{
static_cast
<
float
>
(
_scalar
),
static_cast
<
float
>
(
_scalar
),
static_cast
<
float
>
(
_scalar
),
static_cast
<
float
>
(
_scalar
)
};
GAPI_Assert
(
src
.
meta
().
chan
<=
4
);
if
(
dst
.
y
()
==
0
)
{
float
*
sc
=
scratch
.
OutLine
<
float
>
();
for
(
int
i
=
0
;
i
<
scratch
.
length
();
++
i
)
sc
[
i
]
=
static_cast
<
float
>
(
_scalar
);
}
const
float
*
scalar
=
scratch
.
OutLine
<
float
>
();
const
float
scale
=
1.
f
;
// DST SRC OP __VA_ARGS__
...
...
@@ -1591,6 +1624,15 @@ GAPI_FLUID_KERNEL(GFluidMulCOld, cv::gapi::core::GMulCOld, false)
CV_Error
(
cv
::
Error
::
StsBadArg
,
"unsupported combination of types"
);
}
static
void
initScratch
(
const
GMatDesc
&
,
double
,
int
,
Buffer
&
scratch
)
{
initScratchBuffer
(
scratch
);
}
static
void
resetScratch
(
Buffer
&
/*scratch*/
)
{
}
};
GAPI_FLUID_KERNEL
(
GFluidDivC
,
cv
::
gapi
::
core
::
GDivC
,
false
)
...
...
modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp
浏览文件 @
c3910807
...
...
@@ -138,6 +138,33 @@ SUBC_SIMD(float, float)
#undef SUBC_SIMD
#define MULC_SIMD(SRC, DST) \
int mulc_simd(const SRC in[], const float scalar[], DST out[], \
const int length, const int chan, const float scale) \
{ \
CV_CPU_DISPATCH(mulc_simd, (in, scalar, out, length, chan, scale), \
CV_CPU_DISPATCH_MODES_ALL); \
}
MULC_SIMD
(
uchar
,
uchar
)
MULC_SIMD
(
ushort
,
uchar
)
MULC_SIMD
(
short
,
uchar
)
MULC_SIMD
(
float
,
uchar
)
MULC_SIMD
(
short
,
short
)
MULC_SIMD
(
ushort
,
short
)
MULC_SIMD
(
uchar
,
short
)
MULC_SIMD
(
float
,
short
)
MULC_SIMD
(
ushort
,
ushort
)
MULC_SIMD
(
uchar
,
ushort
)
MULC_SIMD
(
short
,
ushort
)
MULC_SIMD
(
float
,
ushort
)
MULC_SIMD
(
uchar
,
float
)
MULC_SIMD
(
ushort
,
float
)
MULC_SIMD
(
short
,
float
)
MULC_SIMD
(
float
,
float
)
#undef MULC_SIMD
}
// namespace fluid
}
// namespace gapi
}
// namespace cv
...
...
modules/gapi/src/backends/fluid/gfluidcore_func.hpp
浏览文件 @
c3910807
...
...
@@ -106,6 +106,29 @@ SUBC_SIMD(float, float)
#undef SUBC_SIMD
#define MULC_SIMD(SRC, DST) \
int mulc_simd(const SRC in[], const float scalar[], DST out[], \
const int length, const int chan, const float scale);
MULC_SIMD
(
uchar
,
uchar
)
MULC_SIMD
(
ushort
,
uchar
)
MULC_SIMD
(
short
,
uchar
)
MULC_SIMD
(
float
,
uchar
)
MULC_SIMD
(
short
,
short
)
MULC_SIMD
(
ushort
,
short
)
MULC_SIMD
(
uchar
,
short
)
MULC_SIMD
(
float
,
short
)
MULC_SIMD
(
ushort
,
ushort
)
MULC_SIMD
(
uchar
,
ushort
)
MULC_SIMD
(
short
,
ushort
)
MULC_SIMD
(
float
,
ushort
)
MULC_SIMD
(
uchar
,
float
)
MULC_SIMD
(
ushort
,
float
)
MULC_SIMD
(
short
,
float
)
MULC_SIMD
(
float
,
float
)
#undef MULC_SIMD
}
// namespace fluid
}
// namespace gapi
}
// namespace cv
...
...
modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp
浏览文件 @
c3910807
...
...
@@ -127,6 +127,30 @@ SUBC_SIMD(float, float)
#undef SUBC_SIMD
#define MULC_SIMD(SRC, DST) \
int mulc_simd(const SRC in[], const float scalar[], DST out[], \
const int length, const int chan, const float scale);
MULC_SIMD
(
uchar
,
uchar
)
MULC_SIMD
(
ushort
,
uchar
)
MULC_SIMD
(
short
,
uchar
)
MULC_SIMD
(
float
,
uchar
)
MULC_SIMD
(
short
,
short
)
MULC_SIMD
(
ushort
,
short
)
MULC_SIMD
(
uchar
,
short
)
MULC_SIMD
(
float
,
short
)
MULC_SIMD
(
ushort
,
ushort
)
MULC_SIMD
(
uchar
,
ushort
)
MULC_SIMD
(
short
,
ushort
)
MULC_SIMD
(
float
,
ushort
)
MULC_SIMD
(
uchar
,
float
)
MULC_SIMD
(
ushort
,
float
)
MULC_SIMD
(
short
,
float
)
MULC_SIMD
(
float
,
float
)
#undef MULC_SIMD
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
struct
scale_tag
{};
...
...
@@ -870,12 +894,13 @@ MUL_SIMD(float, float)
//-------------------------
//
// Fluid kernels: AddC
// Fluid kernels: AddC
, SubC
//
//-------------------------
struct
add_tag
{};
struct
sub_tag
{};
struct
mul_tag
{};
CV_ALWAYS_INLINE
void
arithmOpScalar_pack_store_c3
(
short
*
outx
,
const
v_int32
&
c1
,
const
v_int32
&
c2
,
const
v_int32
&
c3
,
...
...
@@ -909,6 +934,12 @@ CV_ALWAYS_INLINE v_float32 oper(sub_tag, const v_float32& a, const v_float32& sc
return
a
-
sc
;
}
CV_ALWAYS_INLINE
v_float32
oper
(
mul_tag
,
const
v_float32
&
a
,
const
v_float32
&
sc
)
{
return
a
*
sc
;
}
//-------------------------------------------------------------------------------------------------
template
<
typename
oper_tag
,
typename
SRC
,
typename
DST
>
CV_ALWAYS_INLINE
typename
std
::
enable_if
<
(
std
::
is_same
<
DST
,
ushort
>::
value
||
...
...
@@ -957,7 +988,7 @@ CV_ALWAYS_INLINE
typename
std
::
enable_if
<
std
::
is_same
<
DST
,
short
>::
value
||
std
::
is_same
<
DST
,
ushort
>::
value
,
void
>::
type
arithmOpScalar_simd_c3_impl
(
oper_tag
t
,
const
SRC
*
inx
,
DST
*
outx
,
const
v_float32
&
s1
,
const
v_float32
&
s2
,
const
v_float32
&
s3
,
const
int
nlanes
)
const
v_float32
&
s3
,
const
int
nlanes
)
{
v_float32
a1
=
vg_load_f32
(
inx
);
v_float32
a2
=
vg_load_f32
(
&
inx
[
nlanes
/
2
]);
...
...
@@ -1089,7 +1120,7 @@ CV_ALWAYS_INLINE int arithmOpScalar_simd_common(oper_tag t, const SRC in[],
return
x
;
}
//-------------------------------------------------------------------------------------------------
#define ADDC_SIMD(SRC, DST) \
int addc_simd(const SRC in[], const float scalar[], DST out[], \
...
...
@@ -1129,6 +1160,8 @@ ADDC_SIMD(float, float)
#undef ADDC_SIMD
//-------------------------------------------------------------------------------------------------
#define SUBC_SIMD(SRC, DST) \
int subc_simd(const SRC in[], const float scalar[], DST out[], \
const int length, const int chan) \
...
...
@@ -1167,6 +1200,256 @@ SUBC_SIMD(float, float)
#undef SUBC_SIMD
//-------------------------
//
// Fluid kernels: MulC
//
//-------------------------
template
<
typename
SRC
,
typename
DST
>
CV_ALWAYS_INLINE
typename
std
::
enable_if
<
std
::
is_same
<
DST
,
short
>::
value
||
std
::
is_same
<
DST
,
ushort
>::
value
,
void
>::
type
mulc_scale_simd_c3_impl
(
const
SRC
*
inx
,
DST
*
outx
,
const
v_float32
&
s1
,
const
v_float32
&
s2
,
const
v_float32
&
s3
,
const
v_float32
&
scale
,
const
int
nlanes
)
{
v_float32
a1
=
vg_load_f32
(
inx
);
v_float32
a2
=
vg_load_f32
(
&
inx
[
nlanes
/
2
]);
v_float32
a3
=
vg_load_f32
(
&
inx
[
nlanes
]);
v_float32
a4
=
vg_load_f32
(
&
inx
[
3
*
nlanes
/
2
]);
v_float32
a5
=
vg_load_f32
(
&
inx
[
2
*
nlanes
]);
v_float32
a6
=
vg_load_f32
(
&
inx
[
5
*
nlanes
/
2
]);
arithmOpScalar_pack_store_c3
(
outx
,
v_round
(
scale
*
a1
*
s1
),
v_round
(
scale
*
a2
*
s2
),
v_round
(
scale
*
a3
*
s3
),
v_round
(
scale
*
a4
*
s1
),
v_round
(
scale
*
a5
*
s2
),
v_round
(
scale
*
a6
*
s3
));
}
//-------------------------------------------------------------------------------------------------
template
<
typename
SRC
>
CV_ALWAYS_INLINE
void
mulc_scale_simd_c3_impl
(
const
SRC
*
inx
,
uchar
*
outx
,
const
v_float32
&
s1
,
const
v_float32
&
s2
,
const
v_float32
&
s3
,
const
v_float32
&
scale
,
const
int
nlanes
)
{
vx_store
(
outx
,
v_pack_u
(
v_pack
(
v_round
(
scale
*
vg_load_f32
(
inx
)
*
s1
),
v_round
(
scale
*
vg_load_f32
(
&
inx
[
nlanes
/
4
])
*
s2
)),
v_pack
(
v_round
(
scale
*
vg_load_f32
(
&
inx
[
nlanes
/
2
])
*
s3
),
v_round
(
scale
*
vg_load_f32
(
&
inx
[
3
*
nlanes
/
4
])
*
s1
))));
vx_store
(
&
outx
[
nlanes
],
v_pack_u
(
v_pack
(
v_round
(
scale
*
vg_load_f32
(
&
inx
[
nlanes
])
*
s2
),
v_round
(
scale
*
vg_load_f32
(
&
inx
[
5
*
nlanes
/
4
])
*
s3
)),
v_pack
(
v_round
(
scale
*
vg_load_f32
(
&
inx
[
3
*
nlanes
/
2
])
*
s1
),
v_round
(
scale
*
vg_load_f32
(
&
inx
[
7
*
nlanes
/
4
])
*
s2
))));
vx_store
(
&
outx
[
2
*
nlanes
],
v_pack_u
(
v_pack
(
v_round
(
scale
*
vg_load_f32
(
&
inx
[
2
*
nlanes
])
*
s3
),
v_round
(
scale
*
vg_load_f32
(
&
inx
[
9
*
nlanes
/
4
])
*
s1
)),
v_pack
(
v_round
(
scale
*
vg_load_f32
(
&
inx
[
5
*
nlanes
/
2
])
*
s2
),
v_round
(
scale
*
vg_load_f32
(
&
inx
[
11
*
nlanes
/
4
])
*
s3
))));
}
//-------------------------------------------------------------------------------------------------
template
<
typename
SRC
>
CV_ALWAYS_INLINE
void
mulc_scale_simd_c3_impl
(
const
SRC
*
in
,
float
*
out
,
const
v_float32
&
s1
,
const
v_float32
&
s2
,
const
v_float32
&
s3
,
const
v_float32
&
scale
,
const
int
nlanes
)
{
v_float32
a1
=
vg_load_f32
(
in
);
v_float32
a2
=
vg_load_f32
(
&
in
[
nlanes
]);
v_float32
a3
=
vg_load_f32
(
&
in
[
2
*
nlanes
]);
vx_store
(
out
,
scale
*
a1
*
s1
);
vx_store
(
&
out
[
nlanes
],
scale
*
a2
*
s2
);
vx_store
(
&
out
[
2
*
nlanes
],
scale
*
a3
*
s3
);
}
//-------------------------------------------------------------------------------------------------
template
<
typename
SRC
,
typename
DST
>
CV_ALWAYS_INLINE
int
mulc_scale_simd_c3
(
const
SRC
in
[],
const
float
scalar
[],
DST
out
[],
const
int
length
,
const
float
_scale
)
{
constexpr
int
chan
=
3
;
constexpr
int
nlanes
=
vector_type_of_t
<
DST
>::
nlanes
;
constexpr
int
lanes
=
chan
*
nlanes
;
if
(
length
<
lanes
)
return
0
;
v_float32
scale
=
vx_setall_f32
(
_scale
);
v_float32
s1
=
vx_load
(
scalar
);
#if CV_SIMD_WIDTH == 32
v_float32
s2
=
vx_load
(
&
scalar
[
2
]);
v_float32
s3
=
vx_load
(
&
scalar
[
1
]);
#else
v_float32
s2
=
vx_load
(
&
scalar
[
1
]);
v_float32
s3
=
vx_load
(
&
scalar
[
2
]);
#endif
int
x
=
0
;
for
(;;)
{
for
(;
x
<=
length
-
lanes
;
x
+=
lanes
)
{
mulc_scale_simd_c3_impl
(
&
in
[
x
],
&
out
[
x
],
s1
,
s2
,
s3
,
scale
,
nlanes
);
}
if
(
x
<
length
)
{
x
=
length
-
lanes
;
continue
;
// process unaligned tail
}
break
;
}
return
x
;
}
//-------------------------------------------------------------------------------------------------
template
<
typename
SRC
,
typename
DST
>
CV_ALWAYS_INLINE
typename
std
::
enable_if
<
(
std
::
is_same
<
DST
,
ushort
>::
value
||
std
::
is_same
<
DST
,
short
>::
value
),
void
>::
type
mulc_scale_simd_common_impl
(
const
SRC
*
inx
,
DST
*
outx
,
const
v_float32
&
sc
,
const
v_float32
&
scale
,
const
int
nlanes
)
{
v_float32
a1
=
vg_load_f32
(
inx
);
v_float32
a2
=
vg_load_f32
(
&
inx
[
nlanes
/
2
]);
v_store_i16
(
outx
,
v_round
(
scale
*
a1
*
sc
),
v_round
(
scale
*
a2
*
sc
));
}
//-------------------------------------------------------------------------------------------------
template
<
typename
SRC
>
CV_ALWAYS_INLINE
void
mulc_scale_simd_common_impl
(
const
SRC
*
inx
,
uchar
*
outx
,
const
v_float32
&
sc
,
const
v_float32
&
scale
,
const
int
nlanes
)
{
v_float32
a1
=
vg_load_f32
(
inx
);
v_float32
a2
=
vg_load_f32
(
&
inx
[
nlanes
/
4
]);
v_float32
a3
=
vg_load_f32
(
&
inx
[
nlanes
/
2
]);
v_float32
a4
=
vg_load_f32
(
&
inx
[
3
*
nlanes
/
4
]);
vx_store
(
outx
,
v_pack_u
(
v_pack
(
v_round
(
scale
*
a1
*
sc
),
v_round
(
scale
*
a2
*
sc
)),
v_pack
(
v_round
(
scale
*
a3
*
sc
),
v_round
(
scale
*
a4
*
sc
))));
}
//-------------------------------------------------------------------------------------------------
template
<
typename
SRC
>
CV_ALWAYS_INLINE
void
mulc_scale_simd_common_impl
(
const
SRC
*
inx
,
float
*
outx
,
const
v_float32
&
sc
,
const
v_float32
&
scale
,
const
int
)
{
v_float32
a1
=
vg_load_f32
(
inx
);
vx_store
(
outx
,
scale
*
a1
*
sc
);
}
//-------------------------------------------------------------------------------------------------
template
<
typename
SRC
,
typename
DST
>
CV_ALWAYS_INLINE
int
mulc_scale_simd_common
(
const
SRC
in
[],
const
float
scalar
[],
DST
out
[],
const
int
length
,
const
float
_scale
)
{
constexpr
int
nlanes
=
vector_type_of_t
<
DST
>::
nlanes
;
if
(
length
<
nlanes
)
return
0
;
v_float32
_scalar
=
vx_load
(
scalar
);
v_float32
scale
=
vx_setall_f32
(
_scale
);
int
x
=
0
;
for
(;;)
{
for
(;
x
<=
length
-
nlanes
;
x
+=
nlanes
)
{
mulc_scale_simd_common_impl
(
&
in
[
x
],
&
out
[
x
],
_scalar
,
scale
,
nlanes
);
}
if
(
x
<
length
)
{
x
=
length
-
nlanes
;
continue
;
// process unaligned tail
}
break
;
}
return
x
;
}
#define MULC_SIMD(SRC, DST) \
int mulc_simd(const SRC in[], const float scalar[], DST out[], \
const int length, const int chan, const float scale) \
{ \
mul_tag op_t; \
switch (chan) \
{ \
case 1: \
case 2: \
case 4: \
{ \
if (std::fabs(scale - 1.0f) <= FLT_EPSILON) \
{ \
return arithmOpScalar_simd_common(op_t, in, scalar, \
out, length); \
} \
else \
{ \
return mulc_scale_simd_common(in, scalar, out, length, scale); \
} \
} \
case 3: \
{ \
if (std::fabs(scale - 1.0f) <= FLT_EPSILON) \
{ \
return arithmOpScalar_simd_c3(op_t, in, scalar, \
out, length); \
} \
else \
{ \
return mulc_scale_simd_c3(in, scalar, out, length, scale); \
} \
} \
default: \
GAPI_Assert(chan <= 4); \
break; \
} \
return 0; \
}
MULC_SIMD
(
uchar
,
uchar
)
MULC_SIMD
(
ushort
,
uchar
)
MULC_SIMD
(
short
,
uchar
)
MULC_SIMD
(
float
,
uchar
)
MULC_SIMD
(
short
,
short
)
MULC_SIMD
(
ushort
,
short
)
MULC_SIMD
(
uchar
,
short
)
MULC_SIMD
(
float
,
short
)
MULC_SIMD
(
ushort
,
ushort
)
MULC_SIMD
(
uchar
,
ushort
)
MULC_SIMD
(
short
,
ushort
)
MULC_SIMD
(
float
,
ushort
)
MULC_SIMD
(
uchar
,
float
)
MULC_SIMD
(
ushort
,
float
)
MULC_SIMD
(
short
,
float
)
MULC_SIMD
(
float
,
float
)
#undef MULC_SIMD
#endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
CV_CPU_OPTIMIZATION_NAMESPACE_END
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录