Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenCV
opencv
提交
329abb5b
O
opencv
项目概览
OpenCV
/
opencv
上一次同步 9 个月
通知
992
Star
71100
Fork
55581
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
329abb5b
编写于
4月 26, 2018
作者:
L
Li Peng
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
dnn fp16 support
Signed-off-by:
N
Li Peng
<
peng.li@intel.com
>
上级
bb8ff2c4
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
113 addition
and
22 deletion
+113
-22
modules/dnn/src/dnn.cpp
modules/dnn/src/dnn.cpp
+112
-22
modules/dnn/src/precomp.hpp
modules/dnn/src/precomp.hpp
+1
-0
未找到文件。
modules/dnn/src/dnn.cpp
浏览文件 @
329abb5b
...
...
@@ -499,7 +499,7 @@ public:
}
}
void
reuseOrCreate
(
const
MatShape
&
shape
,
const
LayerPin
&
lp
,
Mat
&
dst
,
bool
forceCreate
)
void
reuseOrCreate
(
const
MatShape
&
shape
,
const
LayerPin
&
lp
,
Mat
&
dst
,
bool
forceCreate
,
bool
use_half
)
{
if
(
!
DNN_DISABLE_MEMORY_OPTIMIZATIONS
&&
!
forceCreate
)
{
...
...
@@ -540,14 +540,14 @@ public:
{
// if dst already has been allocated with total(shape) elements,
// it won't be recrreated and pointer of dst.data remains the same.
dst
.
create
(
shape
,
CV_32F
);
dst
.
create
(
shape
,
use_half
?
CV_16S
:
CV_32F
);
addHost
(
lp
,
dst
);
}
}
void
allocateBlobsForLayer
(
LayerData
&
ld
,
const
LayerShapes
&
layerShapes
,
std
::
vector
<
LayerPin
>&
pinsForInternalBlobs
,
bool
forceCreate
=
false
)
bool
forceCreate
=
false
,
bool
use_half
=
false
)
{
CV_TRACE_FUNCTION
();
...
...
@@ -618,7 +618,7 @@ public:
reuse
(
ld
.
inputBlobsId
[
0
],
blobPin
);
}
else
reuseOrCreate
(
shapes
[
index
],
blobPin
,
*
blobs
[
index
],
forceCreate
);
reuseOrCreate
(
shapes
[
index
],
blobPin
,
*
blobs
[
index
],
forceCreate
,
use_half
);
}
}
}
...
...
@@ -656,7 +656,7 @@ static Ptr<BackendWrapper> wrapMat(int backendId, int targetId, cv::Mat& m)
{
if
(
targetId
==
DNN_TARGET_CPU
)
return
Ptr
<
BackendWrapper
>
();
else
if
(
targetId
==
DNN_TARGET_OPENCL
)
else
if
(
IS_DNN_OPENCL_TARGET
(
targetId
)
)
return
OpenCLBackendWrapper
::
create
(
m
);
else
CV_Error
(
Error
::
StsNotImplemented
,
"Unknown target identifier"
);
...
...
@@ -721,6 +721,7 @@ struct Net::Impl
bool
netWasAllocated
;
bool
fusion
;
std
::
vector
<
int64
>
layersTimings
;
Mat
output_blob
;
Ptr
<
BackendWrapper
>
wrap
(
Mat
&
host
)
{
...
...
@@ -737,7 +738,7 @@ struct Net::Impl
Ptr
<
BackendWrapper
>
baseBuffer
=
backendWrappers
[
data
];
if
(
preferableBackend
==
DNN_BACKEND_DEFAULT
)
{
CV_Assert
(
preferableTarget
==
DNN_TARGET_OPENCL
);
CV_Assert
(
IS_DNN_OPENCL_TARGET
(
preferableTarget
)
);
return
OpenCLBackendWrapper
::
create
(
baseBuffer
,
host
);
}
else
if
(
preferableBackend
==
DNN_BACKEND_HALIDE
)
...
...
@@ -849,7 +850,7 @@ struct Net::Impl
if
(
!
netWasAllocated
||
this
->
blobsToKeep
!=
blobsToKeep_
)
{
if
(
preferableBackend
==
DNN_BACKEND_DEFAULT
&&
preferableTarget
==
DNN_TARGET_OPENCL
)
if
(
preferableBackend
==
DNN_BACKEND_DEFAULT
&&
IS_DNN_OPENCL_TARGET
(
preferableTarget
)
)
#ifndef HAVE_OPENCL
{
CV_LOG_WARNING
(
NULL
,
"DNN: OpenCL target is not available in this OpenCV build, switching to CPU."
);
...
...
@@ -1034,7 +1035,7 @@ struct Net::Impl
{
CV_TRACE_FUNCTION
();
if
(
preferableBackend
==
DNN_BACKEND_DEFAULT
)
CV_Assert
(
preferableTarget
==
DNN_TARGET_CPU
||
preferableTarget
==
DNN_TARGET_OPENCL
);
CV_Assert
(
preferableTarget
==
DNN_TARGET_CPU
||
IS_DNN_OPENCL_TARGET
(
preferableTarget
)
);
else
if
(
preferableBackend
==
DNN_BACKEND_HALIDE
)
initHalideBackend
();
else
if
(
preferableBackend
==
DNN_BACKEND_INFERENCE_ENGINE
)
...
...
@@ -1369,7 +1370,9 @@ struct Net::Impl
std
::
vector
<
LayerPin
>
pinsForInternalBlobs
;
blobManager
.
allocateBlobsForLayer
(
ld
,
layerShapesIt
->
second
,
pinsForInternalBlobs
,
preferableBackend
==
DNN_BACKEND_INFERENCE_ENGINE
);
preferableBackend
==
DNN_BACKEND_INFERENCE_ENGINE
,
preferableBackend
==
DNN_BACKEND_DEFAULT
&&
preferableTarget
==
DNN_TARGET_OPENCL_FP16
);
ld
.
outputBlobsWrappers
.
resize
(
ld
.
outputBlobs
.
size
());
for
(
int
i
=
0
;
i
<
ld
.
outputBlobs
.
size
();
++
i
)
{
...
...
@@ -1439,7 +1442,7 @@ struct Net::Impl
// some other layers.
// TODO: OpenCL target support more fusion styles.
if
(
preferableBackend
==
DNN_BACKEND_DEFAULT
&&
preferableTarget
==
DNN_TARGET_OPENCL
&&
if
(
preferableBackend
==
DNN_BACKEND_DEFAULT
&&
IS_DNN_OPENCL_TARGET
(
preferableTarget
)
&&
(
!
cv
::
ocl
::
useOpenCL
()
||
(
ld
.
layerInstance
->
type
!=
"Convolution"
&&
ld
.
layerInstance
->
type
!=
"MVN"
))
)
continue
;
...
...
@@ -1478,8 +1481,8 @@ struct Net::Impl
continue
;
// Go to the next layer.
// For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh
if
(
preferableTarget
!=
DNN_TARGET_OPENCL
||
(
preferableTarget
==
DNN_TARGET_OPENCL
&&
if
(
!
IS_DNN_OPENCL_TARGET
(
preferableTarget
)
||
(
IS_DNN_OPENCL_TARGET
(
preferableTarget
)
&&
nextData
&&
((
nextData
->
type
==
"ReLU"
)
||
(
nextData
->
type
==
"ChannelsPReLU"
)
||
...
...
@@ -1502,7 +1505,7 @@ struct Net::Impl
ld
.
outputBlobs
=
layers
[
lpNext
.
lid
].
outputBlobs
;
ld
.
outputBlobsWrappers
=
layers
[
lpNext
.
lid
].
outputBlobsWrappers
;
if
(
preferableTarget
==
DNN_TARGET_OPENCL
)
if
(
IS_DNN_OPENCL_TARGET
(
preferableTarget
)
)
{
if
(
!
activData
->
consumers
.
empty
()
)
{
...
...
@@ -1514,7 +1517,7 @@ struct Net::Impl
}
// fuse convlution layer followed by eltwise + relu
if
(
preferableTarget
==
DNN_TARGET_OPENCL
)
if
(
IS_DNN_OPENCL_TARGET
(
preferableTarget
)
)
{
Ptr
<
EltwiseLayer
>
nextEltwiseLayer
;
if
(
nextData
)
...
...
@@ -1727,6 +1730,13 @@ struct Net::Impl
for
(
int
i
=
0
;
i
<
layers
[
0
].
outputBlobs
.
size
();
i
++
)
{
CV_Assert
(
layers
[
0
].
outputBlobs
[
i
].
total
());
if
(
layers
[
0
].
outputBlobs
[
i
].
depth
()
==
CV_32F
&&
preferableBackend
==
DNN_BACKEND_DEFAULT
&&
preferableTarget
==
DNN_TARGET_OPENCL_FP16
)
{
Mat
mat
=
layers
[
0
].
outputBlobs
[
i
].
clone
();
convertFp16
(
mat
,
layers
[
0
].
outputBlobs
[
i
]);
}
inputShapes
.
push_back
(
shape
(
layers
[
0
].
outputBlobs
[
i
]));
}
LayersShapesMap
layersShapes
;
...
...
@@ -1772,7 +1782,7 @@ struct Net::Impl
{
if
(
!
ld
.
skip
)
{
if
(
preferableBackend
==
DNN_BACKEND_DEFAULT
&&
preferableTarget
==
DNN_TARGET_OPENCL
)
if
(
preferableBackend
==
DNN_BACKEND_DEFAULT
&&
IS_DNN_OPENCL_TARGET
(
preferableTarget
)
)
{
std
::
vector
<
UMat
>
umat_outputBlobs
=
OpenCLBackendWrapper
::
getUMatVector
(
ld
.
outputBlobsWrappers
);
layer
->
forward
(
OpenCLBackendWrapper
::
getUMatVector
(
ld
.
inputBlobsWrappers
),
...
...
@@ -1937,7 +1947,14 @@ struct Net::Impl
// Transfer data to CPU if it's require.
ld
.
outputBlobsWrappers
[
pin
.
oid
]
->
copyToHost
();
}
return
ld
.
outputBlobs
[
pin
.
oid
];
if
(
ld
.
outputBlobs
[
pin
.
oid
].
depth
()
==
CV_16S
)
{
convertFp16
(
ld
.
outputBlobs
[
pin
.
oid
],
output_blob
);
return
output_blob
;
}
else
return
ld
.
outputBlobs
[
pin
.
oid
];
}
Mat
getBlob
(
String
outputName
)
...
...
@@ -2080,7 +2097,7 @@ void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName)
if
(
outputBlobs
.
isUMat
())
{
outputBlobs
.
assign
(
ld
.
outputBlobs
[
pin
.
oid
]
.
getUMat
(
ACCESS_RW
));
outputBlobs
.
assign
(
impl
->
getBlob
(
layerName
)
.
getUMat
(
ACCESS_RW
));
}
else
if
(
outputBlobs
.
isMat
())
{
...
...
@@ -2096,17 +2113,33 @@ void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName)
ld
.
outputBlobsWrappers
[
i
]
->
copyToHost
();
}
}
std
::
vector
<
Mat
>
&
outputvec
=
*
(
std
::
vector
<
Mat
>
*
)
outputBlobs
.
getObj
();
outputvec
=
ld
.
outputBlobs
;
if
(
ld
.
outputBlobs
[
0
].
depth
()
==
CV_32F
)
{
std
::
vector
<
Mat
>
&
outputvec
=
*
(
std
::
vector
<
Mat
>
*
)
outputBlobs
.
getObj
();
outputvec
=
ld
.
outputBlobs
;
}
else
{
std
::
vector
<
Mat
>
&
outputvec
=
*
(
std
::
vector
<
Mat
>
*
)
outputBlobs
.
getObj
();
outputvec
.
resize
(
ld
.
outputBlobs
.
size
());
for
(
int
i
=
0
;
i
<
outputvec
.
size
();
i
++
)
convertFp16
(
ld
.
outputBlobs
[
i
],
outputvec
[
i
]);
}
}
else
if
(
outputBlobs
.
isUMatVector
())
{
std
::
vector
<
UMat
>
&
outputvec
=
*
(
std
::
vector
<
UMat
>
*
)
outputBlobs
.
getObj
();
if
(
impl
->
preferableBackend
==
DNN_BACKEND_DEFAULT
&&
impl
->
preferableTarget
==
DNN_TARGET_OPENCL
)
IS_DNN_OPENCL_TARGET
(
impl
->
preferableTarget
)
)
{
outputvec
=
OpenCLBackendWrapper
::
getUMatVector
(
ld
.
outputBlobsWrappers
);
if
(
impl
->
preferableTarget
==
DNN_TARGET_OPENCL
)
outputvec
=
OpenCLBackendWrapper
::
getUMatVector
(
ld
.
outputBlobsWrappers
);
else
if
(
impl
->
preferableTarget
==
DNN_TARGET_OPENCL_FP16
)
{
std
::
vector
<
UMat
>
out_vec
=
OpenCLBackendWrapper
::
getUMatVector
(
ld
.
outputBlobsWrappers
);
outputvec
.
resize
(
out_vec
.
size
());
for
(
int
i
=
0
;
i
<
out_vec
.
size
();
i
++
)
convertFp16
(
out_vec
[
i
],
outputvec
[
i
]);
}
}
else
{
...
...
@@ -2194,6 +2227,16 @@ void Net::setPreferableTarget(int targetId)
if
(
impl
->
preferableTarget
!=
targetId
)
{
impl
->
preferableTarget
=
targetId
;
if
(
IS_DNN_OPENCL_TARGET
(
targetId
))
{
#ifndef HAVE_OPENCL
impl
->
preferableTarget
=
DNN_TARGET_CPU
;
#else
bool
fp16
=
ocl
::
Device
::
getDefault
().
isExtensionSupported
(
"cl_khr_fp16"
);
if
(
!
fp16
&&
targetId
==
DNN_TARGET_OPENCL_FP16
)
impl
->
preferableTarget
=
DNN_TARGET_OPENCL
;
#endif
}
impl
->
netWasAllocated
=
false
;
impl
->
clear
();
}
...
...
@@ -2222,7 +2265,17 @@ void Net::setInput(InputArray blob, const String& name)
ld
.
outputBlobs
.
resize
(
std
::
max
(
pin
.
oid
+
1
,
(
int
)
ld
.
requiredOutputs
.
size
())
);
ld
.
outputBlobsWrappers
.
resize
(
ld
.
outputBlobs
.
size
());
MatShape
prevShape
=
shape
(
ld
.
outputBlobs
[
pin
.
oid
]);
Mat
blob_
=
blob
.
getMat
();
Mat
blob_
;
if
(
impl
->
preferableBackend
==
DNN_BACKEND_DEFAULT
&&
impl
->
preferableTarget
==
DNN_TARGET_OPENCL_FP16
)
{
Mat
blob_mat
=
blob
.
getMat
();
convertFp16
(
blob_mat
,
blob_
);
}
else
{
blob_
=
blob
.
getMat
();
}
bool
oldShape
=
prevShape
==
shape
(
blob_
);
if
(
oldShape
)
{
...
...
@@ -2747,6 +2800,43 @@ void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays
CV_TRACE_FUNCTION
();
CV_TRACE_ARG_VALUE
(
name
,
"name"
,
name
.
c_str
());
if
(
preferableTarget
==
DNN_TARGET_OPENCL_FP16
&&
inputs_arr
.
depth
()
==
CV_16S
)
{
std
::
vector
<
UMat
>
inputs
;
std
::
vector
<
UMat
>
outputs
;
std
::
vector
<
UMat
>
internals
;
std
::
vector
<
UMat
>
orig_inputs
;
std
::
vector
<
UMat
>
orig_outputs
;
std
::
vector
<
UMat
>
orig_internals
;
inputs_arr
.
getUMatVector
(
orig_inputs
);
outputs_arr
.
getUMatVector
(
orig_outputs
);
internals_arr
.
getUMatVector
(
orig_internals
);
inputs
.
resize
(
orig_inputs
.
size
());
for
(
size_t
i
=
0
;
i
<
orig_inputs
.
size
();
i
++
)
convertFp16
(
orig_inputs
[
i
],
inputs
[
i
]);
outputs
.
resize
(
orig_outputs
.
size
());
for
(
size_t
i
=
0
;
i
<
orig_outputs
.
size
();
i
++
)
outputs
[
i
].
create
(
shape
(
orig_outputs
[
i
]),
CV_32F
);
internals
.
resize
(
orig_internals
.
size
());
for
(
size_t
i
=
0
;
i
<
orig_internals
.
size
();
i
++
)
internals
[
i
].
create
(
shape
(
orig_internals
[
i
]),
CV_32F
);
forward
(
inputs
,
outputs
,
internals
);
for
(
size_t
i
=
0
;
i
<
outputs
.
size
();
i
++
)
convertFp16
(
outputs
[
i
],
orig_outputs
[
i
]);
// sync results back
outputs_arr
.
assign
(
orig_outputs
);
internals_arr
.
assign
(
orig_internals
);
return
;
}
std
::
vector
<
Mat
>
inpvec
;
std
::
vector
<
Mat
>
outputs
;
std
::
vector
<
Mat
>
internals
;
...
...
modules/dnn/src/precomp.hpp
浏览文件 @
329abb5b
...
...
@@ -64,6 +64,7 @@
namespace
cv
{
namespace
dnn
{
CV__DNN_EXPERIMENTAL_NS_BEGIN
#define IS_DNN_OPENCL_TARGET(id) (id == DNN_TARGET_OPENCL || id == DNN_TARGET_OPENCL_FP16)
Mutex
&
getInitializationMutex
();
void
initializeLayerFactory
();
CV__DNN_EXPERIMENTAL_NS_END
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录