提交 329abb5b 编写于 作者: L Li Peng

dnn fp16 support

Signed-off-by: NLi Peng <peng.li@intel.com>
上级 bb8ff2c4
......@@ -499,7 +499,7 @@ public:
}
}
void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, bool forceCreate)
void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, bool forceCreate, bool use_half)
{
if (!DNN_DISABLE_MEMORY_OPTIMIZATIONS && !forceCreate)
{
......@@ -540,14 +540,14 @@ public:
{
// if dst already has been allocated with total(shape) elements,
// it won't be recrreated and pointer of dst.data remains the same.
dst.create(shape, CV_32F);
dst.create(shape, use_half ? CV_16S : CV_32F);
addHost(lp, dst);
}
}
void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes,
std::vector<LayerPin>& pinsForInternalBlobs,
bool forceCreate = false)
bool forceCreate = false, bool use_half = false)
{
CV_TRACE_FUNCTION();
......@@ -618,7 +618,7 @@ public:
reuse(ld.inputBlobsId[0], blobPin);
}
else
reuseOrCreate(shapes[index], blobPin, *blobs[index], forceCreate);
reuseOrCreate(shapes[index], blobPin, *blobs[index], forceCreate, use_half);
}
}
}
......@@ -656,7 +656,7 @@ static Ptr<BackendWrapper> wrapMat(int backendId, int targetId, cv::Mat& m)
{
if (targetId == DNN_TARGET_CPU)
return Ptr<BackendWrapper>();
else if (targetId == DNN_TARGET_OPENCL)
else if (IS_DNN_OPENCL_TARGET(targetId))
return OpenCLBackendWrapper::create(m);
else
CV_Error(Error::StsNotImplemented, "Unknown target identifier");
......@@ -721,6 +721,7 @@ struct Net::Impl
bool netWasAllocated;
bool fusion;
std::vector<int64> layersTimings;
Mat output_blob;
Ptr<BackendWrapper> wrap(Mat& host)
{
......@@ -737,7 +738,7 @@ struct Net::Impl
Ptr<BackendWrapper> baseBuffer = backendWrappers[data];
if (preferableBackend == DNN_BACKEND_DEFAULT)
{
CV_Assert(preferableTarget == DNN_TARGET_OPENCL);
CV_Assert(IS_DNN_OPENCL_TARGET(preferableTarget));
return OpenCLBackendWrapper::create(baseBuffer, host);
}
else if (preferableBackend == DNN_BACKEND_HALIDE)
......@@ -849,7 +850,7 @@ struct Net::Impl
if (!netWasAllocated || this->blobsToKeep != blobsToKeep_)
{
if (preferableBackend == DNN_BACKEND_DEFAULT && preferableTarget == DNN_TARGET_OPENCL)
if (preferableBackend == DNN_BACKEND_DEFAULT && IS_DNN_OPENCL_TARGET(preferableTarget))
#ifndef HAVE_OPENCL
{
CV_LOG_WARNING(NULL, "DNN: OpenCL target is not available in this OpenCV build, switching to CPU.");
......@@ -1034,7 +1035,7 @@ struct Net::Impl
{
CV_TRACE_FUNCTION();
if (preferableBackend == DNN_BACKEND_DEFAULT)
CV_Assert(preferableTarget == DNN_TARGET_CPU || preferableTarget == DNN_TARGET_OPENCL);
CV_Assert(preferableTarget == DNN_TARGET_CPU || IS_DNN_OPENCL_TARGET(preferableTarget));
else if (preferableBackend == DNN_BACKEND_HALIDE)
initHalideBackend();
else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE)
......@@ -1369,7 +1370,9 @@ struct Net::Impl
std::vector<LayerPin> pinsForInternalBlobs;
blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs,
preferableBackend == DNN_BACKEND_INFERENCE_ENGINE);
preferableBackend == DNN_BACKEND_INFERENCE_ENGINE,
preferableBackend == DNN_BACKEND_DEFAULT &&
preferableTarget == DNN_TARGET_OPENCL_FP16);
ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
for (int i = 0; i < ld.outputBlobs.size(); ++i)
{
......@@ -1439,7 +1442,7 @@ struct Net::Impl
// some other layers.
// TODO: OpenCL target support more fusion styles.
if ( preferableBackend == DNN_BACKEND_DEFAULT && preferableTarget == DNN_TARGET_OPENCL &&
if ( preferableBackend == DNN_BACKEND_DEFAULT && IS_DNN_OPENCL_TARGET(preferableTarget) &&
(!cv::ocl::useOpenCL() || (ld.layerInstance->type != "Convolution" &&
ld.layerInstance->type != "MVN")) )
continue;
......@@ -1478,8 +1481,8 @@ struct Net::Impl
continue; // Go to the next layer.
// For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh
if ( preferableTarget != DNN_TARGET_OPENCL ||
(preferableTarget == DNN_TARGET_OPENCL &&
if ( !IS_DNN_OPENCL_TARGET(preferableTarget) ||
(IS_DNN_OPENCL_TARGET(preferableTarget) &&
nextData &&
((nextData->type == "ReLU") ||
(nextData->type == "ChannelsPReLU") ||
......@@ -1502,7 +1505,7 @@ struct Net::Impl
ld.outputBlobs = layers[lpNext.lid].outputBlobs;
ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers;
if ( preferableTarget == DNN_TARGET_OPENCL )
if ( IS_DNN_OPENCL_TARGET(preferableTarget) )
{
if ( !activData->consumers.empty() )
{
......@@ -1514,7 +1517,7 @@ struct Net::Impl
}
// fuse convlution layer followed by eltwise + relu
if ( preferableTarget == DNN_TARGET_OPENCL )
if ( IS_DNN_OPENCL_TARGET(preferableTarget) )
{
Ptr<EltwiseLayer> nextEltwiseLayer;
if( nextData )
......@@ -1727,6 +1730,13 @@ struct Net::Impl
for(int i = 0; i < layers[0].outputBlobs.size(); i++)
{
CV_Assert(layers[0].outputBlobs[i].total());
if (layers[0].outputBlobs[i].depth() == CV_32F &&
preferableBackend == DNN_BACKEND_DEFAULT &&
preferableTarget == DNN_TARGET_OPENCL_FP16)
{
Mat mat = layers[0].outputBlobs[i].clone();
convertFp16(mat, layers[0].outputBlobs[i]);
}
inputShapes.push_back(shape(layers[0].outputBlobs[i]));
}
LayersShapesMap layersShapes;
......@@ -1772,7 +1782,7 @@ struct Net::Impl
{
if( !ld.skip )
{
if (preferableBackend == DNN_BACKEND_DEFAULT && preferableTarget == DNN_TARGET_OPENCL)
if (preferableBackend == DNN_BACKEND_DEFAULT && IS_DNN_OPENCL_TARGET(preferableTarget))
{
std::vector<UMat> umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
layer->forward(OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers),
......@@ -1937,7 +1947,14 @@ struct Net::Impl
// Transfer data to CPU if it's require.
ld.outputBlobsWrappers[pin.oid]->copyToHost();
}
return ld.outputBlobs[pin.oid];
if (ld.outputBlobs[pin.oid].depth() == CV_16S)
{
convertFp16(ld.outputBlobs[pin.oid], output_blob);
return output_blob;
}
else
return ld.outputBlobs[pin.oid];
}
Mat getBlob(String outputName)
......@@ -2080,7 +2097,7 @@ void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName)
if (outputBlobs.isUMat())
{
outputBlobs.assign(ld.outputBlobs[pin.oid].getUMat(ACCESS_RW));
outputBlobs.assign(impl->getBlob(layerName).getUMat(ACCESS_RW));
}
else if (outputBlobs.isMat())
{
......@@ -2096,17 +2113,33 @@ void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName)
ld.outputBlobsWrappers[i]->copyToHost();
}
}
std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
outputvec = ld.outputBlobs;
if (ld.outputBlobs[0].depth() == CV_32F)
{
std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
outputvec = ld.outputBlobs;
} else {
std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
outputvec.resize(ld.outputBlobs.size());
for (int i = 0; i < outputvec.size(); i++)
convertFp16(ld.outputBlobs[i], outputvec[i]);
}
}
else if (outputBlobs.isUMatVector())
{
std::vector<UMat> & outputvec = *(std::vector<UMat> *)outputBlobs.getObj();
if (impl->preferableBackend == DNN_BACKEND_DEFAULT &&
impl->preferableTarget == DNN_TARGET_OPENCL)
IS_DNN_OPENCL_TARGET(impl->preferableTarget))
{
outputvec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
if (impl->preferableTarget == DNN_TARGET_OPENCL)
outputvec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
else if (impl->preferableTarget == DNN_TARGET_OPENCL_FP16)
{
std::vector<UMat> out_vec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
outputvec.resize(out_vec.size());
for (int i = 0; i < out_vec.size(); i++)
convertFp16(out_vec[i], outputvec[i]);
}
}
else
{
......@@ -2194,6 +2227,16 @@ void Net::setPreferableTarget(int targetId)
if( impl->preferableTarget != targetId )
{
impl->preferableTarget = targetId;
if (IS_DNN_OPENCL_TARGET(targetId))
{
#ifndef HAVE_OPENCL
impl->preferableTarget = DNN_TARGET_CPU;
#else
bool fp16 = ocl::Device::getDefault().isExtensionSupported("cl_khr_fp16");
if (!fp16 && targetId == DNN_TARGET_OPENCL_FP16)
impl->preferableTarget = DNN_TARGET_OPENCL;
#endif
}
impl->netWasAllocated = false;
impl->clear();
}
......@@ -2222,7 +2265,17 @@ void Net::setInput(InputArray blob, const String& name)
ld.outputBlobs.resize( std::max(pin.oid+1, (int)ld.requiredOutputs.size()) );
ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
MatShape prevShape = shape(ld.outputBlobs[pin.oid]);
Mat blob_ = blob.getMat();
Mat blob_;
if (impl->preferableBackend == DNN_BACKEND_DEFAULT &&
impl->preferableTarget == DNN_TARGET_OPENCL_FP16)
{
Mat blob_mat = blob.getMat();
convertFp16(blob_mat, blob_);
}
else
{
blob_ = blob.getMat();
}
bool oldShape = prevShape == shape(blob_);
if (oldShape)
{
......@@ -2747,6 +2800,43 @@ void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (preferableTarget == DNN_TARGET_OPENCL_FP16 && inputs_arr.depth() == CV_16S)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
std::vector<UMat> internals;
std::vector<UMat> orig_inputs;
std::vector<UMat> orig_outputs;
std::vector<UMat> orig_internals;
inputs_arr.getUMatVector(orig_inputs);
outputs_arr.getUMatVector(orig_outputs);
internals_arr.getUMatVector(orig_internals);
inputs.resize(orig_inputs.size());
for (size_t i = 0; i < orig_inputs.size(); i++)
convertFp16(orig_inputs[i], inputs[i]);
outputs.resize(orig_outputs.size());
for (size_t i = 0; i < orig_outputs.size(); i++)
outputs[i].create(shape(orig_outputs[i]), CV_32F);
internals.resize(orig_internals.size());
for (size_t i = 0; i < orig_internals.size(); i++)
internals[i].create(shape(orig_internals[i]), CV_32F);
forward(inputs, outputs, internals);
for (size_t i = 0; i < outputs.size(); i++)
convertFp16(outputs[i], orig_outputs[i]);
// sync results back
outputs_arr.assign(orig_outputs);
internals_arr.assign(orig_internals);
return;
}
std::vector<Mat> inpvec;
std::vector<Mat> outputs;
std::vector<Mat> internals;
......
......@@ -64,6 +64,7 @@
namespace cv { namespace dnn {
CV__DNN_EXPERIMENTAL_NS_BEGIN
#define IS_DNN_OPENCL_TARGET(id) (id == DNN_TARGET_OPENCL || id == DNN_TARGET_OPENCL_FP16)
Mutex& getInitializationMutex();
void initializeLayerFactory();
CV__DNN_EXPERIMENTAL_NS_END
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册