提交 9d02d42a 编写于 作者: A Alexander Alekhin

dnn(ocl4dnn): don't use getUMat()

especially in CPU only processing
上级 0101fa78
......@@ -2518,7 +2518,7 @@ void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName)
if (outputBlobs.isUMat())
{
outputBlobs.assign(impl->getBlob(layerName).getUMat(ACCESS_RW));
impl->getBlob(layerName).copyTo(outputBlobs);
}
else if (outputBlobs.isMat())
{
......@@ -2566,7 +2566,7 @@ void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName)
{
outputvec.resize(ld.outputBlobs.size());
for (int i = 0; i < outputvec.size(); ++i)
outputvec[i] = ld.outputBlobs[i].getUMat(ACCESS_RW);
ld.outputBlobs[i].copyTo(outputvec[i]);
}
}
}
......
......@@ -172,8 +172,8 @@ public:
if (umat_weight.empty())
{
umat_weight = weights_.getUMat(ACCESS_READ);
umat_bias = bias_.getUMat(ACCESS_READ);
weights_.copyTo(umat_weight);
bias_.copyTo(umat_bias);
}
UMat &inpBlob = inputs[0];
......
......@@ -1452,8 +1452,10 @@ public:
if (umat_weights.empty())
{
transpose(blobs[0].reshape(1, inpCn), umat_weights);
umat_biases = hasBias() ? blobs[1].reshape(1, outCn).getUMat(ACCESS_READ) :
UMat::zeros(outCn, 1, CV_32F);
if (hasBias())
blobs[1].reshape(1, outCn).copyTo(umat_biases);
else
umat_biases = UMat::zeros(outCn, 1, CV_32F);
}
String buildopt = format("-DT=%s ", ocl::typeToStr(inputs[0].type()));
......
......@@ -969,11 +969,12 @@ struct ChannelsPReLUFunctor
{
typedef ChannelsPReLULayer Layer;
Mat scale;
#ifdef HAVE_OPENCL
UMat scale_umat;
#endif
explicit ChannelsPReLUFunctor(const Mat& scale_=Mat()) : scale(scale_)
{
scale_umat = scale.getUMat(ACCESS_READ);
}
bool supportBackend(int backendId, int)
......@@ -1021,6 +1022,9 @@ struct ChannelsPReLUFunctor
#ifdef HAVE_OPENCL
bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
if (scale_umat.empty())
scale.copyTo(scale_umat);
std::vector<UMat> inputs;
std::vector<UMat> outputs;
......
......@@ -96,12 +96,6 @@ public:
biasMat = blobs[1] = blobs[1].reshape(1, 1);
else
biasMat = Mat::zeros(1, numOutput, weightsMat.type());
#ifdef HAVE_OPENCL
size_t n = blobs.size();
umat_blobs.resize(n);
for (int i = 0; i < n; i++) umat_blobs[i] = blobs[i].getUMat(ACCESS_READ);
#endif
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
......@@ -276,6 +270,8 @@ public:
virtual void finalize(InputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE
{
innerProductOp.release();
umat_blobs.clear();
half_blobs.clear();
}
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, InputArrayOfArrays internals)
......@@ -288,13 +284,17 @@ public:
outs.getUMatVector(outputs);
int axisCan = clamp(axis, inputs[0].dims);
int numOutput = umat_blobs[0].size[0];
int innerSize = umat_blobs[0].size[1];
int numOutput = blobs[0].size[0];
int innerSize = blobs[0].size[1];
int outerSize = total(shape(inputs[0]), 0, axisCan);
bool ret = true;
if (innerProductOp.empty())
{
size_t n = blobs.size();
umat_blobs.resize(n);
for (int i = 0; i < n; i++) blobs[i].copyTo(umat_blobs[i]);
OCL4DNNInnerProductConfig config;
config.num_output = numOutput;
config.bias_term = bias;
......
......@@ -71,6 +71,9 @@ public:
}
Mat scale, shift;
#ifdef HAVE_OPENCL
UMat umat_scale, umat_shift;
#endif
bool fuse_batch_norm;
Ptr<ReLULayer> activ_relu;
......@@ -105,6 +108,10 @@ public:
for( i = 0; i < splitDim; i++ )
newRows *= inputs[0].size[i];
zeroDev = inputs[0].total() == newRows;
#ifdef HAVE_OPENCL
umat_scale.release();
umat_shift.release();
#endif
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
......@@ -118,8 +125,13 @@ public:
#ifdef HAVE_OPENCL
bool fast_forward_ocl(std::vector<UMat> &inputs, std::vector<UMat> &outputs)
{
UMat bnorm_weight = scale.empty() ? UMat() : scale.getUMat(ACCESS_READ);
UMat bnorm_bias = shift.empty() ? UMat() : shift.getUMat(ACCESS_READ);
if (umat_scale.empty() && !scale.empty())
scale.copyTo(umat_scale);
if (umat_shift.empty() && !shift.empty())
shift.copyTo(umat_shift);
UMat& bnorm_weight = umat_scale;
UMat& bnorm_bias = umat_shift;
bool use_half = (inputs[0].depth() == CV_16S);
String opts = format(" -DT=%s -DT4=%s -Dconvert_T=%s", use_half ? "half" : "float",
use_half ? "half4" : "float4", use_half ? "convert_half4" : "convert_float4");
......@@ -177,6 +189,13 @@ public:
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
{
if (umat_scale.empty() && !scale.empty())
scale.copyTo(umat_scale);
if (umat_shift.empty() && !shift.empty())
shift.copyTo(umat_shift);
UMat& bnorm_weight = umat_scale;
UMat& bnorm_bias = umat_shift;
std::vector<UMat> inputs;
std::vector<UMat> outputs;
......@@ -192,8 +211,6 @@ public:
if (inputs[0].depth() == CV_16S)
return false;
UMat bnorm_weight = scale.empty() ? UMat() : scale.getUMat(ACCESS_READ);
UMat bnorm_bias = shift.empty() ? UMat() : shift.getUMat(ACCESS_READ);
String opts = format(" -DT=float -DT4=float4 -Dconvert_T=convert_float4");
for (size_t inpIdx = 0; inpIdx < inputs.size(); inpIdx++)
......
......@@ -60,6 +60,9 @@ public:
int coords, classes, anchors, classfix;
float thresh, nmsThreshold;
bool useSoftmax, useLogistic;
#ifdef HAVE_OPENCL
UMat blob_umat;
#endif
RegionLayerImpl(const LayerParams& params)
{
......@@ -123,6 +126,9 @@ public:
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
if (blob_umat.empty())
blobs[0].copyTo(blob_umat);
std::vector<UMat> inputs;
std::vector<UMat> outputs;
......@@ -135,7 +141,6 @@ public:
CV_Assert(inputs.size() >= 1);
int const cell_size = classes + coords + 1;
UMat blob_umat = blobs[0].getUMat(ACCESS_READ);
for (size_t ii = 0; ii < outputs.size(); ii++)
{
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册