提交 a9807d8f 编写于 作者: D Dmitry Kurtaev

Allocate new memory for optimized concat to prevent collisions.

Add a flag to disable memory reusing in dnn module.
上级 3542c98d
......@@ -97,3 +97,8 @@ if(BUILD_PERF_TESTS)
endif()
endif()
endif()
ocv_option(${the_module}_REUSE_MEMORY "Enable reusing strategy of memory management" ON)
if (${the_module}_REUSE_MEMORY)
add_definitions(-DREUSE_DNN_MEMORY=1)
endif()
......@@ -367,43 +367,42 @@ public:
}
}
void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, bool force)
void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst)
{
#ifdef REUSE_DNN_MEMORY
Mat bestBlob;
LayerPin bestBlobPin;
if( !force )
{
std::map<LayerPin, Mat>::iterator hostIt;
std::map<LayerPin, int>::iterator refIt;
std::map<LayerPin, Mat>::iterator hostIt;
std::map<LayerPin, int>::iterator refIt;
const int targetTotal = total(shape);
int bestBlobTotal = INT_MAX;
const int targetTotal = total(shape);
int bestBlobTotal = INT_MAX;
for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt)
for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt)
{
refIt = refCounter.find(hostIt->first);
// Use only blobs that had references before because if not,
// it might be used as output.
if (refIt != refCounter.end() && refIt->second == 0)
{
refIt = refCounter.find(hostIt->first);
// Use only blobs that had references before because if not,
// it might be used as output.
if (refIt != refCounter.end() && refIt->second == 0)
Mat& unusedBlob = hostIt->second;
if (unusedBlob.total() >= targetTotal &&
unusedBlob.total() < bestBlobTotal)
{
Mat& unusedBlob = hostIt->second;
if (unusedBlob.total() >= targetTotal &&
unusedBlob.total() < bestBlobTotal)
{
bestBlobPin = hostIt->first;
bestBlob = unusedBlob;
bestBlobTotal = unusedBlob.total();
}
bestBlobPin = hostIt->first;
bestBlob = unusedBlob;
bestBlobTotal = unusedBlob.total();
}
}
}
if (!bestBlob.empty())
{
reuse(bestBlobPin, lp);
dst = Mat(shape, CV_32F, bestBlob.data);
dst = bestBlob.reshape(1, 1).colRange(0, targetTotal).reshape(1, shape);
}
else
#endif // REUSE_DNN_MEMORY
{
// if dst already has been allocated with total(shape) elements,
// it won't be recrreated and pointer of dst.data remains the same.
......@@ -412,34 +411,32 @@ public:
}
}
void reuseOrCreate(const MatShape& shape, const LayerPin& lp, UMat &umat_dst, bool force)
void reuseOrCreate(const MatShape& shape, const LayerPin& lp, UMat &umat_dst)
{
#ifdef REUSE_DNN_MEMORY
UMat bestBlob;
LayerPin bestBlobPin;
if( !force )
{
std::map<LayerPin, UMat>::iterator hostIt;
std::map<LayerPin, int>::iterator refIt;
std::map<LayerPin, UMat>::iterator hostIt;
std::map<LayerPin, int>::iterator refIt;
const int targetTotal = total(shape);
int bestBlobTotal = INT_MAX;
const int targetTotal = total(shape);
int bestBlobTotal = INT_MAX;
for (hostIt = umat_memHosts.begin(); hostIt != umat_memHosts.end(); ++hostIt)
for (hostIt = umat_memHosts.begin(); hostIt != umat_memHosts.end(); ++hostIt)
{
refIt = refCounter.find(hostIt->first);
// Use only blobs that had references before because if not,
// it might be used as output.
if (refIt != refCounter.end() && refIt->second == 0)
{
refIt = refCounter.find(hostIt->first);
// Use only blobs that had references before because if not,
// it might be used as output.
if (refIt != refCounter.end() && refIt->second == 0)
UMat& unusedBlob = hostIt->second;
if (unusedBlob.total() >= targetTotal &&
unusedBlob.total() < bestBlobTotal)
{
UMat& unusedBlob = hostIt->second;
if (unusedBlob.total() >= targetTotal &&
unusedBlob.total() < bestBlobTotal)
{
bestBlobPin = hostIt->first;
bestBlob = unusedBlob;
bestBlobTotal = unusedBlob.total();
}
bestBlobPin = hostIt->first;
bestBlob = unusedBlob;
bestBlobTotal = unusedBlob.total();
}
}
}
......@@ -449,6 +446,7 @@ public:
umat_dst.create(shape, CV_32F);
}
else
#endif // REUSE_DNN_MEMORY
{
// if dst already has been allocated with total(shape) elements,
// it won't be recrreated and pointer of dst.data remains the same.
......@@ -458,8 +456,7 @@ public:
}
void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes,
std::vector<LayerPin>& pinsForInternalBlobs,
bool maximizeReuse)
std::vector<LayerPin>& pinsForInternalBlobs)
{
CV_TRACE_FUNCTION();
bool use_umat = (preferableBackend == DNN_BACKEND_DEFAULT &&
......@@ -530,7 +527,6 @@ public:
}
std::map<int, std::vector<int> >::reverse_iterator it;
bool force = !maximizeReuse && ld.inputBlobsId.size() > 1;
for(it = idxSizes.rbegin(); it != idxSizes.rend(); it++)
{
for(int j = 0; j < it->second.size(); j++)
......@@ -539,7 +535,7 @@ public:
if (total(shapes[index]))
{
LayerPin blobPin(ld.id, index);
if (index < outShapes.size() && inPlace && !force)
if (index < outShapes.size() && inPlace)
{
if (use_umat)
{
......@@ -558,9 +554,9 @@ public:
else
{
if (use_umat)
reuseOrCreate(shapes[index], blobPin, *umat_blobs[index], force);
reuseOrCreate(shapes[index], blobPin, *umat_blobs[index]);
else
reuseOrCreate(shapes[index], blobPin, *blobs[index], force);
reuseOrCreate(shapes[index], blobPin, *blobs[index]);
}
}
}
......@@ -1111,8 +1107,7 @@ struct Net::Impl
CV_Assert(layerShapesIt != layersShapes.end());
std::vector<LayerPin> pinsForInternalBlobs;
bool maximizeReuse = preferableBackend == DNN_BACKEND_HALIDE;
blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs, maximizeReuse);
blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs);
ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
for (int i = 0; i < ld.outputBlobs.size(); ++i)
{
......@@ -1415,6 +1410,9 @@ struct Net::Impl
if( i >= ninputs )
{
// Allocate new memory to prevent collisions during memory
// reusing (see https://github.com/opencv/opencv/pull/10456).
output = output.clone();
Range chrange[] = { Range::all(), Range::all(), Range::all(), Range::all() };
int ofs = 0;
for( i = 0; i < ninputs; i++ )
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册