提交 a6748df5 编写于 作者: A Alexander Smorkalov

Merge branch 4.x

......@@ -74,6 +74,10 @@ if(POLICY CMP0077)
cmake_policy(SET CMP0077 NEW) # CMake 3.13+: option() honors normal variables.
endif()
if(POLICY CMP0146)
cmake_policy(SET CMP0146 OLD) # CMake 3.27+: use CMake FindCUDA if available.
endif()
#
# Configure OpenCV CMake hooks
#
......
......@@ -643,4 +643,69 @@ INSTANTIATE_TEST_CASE_P(/**/, Layer_ScatterND, testing::Values(std::make_tuple(D
INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNorm, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNormExpanded, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
typedef TestBaseWithParam<tuple<Vec4i, int, bool, tuple<Backend, Target> > > Layer_FullyConnected;
PERF_TEST_P_(Layer_FullyConnected, fc)
{
std::vector<int> inpShape;
inpShape.reserve(4);
for (int i = 0; i < 4; ++i) {
int dim = get<0>(GetParam())[i];
if (dim == 0)
break;
inpShape.push_back(dim);
}
Mat input(inpShape, CV_32F);
randn(input, 0, 1);
int axis = input.dims - 1;
int outDims = get<1>(GetParam());
bool isMatMul = get<2>(GetParam());
int backendId = get<0>(get<3>(GetParam()));
int targetId = get<1>(get<3>(GetParam()));
std::vector<int> weightShape;
if (isMatMul) {
weightShape = inpShape;
weightShape[weightShape.size() - 2] = outDims;
} else {
weightShape = {outDims, (int)input.total(axis, input.dims)};
}
Mat weights(weightShape, CV_32F);
randn(weights, 0, 1);
LayerParams lp;
lp.set("axis", input.dims - 1);
lp.set("is_matmul", weights.dims > 2);
lp.set("bias_term", false);
lp.set("transB", true);
lp.set("num_output", (int)weights.total(0, weights.dims - 1));
lp.blobs.resize(1, weights);
Net net;
net.addLayerToPrev("matmul", "InnerProduct", lp);
net.setInput(input);
net.setPreferableBackend(backendId);
net.setPreferableTarget(targetId);
// warmup
Mat output = net.forward();
TEST_CYCLE()
{
net.forward();
}
SANITY_CHECK_NOTHING();
}
INSTANTIATE_TEST_CASE_P(/**/, Layer_FullyConnected, Combine(
Values( // input size
Vec4i(5, 512, 384),
Vec4i(5, 16, 512, 128)
),
Values(256, 512, 1024), // output dimension
testing::Bool(), // is_matmul
dnnBackendsAndTargets()
));
} // namespace
......@@ -248,6 +248,11 @@ void selu(const Stream& stream, Span<T> output, View<T> input, T alpha, T gamma)
generic_op<T, SeluFunctor<T>>(stream, output, input, {alpha, gamma});
}
template <class T>
void gelu(const Stream& stream, Span<T> output, View<T> input) {
generic_op<T, GeluFunctor<T>>(stream, output, input);
}
template <class T>
void sign(const Stream& stream, Span<T> output, View<T> input) {
generic_op<T, SignFunctor<T>>(stream, output, input);
......@@ -324,6 +329,7 @@ template void tan<__half>(const Stream&, Span<__half>, View<__half>);
template void celu<__half>(const Stream&, Span<__half>, View<__half>, __half);
template void hardsigmoid<__half>(const Stream&, Span<__half>, View<__half>, __half, __half);
template void selu<__half>(const Stream&, Span<__half>, View<__half>, __half, __half);
template void gelu<__half>(const Stream&, Span<__half>, View<__half>);
template void thresholdedrelu<__half>(const Stream&, Span<__half>, View<__half>, __half);
template void power<__half>(const Stream&, Span<__half>, View<__half>, __half, __half, __half);
template void exp<__half>(const Stream&, Span<__half>, View<__half>, __half, __half);
......@@ -366,6 +372,7 @@ template void tan<float>(const Stream&, Span<float>, View<float>);
template void celu<float>(const Stream&, Span<float>, View<float>, float);
template void hardsigmoid<float>(const Stream&, Span<float>, View<float>, float, float);
template void selu<float>(const Stream&, Span<float>, View<float>, float, float);
template void gelu<float>(const Stream&, Span<float>, View<float>);
template void thresholdedrelu<float>(const Stream&, Span<float>, View<float>, float);
template void power<float>(const Stream&, Span<float>, View<float>, float, float, float);
template void exp<float>(const Stream&, Span<float>, View<float>, float, float);
......
......@@ -588,6 +588,21 @@ struct SeluFunctor {
T alpha, gamma;
};
template <class T>
struct GeluFunctor {
struct Params {
CUDA4DNN_HOST_DEVICE Params() { }
};
CUDA4DNN_DEVICE GeluFunctor() { }
CUDA4DNN_DEVICE GeluFunctor(const Params& params) { }
CUDA4DNN_DEVICE T operator()(T value) {
using csl::device::erf;
return static_cast<T>(0.5f) * value * (static_cast<T>(1.f) + erf(value * static_cast<T>(M_SQRT1_2)));
}
};
template <class T>
struct ThresholdedReluFunctor {
struct Params {
......
......@@ -114,6 +114,9 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
template <class T>
void selu(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, T alpha, T gamma);
template <class T>
void gelu(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input);
template <class T>
void thresholdedrelu(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, T alpha);
......
......@@ -537,6 +537,20 @@ namespace cv { namespace dnn { namespace cuda4dnn {
const T alpha, gamma;
};
template <class T>
class GeluOp final : public BaseOp<GeluOp, T> {
public:
GeluOp(csl::Stream stream_) : stream(std::move(stream_)) { }
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const
{
kernels::gelu<T>(stream, output, input);
}
private:
csl::Stream stream;
};
template <class T>
class ThresholdedReluOp final : public BaseOp<ThresholdedReluOp, T> {
public:
......
......@@ -111,7 +111,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
* or there might be several weights
* or we don't have to scale
*/
if (weight != 1.0)
if (weight != static_cast<T>(1.0f))
{
kernels::scale1_with_bias1<T>(stream, output, input, weight, 1.0);
}
......
......@@ -121,7 +121,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
new_coords
);
if (nms_iou_threshold > 0) {
if (nms_iou_threshold > static_cast<T>(0.0f)) {
auto output_mat = output_wrapper->getMutableHostMat();
CV_Assert(output_mat.type() == CV_32F);
for (int i = 0; i < input.get_axis_size(0); i++) {
......
......@@ -446,66 +446,6 @@ void InfEngineNgraphNet::addOutput(const Ptr<InfEngineNgraphNode>& node)
requestedOutputs.insert({name, node.get()});
}
void InfEngineNgraphNet::setNodePtr(std::shared_ptr<ngraph::Node>* ptr) {
all_nodes.emplace((*ptr)->get_friendly_name(), ptr);
}
void InfEngineNgraphNet::release()
{
// FIXIT release should not be conditional, release ALL
for (auto& node : components.back()) {
#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
if (!(ngraph::op::is_parameter(node) || ngraph::op::is_output(node) || ngraph::op::is_constant(node)) ) {
#else
if (!(node->is_parameter() || node->is_output() || node->is_constant()) ) {
#endif
auto it = all_nodes.find(node->get_friendly_name());
if (it != all_nodes.end()) {
it->second->reset();
all_nodes.erase(it);
}
}
}
}
void InfEngineNgraphNet::dfs(std::shared_ptr<ngraph::Node>& node,
std::vector<std::shared_ptr<ngraph::Node>>& comp,
std::unordered_map<std::string, bool>& used) {
used[node->get_friendly_name()] = true;
comp.push_back(node);
auto inputs = node->get_users();
for (size_t i = 0; i < node->get_input_size(); ++i) {
inputs.push_back(node->input_value(i).get_node()->shared_from_this());
}
for (auto& to : inputs) {
if (!used[to->get_friendly_name()]) {
dfs(to, comp, used);
}
}
}
int InfEngineNgraphNet::getNumComponents()
{
if (!components.empty()) {
return components.size();
}
std::unordered_map<std::string, bool> used;
auto inputs = ngraph_function->get_ordered_ops();
for (auto& node : inputs) {
used.emplace(node->get_friendly_name(), false);
}
for (auto& node : inputs) {
if (!used[node->get_friendly_name()]) {
std::vector<std::shared_ptr<ngraph::Node>> current_comp;
dfs(node, current_comp, used);
components.push_back(current_comp);
}
}
return components.size();
}
void InfEngineNgraphNet::createNet(Target targetId) {
if (!hasNetOwner)
{
......@@ -524,46 +464,7 @@ void InfEngineNgraphNet::createNet(Target targetId) {
}
CV_Assert_N(!inputs_vec.empty(), !outs.empty());
ngraph_function = std::make_shared<ngraph::Function>(outs, inputs_vec);
int num_comp = getNumComponents();
CV_LOG_DEBUG(NULL, "DNN/IE: number of subgraphs: " << num_comp);
if (num_comp > 1) {
for (int i = num_comp - 1; i >= 0; --i) {
ngraph::ResultVector outputs;
ngraph::ParameterVector inps;
for (auto& node : components.back()) {
#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
if (ngraph::op::is_parameter(node)) {
#else
if (node->is_parameter()) {
#endif
CV_LOG_DEBUG(NULL, "DNN/IE: subgraph[" << i << "]: +input[" << inps.size() << "] = '" << node->get_friendly_name() << "'");
auto parameter = std::dynamic_pointer_cast<ngraph::op::Parameter>(node);
inps.push_back(parameter);
}
#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
else if (ngraph::op::is_output(node)) {
#else
else if (node->is_output()) {
#endif
CV_LOG_DEBUG(NULL, "DNN/IE: subgraph[" << i << "]: +output[" << outputs.size() << "] = '" << node->get_friendly_name() << "'");
auto result = std::dynamic_pointer_cast<ngraph::op::Result>(node);
outputs.push_back(result);
}
}
CV_LOG_DEBUG(NULL, "DNN/IE: subgraph[" << i << ": nodes=" << components.back().size() << " inputs=" << inps.size() << " outputs=" << outputs.size());
isInit = false;
CV_Assert_N(!inps.empty(), !outputs.empty());
ngraph_function = std::make_shared<ngraph::Function>(outputs, inps);
release();
components.pop_back();
init(targetId);
}
} else {
release();
components.clear();
init(targetId);
}
init(targetId);
}
}
......
......@@ -50,22 +50,14 @@ public:
void addBlobs(const std::vector<cv::Ptr<BackendWrapper> >& ptrs);
void createNet(Target targetId);
void setNodePtr(std::shared_ptr<ngraph::Node>* ptr);
void reset();
//private:
detail::NetImplBase& netImpl_;
void release();
int getNumComponents();
void dfs(std::shared_ptr<ngraph::Node>& node, std::vector<std::shared_ptr<ngraph::Node>>& comp,
std::unordered_map<std::string, bool>& used);
ngraph::ParameterVector inputs_vec;
std::shared_ptr<ngraph::Function> ngraph_function;
std::vector<std::vector<std::shared_ptr<ngraph::Node>>> components;
std::unordered_map<std::string, std::shared_ptr<ngraph::Node>* > all_nodes;
InferenceEngine::ExecutableNetwork netExec;
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2022_1)
......
......@@ -221,7 +221,7 @@ public:
{
return backendId == DNN_BACKEND_OPENCV ||
(backendId == DNN_BACKEND_CUDA && !_groupByClasses) ||
(backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && !_locPredTransposed && _bboxesNormalized);
backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
......@@ -1006,9 +1006,30 @@ public:
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
CV_Assert(nodes.size() == 3);
auto& box_logits = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
auto& class_preds = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
auto& proposals = nodes[2].dynamicCast<InfEngineNgraphNode>()->node;
auto box_logits = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
auto class_preds = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
auto proposals = nodes[2].dynamicCast<InfEngineNgraphNode>()->node;
if (_locPredTransposed) {
// Convert box predictions from yxYX to xyXY
box_logits = std::make_shared<ngraph::op::v1::Reshape>(box_logits,
std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{3}, std::vector<int32_t>{0, -1, 2}),
true
);
int axis = 2;
box_logits = std::make_shared<ngraph::op::v1::Reverse>(box_logits,
std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{1}, &axis),
ngraph::op::v1::Reverse::Mode::INDEX
);
}
auto shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{2}, std::vector<int32_t>{0, -1});
box_logits = std::make_shared<ngraph::op::v1::Reshape>(box_logits, shape, true);
class_preds = std::make_shared<ngraph::op::v1::Reshape>(class_preds, shape, true);
proposals = std::make_shared<ngraph::op::v1::Reshape>(proposals,
std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{3}, std::vector<int32_t>{0, _varianceEncodedInTarget ? 1 : 2, -1}),
true
);
ngraph::op::DetectionOutputAttrs attrs;
attrs.num_classes = _numClasses;
......
......@@ -821,7 +821,7 @@ struct GeluFunctor : public BaseDefaultFunctor<GeluFunctor>
bool supportBackend(int backendId, int)
{
return backendId == DNN_BACKEND_OPENCV;
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA;
}
inline float calculate(float x) const
......@@ -829,6 +829,13 @@ struct GeluFunctor : public BaseDefaultFunctor<GeluFunctor>
return 0.5f * x * (1.0f + erf(x * M_SQRT1_2));
}
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
{
return make_cuda_node<cuda4dnn::GeluOp>(target, stream);
}
#endif
int64 getFLOPSPerElement() const { return 100; }
};
......
......@@ -180,15 +180,12 @@ public:
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
bool tranAorB = transA || transB;
#ifdef HAVE_INF_ENGINE
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
return axis == 1 && !tranAorB;
#endif
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA ||
(backendId == DNN_BACKEND_HALIDE && haveHalide() && axis == 1 && !tranAorB) ||
(backendId == DNN_BACKEND_WEBNN && axis == 1 && !tranAorB) ||
backendId == DNN_BACKEND_CANN ||
backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH ||
(backendId == DNN_BACKEND_VKCOM && haveVulkan() && !tranAorB);
}
......@@ -630,8 +627,10 @@ public:
if(input_wrapper->getRank() == inp2Dim)
return make_cuda_node<cuda4dnn::MatMulOp>(preferableTarget, std::move(context->stream), std::move(context->cublas_handle), oriMat, biasMat_, transA, transB);
else
else {
CV_LOG_INFO(NULL, "DNN/CUDA: no implementation for MatMul with rank " << input_wrapper->getRank());
return Ptr<BackendNode>();
}
}
auto flatten_start_axis = normalize_axis(axis, input_wrapper->getRank());
......@@ -800,17 +799,26 @@ public:
if (nodes.size() == 2)
{
auto& inp2 = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
matmul = std::make_shared<ngraph::op::MatMul>(ieInpNode, inp2, false, false);
matmul = std::make_shared<ngraph::op::MatMul>(ieInpNode, inp2, transA, transB);
}
else
{
std::vector<int64_t> data = {(int64_t)ieInpNode->get_shape()[0], (int64_t)blobs[0].size[1]};
auto new_shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, data.data());
auto inp = std::make_shared<ngraph::op::v1::Reshape>(ieInpNode, new_shape, true);
std::vector<size_t> weight_shape{(size_t)blobs[0].size[0], (size_t)blobs[0].size[1]};
std::vector<int> shape(1 + normalize_axis(axis, ieInpNode->get_shape().size()), 0);
shape[shape.size() - 1] = -1;
auto inp = std::make_shared<ngraph::op::v1::Reshape>(
ieInpNode,
std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{shape.size()}, shape.data()),
true
);
std::vector<size_t> weight_shape;
if (isMatMul) {
weight_shape = getShape<size_t>(oriMat);
} else {
weight_shape = {(size_t)blobs[0].size[0], (size_t)blobs[0].size[1]};
}
auto ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, weight_shape, blobs[0].data);
matmul = std::make_shared<ngraph::op::MatMul>(inp, ieWeights, false, true);
matmul = std::make_shared<ngraph::op::MatMul>(inp, ieWeights, transA, transB);
}
if (bias) {
......
......@@ -13,6 +13,7 @@ Implementation of Batch Normalization layer.
#include "layers_common.hpp"
#include "../op_cuda.hpp"
#include "../op_halide.hpp"
#include "../ie_ngraph.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#include <opencv2/core/utils/logger.hpp>
......@@ -41,6 +42,7 @@ public:
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA ||
backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH ||
(backendId == DNN_BACKEND_HALIDE && haveHalide() && !poolPad.width && !poolPad.height);
}
......@@ -181,6 +183,50 @@ public:
#endif // HAVE_HALIDE
return Ptr<BackendNode>();
}
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
auto features = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
auto indices = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
std::vector<MatShape> inpShapes(nodes.size());
std::vector<MatShape> outShapes, internals;
for (int i = 0; i < nodes.size(); ++i) {
std::vector<size_t> shape = nodes[i].dynamicCast<InfEngineNgraphNode>()->node->get_shape();
inpShapes[i] = std::vector<int>(shape.begin(), shape.end());
}
getMemoryShapes(inpShapes, 1, outShapes, internals);
Mat zeros = Mat::zeros(1, total(outShapes[0]), CV_32F);
auto zeroInp = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{zeros.total()}, zeros.data);
int newShape = -1;
features = std::make_shared<ngraph::op::v1::Reshape>(
features,
std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{1}, &newShape),
true
);
indices = std::make_shared<ngraph::op::v1::Reshape>(
indices,
std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{1}, &newShape),
true
);
if (indices->get_element_type() != ngraph::element::i32 && indices->get_element_type() != ngraph::element::i64) {
indices = std::make_shared<ngraph::op::Convert>(indices, ngraph::element::i64);
}
int axis = 0;
std::shared_ptr<ngraph::Node> unpool = std::make_shared<ngraph::op::ScatterElementsUpdate>(zeroInp, indices, features,
std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{1}, &axis));
auto shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{outShapes[0].size()}, outShapes[0].data());
unpool = std::make_shared<ngraph::op::v1::Reshape>(unpool, shape, true);
return Ptr<BackendNode>(new InfEngineNgraphNode(unpool));
}
#endif // HAVE_DNN_NGRAPH
};
Ptr<MaxUnpoolLayer> MaxUnpoolLayer::create(const LayerParams& params)
......
......@@ -209,7 +209,7 @@ public:
#ifdef HAVE_INF_ENGINE
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
{
return !computeMaxIdx && type != STOCHASTIC && kernel_size.size() > 1 && (kernel_size.size() != 3 || !isArmComputePlugin());
return type != STOCHASTIC && kernel_size.size() > 1 && (kernel_size.size() != 3 || !isArmComputePlugin());
}
#endif
if (backendId == DNN_BACKEND_OPENCV)
......@@ -613,9 +613,17 @@ public:
return Ptr<BackendNode>(new InfEngineNgraphNode(reduce_sum));
}
else if (type == MAX) {
auto max_pool = std::make_shared<ngraph::op::v1::MaxPool>(ieInpNode, ngraph::Strides(strides),
ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size),
rounding_type, pad_type);
std::shared_ptr<ngraph::Node> max_pool;
if (computeMaxIdx) {
std::vector<size_t> dilations(kernel_size.size(), 1);
max_pool = std::make_shared<ngraph::op::v8::MaxPool>(ieInpNode, ngraph::Strides(strides), ngraph::Strides(dilations),
ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size),
rounding_type, pad_type);
} else {
max_pool = std::make_shared<ngraph::op::v1::MaxPool>(ieInpNode, ngraph::Strides(strides),
ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size),
rounding_type, pad_type);
}
return Ptr<BackendNode>(new InfEngineNgraphNode(max_pool));
}
else if (type == ROI) {
......
......@@ -425,7 +425,7 @@ public:
dtype* p_dst = dst.ptr<dtype>();
size_t main_index = start / last_unreduced_dim;
size_t loop = start / last_unreduced_dim;
size_t loop = start % last_unreduced_dim;
size_t origin = unprojected_steps[main_index] + loop * last_unreduced_step;
for (int i = start; i < end; ++i) {
Op accumulator(n_reduce, p_src[origin + projected_steps[0]]);
......
......@@ -410,7 +410,10 @@ public:
}
attrs.shape_calculation_mode = ngraph::op::v4::Interpolate::ShapeCalcMode::SIZES;
if (alignCorners) {
CV_Assert(!halfPixelCenters || !alignCorners);
if (halfPixelCenters) {
attrs.coordinate_transformation_mode = ngraph::op::v4::Interpolate::CoordinateTransformMode::HALF_PIXEL;
} else if (alignCorners) {
attrs.coordinate_transformation_mode = ngraph::op::v4::Interpolate::CoordinateTransformMode::ALIGN_CORNERS;
}
......@@ -427,7 +430,10 @@ public:
}
attrs.shape_calculation_mode = ngraph::op::v4::Interpolate::ShapeCalcMode::sizes;
if (alignCorners) {
CV_Assert(!halfPixelCenters || !alignCorners);
if (halfPixelCenters) {
attrs.coordinate_transformation_mode = ngraph::op::v4::Interpolate::CoordinateTransformMode::half_pixel;
} else if (alignCorners) {
attrs.coordinate_transformation_mode = ngraph::op::v4::Interpolate::CoordinateTransformMode::align_corners;
}
......
......@@ -476,13 +476,14 @@ void NetImplOpenVINO::initBackend(const std::vector<LayerPin>& blobsToKeep_)
{
int lid = ld.inputBlobsId[i].lid;
int oid = ld.inputBlobsId[i].oid;
if (oid == 0 || lid == 0)
continue;
auto ieInpNode = inputNodes[i].dynamicCast<InfEngineNgraphNode>();
const auto& ngraph_input_node = ieInpNode->node;
CV_LOG_DEBUG(NULL, "DNN/IE: bind output port " << lid << ":" << oid << " (" << ngraph_input_node->get_friendly_name() << ":" << ngraph_input_node->get_type_info().name << ")");
if ((oid == 0 && ngraph_input_node->get_output_size() == 1) || lid == 0)
continue;
// Handle parameters from other subnets. Output port is not used in this case
#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
if ((ngraph::op::is_parameter(ngraph_input_node) || ngraph::op::is_constant(ngraph_input_node)) &&
......@@ -549,7 +550,6 @@ void NetImplOpenVINO::initBackend(const std::vector<LayerPin>& blobsToKeep_)
break;
}
}
ieNode->net->setNodePtr(&ieNode->node);
net->addBlobs(ld.inputBlobsWrappers);
net->addBlobs(ld.outputBlobsWrappers);
......
......@@ -1385,13 +1385,19 @@ void ONNXImporter::parseSplit(LayerParams& layerParams, const opencv_onnx::NodeP
CV_Assert(constBlobs.find(node_proto.input(1)) != constBlobs.end());
Mat splitsBlob = getBlob(node_proto, 1);
int splitSize = splitsBlob.total();
std::vector<int> slicePoints(splitSize - 1, splitsBlob.at<int>(0));
for (int i = 1; i < splitSize - 1; ++i)
if (splitSize == 1)
{
slicePoints[i] = slicePoints[i - 1] + splitsBlob.at<int>(i);
layerParams.set("num_split", 1);
}
else
{
std::vector<int> slicePoints(splitSize - 1, splitsBlob.at<int>(0));
for (int i = 1; i < splitSize - 1; ++i)
{
slicePoints[i] = slicePoints[i - 1] + splitsBlob.at<int>(i);
}
layerParams.set("slice_point", DictValue::arrayInt(&slicePoints[0], slicePoints.size()));
}
layerParams.set("slice_point", DictValue::arrayInt(&slicePoints[0], slicePoints.size()));
}
else
{
......@@ -1965,9 +1971,11 @@ void ONNXImporter::parseGemm(LayerParams& layerParams, const opencv_onnx::NodePr
}
int transB = layerParams.get<int>("transB", 0);
int secondInpDims;
if (constBlobs.find(node_proto.input(1)) != constBlobs.end())
{
Mat weights = getBlob(node_proto, 1);
secondInpDims = weights.dims;
if (transA == 0) // optimized barnch, for now, we can only optimize the Gemm when transA = 0.
{
......@@ -1993,7 +2001,10 @@ void ONNXImporter::parseGemm(LayerParams& layerParams, const opencv_onnx::NodePr
}
}
else
{
layerParams.set("transB", transB == 1);
secondInpDims = outShapes[node_proto.input(1)].size();
}
if (node_proto.input_size() == 3)
{
......@@ -2002,7 +2013,7 @@ void ONNXImporter::parseGemm(LayerParams& layerParams, const opencv_onnx::NodePr
}
layerParams.set("bias_term", node_proto.input_size() == 3);
layerParams.set("is_matmul", true);
layerParams.set("is_matmul", secondInpDims > 2);
addLayer(layerParams, node_proto);
}
......@@ -2045,7 +2056,7 @@ void ONNXImporter::parseMatMul(LayerParams& layerParams, const opencv_onnx::Node
layerParams.blobs.push_back(transBlob);
int numOutput = layerParams.blobs[0].total(0, secondInpDims - 1);
layerParams.set("num_output", numOutput);
layerParams.set("is_matmul", true);
layerParams.set("is_matmul", secondInpDims > 2);
} else
secondInpDims = outShapes[node_proto.input(1)].size();
......
......@@ -731,21 +731,23 @@ TEST_P(Test_Caffe_nets, FasterRCNN_vgg16)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
#endif
double scoreDiff = 0.0;
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000)
// Check 'backward_compatible_check || in_out_elements_equal' failed at core/src/op/reshape.cpp:427:
// While validating node 'v1::Reshape bbox_pred_reshape (bbox_pred[0]:f32{1,84}, Constant_265242[0]:i64{4}) -> (f32{?,?,?,?})' with friendly_name 'bbox_pred_reshape':
// Requested output shape {1,6300,4,1} is incompatible with input shape {1, 84}
double scoreDiff = 0.0, iouDiff = 0.0;
#if defined(INF_ENGINE_RELEASE)
if (target == DNN_TARGET_MYRIAD)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
if (target == DNN_TARGET_OPENCL_FP16)
scoreDiff = 0.02;
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
iouDiff = 0.02;
if (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16) {
scoreDiff = 0.04;
iouDiff = 0.06;
}
}
#endif
static Mat ref = (Mat_<float>(3, 7) << 0, 2, 0.949398, 99.2454, 210.141, 601.205, 462.849,
0, 7, 0.997022, 481.841, 92.3218, 722.685, 175.953,
0, 12, 0.993028, 133.221, 189.377, 350.994, 563.166);
testFaster("faster_rcnn_vgg16.prototxt", "VGG16_faster_rcnn_final.caffemodel", ref, scoreDiff);
testFaster("faster_rcnn_vgg16.prototxt", "VGG16_faster_rcnn_final.caffemodel", ref, scoreDiff, iouDiff);
}
TEST_P(Test_Caffe_nets, FasterRCNN_zf)
......@@ -766,9 +768,6 @@ TEST_P(Test_Caffe_nets, FasterRCNN_zf)
);
#endif
if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_OPENCL_FP16)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_MYRIAD)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
......@@ -779,7 +778,14 @@ TEST_P(Test_Caffe_nets, FasterRCNN_zf)
static Mat ref = (Mat_<float>(3, 7) << 0, 2, 0.90121, 120.407, 115.83, 570.586, 528.395,
0, 7, 0.988779, 469.849, 75.1756, 718.64, 186.762,
0, 12, 0.967198, 138.588, 206.843, 329.766, 553.176);
testFaster("faster_rcnn_zf.prototxt", "ZF_faster_rcnn_final.caffemodel", ref);
double scoreDiff = 0.0, iouDiff = 0.0;
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
scoreDiff = 0.02;
iouDiff = 0.13;
}
testFaster("faster_rcnn_zf.prototxt", "ZF_faster_rcnn_final.caffemodel", ref, scoreDiff, iouDiff);
}
TEST_P(Test_Caffe_nets, RFCN)
......@@ -802,8 +808,8 @@ TEST_P(Test_Caffe_nets, RFCN)
iouDiff = 0.12;
}
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000)
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
#if defined(INF_ENGINE_RELEASE)
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
{
scoreDiff = 0.1f;
iouDiff = 0.2f;
......
......@@ -102,11 +102,14 @@ TEST(Test_Darknet, read_yolo_voc_stream)
class Test_Darknet_layers : public DNNTestLayer
{
public:
void testDarknetLayer(const std::string& name, bool hasWeights = false, bool testBatchProcessing = true)
void testDarknetLayer(const std::string& name, bool hasWeights = false, bool testBatchProcessing = true,
double l1 = 0.0, double lInf = 0.0)
{
SCOPED_TRACE(name);
Mat inp = blobFromNPY(findDataFile("dnn/darknet/" + name + "_in.npy"));
Mat ref = blobFromNPY(findDataFile("dnn/darknet/" + name + "_out.npy"));
l1 = l1 ? l1 : default_l1;
lInf = lInf ? lInf : default_lInf;
std::string cfg = findDataFile("dnn/darknet/" + name + ".cfg");
std::string model = "";
......@@ -120,7 +123,7 @@ public:
net.setPreferableTarget(target);
net.setInput(inp);
Mat out = net.forward();
normAssert(out, ref, "", default_l1, default_lInf);
normAssert(out, ref, "", l1, lInf);
if (inp.size[0] == 1 && testBatchProcessing) // test handling of batch size
{
......@@ -166,8 +169,8 @@ public:
}*/
ASSERT_EQ(out2.dims, ref2.dims) << ref.dims;
normAssert(out2(ranges0), ref2, "", default_l1, default_lInf);
normAssert(out2(ranges1), ref2, "", default_l1, default_lInf);
normAssert(out2(ranges0), ref2, "", l1, lInf);
normAssert(out2(ranges1), ref2, "", l1, lInf);
}
}
};
......@@ -1046,7 +1049,7 @@ TEST_P(Test_Darknet_layers, region)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
#endif
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000)
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2022010000)
// accuracy on CPU, OpenCL
// Expected: (normL1) <= (l1), actual: 0.000358148 vs 1e-05
// |ref| = 1.207319974899292
......@@ -1116,7 +1119,12 @@ TEST_P(Test_Darknet_layers, connected)
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_CPU_FP16)
applyTestTag(CV_TEST_TAG_DNN_SKIP_CPU_FP16);
testDarknetLayer("connected", true);
double l1 = 0.0;
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
{
l1 = 3e-5;
}
testDarknetLayer("connected", true, true, l1);
}
TEST_P(Test_Darknet_layers, relu)
......
......@@ -361,22 +361,9 @@ TEST_P(MaxPooling, Accuracy)
Backend backendId = get<0>(get<5>(GetParam()));
Target targetId = get<1>(get<5>(GetParam()));
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2018050000)
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && targetId == DNN_TARGET_MYRIAD
&& inSize == Size(7, 6) && kernel == Size(3, 2)
&& (stride == Size(1, 1) || stride == Size(2, 2))
&& (pad == Size(0, 1) || pad == Size(1, 1))
)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
#endif
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2018050000)
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && targetId == DNN_TARGET_MYRIAD
&& (kernel == Size(2, 2) || kernel == Size(3, 2))
&& stride == Size(1, 1) && (pad == Size(0, 0) || pad == Size(0, 1))
)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
#endif
// https://github.com/openvinotoolkit/openvino/issues/18731
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && stride != Size(1, 1))
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000)
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && targetId == DNN_TARGET_MYRIAD
......@@ -467,6 +454,11 @@ TEST_P(FullyConnected, Accuracy)
{
l1 = 0.01;
}
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && targetId == DNN_TARGET_OPENCL)
{
l1 = 5e-3;
lInf = 7e-3;
}
#endif
if (targetId == DNN_TARGET_CUDA_FP16)
l1 = 0.015;
......
......@@ -465,8 +465,8 @@ TEST_P(DNNTestHighLevelAPI, predict)
const std::string modelPath = getOpenVINOModel(modelName, isFP16);
ASSERT_FALSE(modelPath.empty()) << modelName;
std::string xmlPath = findDataFile(modelPath + ".xml");
std::string binPath = findDataFile(modelPath + ".bin");
std::string xmlPath = findDataFile(modelPath + ".xml", false);
std::string binPath = findDataFile(modelPath + ".bin", false);
Model model(xmlPath, binPath);
Mat frame = imread(findDataFile("dnn/googlenet_1.png"));
......
......@@ -215,7 +215,13 @@ TEST_P(Test_Caffe_layers, InnerProduct)
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_CPU_FP16)
applyTestTag(CV_TEST_TAG_DNN_SKIP_CPU_FP16);
testLayerUsingCaffeModels("layer_inner_product", true);
double l1 = 0.0, lInf = 0.0;
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
{
l1 = 5e-3;
lInf = 2e-2;
}
testLayerUsingCaffeModels("layer_inner_product", true, true, l1, lInf);
}
TEST_P(Test_Caffe_layers, Pooling_max)
......
......@@ -447,14 +447,17 @@ TEST_P(Test_Model, DetectionOutput)
{
if (backend == DNN_BACKEND_OPENCV)
scoreDiff = 4e-3;
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2022010000)
else if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
scoreDiff = 4e-2;
#endif
else
scoreDiff = 2e-2;
iouDiff = 1.8e-1;
}
#if defined(INF_ENGINE_RELEASE)
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
{
scoreDiff = 0.05;
iouDiff = 0.08;
}
#endif
testDetectModel(weights_file, config_file, img_path, refClassIds, refConfidences, refBoxes,
scoreDiff, iouDiff, confThreshold, nmsThreshold, size, mean);
......
......@@ -579,9 +579,7 @@ CASE(test_dropout_default_mask_ratio)
CASE(test_dropout_default_old)
// no filter
CASE(test_dropout_default_ratio)
#if SKIP_SET_1
SKIP;
#endif
// no filter
CASE(test_dropout_random_old)
// no filter
CASE(test_dynamicquantizelinear)
......
......@@ -52,7 +52,7 @@ public:
}
void testONNXModels(const String& basename, const Extension ext = npy,
const double l1 = 0, const float lInf = 0, const bool useSoftmax = false,
double l1 = 0, double lInf = 0, const bool useSoftmax = false,
bool checkNoFallbacks = true, int numInps = 1)
{
String onnxmodel = _tf("models/" + basename + ".onnx", required);
......@@ -102,7 +102,12 @@ public:
netSoftmax.setInput(ref);
ref = netSoftmax.forward();
}
normAssert(ref, out, "", l1 ? l1 : default_l1, lInf ? lInf : default_lInf);
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
{
l1 = std::max(l1, 1.4e-3);
lInf = std::max(lInf, 8e-3);
}
normAssert(ref, out, basename.c_str(), l1 ? l1 : default_l1, lInf ? lInf : default_lInf);
if (checkNoFallbacks)
expectNoFallbacksFromIE(net);
}
......
......@@ -1816,6 +1816,11 @@ TEST_P(Test_TensorFlow_nets, Mask_RCNN)
double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CPU_FP16) ? 0.2 : 2e-5;
double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CPU_FP16) ? 0.018 : default_lInf;
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
{
scoreDiff = std::max(scoreDiff, 0.06);
iouDiff = std::max(iouDiff, 0.01);
}
normAssertDetections(refDetections, outDetections, "", /*threshold for zero confidence*/1e-5, scoreDiff, iouDiff);
// Output size of masks is NxCxHxW where
......
......@@ -20,6 +20,14 @@ namespace opencv_test { namespace {
using namespace cv;
using namespace cv::dnn;
class Test_TFLite : public DNNTestLayer {
public:
void testModel(Net& net, const std::string& modelName, const Mat& input, double l1 = 0, double lInf = 0);
void testModel(const std::string& modelName, const Mat& input, double l1 = 0, double lInf = 0);
void testModel(const std::string& modelName, const Size& inpSize, double l1 = 0, double lInf = 0);
void testLayer(const std::string& modelName, double l1 = 0, double lInf = 0);
};
void testInputShapes(const Net& net, const std::vector<Mat>& inps) {
std::vector<MatShape> inLayerShapes;
std::vector<MatShape> outLayerShapes;
......@@ -31,8 +39,14 @@ void testInputShapes(const Net& net, const std::vector<Mat>& inps) {
}
}
void testModel(Net& net, const std::string& modelName, const Mat& input, double l1 = 1e-5, double lInf = 1e-4)
void Test_TFLite::testModel(Net& net, const std::string& modelName, const Mat& input, double l1, double lInf)
{
l1 = l1 ? l1 : default_l1;
lInf = lInf ? lInf : default_lInf;
net.setPreferableBackend(backend);
net.setPreferableTarget(target);
testInputShapes(net, {input});
net.setInput(input);
......@@ -48,20 +62,20 @@ void testModel(Net& net, const std::string& modelName, const Mat& input, double
}
}
void testModel(const std::string& modelName, const Mat& input, double l1 = 1e-5, double lInf = 1e-4)
void Test_TFLite::testModel(const std::string& modelName, const Mat& input, double l1, double lInf)
{
Net net = readNet(findDataFile("dnn/tflite/" + modelName + ".tflite", false));
testModel(net, modelName, input, l1, lInf);
}
void testModel(const std::string& modelName, const Size& inpSize, double l1 = 1e-5, double lInf = 1e-4)
void Test_TFLite::testModel(const std::string& modelName, const Size& inpSize, double l1, double lInf)
{
Mat input = imread(findDataFile("cv/shared/lena.png"));
input = blobFromImage(input, 1.0 / 255, inpSize, 0, true);
testModel(modelName, input, l1, lInf);
}
void testLayer(const std::string& modelName, double l1 = 1e-5, double lInf = 1e-4)
void Test_TFLite::testLayer(const std::string& modelName, double l1, double lInf)
{
Mat inp = blobFromNPY(findDataFile("dnn/tflite/" + modelName + "_inp.npy"));
Net net = readNet(findDataFile("dnn/tflite/" + modelName + ".tflite"));
......@@ -69,29 +83,66 @@ void testLayer(const std::string& modelName, double l1 = 1e-5, double lInf = 1e-
}
// https://google.github.io/mediapipe/solutions/face_mesh
TEST(Test_TFLite, face_landmark)
TEST_P(Test_TFLite, face_landmark)
{
testModel("face_landmark", Size(192, 192), 2e-5, 2e-4);
if (backend == DNN_BACKEND_CUDA && target == DNN_TARGET_CUDA_FP16)
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA_FP16);
double l1 = 2e-5, lInf = 2e-4;
if (target == DNN_TARGET_CPU_FP16 || target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD ||
(backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL))
{
l1 = 0.15;
lInf = 0.82;
}
testModel("face_landmark", Size(192, 192), l1, lInf);
}
// https://google.github.io/mediapipe/solutions/face_detection
TEST(Test_TFLite, face_detection_short_range)
TEST_P(Test_TFLite, face_detection_short_range)
{
testModel("face_detection_short_range", Size(128, 128));
double l1 = 0, lInf = 2e-4;
if (target == DNN_TARGET_CPU_FP16 || target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD ||
(backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL))
{
l1 = 0.04;
lInf = 0.8;
}
testModel("face_detection_short_range", Size(128, 128), l1, lInf);
}
// https://google.github.io/mediapipe/solutions/selfie_segmentation
TEST(Test_TFLite, selfie_segmentation)
TEST_P(Test_TFLite, selfie_segmentation)
{
testModel("selfie_segmentation", Size(256, 256));
double l1 = 0, lInf = 0;
if (target == DNN_TARGET_CPU_FP16 || target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD ||
(backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL))
{
l1 = 0.01;
lInf = 0.48;
}
testModel("selfie_segmentation", Size(256, 256), l1, lInf);
}
TEST(Test_TFLite, max_unpooling)
TEST_P(Test_TFLite, max_unpooling)
{
if (backend == DNN_BACKEND_CUDA)
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA);
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target != DNN_TARGET_CPU) {
if (target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
if (target == DNN_TARGET_OPENCL) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
}
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
// Due Max Unpoling is a numerically unstable operation and small difference between frameworks
// might lead to positional difference of maximal elements in the tensor, this test checks
// behavior of Max Unpooling layer only.
Net net = readNet(findDataFile("dnn/tflite/hair_segmentation.tflite", false));
net.setPreferableBackend(backend);
net.setPreferableTarget(target);
Mat input = imread(findDataFile("cv/shared/lena.png"));
cvtColor(input, input, COLOR_BGR2RGBA);
......@@ -101,7 +152,15 @@ TEST(Test_TFLite, max_unpooling)
net.setInput(input);
std::vector<std::vector<Mat> > outs;
net.forward(outs, {"p_re_lu_1", "max_pooling_with_argmax2d", "conv2d_86", "max_unpooling2d_2"});
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
// TODO: seems like a bug with a retrieving intermediate tensors
net.forward(outs, {"conv2d_transpose_4", "p_re_lu_1", "max_pooling_with_argmax2d", "conv2d_86", "max_unpooling2d_2"});
outs.erase(outs.begin());
}
else {
net.forward(outs, {"p_re_lu_1", "max_pooling_with_argmax2d", "conv2d_86", "max_unpooling2d_2"});
}
ASSERT_EQ(outs.size(), 4);
ASSERT_EQ(outs[0].size(), 1);
ASSERT_EQ(outs[1].size(), 2);
......@@ -117,6 +176,8 @@ TEST(Test_TFLite, max_unpooling)
ASSERT_EQ(poolOut.size, poolIds.size);
ASSERT_EQ(poolOut.size, unpoolInp.size);
ASSERT_EQ(countNonZero(poolInp), poolInp.total());
for (int c = 0; c < 32; ++c) {
float *poolInpData = poolInp.ptr<float>(0, c);
float *poolOutData = poolOut.ptr<float>(0, c);
......@@ -135,15 +196,19 @@ TEST(Test_TFLite, max_unpooling)
}
}
EXPECT_EQ(poolInpData[maxIdx], poolOutData[y * 64 + x]) << errMsg;
EXPECT_EQ(poolIdsData[y * 64 + x], (float)maxIdx) << errMsg;
if (backend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
EXPECT_EQ(poolIdsData[y * 64 + x], (float)maxIdx) << errMsg;
}
EXPECT_EQ(unpoolOutData[maxIdx], unpoolInpData[y * 64 + x]) << errMsg;
}
}
}
}
TEST(Test_TFLite, EfficientDet_int8) {
TEST_P(Test_TFLite, EfficientDet_int8) {
Net net = readNet(findDataFile("dnn/tflite/coco_efficientdet_lite0_v1_1.0_quant_2021_09_06.tflite", false));
net.setPreferableBackend(backend);
net.setPreferableTarget(target);
Mat img = imread(findDataFile("dnn/dog416.png"));
Mat blob = blobFromImage(img, 1.0, Size(320, 320));
......@@ -158,10 +223,18 @@ TEST(Test_TFLite, EfficientDet_int8) {
normAssertDetections(ref, out, "", 0.5, 0.05, 0.1);
}
TEST(Test_TFLite, replicate_by_pack) {
testLayer("replicate_by_pack");
TEST_P(Test_TFLite, replicate_by_pack) {
double l1 = 0, lInf = 0;
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
{
l1 = 4e-4;
lInf = 2e-3;
}
testLayer("replicate_by_pack", l1, lInf);
}
INSTANTIATE_TEST_CASE_P(/**/, Test_TFLite, dnnBackendsAndTargets());
}} // namespace
#endif // OPENCV_TEST_DNN_TFLITE
......@@ -39,6 +39,12 @@ public:
GAPI_WRAP
PyParams& cfgAddExecutionProvider(ep::DirectML ep);
GAPI_WRAP
PyParams& cfgAddExecutionProvider(ep::CUDA ep);
GAPI_WRAP
PyParams& cfgAddExecutionProvider(ep::TensorRT ep);
GAPI_WRAP
PyParams& cfgDisableMemPattern();
......
......@@ -32,6 +32,56 @@ namespace onnx {
*/
namespace ep {
/**
* @brief This structure provides functions
* that fill inference options for CUDA Execution Provider.
* Please follow https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#cuda-execution-provider
*/
struct GAPI_EXPORTS_W_SIMPLE CUDA {
// NB: Used from python.
/// @private -- Exclude this constructor from OpenCV documentation
GAPI_WRAP
CUDA() = default;
/** @brief Class constructor.
Constructs CUDA parameters based on device type information.
@param dev_id Target device id to use.
*/
GAPI_WRAP
explicit CUDA(const int dev_id)
: device_id(dev_id) {
}
int device_id;
};
/**
* @brief This structure provides functions
* that fill inference options for TensorRT Execution Provider.
* Please follow https://onnxruntime.ai/docs/execution-providers/TensorRT-ExecutionProvider.html#tensorrt-execution-provider
*/
struct GAPI_EXPORTS_W_SIMPLE TensorRT {
// NB: Used from python.
/// @private -- Exclude this constructor from OpenCV documentation
GAPI_WRAP
TensorRT() = default;
/** @brief Class constructor.
Constructs TensorRT parameters based on device type information.
@param dev_id Target device id to use.
*/
GAPI_WRAP
explicit TensorRT(const int dev_id)
: device_id(dev_id) {
}
int device_id;
};
/**
* @brief This structure provides functions
* that fill inference options for ONNX OpenVINO Execution Provider.
......@@ -143,7 +193,11 @@ public:
DeviceDesc ddesc;
};
using EP = cv::util::variant<cv::util::monostate, OpenVINO, DirectML>;
using EP = cv::util::variant< cv::util::monostate
, OpenVINO
, DirectML
, CUDA
, TensorRT>;
} // namespace ep
......@@ -431,6 +485,34 @@ public:
return *this;
}
/** @brief Adds execution provider for runtime.
The function is used to add ONNX Runtime CUDA Execution Provider options.
@param ep CUDA Execution Provider options.
@see cv::gapi::onnx::ep::CUDA.
@return the reference on modified object.
*/
Params<Net>& cfgAddExecutionProvider(ep::CUDA&& ep) {
desc.execution_providers.emplace_back(std::move(ep));
return *this;
}
/** @brief Adds execution provider for runtime.
The function is used to add ONNX Runtime TensorRT Execution Provider options.
@param ep TensorRT Execution Provider options.
@see cv::gapi::onnx::ep::TensorRT.
@return the reference on modified object.
*/
Params<Net>& cfgAddExecutionProvider(ep::TensorRT&& ep) {
desc.execution_providers.emplace_back(std::move(ep));
return *this;
}
/** @brief Disables the memory pattern optimization.
@return the reference on modified object.
......@@ -491,6 +573,16 @@ public:
desc.execution_providers.emplace_back(std::move(ep));
}
/** @see onnx::Params::cfgAddExecutionProvider. */
void cfgAddExecutionProvider(ep::CUDA&& ep) {
desc.execution_providers.emplace_back(std::move(ep));
}
/** @see onnx::Params::cfgAddExecutionProvider. */
void cfgAddExecutionProvider(ep::TensorRT&& ep) {
desc.execution_providers.emplace_back(std::move(ep));
}
/** @see onnx::Params::cfgDisableMemPattern. */
void cfgDisableMemPattern() {
desc.disable_mem_pattern = true;
......
......@@ -31,6 +31,8 @@ using map_string_and_vector_float = std::map<std::string, std::vector<float>>;
using map_int_and_double = std::map<int, double>;
using ep_OpenVINO = cv::gapi::onnx::ep::OpenVINO;
using ep_DirectML = cv::gapi::onnx::ep::DirectML;
using ep_CUDA = cv::gapi::onnx::ep::CUDA;
using ep_TensorRT = cv::gapi::onnx::ep::TensorRT;
// NB: Python wrapper generate T_U for T<U>
// This behavior is only observed for inputs
......
......@@ -33,6 +33,18 @@ cv::gapi::onnx::PyParams::cfgAddExecutionProvider(cv::gapi::onnx::ep::DirectML e
return *this;
}
cv::gapi::onnx::PyParams&
cv::gapi::onnx::PyParams::cfgAddExecutionProvider(cv::gapi::onnx::ep::CUDA ep) {
m_priv->cfgAddExecutionProvider(std::move(ep));
return *this;
}
cv::gapi::onnx::PyParams&
cv::gapi::onnx::PyParams::cfgAddExecutionProvider(cv::gapi::onnx::ep::TensorRT ep) {
m_priv->cfgAddExecutionProvider(std::move(ep));
return *this;
}
cv::gapi::onnx::PyParams&
cv::gapi::onnx::PyParams::cfgDisableMemPattern() {
m_priv->cfgDisableMemPattern();
......
......@@ -145,9 +145,39 @@ public:
void run();
};
static void addCUDAExecutionProvider(Ort::SessionOptions *session_options,
const cv::gapi::onnx::ep::CUDA &cuda_ep) {
OrtCUDAProviderOptions options{};
options.device_id = cuda_ep.device_id;
try {
session_options->AppendExecutionProvider_CUDA(options);
} catch (const std::exception &e) {
std::stringstream ss;
ss << "ONNX Backend: Failed to enable CUDA"
<< " Execution Provider: " << e.what();
cv::util::throw_error(std::runtime_error(ss.str()));
}
}
static void addTensorRTExecutionProvider(Ort::SessionOptions *session_options,
const cv::gapi::onnx::ep::TensorRT &trt_ep) {
OrtTensorRTProviderOptions options{};
options.device_id = trt_ep.device_id;
try {
session_options->AppendExecutionProvider_TensorRT(options);
} catch (const std::exception &e) {
std::stringstream ss;
ss << "ONNX Backend: Failed to enable TensorRT"
<< " Execution Provider: " << e.what();
cv::util::throw_error(std::runtime_error(ss.str()));
}
}
static void addOpenVINOExecutionProvider(Ort::SessionOptions *session_options,
const cv::gapi::onnx::ep::OpenVINO &ov_ep) {
OrtOpenVINOProviderOptions options;
OrtOpenVINOProviderOptions options{};
options.device_type = ov_ep.device_type.c_str();
options.cache_dir = ov_ep.cache_dir.c_str();
options.num_of_threads = ov_ep.num_of_threads;
......@@ -181,6 +211,18 @@ static void addExecutionProvider(Ort::SessionOptions *session_options,
addDMLExecutionProvider(session_options, dml_ep);
break;
}
case ep::EP::index_of<ep::CUDA>(): {
GAPI_LOG_INFO(NULL, "CUDA Execution Provider is added.");
const auto &cuda_ep = cv::util::get<ep::CUDA>(execution_provider);
addCUDAExecutionProvider(session_options, cuda_ep);
break;
}
case ep::EP::index_of<ep::TensorRT>(): {
GAPI_LOG_INFO(NULL, "TensorRT Execution Provider is added.");
const auto &trt_ep = cv::util::get<ep::TensorRT>(execution_provider);
addTensorRTExecutionProvider(session_options, trt_ep);
break;
}
default:
GAPI_LOG_INFO(NULL, "CPU Execution Provider is added.");
break;
......
......@@ -260,6 +260,10 @@ typedef uint32_t __u32;
#define V4L2_CID_IRIS_ABSOLUTE (V4L2_CID_CAMERA_CLASS_BASE+17)
#endif
#ifndef v4l2_fourcc_be
#define v4l2_fourcc_be(a, b, c, d) (v4l2_fourcc(a, b, c, d) | (1U << 31))
#endif
#ifndef V4L2_PIX_FMT_Y10
#define V4L2_PIX_FMT_Y10 v4l2_fourcc('Y', '1', '0', ' ')
#endif
......
......@@ -65,7 +65,7 @@ inline std::string fourccToStringSafe(int fourcc)
{
std::string res = fourccToString(fourcc);
// TODO: return hex values for invalid characters
std::transform(res.begin(), res.end(), res.begin(), [](uint8_t c) { return (c >= '0' && c <= 'z') ? c : (c == ' ' ? '_' : 'x'); });
std::transform(res.begin(), res.end(), res.begin(), [](char c) -> char { return (c >= '0' && c <= 'z') ? c : (c == ' ' ? '_' : 'x'); });
return res;
}
......
......@@ -22,6 +22,9 @@
#include <linux/videodev2.h>
// workarounds for older versions
#ifndef v4l2_fourcc_be
#define v4l2_fourcc_be(a, b, c, d) (v4l2_fourcc(a, b, c, d) | (1U << 31))
#endif
#ifndef V4L2_PIX_FMT_Y10
#define V4L2_PIX_FMT_Y10 v4l2_fourcc('Y', '1', '0', ' ')
#endif
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册