test_torch_importer.cpp 14.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.
//
//
//                           License Agreement
//                For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//   * Redistribution's of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//
//   * Redistribution's in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.
//
//   * The name of the copyright holders may not be used to endorse or promote products
//     derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/

#include "test_precomp.hpp"
#include "npy_blob.hpp"
#include <opencv2/dnn/shape_utils.hpp>
45
#include <opencv2/dnn/layer.details.hpp>  // CV_DNN_REGISTER_LAYER_CLASS
46

A
Alexander Alekhin 已提交
47
namespace opencv_test
48 49 50 51 52 53 54 55 56 57
{

using namespace std;
using namespace testing;
using namespace cv;
using namespace cv::dnn;

template<typename TStr>
static std::string _tf(TStr filename, bool inTorchDir = true)
{
58
    String path = "dnn/";
59 60 61
    if (inTorchDir)
        path += "torch/";
    path += filename;
62
    return findDataFile(path, false);
63 64 65 66 67
}

TEST(Torch_Importer, simple_read)
{
    Net net;
68 69
    ASSERT_NO_THROW(net = readNetFromTorch(_tf("net_simple_net.txt"), false));
    ASSERT_FALSE(net.empty());
70 71
}

72
class Test_Torch_layers : public DNNTestLayer
73
{
74 75 76 77 78 79
public:
    void runTorchNet(const String& prefix, String outLayerName = "",
                     bool check2ndBlob = false, bool isBinary = false,
                     double l1 = 0.0, double lInf = 0.0)
    {
        String suffix = (isBinary) ? ".dat" : ".txt";
80

81 82 83
        Mat inp, outRef;
        ASSERT_NO_THROW( inp = readTorchBlob(_tf(prefix + "_input" + suffix), isBinary) );
        ASSERT_NO_THROW( outRef = readTorchBlob(_tf(prefix + "_output" + suffix), isBinary) );
84

85
        checkBackend(backend, target, &inp, &outRef);
86

87 88
        Net net = readNetFromTorch(_tf(prefix + "_net" + suffix), isBinary);
        ASSERT_FALSE(net.empty());
89

90 91
        net.setPreferableBackend(backend);
        net.setPreferableTarget(target);
92

93 94
        if (outLayerName.empty())
            outLayerName = net.getLayerNames().back();
95

96 97 98 99 100 101
        net.setInput(inp);
        std::vector<Mat> outBlobs;
        net.forward(outBlobs, outLayerName);
        l1 = l1 ? l1 : default_l1;
        lInf = lInf ? lInf : default_lInf;
        normAssert(outRef, outBlobs[0], "", l1, lInf);
102

103 104 105 106 107 108 109 110
        if (check2ndBlob && backend != DNN_BACKEND_INFERENCE_ENGINE)
        {
            Mat out2 = outBlobs[1];
            Mat ref2 = readTorchBlob(_tf(prefix + "_output_2" + suffix), isBinary);
            normAssert(out2, ref2, "", l1, lInf);
        }
    }
};
111

112
TEST_P(Test_Torch_layers, run_convolution)
113
{
114 115 116 117
    // Output reference values are in range [23.4018, 72.0181]
    double l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.08 : default_l1;
    double lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.42 : default_lInf;
    runTorchNet("net_conv", "", false, true, l1, lInf);
118 119
}

120
TEST_P(Test_Torch_layers, run_pool_max)
121
{
122 123 124
    if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
        throw SkipTestException("");
    runTorchNet("net_pool_max", "", true);
125 126
}

127
TEST_P(Test_Torch_layers, run_pool_ave)
128
{
129
    runTorchNet("net_pool_ave");
130 131
}

132
TEST_P(Test_Torch_layers, run_reshape_change_batch_size)
133
{
134
    runTorchNet("net_reshape");
135 136 137 138
}

TEST_P(Test_Torch_layers, run_reshape)
{
139 140 141 142 143 144
    runTorchNet("net_reshape_batch");
    runTorchNet("net_reshape_channels", "", false, true);
}

TEST_P(Test_Torch_layers, run_reshape_single_sample)
{
145
    // Reference output values in range [14.4586, 18.4492].
146
    runTorchNet("net_reshape_single_sample", "", false, false,
147 148
                (target == DNN_TARGET_MYRIAD || target == DNN_TARGET_OPENCL_FP16) ? 0.0073 : default_l1,
                (target == DNN_TARGET_MYRIAD || target == DNN_TARGET_OPENCL_FP16) ? 0.025 : default_lInf);
149 150
}

151
TEST_P(Test_Torch_layers, run_linear)
L
Li Peng 已提交
152
{
153 154 155
    if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
        throw SkipTestException("");
    runTorchNet("net_linear_2d");
L
Li Peng 已提交
156 157
}

158
TEST_P(Test_Torch_layers, run_concat)
159
{
160
    runTorchNet("net_concat", "l5_torchMerge");
161 162 163 164
}

TEST_P(Test_Torch_layers, run_depth_concat)
{
165 166
    runTorchNet("net_depth_concat", "", false, true, 0.0,
                target == DNN_TARGET_OPENCL_FP16 ? 0.021 : 0.0);
167 168
}

169
TEST_P(Test_Torch_layers, run_deconv)
170
{
171
    runTorchNet("net_deconv");
172 173
}

174
TEST_P(Test_Torch_layers, run_batch_norm)
175
{
176
    runTorchNet("net_batch_norm", "", false, true);
177 178
}

179
TEST_P(Test_Torch_layers, net_prelu)
180
{
181
    runTorchNet("net_prelu");
182 183
}

184
TEST_P(Test_Torch_layers, net_cadd_table)
185
{
186
    runTorchNet("net_cadd_table");
187 188
}

189
TEST_P(Test_Torch_layers, net_softmax)
190
{
191 192
    runTorchNet("net_softmax");
    runTorchNet("net_softmax_spatial");
193 194
}

195
TEST_P(Test_Torch_layers, net_logsoftmax)
196 197 198 199 200
{
    runTorchNet("net_logsoftmax");
    runTorchNet("net_logsoftmax_spatial");
}

201
TEST_P(Test_Torch_layers, net_lp_pooling)
202
{
203 204
    runTorchNet("net_lp_pooling_square", "", false, true);
    runTorchNet("net_lp_pooling_power", "", false, true);
205 206
}

207
TEST_P(Test_Torch_layers, net_conv_gemm_lrn)
208
{
209 210 211 212 213
    if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
        throw SkipTestException("");
    runTorchNet("net_conv_gemm_lrn", "", false, true,
                target == DNN_TARGET_OPENCL_FP16 ? 0.046 : 0.0,
                target == DNN_TARGET_OPENCL_FP16 ? 0.023 : 0.0);
214 215
}

216
TEST_P(Test_Torch_layers, net_inception_block)
217
{
218 219 220 221
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE == 2018030000
    if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
        throw SkipTestException("");
#endif
222
    runTorchNet("net_inception_block", "", false, true);
223 224
}

225
TEST_P(Test_Torch_layers, net_normalize)
226
{
227
    runTorchNet("net_normalize", "", false, true);
228 229
}

230
TEST_P(Test_Torch_layers, net_padding)
231
{
232 233 234
    runTorchNet("net_padding", "", false, true);
    runTorchNet("net_spatial_zero_padding", "", false, true);
    runTorchNet("net_spatial_reflection_padding", "", false, true);
235 236
}

237
TEST_P(Test_Torch_layers, net_non_spatial)
238
{
239 240 241 242 243 244 245 246 247 248 249
    if (backend == DNN_BACKEND_INFERENCE_ENGINE &&
        (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
        throw SkipTestException("");
    runTorchNet("net_non_spatial", "", false, true);
}

TEST_P(Test_Torch_layers, run_paralel)
{
    if (backend != DNN_BACKEND_OPENCV || target != DNN_TARGET_CPU)
        throw SkipTestException("");
    runTorchNet("net_parallel", "l5_torchMerge");
250 251
}

252 253 254 255
TEST_P(Test_Torch_layers, net_residual)
{
    runTorchNet("net_residual", "", false, true);
}
256

257
class Test_Torch_nets : public DNNTestLayer {};
258

259
TEST_P(Test_Torch_nets, OpenFace_accuracy)
260
{
261 262 263 264 265 266 267 268
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE < 2018030000
    if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
        throw SkipTestException("Test is enabled starts from OpenVINO 2018R3");
#endif
    checkBackend();
    if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)
        throw SkipTestException("");

269 270 271
    const string model = findDataFile("dnn/openface_nn4.small2.v1.t7", false);
    Net net = readNetFromTorch(model);

272 273
    net.setPreferableBackend(backend);
    net.setPreferableTarget(target);
274 275 276 277 278 279 280

    Mat sample = imread(findDataFile("cv/shared/lena.png", false));
    Mat sampleF32(sample.size(), CV_32FC3);
    sample.convertTo(sampleF32, sampleF32.type());
    sampleF32 /= 255;
    resize(sampleF32, sampleF32, Size(96, 96), 0, 0, INTER_NEAREST);

281
    Mat inputBlob = blobFromImage(sampleF32, 1.0, Size(), Scalar(), /*swapRB*/true);
282 283 284 285 286

    net.setInput(inputBlob);
    Mat out = net.forward();

    Mat outRef = readTorchBlob(_tf("net_openface_output.dat"), true);
287
    normAssert(out, outRef, "", default_l1, default_lInf);
288 289
}

290
TEST_P(Test_Torch_nets, ENet_accuracy)
291
{
292 293 294 295 296
    checkBackend();
    if (backend == DNN_BACKEND_INFERENCE_ENGINE ||
        (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
        throw SkipTestException("");

297 298 299
    Net net;
    {
        const string model = findDataFile("dnn/Enet-model-best.net", false);
D
Dmitry Kurtaev 已提交
300 301
        net = readNetFromTorch(model, true);
        ASSERT_TRUE(!net.empty());
302 303
    }

304 305
    net.setPreferableBackend(backend);
    net.setPreferableTarget(target);
306 307

    Mat sample = imread(_tf("street.png", false));
308
    Mat inputBlob = blobFromImage(sample, 1./255, Size(), Scalar(), /*swapRB*/true);
309 310 311 312 313

    net.setInput(inputBlob, "");
    Mat out = net.forward();
    Mat ref = blobFromNPY(_tf("torch_enet_prob.npy", false));
    // Due to numerical instability in Pooling-Unpooling layers (indexes jittering)
K
Kuang Fangjun 已提交
314
    // thresholds for ENet must be changed. Accuracy of results was checked on
315
    // Cityscapes dataset and difference in mIOU with Torch is 10E-4%
316
    normAssert(ref, out, "", 0.00044, /*target == DNN_TARGET_CPU ? 0.453 : */0.5);
317 318 319 320 321 322

    const int N = 3;
    for (int i = 0; i < N; i++)
    {
        net.setInput(inputBlob, "");
        Mat out = net.forward();
323
        normAssert(ref, out, "", 0.00044, /*target == DNN_TARGET_CPU ? 0.453 : */0.5);
324 325 326
    }
}

327 328 329 330 331 332 333 334 335 336 337 338 339
// Check accuracy of style transfer models from https://github.com/jcjohnson/fast-neural-style
// th fast_neural_style.lua \
//   -input_image ~/opencv_extra/testdata/dnn/googlenet_1.png \
//   -output_image lena.png \
//   -median_filter 0 \
//   -image_size 0 \
//   -model models/eccv16/starry_night.t7
// th fast_neural_style.lua \
//   -input_image ~/opencv_extra/testdata/dnn/googlenet_1.png \
//   -output_image lena.png \
//   -median_filter 0 \
//   -image_size 0 \
//   -model models/instance_norm/feathers.t7
340
TEST_P(Test_Torch_nets, FastNeuralStyle_accuracy)
341
{
342
    checkBackend();
343 344 345 346 347 348 349 350 351
    std::string models[] = {"dnn/fast_neural_style_eccv16_starry_night.t7",
                            "dnn/fast_neural_style_instance_norm_feathers.t7"};
    std::string targets[] = {"dnn/lena_starry_night.png", "dnn/lena_feathers.png"};

    for (int i = 0; i < 2; ++i)
    {
        const string model = findDataFile(models[i], false);
        Net net = readNetFromTorch(model);

352 353
        net.setPreferableBackend(backend);
        net.setPreferableTarget(target);
354

355 356 357 358 359 360 361 362 363 364 365 366 367 368 369
        Mat img = imread(findDataFile("dnn/googlenet_1.png", false));
        Mat inputBlob = blobFromImage(img, 1.0, Size(), Scalar(103.939, 116.779, 123.68), false);

        net.setInput(inputBlob);
        Mat out = net.forward();

        // Deprocessing.
        getPlane(out, 0, 0) += 103.939;
        getPlane(out, 0, 1) += 116.779;
        getPlane(out, 0, 2) += 123.68;
        out = cv::min(cv::max(0, out), 255);

        Mat ref = imread(findDataFile(targets[i]));
        Mat refBlob = blobFromImage(ref, 1.0, Size(), Scalar(), false);

370 371 372 373 374 375 376 377 378 379
        if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD)
        {
            double normL1 = cvtest::norm(refBlob, out, cv::NORM_L1) / refBlob.total();
            if (target == DNN_TARGET_MYRIAD)
                EXPECT_LE(normL1, 4.0f);
            else
                EXPECT_LE(normL1, 0.6f);
        }
        else
            normAssert(out, refBlob, "", 0.5, 1.1);
380 381 382
    }
}

383
INSTANTIATE_TEST_CASE_P(/**/, Test_Torch_nets, dnnBackendsAndTargets());
384

385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413
// Test a custom layer
// https://github.com/torch/nn/blob/master/doc/convolution.md#nn.SpatialUpSamplingNearest
class SpatialUpSamplingNearestLayer CV_FINAL : public Layer
{
public:
    SpatialUpSamplingNearestLayer(const LayerParams &params) : Layer(params)
    {
        scale = params.get<int>("scale_factor");
    }

    static Ptr<Layer> create(LayerParams& params)
    {
        return Ptr<Layer>(new SpatialUpSamplingNearestLayer(params));
    }

    virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
                                 const int requiredOutputs,
                                 std::vector<std::vector<int> > &outputs,
                                 std::vector<std::vector<int> > &internals) const CV_OVERRIDE
    {
        std::vector<int> outShape(4);
        outShape[0] = inputs[0][0];  // batch size
        outShape[1] = inputs[0][1];  // number of channels
        outShape[2] = scale * inputs[0][2];
        outShape[3] = scale * inputs[0][3];
        outputs.assign(1, outShape);
        return false;
    }

414
    void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays) CV_OVERRIDE
415
    {
416 417 418 419 420 421 422 423
        CV_TRACE_FUNCTION();
        CV_TRACE_ARG_VALUE(name, "name", name.c_str());

        std::vector<Mat> inputs, outputs;
        inputs_arr.getMatVector(inputs);
        outputs_arr.getMatVector(outputs);

        Mat& inp = inputs[0];
424 425 426
        Mat& out = outputs[0];
        const int outHeight = out.size[2];
        const int outWidth = out.size[3];
427
        for (size_t n = 0; n < inp.size[0]; ++n)
428
        {
429
            for (size_t ch = 0; ch < inp.size[1]; ++ch)
430 431 432 433 434 435 436 437 438 439 440
            {
                resize(getPlane(inp, n, ch), getPlane(out, n, ch),
                       Size(outWidth, outHeight), 0, 0, INTER_NEAREST);
            }
        }
    }

private:
    int scale;
};

441
TEST_P(Test_Torch_layers, upsampling_nearest)
442
{
443
    // Test a custom layer.
444
    CV_DNN_REGISTER_LAYER_CLASS(SpatialUpSamplingNearest, SpatialUpSamplingNearestLayer);
445 446 447 448 449 450 451 452 453
    try
    {
        runTorchNet("net_spatial_upsampling_nearest", "", false, true);
    }
    catch (...)
    {
        LayerFactory::unregisterLayer("SpatialUpSamplingNearest");
        throw;
    }
454
    LayerFactory::unregisterLayer("SpatialUpSamplingNearest");
455 456 457

    // Test an implemented layer.
    runTorchNet("net_spatial_upsampling_nearest", "", false, true);
458 459
}

460 461
INSTANTIATE_TEST_CASE_P(/**/, Test_Torch_layers, dnnBackendsAndTargets());

462
}