test_batchnorm.py 13.3 KB
Newer Older
1 2 3 4 5 6 7 8
# -*- coding: utf-8 -*-
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
#
# Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9
import functools
10 11 12 13 14 15 16 17
import multiprocessing as mp
import platform

import numpy as np
import pytest

import megengine as mge
import megengine.distributed as dist
M
Megvii Engine Team 已提交
18
from megengine import Tensor
19
from megengine.core._trace_option import use_symbolic_shape
20 21
from megengine.module import BatchNorm1d, BatchNorm2d, SyncBatchNorm

22 23
_assert_allclose = functools.partial(np.testing.assert_allclose, atol=5e-6, rtol=5e-6)

24 25 26 27 28

@pytest.mark.skipif(
    platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
@pytest.mark.skipif(
29
    platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
)
@pytest.mark.isolated_distributed
def test_syncbn():
    nr_chan = 8
    data_shape = (3, nr_chan, 4, 16)
    momentum = 0.9
    eps = 1e-5
    running_mean = np.zeros((1, nr_chan, 1, 1), dtype=np.float32)
    running_var = np.ones((1, nr_chan, 1, 1), dtype=np.float32)
    steps = 4
    nr_ranks = 2
    server = dist.Server(0)
    port = server.py_server_port

    def worker(rank, data, yv_expect, running_mean, running_var):
        if mge.get_device_count("gpu") < nr_ranks:
            return
        dist.init_process_group("localhost", port, nr_ranks, rank, rank)
        bn = SyncBatchNorm(nr_chan, momentum=momentum, eps=eps)
        for i in range(steps):
M
Megvii Engine Team 已提交
50
            yv = bn(Tensor(data[i]))
51

52 53 54
        _assert_allclose(yv.numpy(), yv_expect)
        _assert_allclose(bn.running_mean.numpy(), running_mean)
        _assert_allclose(bn.running_var.numpy(), running_var)
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120

    xv = []
    for i in range(steps):
        xv.append(np.random.normal(loc=2.3, size=data_shape).astype(np.float32))
        xv_transposed = np.transpose(xv[i], [0, 2, 3, 1]).reshape(
            (data_shape[0] * data_shape[2] * data_shape[3], nr_chan)
        )

        mean = np.mean(xv_transposed, axis=0).reshape(1, nr_chan, 1, 1)

        var_biased = np.var(xv_transposed, axis=0).reshape((1, nr_chan, 1, 1))
        sd = np.sqrt(var_biased + eps)

        var_unbiased = np.var(xv_transposed, axis=0, ddof=1).reshape((1, nr_chan, 1, 1))
        running_mean = running_mean * momentum + mean * (1 - momentum)
        running_var = running_var * momentum + var_unbiased * (1 - momentum)

        yv_expect = (xv[i] - mean) / sd

    data = []
    for i in range(nr_ranks):
        data.append([])
        for j in range(steps):
            data[i].append(xv[j][:, :, :, i * 8 : i * 8 + 8])

    procs = []
    for rank in range(nr_ranks):
        p = mp.Process(
            target=worker,
            args=(
                rank,
                data[rank],
                yv_expect[:, :, :, rank * 8 : rank * 8 + 8],
                running_mean,
                running_var,
            ),
        )
        p.start()
        procs.append(p)

    for p in procs:
        p.join(10)
        assert p.exitcode == 0


def test_batchnorm():
    nr_chan = 8
    data_shape = (3, nr_chan, 4)
    momentum = 0.9
    bn = BatchNorm1d(nr_chan, momentum=momentum)
    running_mean = np.zeros((1, nr_chan, 1), dtype=np.float32)
    running_var = np.ones((1, nr_chan, 1), dtype=np.float32)
    for i in range(3):
        xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
        mean = np.mean(np.mean(xv, axis=0, keepdims=True), axis=2, keepdims=True)
        xv_transposed = np.transpose(xv, [0, 2, 1]).reshape(
            (data_shape[0] * data_shape[2], nr_chan)
        )

        var_biased = np.var(xv_transposed, axis=0).reshape((1, nr_chan, 1))
        sd = np.sqrt(var_biased + bn.eps)

        var_unbiased = np.var(xv_transposed, axis=0, ddof=1).reshape((1, nr_chan, 1))
        running_mean = running_mean * momentum + mean * (1 - momentum)
        running_var = running_var * momentum + var_unbiased * (1 - momentum)

M
Megvii Engine Team 已提交
121
        yv = bn(Tensor(xv))
122 123
        yv_expect = (xv - mean) / sd

124 125 126
        _assert_allclose(yv.numpy(), yv_expect)
        _assert_allclose(bn.running_mean.numpy().reshape(-1), running_mean.reshape(-1))
        _assert_allclose(bn.running_var.numpy().reshape(-1), running_var.reshape(-1))
127 128 129 130 131 132

    # test set 'training' flag to False
    mean_backup = bn.running_mean.numpy()
    var_backup = bn.running_var.numpy()
    bn.training = False
    xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
M
Megvii Engine Team 已提交
133
    data = Tensor(xv)
134 135
    yv1 = bn(data)
    yv2 = bn(data)
136 137 138
    np.testing.assert_equal(yv1.numpy(), yv2.numpy())
    np.testing.assert_equal(mean_backup, bn.running_mean.numpy())
    np.testing.assert_equal(var_backup, bn.running_var.numpy())
139
    yv_expect = (xv - running_mean) / np.sqrt(running_var + bn.eps)
140
    _assert_allclose(yv1.numpy(), yv_expect)
141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166


@pytest.mark.skipif(
    platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
def test_syncbn1d():
    nr_chan = 8
    data_shape = (3, nr_chan, 4)
    momentum = 0.9
    bn = SyncBatchNorm(nr_chan, momentum=momentum)
    running_mean = np.zeros((1, nr_chan, 1), dtype=np.float32)
    running_var = np.ones((1, nr_chan, 1), dtype=np.float32)
    for i in range(3):
        xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
        mean = np.mean(np.mean(xv, axis=0, keepdims=True), axis=2, keepdims=True)
        xv_transposed = np.transpose(xv, [0, 2, 1]).reshape(
            (data_shape[0] * data_shape[2], nr_chan)
        )

        var_biased = np.var(xv_transposed, axis=0).reshape((1, nr_chan, 1))
        sd = np.sqrt(var_biased + bn.eps)

        var_unbiased = np.var(xv_transposed, axis=0, ddof=1).reshape((1, nr_chan, 1))
        running_mean = running_mean * momentum + mean * (1 - momentum)
        running_var = running_var * momentum + var_unbiased * (1 - momentum)

M
Megvii Engine Team 已提交
167
        yv = bn(Tensor(xv))
168 169
        yv_expect = (xv - mean) / sd

170 171 172
        _assert_allclose(yv.numpy(), yv_expect)
        _assert_allclose(bn.running_mean.numpy().reshape(-1), running_mean.reshape(-1))
        _assert_allclose(bn.running_var.numpy().reshape(-1), running_var.reshape(-1))
173 174 175 176 177 178

    # test set 'training' flag to False
    mean_backup = bn.running_mean.numpy()
    var_backup = bn.running_var.numpy()
    bn.training = False
    xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
M
Megvii Engine Team 已提交
179
    data = Tensor(xv)
180 181
    yv1 = bn(data)
    yv2 = bn(data)
182 183 184
    np.testing.assert_equal(yv1.numpy(), yv2.numpy())
    np.testing.assert_equal(mean_backup, bn.running_mean.numpy())
    np.testing.assert_equal(var_backup, bn.running_var.numpy())
185
    yv_expect = (xv - running_mean) / np.sqrt(running_var + bn.eps)
186
    _assert_allclose(yv1.numpy(), yv_expect)
187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210


def test_batchnorm2d():
    nr_chan = 8
    data_shape = (3, nr_chan, 16, 16)
    momentum = 0.9
    bn = BatchNorm2d(nr_chan, momentum=momentum)
    running_mean = np.zeros((1, nr_chan, 1, 1), dtype=np.float32)
    running_var = np.ones((1, nr_chan, 1, 1), dtype=np.float32)
    for i in range(3):
        xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
        xv_transposed = np.transpose(xv, [0, 2, 3, 1]).reshape(
            (data_shape[0] * data_shape[2] * data_shape[3], nr_chan)
        )

        mean = np.mean(xv_transposed, axis=0).reshape(1, nr_chan, 1, 1)

        var_biased = np.var(xv_transposed, axis=0).reshape((1, nr_chan, 1, 1))
        sd = np.sqrt(var_biased + bn.eps)

        var_unbiased = np.var(xv_transposed, axis=0, ddof=1).reshape((1, nr_chan, 1, 1))
        running_mean = running_mean * momentum + mean * (1 - momentum)
        running_var = running_var * momentum + var_unbiased * (1 - momentum)

M
Megvii Engine Team 已提交
211
        yv = bn(Tensor(xv))
212 213
        yv_expect = (xv - mean) / sd

214 215 216
        _assert_allclose(yv.numpy(), yv_expect)
        _assert_allclose(bn.running_mean.numpy(), running_mean)
        _assert_allclose(bn.running_var.numpy(), running_var)
217 218 219 220 221 222

    # test set 'training' flag to False
    mean_backup = bn.running_mean.numpy()
    var_backup = bn.running_var.numpy()
    bn.training = False
    xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
M
Megvii Engine Team 已提交
223
    data = Tensor(xv)
224 225
    yv1 = bn(data)
    yv2 = bn(data)
226 227 228
    np.testing.assert_equal(yv1.numpy(), yv2.numpy())
    np.testing.assert_equal(mean_backup, bn.running_mean.numpy())
    np.testing.assert_equal(var_backup, bn.running_var.numpy())
229
    yv_expect = (xv - running_mean) / np.sqrt(running_var + bn.eps)
230
    _assert_allclose(yv1.numpy(), yv_expect)
231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257


@pytest.mark.skipif(
    platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
def test_syncbn2d():
    nr_chan = 8
    data_shape = (3, nr_chan, 16, 16)
    momentum = 0.9
    bn = SyncBatchNorm(nr_chan, momentum=momentum)
    running_mean = np.zeros((1, nr_chan, 1, 1), dtype=np.float32)
    running_var = np.ones((1, nr_chan, 1, 1), dtype=np.float32)
    for i in range(3):
        xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
        xv_transposed = np.transpose(xv, [0, 2, 3, 1]).reshape(
            (data_shape[0] * data_shape[2] * data_shape[3], nr_chan)
        )

        mean = np.mean(xv_transposed, axis=0).reshape(1, nr_chan, 1, 1)

        var_biased = np.var(xv_transposed, axis=0).reshape((1, nr_chan, 1, 1))
        sd = np.sqrt(var_biased + bn.eps)

        var_unbiased = np.var(xv_transposed, axis=0, ddof=1).reshape((1, nr_chan, 1, 1))
        running_mean = running_mean * momentum + mean * (1 - momentum)
        running_var = running_var * momentum + var_unbiased * (1 - momentum)

M
Megvii Engine Team 已提交
258
        yv = bn(Tensor(xv))
259 260
        yv_expect = (xv - mean) / sd

261 262 263
        _assert_allclose(yv.numpy(), yv_expect)
        _assert_allclose(bn.running_mean.numpy(), running_mean)
        _assert_allclose(bn.running_var.numpy(), running_var)
264 265 266 267 268 269

    # test set 'training' flag to False
    mean_backup = bn.running_mean.numpy()
    var_backup = bn.running_var.numpy()
    bn.training = False
    xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
M
Megvii Engine Team 已提交
270
    data = Tensor(xv)
271 272
    yv1 = bn(data)
    yv2 = bn(data)
273 274 275
    np.testing.assert_equal(yv1.numpy(), yv2.numpy())
    np.testing.assert_equal(mean_backup, bn.running_mean.numpy())
    np.testing.assert_equal(var_backup, bn.running_var.numpy())
276
    yv_expect = (xv - running_mean) / np.sqrt(running_var + bn.eps)
277
    _assert_allclose(yv1.numpy(), yv_expect)
278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296


def test_batchnorm_no_stats():
    nr_chan = 8
    data_shape = (3, nr_chan, 4)
    bn = BatchNorm1d(8, track_running_stats=False)
    for i in range(4):
        if i == 2:
            bn.training = False
        xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
        mean = np.mean(np.mean(xv, axis=0, keepdims=True), axis=2, keepdims=True)
        var = np.var(
            np.transpose(xv, [0, 2, 1]).reshape(
                (data_shape[0] * data_shape[2], nr_chan)
            ),
            axis=0,
        ).reshape((1, nr_chan, 1))
        sd = np.sqrt(var + bn.eps)

M
Megvii Engine Team 已提交
297
        yv = bn(Tensor(xv))
298 299
        yv_expect = (xv - mean) / sd

300
        _assert_allclose(yv.numpy(), yv_expect)
301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322


@pytest.mark.skipif(
    platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
def test_syncbn_no_stats():
    nr_chan = 8
    data_shape = (3, nr_chan, 4)
    bn = SyncBatchNorm(8, track_running_stats=False)
    for i in range(4):
        if i == 2:
            bn.training = False
        xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
        mean = np.mean(np.mean(xv, axis=0, keepdims=True), axis=2, keepdims=True)
        var = np.var(
            np.transpose(xv, [0, 2, 1]).reshape(
                (data_shape[0] * data_shape[2], nr_chan)
            ),
            axis=0,
        ).reshape((1, nr_chan, 1))
        sd = np.sqrt(var + bn.eps)

M
Megvii Engine Team 已提交
323
        yv = bn(Tensor(xv))
324 325
        yv_expect = (xv - mean) / sd

326
        _assert_allclose(yv.numpy(), yv_expect)
327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344


def test_batchnorm2d_no_stats():
    nr_chan = 8
    data_shape = (3, nr_chan, 16, 16)
    bn = BatchNorm2d(8, track_running_stats=False)
    for i in range(4):
        if i == 2:
            bn.training = False
        xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
        xv_transposed = np.transpose(xv, [0, 2, 3, 1]).reshape(
            (data_shape[0] * data_shape[2] * data_shape[3], nr_chan)
        )

        mean = np.mean(xv_transposed, axis=0).reshape(1, nr_chan, 1, 1)
        var = np.var(xv_transposed, axis=0).reshape((1, nr_chan, 1, 1))
        sd = np.sqrt(var + bn.eps)

M
Megvii Engine Team 已提交
345
        yv = bn(Tensor(xv))
346 347
        yv_expect = (xv - mean) / sd

348
        _assert_allclose(yv.numpy(), yv_expect)
349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369


@pytest.mark.skipif(
    platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
def test_syncbn2d_no_stats():
    nr_chan = 8
    data_shape = (3, nr_chan, 16, 16)
    bn = SyncBatchNorm(8, track_running_stats=False)
    for i in range(4):
        if i == 2:
            bn.training = False
        xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
        xv_transposed = np.transpose(xv, [0, 2, 3, 1]).reshape(
            (data_shape[0] * data_shape[2] * data_shape[3], nr_chan)
        )

        mean = np.mean(xv_transposed, axis=0).reshape(1, nr_chan, 1, 1)
        var = np.var(xv_transposed, axis=0).reshape((1, nr_chan, 1, 1))
        sd = np.sqrt(var + bn.eps)

M
Megvii Engine Team 已提交
370
        yv = bn(Tensor(xv))
371 372
        yv_expect = (xv - mean) / sd

373
        _assert_allclose(yv.numpy(), yv_expect)