loss.py 7.7 KB
Newer Older
1 2 3
# -*- coding: utf-8 -*-
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
#
4
# Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 6 7 8 9 10 11
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import numpy as np

from ..tensor import Tensor
12 13
from .elemwise import abs, log
from .nn import indexing_one_hot, logsigmoid, logsumexp, relu
14 15
from .tensor import where

16 17 18
__all__ = [
    "l1_loss",
    "square_loss",
19
    "cross_entropy",
20 21 22 23
    "binary_cross_entropy",
    "hinge_loss",
]

24 25

def l1_loss(pred: Tensor, label: Tensor) -> Tensor:
26 27
    r"""
    Calculates the mean absolute error (MAE) between
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
    each element in the pred :math:`x` and label :math:`y`.

    The mean absolute error can be described as:

    .. math:: \ell(x,y) = mean\left(L \right)

    where

    .. math::

        L = \{l_1,\dots,l_N\}, \quad
        l_n = \left| x_n - y_n \right|,

    :math:`x` and :math:`y` are tensors of arbitrary shapes with a total
    of :math:`N` elements each. :math:`N` is the batch size.

44 45 46
    :param pred: predicted result from model.
    :param label: ground truth to compare.
    :return: loss value.
47 48 49 50 51 52 53 54

    Examples:

    .. testcode::

        import numpy as np
        import megengine as mge
        import megengine.functional as F
55

56 57
        ipt = mge.tensor(np.array([3, 3, 3, 3]).astype(np.float32))
        tgt = mge.tensor(np.array([2, 8, 6, 1]).astype(np.float32))
58
        loss = F.nn.l1_loss(ipt, tgt)
59 60 61 62 63 64
        print(loss.numpy())

    Outputs:

    .. testoutput::

65
        2.75
66 67 68 69 70 71 72

    """
    diff = pred - label
    return abs(diff).mean()


def square_loss(pred: Tensor, label: Tensor) -> Tensor:
73 74
    r"""
    Calculates the mean squared error (squared L2 norm) between
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
    each element in the pred :math:`x` and label :math:`y`.

    The mean squared error can be described as:

    .. math:: \ell(x, y) = mean\left( L \right)

    where

    .. math::

        L = \{l_1,\dots,l_N\}, \quad
        l_n = \left( x_n - y_n \right)^2,

    :math:`x` and :math:`y` are tensors of arbitrary shapes with a total
    of :math:`N` elements each. :math:`N` is the batch size.

91 92 93
    :param pred: predicted result from model.
    :param label: ground truth to compare.
    :return: loss value.
94 95 96

    Shape:
        - pred: :math:`(N, *)` where :math:`*` means any number of additional
97 98 99 100 101 102 103 104 105 106 107 108 109
          dimensions.
        - label: :math:`(N, *)`. Same shape as ``pred``.

    Examples:

    .. testcode::

        import numpy as np
        import megengine as mge
        import megengine.functional as F

        ipt = mge.tensor(np.array([3, 3, 3, 3]).astype(np.float32))
        tgt = mge.tensor(np.array([2, 8, 6, 1]).astype(np.float32))
110
        loss = F.nn.square_loss(ipt, tgt)
111 112 113 114 115 116
        print(loss.numpy())

    Outputs:

    .. testoutput::

117
        9.75
118 119 120 121 122 123

    """
    diff = pred - label
    return (diff ** 2).mean()


124 125 126 127 128 129
def cross_entropy(
    pred: Tensor,
    label: Tensor,
    axis: int = 1,
    with_logits: bool = True,
    label_smooth: float = 0,
130
) -> Tensor:
131 132
    r"""
    Computes the multi-class cross entropy loss (using logits by default).
133

134 135
    By default(``with_logitis`` is True), ``pred`` is assumed to be logits,
    class probabilities are given by softmax.
136 137 138 139 140 141 142 143

    It has better numerical stability compared with sequential calls to :func:`~.softmax` and :func:`~.cross_entropy`.

    When using label smoothing, the label distribution is as follows:

    .. math:: y^{LS}_{k}=y_{k}\left(1-\alpha\right)+\alpha/K

    where :math:`y^{LS}` and :math:`y` are new label distribution and origin label distribution respectively.
M
Megvii Engine Team 已提交
144
    k is the index of label distribution. :math:`\alpha` is ``label_smooth`` and :math:`K` is the number of classes.
145

146 147 148
    :param pred: input tensor representing the predicted probability.
    :param label: input tensor representing the classification label.
    :param axis: an axis along which softmax will be applied. Default: 1
149
    :param with_logits: whether to apply softmax first. Default: True
150 151 152 153 154 155 156 157 158 159 160 161 162
    :param label_smooth: a label smoothing of parameter that can re-distribute target distribution. Default: 0
    :return: loss value.

    Examples:

    .. testcode::

        import numpy as np
        from megengine import tensor
        import megengine.functional as F

        data_shape = (1, 2)
        label_shape = (1, )
163
        pred = tensor(np.array([0, 0], dtype=np.float32).reshape(data_shape))
164
        label = tensor(np.ones(label_shape, dtype=np.int32))
165
        loss = F.nn.cross_entropy(pred, label)
166
        print(loss.numpy().round(decimals=4))
167 168 169 170 171

    Outputs:

    .. testoutput::

172
        0.6931
173

174 175 176 177 178 179 180 181
    """
    n0 = pred.ndim
    n1 = label.ndim
    assert n0 == n1 + 1, (
        "target ndim must be one less than input ndim; input_ndim={} "
        "target_ndim={}".format(n0, n1)
    )

182 183 184 185 186 187 188 189 190 191
    ls = label_smooth

    if with_logits:
        logZ = logsumexp(pred, axis).mean()
        primary_term = indexing_one_hot(pred, label, axis).mean()
    else:
        logZ = 0
        primary_term = log(indexing_one_hot(pred, label, axis)).mean()
    if ls is None or type(ls) in (int, float) and ls == 0:
        return logZ - primary_term
192 193
    if not with_logits:
        pred = log(pred)
194
    return logZ - ls * pred.mean() - (1 - ls) * primary_term
195 196


197 198 199
def binary_cross_entropy(
    pred: Tensor, label: Tensor, with_logits: bool = True
) -> Tensor:
200 201
    r"""
    Computes the binary cross entropy loss (using logits by default).
202

203 204
    By default(``with_logitis`` is True), ``pred`` is assumed to be logits,
    class probabilities are given by sigmoid.
205

M
Megvii Engine Team 已提交
206
    :param pred: `(N, *)`, where `*` means any number of additional dimensions.
207
    :param label: `(N, *)`, same shape as the input.
208
    :param with_logits: bool, whether to apply sigmoid first. Default: True
209
    :return: loss value.
210

211 212 213 214 215 216 217 218
    Examples:

    .. testcode::

        import numpy as np
        from megengine import tensor
        import megengine.functional as F

219
        pred = tensor(np.array([0, 0], dtype=np.float32).reshape(1, 2))
220
        label = tensor(np.ones((1, 2), dtype=np.float32))
221
        loss = F.nn.binary_cross_entropy(pred, label)
222
        print(loss.numpy().round(decimals=4))
223 224

    Outputs:
225

226 227
    .. testoutput::

228
        0.6931
229 230

    """
231 232 233 234 235
    if not with_logits:
        return -(label * log(pred) + (1 - label) * log(1 - pred)).mean()
    # logsigmoid(pred) and logsigmoid(-pred) has common sub-expression
    # hopefully the backend would optimize this
    return -(label * logsigmoid(pred) + (1 - label) * logsigmoid(-pred)).mean()
236 237 238


def hinge_loss(pred: Tensor, label: Tensor, norm: str = "L1") -> Tensor:
239 240
    r"""
    Caculates the hinge loss which is often used in SVM.
241 242 243

    The hinge loss can be described as:

244
    .. math:: loss(x, y) = \frac{1}{N}\sum_i\sum_j(max(0, 1 - x_{ij}*y_{ij}))
245

246 247 248 249
    :param pred: input tensor representing the predicted probability, shape is `(N, C)`.
    :param label: input tensor representing the binary classification label, shape is `(N, C)`.
    :param norm: specify the norm to caculate the loss, should be "L1" or "L2".
    :return: loss value.
250 251 252 253 254 255 256 257 258 259

    Examples:

    .. testcode::

        from megengine import tensor
        import megengine.functional as F

        pred = tensor([[0.5, -0.5, 0.1], [-0.6, 0.7, 0.8]], dtype="float32")
        label = tensor([[1, -1, -1], [-1, 1, 1]], dtype="float32")
260
        loss = F.nn.hinge_loss(pred, label)
261 262 263 264 265 266
        print(loss.numpy())

    Outputs:

    .. testoutput::

267
        1.5
268 269

    """
270
    norm = norm.upper()
271 272 273 274 275 276 277
    assert norm in ["L1", "L2"], "norm must be L1 or L2"
    # Converts binary labels to -1/1 labels.
    loss = relu(1.0 - pred * label)
    if norm == "L1":
        return loss.sum(axis=1).mean()
    else:
        return (loss ** 2).sum(axis=1).mean()