# -*- coding: utf-8 -*- # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") # # Copyright (c) 2014-2020 Megvii Inc. All rights reserved. # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. import megengine as mge import megengine.functional as F import numpy as np from megengine.core import tensor, Tensor from official.vision.detection.layers import basic def get_focal_loss( score: Tensor, label: Tensor, ignore_label: int = -1, background: int = 0, alpha: float = 0.5, gamma: float = 0, norm_type: str = "fg", ) -> Tensor: r"""Focal Loss for Dense Object Detection: .. math:: FL(p_t) = -\alpha_t(1-p_t)^\gamma \log(p_t) Args: score (Tensor): the predicted score with the shape of :math:`(B, A, C)` label (Tensor): the assigned label of boxes with shape of :math:`(B, A)` ignore_label (int): the value of ignore class. Default: -1 background (int): the value of background class. Default: 0 alpha (float): parameter to mitigate class imbalance. Default: 0.5 gamma (float): parameter to mitigate easy/hard loss imbalance. Default: 0 norm_type (str): current support 'fg', 'none': 'fg': loss will be normalized by number of fore-ground samples 'none": not norm Returns: the calculated focal loss. """ mask = 1 - (label == ignore_label) valid_label = label * mask score_shp = score.shape zero_mat = mge.zeros( F.concat([score_shp[0], score_shp[1], score_shp[2] + 1], axis=0), dtype=np.float32, ) one_mat = mge.ones( F.concat([score_shp[0], score_shp[1], tensor(1)], axis=0), dtype=np.float32, ) one_hot = basic.indexing_set_one_hot( zero_mat, 2, valid_label.astype(np.int32), one_mat )[:, :, 1:] pos_part = F.power(1 - score, gamma) * one_hot * F.log(score) neg_part = F.power(score, gamma) * (1 - one_hot) * F.log(1 - score) loss = -(alpha * pos_part + (1 - alpha) * neg_part).sum(axis=2) * mask if norm_type == "fg": positive_mask = label > background return loss.sum() / F.maximum(positive_mask.sum(), 1) elif norm_type == "none": return loss.sum() else: raise NotImplementedError def get_smooth_l1_loss( pred_bbox: Tensor, gt_bbox: Tensor, label: Tensor, sigma: int = 3, background: int = 0, ignore_label: int = -1, fix_smooth_l1: bool = False, norm_type: str = "fg", ) -> Tensor: r"""Smooth l1 loss used in RetinaNet. Args: pred_bbox (Tensor): the predicted bbox with the shape of :math:`(B, A, 4)` gt_bbox (Tensor): the ground-truth bbox with the shape of :math:`(B, A, 4)` label (Tensor): the assigned label of boxes with shape of :math:`(B, A)` sigma (int): the parameter of smooth l1 loss. Default: 1 background (int): the value of background class. Default: 0 ignore_label (int): the value of ignore class. Default: -1 fix_smooth_l1 (bool): is to use huber loss, default is False to use original smooth-l1 norm_type (str): current support 'fg', 'all', 'none': 'fg': loss will be normalized by number of fore-ground samples 'all': loss will be normalized by number of all samples 'none': not norm Returns: the calculated smooth l1 loss. """ pred_bbox = pred_bbox.reshape(-1, 4) gt_bbox = gt_bbox.reshape(-1, 4) label = label.reshape(-1) valid_mask = 1 - (label == ignore_label) fg_mask = (1 - (label == background)) * valid_mask losses = get_smooth_l1_base(pred_bbox, gt_bbox, sigma, is_fix=fix_smooth_l1) if norm_type == "fg": loss = (losses.sum(axis=1) * fg_mask).sum() / F.maximum(fg_mask.sum(), 1) elif norm_type == "all": raise NotImplementedError else: raise NotImplementedError return loss def get_smooth_l1_base( pred_bbox: Tensor, gt_bbox: Tensor, sigma: float, is_fix: bool = False, ): r""" Args: pred_bbox (Tensor): the predicted bbox with the shape of :math:`(N, 4)` gt_bbox (Tensor): the ground-truth bbox with the shape of :math:`(N, 4)` sigma (int): the parameter of smooth l1 loss. is_fix (bool): is to use huber loss, default is False to use original smooth-l1 Returns: the calculated smooth l1 loss. """ if is_fix: sigma = 1 / sigma cond_point = sigma x = pred_bbox - gt_bbox abs_x = F.abs(x) in_mask = abs_x < cond_point out_mask = 1 - in_mask in_loss = 0.5 * (x ** 2) out_loss = sigma * abs_x - 0.5 * (sigma ** 2) loss = in_loss * in_mask + out_loss * out_mask else: sigma2 = sigma ** 2 cond_point = 1 / sigma2 x = pred_bbox - gt_bbox abs_x = F.abs(x) in_mask = abs_x < cond_point out_mask = 1 - in_mask in_loss = 0.5 * (sigma * x) ** 2 out_loss = abs_x - 0.5 / sigma2 loss = in_loss * in_mask + out_loss * out_mask return loss