diff --git a/official/vision/detection/layers/basic/norm.py b/official/vision/detection/layers/basic/norm.py
index 487dd17c580342cb3a0eab8354aa21acdf8bba89..5d0463a333c259267268c3ecdb8c3f17ba7a16c9 100644
--- a/official/vision/detection/layers/basic/norm.py
+++ b/official/vision/detection/layers/basic/norm.py
@@ -58,7 +58,7 @@ class FrozenBatchNorm2d(M.Module):
 def get_norm(norm, out_channels=None):
     """
     Args:
-        norm (str): currently support "BN" and "FrozenBN"
+        norm (str): currently support "BN", "SyncBN" and "FrozenBN"
 
     Returns:
         M.Module or None: the normalization layer
@@ -66,7 +66,11 @@ def get_norm(norm, out_channels=None):
     if isinstance(norm, str):
         if len(norm) == 0:
             return None
-        norm = {"BN": M.BatchNorm2d, "FrozenBN": FrozenBatchNorm2d}[norm]
+        norm = {
+            "BN": M.BatchNorm2d,
+            "SyncBN": M.SyncBatchNorm,
+            "FrozenBN": FrozenBatchNorm2d
+        }[norm]
     if out_channels is not None:
         return norm(out_channels)
     else:
diff --git a/official/vision/detection/layers/det/loss.py b/official/vision/detection/layers/det/loss.py
index c355d52c3930973369f0c435bc01db6a6ba4adca..1d48adba183cc81c70994af2a59173d758398428 100644
--- a/official/vision/detection/layers/det/loss.py
+++ b/official/vision/detection/layers/det/loss.py
@@ -151,7 +151,5 @@ def get_smooth_l1_base(
         abs_x = F.abs(x)
         in_loss = 0.5 * x ** 2 * sigma2
         out_loss = abs_x - 0.5 / sigma2
-    in_mask = abs_x < cond_point
-    out_mask = 1 - in_mask
-    loss = in_loss * in_mask + out_loss * out_mask
+    loss = F.where(abs_x < cond_point, in_loss, out_loss)
     return loss
diff --git a/official/vision/detection/layers/det/retinanet.py b/official/vision/detection/layers/det/retinanet.py
index a6b7b49b8530561f1b2868e17038e980f018898d..df6d195685124a520de28a72cb1959e754a3b917 100644
--- a/official/vision/detection/layers/det/retinanet.py
+++ b/official/vision/detection/layers/det/retinanet.py
@@ -28,7 +28,9 @@ class RetinaNetHead(M.Module):
         num_classes = cfg.num_classes
         num_convs = 4
         prior_prob = cfg.cls_prior_prob
-        num_anchors = [len(cfg.anchor_ratios) * len(cfg.anchor_scales)] * 5
+        num_anchors = [len(cfg.anchor_ratios) * len(cfg.anchor_scales)] * len(
+            input_shape
+        )
 
         assert (
             len(set(num_anchors)) == 1
diff --git a/official/vision/detection/models/retinanet.py b/official/vision/detection/models/retinanet.py
index 255d447e99a2319d394c7f5f534a0f24dba2dab8..1013819788d75d75b9500dcfa7eeb85680526dde 100644
--- a/official/vision/detection/models/retinanet.py
+++ b/official/vision/detection/models/retinanet.py
@@ -53,7 +53,7 @@ class RetinaNet(M.Module):
             bottom_up=bottom_up,
             in_features=["res3", "res4", "res5"],
             out_channels=out_channels,
-            norm="",
+            norm=self.cfg.fpn_norm,
             top_block=layers.LastLevelP6P7(in_channels_p6p7, out_channels),
         )
 
@@ -97,7 +97,8 @@ class RetinaNet(M.Module):
         ]
 
         anchors_list = [
-            self.anchor_gen(features[i], self.stride_list[i]) for i in range(5)
+            self.anchor_gen(features[i], self.stride_list[i])
+            for i in range(len(features))
         ]
 
         all_level_box_cls = F.sigmoid(F.concat(box_cls_list, axis=1))
@@ -196,18 +197,19 @@ class RetinaNet(M.Module):
 class RetinaNetConfig:
     def __init__(self):
         self.resnet_norm = "FrozenBN"
+        self.fpn_norm = ""
         self.backbone_freeze_at = 2
 
         # ------------------------ data cfg -------------------------- #
         self.train_dataset = dict(
             name="coco",
             root="train2017",
-            ann_file="instances_train2017.json"
+            ann_file="annotations/instances_train2017.json",
         )
         self.test_dataset = dict(
             name="coco",
             root="val2017",
-            ann_file="instances_val2017.json"
+            ann_file="annotations/instances_val2017.json",
         )
         self.train_image_short_size = 800
         self.train_image_max_size = 1333
diff --git a/official/vision/detection/retinanet_res50_coco_1x_800size.py b/official/vision/detection/retinanet_res50_coco_1x_800size.py
index bdb9cc4a009781889be7193d89a1dd1f5fce6b47..8324290dd5cb48678fd2583980ddc4df88dd6656 100644
--- a/official/vision/detection/retinanet_res50_coco_1x_800size.py
+++ b/official/vision/detection/retinanet_res50_coco_1x_800size.py
@@ -11,33 +11,17 @@ from megengine import hub
 from official.vision.detection import models
 
 
-class CustomRetinaNetConfig(models.RetinaNetConfig):
-    def __init__(self):
-        super().__init__()
-
-        # ------------------------ data cfg -------------------------- #
-        self.train_dataset = dict(
-            name="coco",
-            root="train2017",
-            ann_file="annotations/instances_train2017.json"
-        )
-        self.test_dataset = dict(
-            name="coco",
-            root="val2017",
-            ann_file="annotations/instances_val2017.json"
-        )
-
-
 @hub.pretrained(
     "https://data.megengine.org.cn/models/weights/"
     "retinanet_d3f58dce_res50_1x_800size_36dot0.pkl"
 )
 def retinanet_res50_coco_1x_800size(batch_size=1, **kwargs):
-    r"""ResNet-18 model from
+    r"""
+    RetinaNet trained from COCO dataset.
     `"RetinaNet" <https://arxiv.org/abs/1708.02002>`_
     """
-    return models.RetinaNet(RetinaNetConfig(), batch_size=batch_size, **kwargs)
+    return models.RetinaNet(models.RetinaNetConfig(), batch_size=batch_size, **kwargs)
 
 
 Net = models.RetinaNet
-Cfg = CustomRetinaNetConfig
+Cfg = models.RetinaNetConfig
diff --git a/official/vision/detection/retinanet_res50_coco_1x_800size_syncbn.py b/official/vision/detection/retinanet_res50_coco_1x_800size_syncbn.py
new file mode 100644
index 0000000000000000000000000000000000000000..363a542ed2268a83eeabca889167e4be8730bb17
--- /dev/null
+++ b/official/vision/detection/retinanet_res50_coco_1x_800size_syncbn.py
@@ -0,0 +1,32 @@
+# -*- coding: utf-8 -*-
+# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
+#
+# Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+from megengine import hub
+
+from official.vision.detection import models
+
+
+class CustomRetinaNetConfig(models.RetinaNetConfig):
+    def __init__(self):
+        super().__init__()
+
+        self.resnet_norm = "SyncBN"
+        self.fpn_norm = "SyncBN"
+        self.backbone_freeze_at = 0
+
+
+def retinanet_res50_coco_1x_800size_syncbn(batch_size=1, **kwargs):
+    r"""
+    RetinaNet with SyncBN trained from COCO dataset.
+    `"RetinaNet" <https://arxiv.org/abs/1708.02002>`_
+    """
+    return models.RetinaNet(CustomRetinaNetConfig(), batch_size=batch_size, **kwargs)
+
+
+Net = models.RetinaNet
+Cfg = CustomRetinaNetConfig
diff --git a/official/vision/detection/retinanet_res50_objects365_1x_800size.py b/official/vision/detection/retinanet_res50_objects365_1x_800size.py
index 028cebffc29f00698cf92f212af6dcc66c66f6cf..951d09a9cecc44ae9122ec12ee50511cc1c25d8b 100644
--- a/official/vision/detection/retinanet_res50_objects365_1x_800size.py
+++ b/official/vision/detection/retinanet_res50_objects365_1x_800size.py
@@ -19,23 +19,25 @@ class CustomRetinaNetConfig(models.RetinaNetConfig):
         self.train_dataset = dict(
             name="objects365",
             root="train",
-            ann_file="annotations/objects365_train_20190423.json"
+            ann_file="annotations/objects365_train_20190423.json",
         )
         self.test_dataset = dict(
             name="objects365",
             root="val",
-            ann_file="annotations/objects365_val_20190423.json"
+            ann_file="annotations/objects365_val_20190423.json",
         )
+        self.num_classes = 365
 
         # ------------------------ training cfg ---------------------- #
         self.nr_images_epoch = 400000
 
 
 def retinanet_objects365_res50_1x_800size(batch_size=1, **kwargs):
-    r"""ResNet-18 model from
+    r"""
+    RetinaNet trained from Objects365 dataset.
     `"RetinaNet" <https://arxiv.org/abs/1708.02002>`_
     """
-    return models.RetinaNet(RetinaNetConfig(), batch_size=batch_size, **kwargs)
+    return models.RetinaNet(CustomRetinaNetConfig(), batch_size=batch_size, **kwargs)
 
 
 Net = models.RetinaNet
diff --git a/official/vision/detection/tools/inference.py b/official/vision/detection/tools/inference.py
index b8db4cdb1f2810d104b7e2169c79339830003e20..c1a35560cd4f3b54db17d80febbc30a02c9062df 100644
--- a/official/vision/detection/tools/inference.py
+++ b/official/vision/detection/tools/inference.py
@@ -47,7 +47,10 @@ def main():
     current_network = importlib.import_module(os.path.basename(args.file).split(".")[0])
     model = current_network.Net(current_network.Cfg(), batch_size=1)
     model.eval()
-    model.load_state_dict(mge.load(args.model)["state_dict"])
+    state_dict = mge.load(args.model)
+    if "state_dict" in state_dict:
+        state_dict = state_dict["state_dict"]
+    model.load_state_dict(state_dict)
 
     evaluator = DetEvaluator(model)
 
diff --git a/official/vision/detection/tools/test.py b/official/vision/detection/tools/test.py
index f9f66d4e4d9675da21b5f5dcf39a6d7835984010..4fe87a5157096a06753a9220c4e02f16b702966d 100644
--- a/official/vision/detection/tools/test.py
+++ b/official/vision/detection/tools/test.py
@@ -235,7 +235,10 @@ def worker(
     model = current_network.Net(current_network.Cfg(), batch_size=1)
     model.eval()
     evaluator = DetEvaluator(model)
-    model.load_state_dict(mge.load(model_file)["state_dict"])
+    state_dict = mge.load(model_file)
+    if "state_dict" in state_dict:
+        state_dict = state_dict["state_dict"]
+    model.load_state_dict(state_dict)
 
     loader = build_dataloader(worker_id, total_worker, data_dir, model.cfg)
     for data_dict in loader:
diff --git a/official/vision/detection/tools/train.py b/official/vision/detection/tools/train.py
index c8a8c0a5460e59aecc6fc4243ffa5b4c6b67fe40..f93093d4ddcb4a4db71b4fc26b9542271050e7f3 100644
--- a/official/vision/detection/tools/train.py
+++ b/official/vision/detection/tools/train.py
@@ -232,14 +232,35 @@ def main():
         worker(0, 1, args)
 
 
+def build_sampler(train_dataset, batch_size, aspect_grouping=[1]):
+    def _compute_aspect_ratios(dataset):
+        aspect_ratios = []
+        for i in range(len(dataset)):
+            info = dataset.get_img_info(i)
+            aspect_ratios.append(info["height"] / info["width"])
+        return aspect_ratios
+
+    def _quantize(x, bins):
+        return list(map(lambda y: bisect.bisect_right(sorted(bins), y), x))
+
+    if len(aspect_grouping) == 0:
+        return Infinite(RandomSampler(train_dataset, batch_size, drop_last=True))
+
+    aspect_ratios = _compute_aspect_ratios(train_dataset)
+    group_ids = _quantize(aspect_ratios, aspect_grouping)
+    return Infinite(GroupedRandomSampler(train_dataset, batch_size, group_ids))
+
+
 def build_dataloader(batch_size, data_dir, cfg):
     train_dataset = data_mapper[cfg.train_dataset["name"]](
         os.path.join(data_dir, cfg.train_dataset["name"], cfg.train_dataset["root"]),
-        os.path.join(data_dir, cfg.train_dataset["name"], cfg.train_dataset["ann_file"]),
+        os.path.join(
+            data_dir, cfg.train_dataset["name"], cfg.train_dataset["ann_file"]
+        ),
         remove_images_without_annotations=True,
         order=["image", "boxes", "boxes_category", "info"],
     )
-    train_sampler = Infinite(RandomSampler(train_dataset, batch_size, drop_last=True))
+    train_sampler = build_sampler(train_dataset, batch_size)
     train_dataloader = DataLoader(
         train_dataset,
         sampler=train_sampler,
@@ -259,6 +280,45 @@ def build_dataloader(batch_size, data_dir, cfg):
     return {"train": train_dataloader}
 
 
+class GroupedRandomSampler(RandomSampler):
+    def __init__(
+        self,
+        dataset,
+        batch_size,
+        group_ids,
+        indices=None,
+        world_size=None,
+        rank=None,
+        seed=None,
+    ):
+        super().__init__(dataset, batch_size, False, indices, world_size, rank, seed)
+        self.group_ids = group_ids
+        assert len(group_ids) == len(dataset)
+        groups = np.unique(self.group_ids).tolist()
+
+        # buffer the indices of each group until batch size is reached
+        self.buffer_per_group = {k: [] for k in groups}
+
+    def batch(self):
+        indices = list(self.sample())
+        if self.world_size > 1:
+            indices = self.scatter(indices)
+
+        batch_index = []
+        for ind in indices:
+            group_id = self.group_ids[ind]
+            group_buffer = self.buffer_per_group[group_id]
+            group_buffer.append(ind)
+            if len(group_buffer) == self.batch_size:
+                batch_index.append(group_buffer)
+                self.buffer_per_group[group_id] = []
+
+        return iter(batch_index)
+
+    def __len__(self):
+        raise NotImplementedError("len() of GroupedRandomSampler is not well-defined.")
+
+
 class DetectionPadCollator(Collator):
     def __init__(self, pad_value: float = 0.0):
         super().__init__()