Merge pull request #10 from MegEngine/courses

Courses

Merge pull request #10 from MegEngine/courses
Courses
fe984212 · flashrunrun · GitHub · 35e06ddc · 14fe843b · fe984212
4 changed file
--- a/零基础入门旷视天元MegEngine/4.模型构建和训练进阶 II：物体检测.md
+++ b/零基础入门旷视天元MegEngine/4.模型构建和训练进阶 II：物体检测.md
+## 主题
+模型构建和训练进阶 II：物体检测
+## 讲师
+王枫<br/>
+毕业于中科院计算所，旷视科技基础检测组算法研究员，曾获得ICDAR ArT文字检测任务冠军，在MegEngine中负责检测模型的复现和维护和Data部分的设计实现。
+## 概要
+本节课程中，我们将学习通用物体检测的方法和流程，并且我们将以Faster-RCNN为例讲述旷视天元MegEngine是如何实现通用物体检测的Pipeline的。其中包括数据准备，模型搭建，训练测试，还有节省显存的大杀器sublinear memory等内容。
+
+## 资料
+
+[4.模型构建和训练进阶 II：物体检测(anchor).ipynb](./notebooks/4.模型构建和训练进阶%20II：物体检测(anchor).ipynb)<br/>
+[4.模型构建和训练进阶 II：物体检测(nms).ipynb](./notebooks/4.模型构建和训练进阶%20II：物体检测(nms).ipynb)<br/>
+[4.模型构建和训练进阶 II：物体检测(transform_pipeline).ipynb](./notebooks/4.模型构建和训练进阶%20II：物体检测(transform_pipeline).ipynb)<br/>
+
+## 视频地址
+
+https://www.bilibili.com/video/BV1hT4y1L7Z8
+
+## 作业
+
+了解MegEngine的hub的使用，上传一张图片，并且用hub中的Faster-RCNN 或者 RetinaNet 进行推理.
+
+### 如何提交
+
+将运行成功的截图及个人信息发送到邮箱：mgesupport@megvii.com
+
+**邮件标题：** 天元入门第四次课程作业
+
+**邮件内容**
+
+* 截图
+* 姓名：
+* 学校（公司）：
+* 电话：
+* 邮寄地址：
+
--- a/零基础入门旷视天元MegEngine/notebooks/4.模型构建和训练进阶 II：物体检测(anchor).ipynb
+++ b/零基础入门旷视天元MegEngine/notebooks/4.模型构建和训练进阶 II：物体检测(anchor).ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import math\n",
+    "import numpy as np\n",
+    "\n",
+    "from megengine import Tensor, tensor\n",
+    "import megengine.functional as F\n",
+    "\n",
+    "\n",
+    "def meshgrid(x, y):\n",
+    "    assert len(x.shape) == 1\n",
+    "    assert len(y.shape) == 1\n",
+    "    mesh_shape = (y.shape[0], x.shape[0])\n",
+    "    mesh_x = x.broadcast(mesh_shape)\n",
+    "    mesh_y = y.reshape(-1, 1).broadcast(mesh_shape)\n",
+    "    return mesh_x, mesh_y\n",
+    "\n",
+    "\n",
+    "def create_anchor_grid(featmap_size, offsets, stride):\n",
+    "    step_x, step_y = featmap_size\n",
+    "    shift = offsets * stride\n",
+    "\n",
+    "    grid_x = F.arange(shift, step_x * stride + shift, step=stride)\n",
+    "    grid_y = F.arange(shift, step_y * stride + shift, step=stride)\n",
+    "    grids_x, grids_y = meshgrid(grid_y, grid_x)\n",
+    "    return grids_x.reshape(-1), grids_y.reshape(-1)\n",
+    "\n",
+    "\n",
+    "class AnchorGenerator:\n",
+    "\n",
+    "    def __init__(\n",
+    "        self,\n",
+    "        anchor_scales: list = [[32], [64], [128], [256], [512]],\n",
+    "        anchor_ratios: list = [[0.5, 1, 2]],\n",
+    "        strides: list = [4, 8, 16, 32, 64],\n",
+    "        offset: float = 0,\n",
+    "    ):\n",
+    "        super().__init__()\n",
+    "        self.anchor_scales = np.array(anchor_scales, dtype=np.float32)\n",
+    "        self.anchor_ratios = np.array(anchor_ratios, dtype=np.float32)\n",
+    "        self.strides = strides\n",
+    "        self.offset = offset\n",
+    "        self.num_features = len(strides)\n",
+    "\n",
+    "        self.base_anchors = self._different_level_anchors(anchor_scales, anchor_ratios)\n",
+    "\n",
+    "    def _different_level_anchors(self, scales, ratios):\n",
+    "        if len(scales) == 1:\n",
+    "            scales *= self.num_features\n",
+    "        assert len(scales) == self.num_features\n",
+    "\n",
+    "        if len(ratios) == 1:\n",
+    "            ratios *= self.num_features\n",
+    "        assert len(ratios) == self.num_features\n",
+    "        return [\n",
+    "            tensor(self.generate_base_anchors(scale, ratio))\n",
+    "            # self.generate_base_anchors(scale, ratio)\n",
+    "            for scale, ratio in zip(scales, ratios)\n",
+    "        ]\n",
+    "\n",
+    "    def generate_base_anchors(self, scales, ratios):\n",
+    "        base_anchors = []\n",
+    "        areas = [s ** 2.0 for s in scales]\n",
+    "        for area in areas:\n",
+    "            for ratio in ratios:\n",
+    "                w = math.sqrt(area / ratio)\n",
+    "                h = ratio * w\n",
+    "                # center-based anchor\n",
+    "                x0, y0, x1, y1 = -w / 2.0, -h / 2.0, w / 2.0, h / 2.0\n",
+    "                base_anchors.append([x0, y0, x1, y1])\n",
+    "        return base_anchors\n",
+    "\n",
+    "    def generate_anchors_by_features(self, sizes):\n",
+    "        all_anchors = []\n",
+    "        assert len(sizes) == self.num_features, (\n",
+    "            \"input features expected {}, got {}\".format(self.num_features, len(sizes))\n",
+    "        )\n",
+    "        for size, stride, base_anchor in zip(sizes, self.strides, self.base_anchors):\n",
+    "            grid_x, grid_y = create_anchor_grid(size, self.offset, stride)\n",
+    "            # FIXME: If F.stack works, change to stack\n",
+    "            grid_x, grid_y = grid_x.reshape(-1, 1), grid_y.reshape(-1, 1)\n",
+    "            grids = F.concat([grid_x, grid_y, grid_x, grid_y], axis=1)\n",
+    "            all_anchors.append(\n",
+    "                (grids.reshape(-1, 1, 4) + base_anchor.reshape(1, -1, 4)).reshape(-1, 4)\n",
+    "            )\n",
+    "        return all_anchors\n",
+    "    \n",
+    "    def __call__(self, featmaps):\n",
+    "        feat_sizes = [fmap.shape[-2:] for fmap in featmaps]\n",
+    "        return self.generate_anchors_by_features(feat_sizes)\n",
+    "\n",
+    "    @property\n",
+    "    def anchor_dim(self):\n",
+    "        return 4"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "shape of feature map: (1, 3, 400, 480)\n",
+      "shape of feature map: (1, 3, 200, 240)\n",
+      "shape of feature map: (1, 3, 100, 120)\n",
+      "shape of feature map: (1, 3, 50, 60)\n",
+      "shape of feature map: (1, 3, 25, 30)\n"
+     ]
+    }
+   ],
+   "source": [
+    "from megengine.random import gaussian\n",
+    "shape = [1, 3, 25, 30]\n",
+    "shape_list = reversed([shape[:2] + [s * 2**i for s in shape[2:]] for i in range(5)])\n",
+    "feature_maps = [gaussian(shape) for shape in shape_list]\n",
+    "for featmap in feature_maps:\n",
+    "    print(\"shape of feature map: {}\".format(featmap.shape))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "anchor shape: (576000, 4)\n",
+      "576000 = 400 * 480 * 3\n",
+      "anchor shape: (144000, 4)\n",
+      "144000 = 200 * 240 * 3\n",
+      "anchor shape: (36000, 4)\n",
+      "36000 = 100 * 120 * 3\n",
+      "anchor shape: (9000, 4)\n",
+      "9000 = 50 * 60 * 3\n",
+      "anchor shape: (2250, 4)\n",
+      "2250 = 25 * 30 * 3\n"
+     ]
+    }
+   ],
+   "source": [
+    "anchor_generator = AnchorGenerator()\n",
+    "anchors_list = anchor_generator(feature_maps)\n",
+    "for anchors, fmap in zip(anchors_list, feature_maps):\n",
+    "    print(\"anchor shape: {}\".format(anchors.shape))\n",
+    "    print(\"{} = {} * {} * 3\".format(anchors.shape[0], *fmap.shape[2:]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Tensor([1264. 1048. 1296. 1080.])\n",
+      "Tensor([ 608. 1032.  672. 1096.])\n",
+      "Tensor([1216.  992. 1344. 1120.])\n",
+      "Tensor([ 512.  928.  768. 1184.])\n",
+      "Tensor([1024.  768. 1536. 1280.])\n"
+     ]
+    }
+   ],
+   "source": [
+    "for anchors in anchors_list:\n",
+    "    print(anchors[anchors.shape[0] * 2 // 3 + 1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
--- a/零基础入门旷视天元MegEngine/notebooks/4.模型构建和训练进阶 II：物体检测(nms).ipynb
+++ b/零基础入门旷视天元MegEngine/notebooks/4.模型构建和训练进阶 II：物体检测(nms).ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "\n",
+    "def cpu_nms(dets, thresh):\n",
+    "    x1 = np.ascontiguousarray(dets[:, 0])\n",
+    "    y1 = np.ascontiguousarray(dets[:, 1])\n",
+    "    x2 = np.ascontiguousarray(dets[:, 2])\n",
+    "    y2 = np.ascontiguousarray(dets[:, 3])\n",
+    "\n",
+    "    areas = (x2 - x1) * (y2 - y1)\n",
+    "    order = dets[:, 4].argsort()[::-1]\n",
+    "    keep = list()\n",
+    "\n",
+    "    while order.size > 0:\n",
+    "        pick_ind = order[0]\n",
+    "        keep.append(pick_ind)\n",
+    "\n",
+    "        xx1 = np.maximum(x1[pick_ind], x1[order[1:]])\n",
+    "        yy1 = np.maximum(y1[pick_ind], y1[order[1:]])\n",
+    "        xx2 = np.minimum(x2[pick_ind], x2[order[1:]])\n",
+    "        yy2 = np.minimum(y2[pick_ind], y2[order[1:]])\n",
+    "\n",
+    "        inter = np.maximum(0.0, xx2 - xx1) * np.maximum(0.0, yy2 - yy1)\n",
+    "        iou = inter / (areas[pick_ind] + areas[order[1:]] - inter)\n",
+    "\n",
+    "        order = order[np.where(iou <= thresh)[0] + 1]\n",
+    "\n",
+    "    return keep"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "before nms:\n",
+      " [[  11.5    12.    311.4   410.6     0.85]\n",
+      " [   0.5     1.    300.4   400.5     0.97]\n",
+      " [ 200.5   300.    700.4  1000.6     0.65]\n",
+      " [ 250.5   310.    700.4  1000.6     0.72]]\n",
+      "after nms:\n",
+      " [[   0.5     1.    300.4   400.5     0.97]\n",
+      " [ 250.5   310.    700.4  1000.6     0.72]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "dets = np.array([\n",
+    "    [11.5, 12.0, 311.4, 410.6, 0.85],\n",
+    "    [0.5, 1.0, 300.4, 400.5, 0.97],\n",
+    "    [200.5, 300.0, 700.4, 1000.6, 0.65],\n",
+    "    [250.5, 310.0, 700.4, 1000.6, 0.72],\n",
+    "])\n",
+    "np.set_printoptions(suppress=True)\n",
+    "print(\"before nms:\\n\", dets)\n",
+    "keep = cpu_nms(dets, 0.5)\n",
+    "print(\"after nms:\\n\", dets[keep])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
--- a/零基础入门旷视天元MegEngine/notebooks/4.模型构建和训练进阶 II：物体检测(transform_pipeline).ipynb
+++ b/零基础入门旷视天元MegEngine/notebooks/4.模型构建和训练进阶 II：物体检测(transform_pipeline).ipynb