未验证 提交 fe984212 编写于 作者: F flashrunrun 提交者: GitHub

Merge pull request #10 from MegEngine/courses

Courses
## 主题
模型构建和训练进阶 II:物体检测
## 讲师
王枫<br/>
毕业于中科院计算所,旷视科技基础检测组算法研究员,曾获得ICDAR ArT文字检测任务冠军,在MegEngine中负责检测模型的复现和维护和Data部分的设计实现。
## 概要
本节课程中,我们将学习通用物体检测的方法和流程,并且我们将以Faster-RCNN为例讲述旷视天元MegEngine是如何实现通用物体检测的Pipeline的。其中包括数据准备,模型搭建,训练测试,还有节省显存的大杀器sublinear memory等内容。
## 资料
[4.模型构建和训练进阶 II:物体检测(anchor).ipynb](./notebooks/4.模型构建和训练进阶%20II:物体检测(anchor).ipynb)<br/>
[4.模型构建和训练进阶 II:物体检测(nms).ipynb](./notebooks/4.模型构建和训练进阶%20II:物体检测(nms).ipynb)<br/>
[4.模型构建和训练进阶 II:物体检测(transform_pipeline).ipynb](./notebooks/4.模型构建和训练进阶%20II:物体检测(transform_pipeline).ipynb)<br/>
## 视频地址
https://www.bilibili.com/video/BV1hT4y1L7Z8
## 作业
了解MegEngine的hub的使用,上传一张图片,并且用hub中的Faster-RCNN 或者 RetinaNet 进行推理.
### 如何提交
将运行成功的截图及个人信息发送到邮箱:mgesupport@megvii.com
**邮件标题:** 天元入门第四次课程作业
**邮件内容**
* 截图
* 姓名:
* 学校(公司):
* 电话:
* 邮寄地址:
{
"cells": [
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
"import math\n",
"import numpy as np\n",
"\n",
"from megengine import Tensor, tensor\n",
"import megengine.functional as F\n",
"\n",
"\n",
"def meshgrid(x, y):\n",
" assert len(x.shape) == 1\n",
" assert len(y.shape) == 1\n",
" mesh_shape = (y.shape[0], x.shape[0])\n",
" mesh_x = x.broadcast(mesh_shape)\n",
" mesh_y = y.reshape(-1, 1).broadcast(mesh_shape)\n",
" return mesh_x, mesh_y\n",
"\n",
"\n",
"def create_anchor_grid(featmap_size, offsets, stride):\n",
" step_x, step_y = featmap_size\n",
" shift = offsets * stride\n",
"\n",
" grid_x = F.arange(shift, step_x * stride + shift, step=stride)\n",
" grid_y = F.arange(shift, step_y * stride + shift, step=stride)\n",
" grids_x, grids_y = meshgrid(grid_y, grid_x)\n",
" return grids_x.reshape(-1), grids_y.reshape(-1)\n",
"\n",
"\n",
"class AnchorGenerator:\n",
"\n",
" def __init__(\n",
" self,\n",
" anchor_scales: list = [[32], [64], [128], [256], [512]],\n",
" anchor_ratios: list = [[0.5, 1, 2]],\n",
" strides: list = [4, 8, 16, 32, 64],\n",
" offset: float = 0,\n",
" ):\n",
" super().__init__()\n",
" self.anchor_scales = np.array(anchor_scales, dtype=np.float32)\n",
" self.anchor_ratios = np.array(anchor_ratios, dtype=np.float32)\n",
" self.strides = strides\n",
" self.offset = offset\n",
" self.num_features = len(strides)\n",
"\n",
" self.base_anchors = self._different_level_anchors(anchor_scales, anchor_ratios)\n",
"\n",
" def _different_level_anchors(self, scales, ratios):\n",
" if len(scales) == 1:\n",
" scales *= self.num_features\n",
" assert len(scales) == self.num_features\n",
"\n",
" if len(ratios) == 1:\n",
" ratios *= self.num_features\n",
" assert len(ratios) == self.num_features\n",
" return [\n",
" tensor(self.generate_base_anchors(scale, ratio))\n",
" # self.generate_base_anchors(scale, ratio)\n",
" for scale, ratio in zip(scales, ratios)\n",
" ]\n",
"\n",
" def generate_base_anchors(self, scales, ratios):\n",
" base_anchors = []\n",
" areas = [s ** 2.0 for s in scales]\n",
" for area in areas:\n",
" for ratio in ratios:\n",
" w = math.sqrt(area / ratio)\n",
" h = ratio * w\n",
" # center-based anchor\n",
" x0, y0, x1, y1 = -w / 2.0, -h / 2.0, w / 2.0, h / 2.0\n",
" base_anchors.append([x0, y0, x1, y1])\n",
" return base_anchors\n",
"\n",
" def generate_anchors_by_features(self, sizes):\n",
" all_anchors = []\n",
" assert len(sizes) == self.num_features, (\n",
" \"input features expected {}, got {}\".format(self.num_features, len(sizes))\n",
" )\n",
" for size, stride, base_anchor in zip(sizes, self.strides, self.base_anchors):\n",
" grid_x, grid_y = create_anchor_grid(size, self.offset, stride)\n",
" # FIXME: If F.stack works, change to stack\n",
" grid_x, grid_y = grid_x.reshape(-1, 1), grid_y.reshape(-1, 1)\n",
" grids = F.concat([grid_x, grid_y, grid_x, grid_y], axis=1)\n",
" all_anchors.append(\n",
" (grids.reshape(-1, 1, 4) + base_anchor.reshape(1, -1, 4)).reshape(-1, 4)\n",
" )\n",
" return all_anchors\n",
" \n",
" def __call__(self, featmaps):\n",
" feat_sizes = [fmap.shape[-2:] for fmap in featmaps]\n",
" return self.generate_anchors_by_features(feat_sizes)\n",
"\n",
" @property\n",
" def anchor_dim(self):\n",
" return 4"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"shape of feature map: (1, 3, 400, 480)\n",
"shape of feature map: (1, 3, 200, 240)\n",
"shape of feature map: (1, 3, 100, 120)\n",
"shape of feature map: (1, 3, 50, 60)\n",
"shape of feature map: (1, 3, 25, 30)\n"
]
}
],
"source": [
"from megengine.random import gaussian\n",
"shape = [1, 3, 25, 30]\n",
"shape_list = reversed([shape[:2] + [s * 2**i for s in shape[2:]] for i in range(5)])\n",
"feature_maps = [gaussian(shape) for shape in shape_list]\n",
"for featmap in feature_maps:\n",
" print(\"shape of feature map: {}\".format(featmap.shape))"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"anchor shape: (576000, 4)\n",
"576000 = 400 * 480 * 3\n",
"anchor shape: (144000, 4)\n",
"144000 = 200 * 240 * 3\n",
"anchor shape: (36000, 4)\n",
"36000 = 100 * 120 * 3\n",
"anchor shape: (9000, 4)\n",
"9000 = 50 * 60 * 3\n",
"anchor shape: (2250, 4)\n",
"2250 = 25 * 30 * 3\n"
]
}
],
"source": [
"anchor_generator = AnchorGenerator()\n",
"anchors_list = anchor_generator(feature_maps)\n",
"for anchors, fmap in zip(anchors_list, feature_maps):\n",
" print(\"anchor shape: {}\".format(anchors.shape))\n",
" print(\"{} = {} * {} * 3\".format(anchors.shape[0], *fmap.shape[2:]))"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tensor([1264. 1048. 1296. 1080.])\n",
"Tensor([ 608. 1032. 672. 1096.])\n",
"Tensor([1216. 992. 1344. 1120.])\n",
"Tensor([ 512. 928. 768. 1184.])\n",
"Tensor([1024. 768. 1536. 1280.])\n"
]
}
],
"source": [
"for anchors in anchors_list:\n",
" print(anchors[anchors.shape[0] * 2 // 3 + 1])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
"\n",
"def cpu_nms(dets, thresh):\n",
" x1 = np.ascontiguousarray(dets[:, 0])\n",
" y1 = np.ascontiguousarray(dets[:, 1])\n",
" x2 = np.ascontiguousarray(dets[:, 2])\n",
" y2 = np.ascontiguousarray(dets[:, 3])\n",
"\n",
" areas = (x2 - x1) * (y2 - y1)\n",
" order = dets[:, 4].argsort()[::-1]\n",
" keep = list()\n",
"\n",
" while order.size > 0:\n",
" pick_ind = order[0]\n",
" keep.append(pick_ind)\n",
"\n",
" xx1 = np.maximum(x1[pick_ind], x1[order[1:]])\n",
" yy1 = np.maximum(y1[pick_ind], y1[order[1:]])\n",
" xx2 = np.minimum(x2[pick_ind], x2[order[1:]])\n",
" yy2 = np.minimum(y2[pick_ind], y2[order[1:]])\n",
"\n",
" inter = np.maximum(0.0, xx2 - xx1) * np.maximum(0.0, yy2 - yy1)\n",
" iou = inter / (areas[pick_ind] + areas[order[1:]] - inter)\n",
"\n",
" order = order[np.where(iou <= thresh)[0] + 1]\n",
"\n",
" return keep"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"before nms:\n",
" [[ 11.5 12. 311.4 410.6 0.85]\n",
" [ 0.5 1. 300.4 400.5 0.97]\n",
" [ 200.5 300. 700.4 1000.6 0.65]\n",
" [ 250.5 310. 700.4 1000.6 0.72]]\n",
"after nms:\n",
" [[ 0.5 1. 300.4 400.5 0.97]\n",
" [ 250.5 310. 700.4 1000.6 0.72]]\n"
]
}
],
"source": [
"dets = np.array([\n",
" [11.5, 12.0, 311.4, 410.6, 0.85],\n",
" [0.5, 1.0, 300.4, 400.5, 0.97],\n",
" [200.5, 300.0, 700.4, 1000.6, 0.65],\n",
" [250.5, 310.0, 700.4, 1000.6, 0.72],\n",
"])\n",
"np.set_printoptions(suppress=True)\n",
"print(\"before nms:\\n\", dets)\n",
"keep = cpu_nms(dets, 0.5)\n",
"print(\"after nms:\\n\", dets[keep])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册