提交 848decd5 编写于 作者: Z zhangwen31

[cuda][kernel]fix: yolo_box cuda kernel updated with paddle-fluid

上级 85b61a05
......@@ -26,6 +26,7 @@ namespace lite {
namespace subgraph {
namespace bm {
// fixme: yolo box has updated, check arm kernel to get more info
int YoloBoxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
......
......@@ -49,9 +49,12 @@ __host__ __device__ inline void GetYoloBox(T* box,
int index,
int stride,
int img_height,
int img_width) {
box[0] = (i + sigmoid<T>(x[index])) * img_width / grid_size;
box[1] = (j + sigmoid<T>(x[index + stride])) * img_height / grid_size;
int img_width,
float scale,
float bias) {
box[0] = (i + sigmoid<T>(x[index]) * scale + bias) * img_width / grid_size;
box[1] = (j + sigmoid<T>(x[index + stride]) * scale + bias) * img_height /
grid_size;
box[2] = std::exp(x[index + 2 * stride]) * anchors[2 * an_idx] * img_width /
input_size;
box[3] = std::exp(x[index + 3 * stride]) * anchors[2 * an_idx + 1] *
......@@ -63,12 +66,16 @@ __host__ __device__ inline void CalcDetectionBox(T* boxes,
T* box,
const int box_idx,
const int img_height,
const int img_width) {
const int img_width,
bool clip_bbox) {
boxes[box_idx] = box[0] - box[2] / 2;
boxes[box_idx + 1] = box[1] - box[3] / 2;
boxes[box_idx + 2] = box[0] + box[2] / 2;
boxes[box_idx + 3] = box[1] + box[3] / 2;
if (!clip_bbox) {
return;
}
boxes[box_idx] = boxes[box_idx] > 0 ? boxes[box_idx] : static_cast<T>(0);
boxes[box_idx + 1] =
boxes[box_idx + 1] > 0 ? boxes[box_idx + 1] : static_cast<T>(0);
......@@ -106,7 +113,10 @@ __global__ void KeYoloBoxFw(const T* input,
const int an_num,
const int class_num,
const int box_num,
int input_size) {
int input_size,
bool clip_bbox,
float scale,
float bias) {
int tid = blockIdx.x * blockDim.x + threadIdx.x;
int stride = blockDim.x * gridDim.x;
T box[4];
......@@ -141,9 +151,11 @@ __global__ void KeYoloBoxFw(const T* input,
box_idx,
grid_num,
img_height,
img_width);
img_width,
scale,
bias);
box_idx = (i * box_num + j * grid_num + k * w + l) * 4;
CalcDetectionBox<T>(boxes, box, box_idx, img_height, img_width);
CalcDetectionBox<T>(boxes, box, box_idx, img_height, img_width, clip_bbox);
int label_idx =
GetEntryIndex(i, j, k * w + l, an_num, an_stride, grid_num, 5);
......@@ -152,7 +164,7 @@ __global__ void KeYoloBoxFw(const T* input,
scores, input, label_idx, score_idx, class_num, conf, grid_num);
}
}
// fixme: yolo box has updated, check arm kernel to get more info
void YoloBoxCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->template As<CUDAContext>();
......@@ -166,6 +178,9 @@ void YoloBoxCompute::Run() {
int class_num = param.class_num;
float conf_thresh = param.conf_thresh;
int downsample_ratio = param.downsample_ratio;
bool clip_bbox = param.clip_bbox;
float scale_x_y = param.scale_x_y;
float bias = -0.5 * (scale_x_y - 1.);
const float* input = X->data<float>();
const int* imgsize = ImgSize->data<int>();
......@@ -207,7 +222,10 @@ void YoloBoxCompute::Run() {
an_num,
class_num,
box_num,
input_size);
input_size,
clip_bbox,
scale_x_y,
bias);
cudaError_t error = cudaGetLastError();
if (error != cudaSuccess) LOG(INFO) << cudaGetErrorString(error);
}
......
......@@ -35,9 +35,12 @@ inline static void get_yolo_box(float* box,
int index,
int stride,
int img_height,
int img_width) {
box[0] = (i + sigmoid(x[index])) * img_width / grid_size;
box[1] = (j + sigmoid(x[index + stride])) * img_height / grid_size;
int img_width,
float scale,
float bias) {
box[0] = (i + sigmoid(x[index]) * scale + bias) * img_width / grid_size;
box[1] =
(j + sigmoid(x[index + stride] * scale + bias)) * img_height / grid_size;
box[2] = std::exp(x[index + 2 * stride]) * anchors[2 * an_idx] * img_width /
input_size;
box[3] = std::exp(x[index + 3 * stride]) * anchors[2 * an_idx + 1] *
......@@ -58,12 +61,15 @@ inline static void calc_detection_box(float* boxes,
float* box,
const int box_idx,
const int img_height,
const int img_width) {
const int img_width,
bool clip_bbox) {
boxes[box_idx] = box[0] - box[2] / 2;
boxes[box_idx + 1] = box[1] - box[3] / 2;
boxes[box_idx + 2] = box[0] + box[2] / 2;
boxes[box_idx + 3] = box[1] + box[3] / 2;
if (!clip_bbox) {
return;
}
boxes[box_idx] = boxes[box_idx] > 0 ? boxes[box_idx] : static_cast<float>(0);
boxes[box_idx + 1] =
boxes[box_idx + 1] > 0 ? boxes[box_idx + 1] : static_cast<float>(0);
......@@ -100,7 +106,10 @@ static void YoloBoxRef(const T* input,
const int an_num,
const int class_num,
const int box_num,
int input_size) {
int input_size,
bool clip_bbox,
float scale,
float bias) {
const int stride = h * w;
const int an_stride = (class_num + 5) * stride;
float box[4];
......@@ -132,9 +141,12 @@ static void YoloBoxRef(const T* input,
box_idx,
stride,
img_height,
img_width);
img_width,
scale,
bias);
box_idx = (i * box_num + j * stride + k * w + l) * 4;
calc_detection_box(boxes, box, box_idx, img_height, img_width);
calc_detection_box(
boxes, box, box_idx, img_height, img_width, clip_bbox);
int label_idx =
get_entry_index(i, j, k * w + l, an_num, an_stride, stride, 5);
......@@ -163,6 +175,9 @@ TEST(yolo_box, normal) {
param.downsample_ratio = 2;
param.conf_thresh = 0.5;
param.class_num = cls;
param.clip_bbox = true;
param.scale_x_y = 1.0;
float bias = -0.5 * (param.scale_x_y - 1.);
int m = h * w * param.anchors.size() / 2;
x.Resize({n, c, h, w});
......@@ -240,7 +255,10 @@ TEST(yolo_box, normal) {
param.anchors.size() / 2,
cls,
m,
param.downsample_ratio * h);
param.downsample_ratio * h,
param.clip_bbox,
param.scale_x_y,
bias);
for (int i = 0; i < boxes.numel(); i++) {
EXPECT_NEAR(boxes_cpu_data[i], boxes_ref_data[i], 1e-5);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册