提交 db415913 编写于 作者: J joker3212 提交者: Gines

CUDA version of cvMatToOpInput (#1212)

上级 99fb8c59
......@@ -87,7 +87,41 @@ namespace op
// CUDA version (if #Gpus > n)
else
{
error("Not implemented yet.", __LINE__, __FUNCTION__, __FILE__);
// (Re)Allocate temporary memory
const unsigned int inputImageSize = 3 * cvInputData.rows * cvInputData.cols;
const unsigned int outputImageSize = 3 * netInputSizes[i].x * netInputSizes[i].y;
if (pInputMaxSize < inputImageSize)
{
pInputMaxSize = inputImageSize;
// Free temporary memory
cudaFree(pInputImageCuda);
cudaFree(pInputImageReorderedCuda);
// Re-allocate memory
cudaMalloc((void**)&pInputImageCuda, sizeof(unsigned char) * inputImageSize);
cudaMalloc((void**)&pInputImageReorderedCuda, sizeof(float) * inputImageSize);
}
if (pOutputMaxSize < outputImageSize)
{
pOutputMaxSize = outputImageSize;
// Free temporary memory
cudaFree(pOutputImageCuda);
// Re-allocate memory
cudaMalloc((void**)&pOutputImageCuda, sizeof(float) * outputImageSize);
}
// Copy image to GPU
cudaMemcpy(
pInputImageCuda, cvInputData.data, sizeof(unsigned char) * inputImageSize,
cudaMemcpyHostToDevice);
// Resize image on GPU
reorderAndCast(pInputImageReorderedCuda, pInputImageCuda, cvInputData.cols, cvInputData.rows, 3);
resizeAndMergeRGBGPU(
pOutputImageCuda, pInputImageReorderedCuda, cvInputData.cols, cvInputData.rows,
netInputSizes[i].x, netInputSizes[i].y, (float)scaleInputToNetInputs[i]);
// Copy back to CPU
inputNetData[i].reset({1, 3, netInputSizes.at(i).y, netInputSizes.at(i).x});
cudaMemcpy(
inputNetData[i].getPtr(), pOutputImageCuda, sizeof(float) * outputImageSize,
cudaMemcpyDeviceToHost);
}
}
return inputNetData;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册