From db415913279e2ae4e2c4dd71fd7fc7504759d57e Mon Sep 17 00:00:00 2001 From: joker3212 Date: Mon, 6 May 2019 23:56:19 -0400 Subject: [PATCH] CUDA version of cvMatToOpInput (#1212) --- src/openpose/core/cvMatToOpInput.cpp | 36 +++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/src/openpose/core/cvMatToOpInput.cpp b/src/openpose/core/cvMatToOpInput.cpp index 301b2113..601ccad9 100644 --- a/src/openpose/core/cvMatToOpInput.cpp +++ b/src/openpose/core/cvMatToOpInput.cpp @@ -87,7 +87,41 @@ namespace op // CUDA version (if #Gpus > n) else { - error("Not implemented yet.", __LINE__, __FUNCTION__, __FILE__); + // (Re)Allocate temporary memory + const unsigned int inputImageSize = 3 * cvInputData.rows * cvInputData.cols; + const unsigned int outputImageSize = 3 * netInputSizes[i].x * netInputSizes[i].y; + if (pInputMaxSize < inputImageSize) + { + pInputMaxSize = inputImageSize; + // Free temporary memory + cudaFree(pInputImageCuda); + cudaFree(pInputImageReorderedCuda); + // Re-allocate memory + cudaMalloc((void**)&pInputImageCuda, sizeof(unsigned char) * inputImageSize); + cudaMalloc((void**)&pInputImageReorderedCuda, sizeof(float) * inputImageSize); + } + if (pOutputMaxSize < outputImageSize) + { + pOutputMaxSize = outputImageSize; + // Free temporary memory + cudaFree(pOutputImageCuda); + // Re-allocate memory + cudaMalloc((void**)&pOutputImageCuda, sizeof(float) * outputImageSize); + } + // Copy image to GPU + cudaMemcpy( + pInputImageCuda, cvInputData.data, sizeof(unsigned char) * inputImageSize, + cudaMemcpyHostToDevice); + // Resize image on GPU + reorderAndCast(pInputImageReorderedCuda, pInputImageCuda, cvInputData.cols, cvInputData.rows, 3); + resizeAndMergeRGBGPU( + pOutputImageCuda, pInputImageReorderedCuda, cvInputData.cols, cvInputData.rows, + netInputSizes[i].x, netInputSizes[i].y, (float)scaleInputToNetInputs[i]); + // Copy back to CPU + inputNetData[i].reset({1, 3, netInputSizes.at(i).y, netInputSizes.at(i).x}); + cudaMemcpy( + inputNetData[i].getPtr(), pOutputImageCuda, sizeof(float) * outputImageSize, + cudaMemcpyDeviceToHost); } } return inputNetData; -- GitLab