提交 e3b80524 编写于 作者: G gineshidalgo99

CPU version released

上级 7325aa32
......@@ -110,14 +110,16 @@ set(GPU_MODE CUDA CACHE STRING "Select the acceleration GPU library or CPU other
# else ()
# set_property(CACHE GPU_MODE PROPERTY STRINGS CPU_ONLY)
# endif ()
set_property(CACHE GPU_MODE PROPERTY STRINGS CUDA)
set_property(CACHE GPU_MODE PROPERTY STRINGS CUDA CPU_ONLY)
# Look for CUDA
set(CUDA_FOUND FALSE)
if (${GPU_MODE} MATCHES "CUDA")
find_package(CUDA)
endif (${GPU_MODE} MATCHES "CUDA")
# Look for OpenCL
set(OpenCL_FOUND FALSE)
set(CUDA_VERSION_MAJOR 0)
if (${GPU_MODE} MATCHES "OPENCL")
find_package(OpenCL)
endif (${GPU_MODE} MATCHES "OPENCL")
......@@ -136,9 +138,10 @@ if (${GPU_MODE} MATCHES "CUDA")
add_definitions(-DUSE_CUDA)
message(STATUS "Building with CUDA.")
elseif (${GPU_MODE} MATCHES "CPU_ONLY")
add_definitions(-DUSE_CPU_ONLY)
message(STATUS "Building CPU Only.")
# OpenPose flag for Caffe
add_definitions(-DCPU_ONLY)
message(STATUS "Building CPU Only.")
elseif (${GPU_MODE} MATCHES "OPENCL")
# OpenPose flag for Caffe
add_definitions(-DUSE_OPENCL)
......
......@@ -28,7 +28,8 @@
## Latest Features
- Mar 2017: Improved [**3-D keypoint reconstruction module**](doc/3d_reconstruction_demo.md) (from multiple camera views)!
- Mar 2018: [**CPU version**](doc/installation.md#cpu-version)!
- Mar 2018: Improved [**3-D keypoint reconstruction module**](doc/3d_reconstruction_demo.md) (from multiple camera views)!
- Sep 2017: [**CMake**](doc/installation.md) installer and **IP camera** support!
- Jul 2017: [**Windows portable binaries and demo**](https://github.com/CMU-Perceptual-Computing-Lab/openpose/releases)!
- Jul 2017: **Hands** released!
......
# Copied from Caffe
if(CPU_ONLY)
if (CPU_ONLY)
return()
endif()
endif ()
################################################################################################
# Remove duplicates from list(s)
......@@ -10,9 +10,9 @@ endif()
# op_list_unique(<list_variable> [<list_variable>] [...])
macro(op_list_unique)
foreach(__lst ${ARGN})
if(${__lst})
if (${__lst})
list(REMOVE_DUPLICATES ${__lst})
endif()
endif ()
endforeach()
endmacro()
......@@ -29,7 +29,7 @@ endif ()
# Usage:
# op_detect_installed_gpus(out_variable)
function(op_detect_installed_gpus out_variable)
if(NOT CUDA_gpu_detect_output)
if (NOT CUDA_gpu_detect_output)
set(__cufile ${PROJECT_BINARY_DIR}/detect_cuda_archs.cu)
file(WRITE ${__cufile} ""
......@@ -53,20 +53,20 @@ function(op_detect_installed_gpus out_variable)
RESULT_VARIABLE __nvcc_res OUTPUT_VARIABLE __nvcc_out
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
if(__nvcc_res EQUAL 0)
if (__nvcc_res EQUAL 0)
if (NOT WIN32)
string(REPLACE "2.1" "2.1(2.0)" __nvcc_out "${__nvcc_out}")
endif (NOT WIN32)
set(CUDA_gpu_detect_output ${__nvcc_out} CACHE INTERNAL "Returned GPU architetures from op_detect_gpus tool" FORCE)
endif()
endif()
endif ()
endif ()
if(NOT CUDA_gpu_detect_output)
if (NOT CUDA_gpu_detect_output)
message(STATUS "Automatic GPU detection failed. Building for all known architectures.")
set(${out_variable} ${Caffe_known_gpu_archs} PARENT_SCOPE)
else()
set(${out_variable} ${CUDA_gpu_detect_output} PARENT_SCOPE)
endif()
endif ()
endfunction()
......@@ -78,10 +78,10 @@ function(op_select_nvcc_arch_flags out_variable)
# List of arch names
set(__archs_names "Fermi" "Kepler" "Maxwell" "Pascal" "All" "Manual")
set(__archs_name_default "All")
if(NOT CMAKE_CROSSCOMPILING)
if (NOT CMAKE_CROSSCOMPILING)
list(APPEND __archs_names "Auto")
set(__archs_name_default "Auto")
endif()
endif ()
# set CUDA_ARCH strings (so it will be seen as dropbox in CMake-Gui)
# set(CUDA_ARCH ${__archs_name_default} CACHE STRING "Select target NVIDIA GPU achitecture.")
......@@ -89,35 +89,35 @@ function(op_select_nvcc_arch_flags out_variable)
# mark_as_advanced(CUDA_ARCH)
# verify CUDA_ARCH value
if(NOT ";${__archs_names};" MATCHES ";${CUDA_ARCH};")
if (NOT ";${__archs_names};" MATCHES ";${CUDA_ARCH};")
string(REPLACE ";" ", " __archs_names "${__archs_names}")
message(FATAL_ERROR "Only ${__archs_names} architeture names are supported.")
endif()
endif ()
if(${CUDA_ARCH} STREQUAL "Manual")
if (${CUDA_ARCH} STREQUAL "Manual")
set(CUDA_ARCH_BIN ${Caffe_known_gpu_archs} CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported")
set(CUDA_ARCH_PTX "50" CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for")
# mark_as_advanced(CUDA_ARCH_BIN CUDA_ARCH_PTX)
else()
unset(CUDA_ARCH_BIN CACHE)
unset(CUDA_ARCH_PTX CACHE)
endif()
endif ()
if(${CUDA_ARCH} STREQUAL "Fermi" AND NOT WIN32)
if (${CUDA_ARCH} STREQUAL "Fermi" AND NOT WIN32)
set(__cuda_arch_bin "20 21(20)")
elseif(${CUDA_ARCH} STREQUAL "Kepler")
elseif (${CUDA_ARCH} STREQUAL "Kepler")
set(__cuda_arch_bin "30 35")
elseif(${CUDA_ARCH} STREQUAL "Maxwell")
elseif (${CUDA_ARCH} STREQUAL "Maxwell")
set(__cuda_arch_bin "50 52")
elseif(${CUDA_ARCH} STREQUAL "Pascal")
elseif (${CUDA_ARCH} STREQUAL "Pascal")
set(__cuda_arch_bin "60 61")
elseif(${CUDA_ARCH} STREQUAL "All")
elseif (${CUDA_ARCH} STREQUAL "All")
set(__cuda_arch_bin ${Caffe_known_gpu_archs})
elseif(${CUDA_ARCH} STREQUAL "Auto")
elseif (${CUDA_ARCH} STREQUAL "Auto")
op_detect_installed_gpus(__cuda_arch_bin)
else() # (${CUDA_ARCH} STREQUAL "Manual")
set(__cuda_arch_bin ${CUDA_ARCH_BIN})
endif()
endif ()
# remove dots and convert to lists
string(REGEX REPLACE "\\." "" __cuda_arch_bin "${__cuda_arch_bin}")
......@@ -131,7 +131,7 @@ function(op_select_nvcc_arch_flags out_variable)
# Tell NVCC to add binaries for the specified GPUs
foreach(__arch ${__cuda_arch_bin})
if(__arch MATCHES "([0-9]+)\\(([0-9]+)\\)")
if (__arch MATCHES "([0-9]+)\\(([0-9]+)\\)")
# User explicitly specified PTX for the concrete BIN
list(APPEND __nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1})
list(APPEND __nvcc_archs_readable sm_${CMAKE_MATCH_1})
......@@ -139,7 +139,7 @@ function(op_select_nvcc_arch_flags out_variable)
# User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN
list(APPEND __nvcc_flags -gencode arch=compute_${__arch},code=sm_${__arch})
list(APPEND __nvcc_archs_readable sm_${__arch})
endif()
endif ()
endforeach()
# Tell NVCC to add PTX intermediate code for the specified architectures
......@@ -166,13 +166,13 @@ macro(op_cuda_compile objlist_variable)
endforeach()
if(UNIX OR APPLE)
if (UNIX OR APPLE)
list(APPEND CUDA_NVCC_FLAGS -Xcompiler -fPIC)
endif()
endif ()
if(APPLE)
if (APPLE)
list(APPEND CUDA_NVCC_FLAGS -Xcompiler -Wno-unused-function)
endif()
endif ()
cuda_compile(cuda_objcs ${ARGN})
......@@ -197,18 +197,18 @@ function(detect_cuDNN)
DOC "Path to cuDNN include directory." )
# dynamic libs have different suffix in mac and linux
if(APPLE)
if (APPLE)
set(CUDNN_LIB_NAME "libcudnn.dylib")
else()
set(CUDNN_LIB_NAME "libcudnn.so")
endif()
endif ()
get_filename_component(__libpath_hist ${CUDA_CUDART_LIBRARY} PATH)
find_library(CUDNN_LIBRARY NAMES ${CUDNN_LIB_NAME}
PATHS ${CUDNN_ROOT} $ENV{CUDNN_ROOT} ${CUDNN_INCLUDE} ${__libpath_hist} ${__libpath_hist}/../lib
DOC "Path to cuDNN library.")
if(CUDNN_INCLUDE AND CUDNN_LIBRARY)
if (CUDNN_INCLUDE AND CUDNN_LIBRARY)
set(HAVE_CUDNN TRUE PARENT_SCOPE)
set(CUDNN_FOUND TRUE PARENT_SCOPE)
......@@ -228,23 +228,23 @@ function(detect_cuDNN)
string(REGEX REPLACE "define CUDNN_PATCHLEVEL * +([0-9]+)" "\\1"
CUDNN_VERSION_PATCH "${CUDNN_VERSION_PATCH}")
if(NOT CUDNN_VERSION_MAJOR)
if (NOT CUDNN_VERSION_MAJOR)
set(CUDNN_VERSION "???")
else()
set(CUDNN_VERSION "${CUDNN_VERSION_MAJOR}.${CUDNN_VERSION_MINOR}.${CUDNN_VERSION_PATCH}")
endif()
endif ()
message(STATUS "Found cuDNN: ver. ${CUDNN_VERSION} found (include: ${CUDNN_INCLUDE}, library: ${CUDNN_LIBRARY})")
string(COMPARE LESS "${CUDNN_VERSION_MAJOR}" 3 cuDNNVersionIncompatible)
if(cuDNNVersionIncompatible)
if (cuDNNVersionIncompatible)
message(FATAL_ERROR "cuDNN version >3 is required.")
endif()
endif ()
set(CUDNN_VERSION "${CUDNN_VERSION}" PARENT_SCOPE)
mark_as_advanced(CUDNN_INCLUDE CUDNN_LIBRARY CUDNN_ROOT)
endif()
endif ()
endfunction()
################################################################################################
......@@ -254,9 +254,9 @@ endfunction()
find_package(CUDA 5.5 QUIET)
find_cuda_helper_libs(curand) # cmake 2.8.7 compartibility which doesn't search for curand
if(NOT CUDA_FOUND)
if (NOT CUDA_FOUND)
return()
endif()
endif ()
set(HAVE_CUDA TRUE)
message(STATUS "CUDA detected: " ${CUDA_VERSION})
......@@ -265,14 +265,14 @@ list(APPEND Caffe_LINKER_LIBS PUBLIC ${CUDA_CUDART_LIBRARY}
${CUDA_curand_LIBRARY} ${CUDA_CUBLAS_LIBRARIES})
# cudnn detection
if(USE_CUDNN)
if (USE_CUDNN)
detect_cuDNN()
if(HAVE_CUDNN)
if (HAVE_CUDNN)
list(APPEND Caffe_DEFINITIONS PUBLIC -DUSE_CUDNN)
list(APPEND Caffe_INCLUDE_DIRS PUBLIC ${CUDNN_INCLUDE})
list(APPEND Caffe_LINKER_LIBS PUBLIC ${CUDNN_LIBRARY})
endif()
endif()
endif ()
endif ()
# setting nvcc arch flags
op_select_nvcc_arch_flags(NVCC_FLAGS_EXTRA)
......@@ -281,11 +281,11 @@ message(STATUS "Added CUDA NVCC flags for: ${NVCC_FLAGS_EXTRA_readable}")
# Boost 1.55 workaround, see https://svn.boost.org/trac/boost/ticket/9392 or
# https://github.com/ComputationalRadiationPhysics/picongpu/blob/master/src/picongpu/CMakeLists.txt
if(Boost_VERSION EQUAL 105500)
if (Boost_VERSION EQUAL 105500)
message(STATUS "Cuda + Boost 1.55: Applying noinline work around")
# avoid warning for CMake >= 2.8.12
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} \"-DBOOST_NOINLINE=__attribute__((noinline))\" ")
endif()
endif ()
# disable some nvcc diagnostic that apears in boost, glog, glags, opencv, etc.
foreach(diag cc_clobber_ignored integer_sign_change useless_using_declaration set_but_not_used)
......@@ -293,20 +293,20 @@ foreach(diag cc_clobber_ignored integer_sign_change useless_using_declaration se
endforeach()
# setting default testing device
if(NOT CUDA_TEST_DEVICE)
if (NOT CUDA_TEST_DEVICE)
set(CUDA_TEST_DEVICE -1)
endif()
endif ()
mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD)
mark_as_advanced(CUDA_SDK_ROOT_DIR CUDA_SEPARABLE_COMPILATION)
# Handle clang/libc++ issue
if(APPLE)
if (APPLE)
op_detect_darwin_version(OSX_VERSION)
# OSX 10.9 and higher uses clang/libc++ by default which is incompatible with old CUDA toolkits
if(OSX_VERSION VERSION_GREATER 10.8)
if (OSX_VERSION VERSION_GREATER 10.8)
# enabled by default if and only if CUDA version is less than 7.0
op_option(USE_libstdcpp "Use libstdc++ instead of libc++" (CUDA_VERSION VERSION_LESS 7.0))
endif()
endif()
\ No newline at end of file
endif ()
endif ()
......@@ -12,12 +12,13 @@ OpenPose - Installation
8. [Uninstallation](#uninstallation)
9. [Optional Settings](#optional-settings)
1. [MPI Model](#mpi-model)
2. [3D Reconstruction Module](#3d-reconstruction-module)
3. [Compiling without cuDNN](#compiling-without-cudnn)
4. [Custom Caffe (Ubuntu Only)](#custom-caffe-ubuntu-only)
5. [Custom OpenCV (Ubuntu Only)](#custom-opencv-ubuntu-only)
6. [Doxygen Documentation Autogeneration (Ubuntu Only)](#doxygen-documentation-autogeneration-ubuntu-only)
7. [CMake Command Line Configuration (Ubuntu Only)](#cmake-command-line-configuration-ubuntu-only)
2. [CPU Version](#cpu-version)
3. [3D Reconstruction Module](#3d-reconstruction-module)
4. [Compiling without cuDNN](#compiling-without-cudnn)
5. [Custom Caffe (Ubuntu Only)](#custom-caffe-ubuntu-only)
6. [Custom OpenCV (Ubuntu Only)](#custom-opencv-ubuntu-only)
7. [Doxygen Documentation Autogeneration (Ubuntu Only)](#doxygen-documentation-autogeneration-ubuntu-only)
8. [CMake Command Line Configuration (Ubuntu Only)](#cmake-command-line-configuration-ubuntu-only)
......@@ -37,11 +38,15 @@ This installation section is only intended if you plan to modify the OpenPose co
## Requirements
- NVIDIA graphics card with at least 1.6 GB available (the `nvidia-smi` command checks the available GPU memory in Ubuntu).
- At least 2 GB of free RAM memory.
- Highly recommended: cuDNN and a CPU with at least 8 cores.
Requirements for the default configuration (you might need more resources with a greater `--net_resolution` and/or `scale_number` or less resources by reducing the net resolution and/or using the MPI and MPI_4 models):
Note: These requirements assume the default configuration (i.e. `--net_resolution "656x368"` and `scale_number 1`). You might need more (with a greater net resolution and/or number of scales) or less resources (with smaller net resolution and/or using the MPI and MPI_4 models).
- Nvidia GPU version:
- NVIDIA graphics card with at least 1.6 GB available (the `nvidia-smi` command checks the available GPU memory in Ubuntu).
- At least 2 GB of free RAM memory.
- Highly recommended: cuDNN.
- CPU version:
- Around 8GB of free RAM memory.
- Highly recommended: a CPU with at least 8 cores.
......@@ -87,17 +92,18 @@ The instructions in this section describe the steps to build OpenPose using CMak
1. Download and install CMake GUI:
- Ubuntu: run the command `sudo apt-get install cmake-qt-gui`. Note: If you prefer to use CMake through the command line, see [Cmake Command Line Build](#cmake-command-line-build-ubuntu-only).
- Windows: download and install the latest CMake win64-x64 msi installer from the [CMake website](https://cmake.org/download/), called `cmake-X.X.X-win64-x64.msi`.
2. [**CUDA 8**](https://developer.nvidia.com/cuda-80-ga2-download-archive):
- Ubuntu: Run `sudo ubuntu/install_cuda.sh` or alternatively download and install it from their website.
- Windows: Install CUDA 8.0 after Visual Studio 2015 is installed to assure that the CUDA installation will generate all necessary files for VS. If CUDA was already installed, re-install CUDA after installing VS!
- **IMPORTANT**: As of a recent Windows update, you have to download the Nvidia [drivers](http://www.nvidia.com/Download/index.aspx) drivers first, and then install CUDA without the Graphics Driver flag or else your system might hang.
3. [**cuDNN 5.1**](https://developer.nvidia.com/cudnn):
- Ubuntu: Run `sudo ubuntu/install_cudnn.sh` or alternatively download and install it from their website.
- Windows (and Ubuntu if manual installation): In order to manually install it, just unzip it and copy (merge) the contents on the CUDA folder, usually `/usr/local/cuda/` in Ubuntu and `C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0` in Windows.
4. Ubuntu - Other prerequisites:
2. Nvidia GPU version prerequisites:
1. [**CUDA 8**](https://developer.nvidia.com/cuda-80-ga2-download-archive):
- Ubuntu: Run `sudo ubuntu/install_cuda.sh` or alternatively download and install it from their website.
- Windows: Install CUDA 8.0 after Visual Studio 2015 is installed to assure that the CUDA installation will generate all necessary files for VS. If CUDA was already installed, re-install CUDA after installing VS!
- **IMPORTANT**: As of a recent Windows update, you have to download the Nvidia [drivers](http://www.nvidia.com/Download/index.aspx) drivers first, and then install CUDA without the Graphics Driver flag or else your system might hang.
2. [**cuDNN 5.1**](https://developer.nvidia.com/cudnn):
- Ubuntu: Run `sudo ubuntu/install_cudnn.sh` or alternatively download and install it from their website.
- Windows (and Ubuntu if manual installation): In order to manually install it, just unzip it and copy (merge) the contents on the CUDA folder, usually `/usr/local/cuda/` in Ubuntu and `C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0` in Windows.
3. Ubuntu - Other prerequisites:
- Caffe prerequisites: By default, OpenPose uses Caffe under the hood. If you have not used Caffe previously, install its dependencies by running `sudo bash ./ubuntu/install_cmake.sh`.
- OpenCV must be already installed on your machine. It can be installed with `apt-get install libopencv-dev`. You can also use your own compiled OpenCV version.
5. Windows - **Microsoft Visual Studio (VS) 2015 Enterprise Update 3**:
4. Windows - **Microsoft Visual Studio (VS) 2015 Enterprise Update 3**:
- If **Visual Studio 2017 Community** is desired, we do not officially support it, but it might be compiled by firstly [enabling CUDA 8.0 in VS2017](https://stackoverflow.com/questions/43745099/using-cuda-with-visual-studio-2017?answertab=active#tab-top) or use **VS2017 with CUDA 9** by checking the `.vcxproj` file and changing the necessary paths from CUDA 8 to 9.
- VS 2015 Enterprise Update 1 will give some compiler errors and VS 2015 Community has not been tested.
5. Windows - **Caffe, OpenCV, and Caffe prerequisites**:
......@@ -214,6 +220,25 @@ By default, the body MPI model is not downloaded. You can download it by turning
#### CPU Version
To manually select the CPU Version, open CMake GUI mentioned above, and set the `GPU_MODE` flag to `CPU_ONLY`. **NOTE: Accuracy of the CPU version is ~1% higher than CUDA version, so the results will vary.**
- On Ubuntu, OpenPose will link against the Intel MKL version (Math Kernel Library) of Caffe. Alternatively, the user can choose his own Caffe version, by unselecting `USE_MKL` and selecting his own Caffe path.
- On Windows, it will use the default version of Caffe or one provided by the user on the CPU.
The default CPU version takes ~0.2 seconds per image on Ubuntu (~50x slower than GPU) while the MKL version provides a roughly 2x speedup at ~0.4 seconds. As of now OpenPose does not support MKL on Windows but will at a later date. Also, MKL version does not support unfixed resolution. So a folder of images of different resolutions with openpose, requires the `--net_resolution 656x368` flag for example.
The user can configure the environmental variables `MKL_NUM_THREADS` and `OMP_NUM_THREADS`. They are set at an optimum parameter level by default (i.e., to the number of threads of the machine). However, they can be tweak by running the following commands into the terminal window, right before running any OpenPose application. Eg:
```
# Optimal number = Number of threads (used by default)
export MKL_NUM_THREADS="8"
export OMP_NUM_THREADS="8"
```
Do note that increasing the number of threads results in more memory use. You can check the [OpenPose benchmark](https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/doc/faq.md#speed-up-and-benchmark) for more information about speed and memory requirements in several CPUs and GPUs.
#### 3D Reconstruction Module
You can include the 3D reconstruction module by:
......
......@@ -67,6 +67,8 @@ There are 2 alternatives to save the OpenPose output.
2. (Deprecated) The `write_keypoint` flag uses the OpenCV cv::FileStorage default formats, i.e. JSON (available after OpenCV 3.0), XML, and YML. Note that it does not include any other information othern than keypoints.
Both of them follow the keypoint ordering described in the [Keypoint Ordering](#keypoint-ordering) section.
### Keypoint Ordering
......
OpenPose Library - Latest Released Features
====================================
- Mar 2017: Improved [**3-D keypoint reconstruction module**](doc/3d_reconstruction_demo.md) (from multiple camera views)!
- Mar 2018: [**CPU version**](doc/installation.md#cpu-version)!
- Mar 2018: Improved [**3-D keypoint reconstruction module**](doc/3d_reconstruction_demo.md) (from multiple camera views)!
- Sep 2017: [**CMake**](doc/installation.md) installer and **IP camera** support!
- Jul 2017: [**Windows portable binaries and demo**](https://github.com/CMU-Perceptual-Computing-Lab/openpose/releases)!
- Jul 2017: **Hands** released!
......
// ------------------------- OpenPose Resize Layer Testing -------------------------
#include <chrono> // `std::chrono::` functions and classes, e.g. std::chrono::milliseconds
// GFlags: DEFINE_bool, _int32, _int64, _uint64, _double, _string
#include <gflags/gflags.h>
// Allow Google Flags in Ubuntu 14
#ifndef GFLAGS_GFLAGS_H_
namespace gflags = google;
#endif
#include <openpose/headers.hpp>
#include <openpose/gpu/cuda.hpp>
#ifdef USE_CAFFE
#include <caffe/net.hpp>
#endif
#include <openpose/net/resizeAndMergeBase.hpp>
DEFINE_string(image_path, "examples/media/COCO_val2014_000000000192.jpg", "Process the desired image.");
#ifdef USE_CUDA
#include <chrono> // `std::chrono::` functions and classes, e.g. std::chrono::milliseconds
// GFlags: DEFINE_bool, _int32, _int64, _uint64, _double, _string
#include <gflags/gflags.h>
// Allow Google Flags in Ubuntu 14
#ifndef GFLAGS_GFLAGS_H_
namespace gflags = google;
#endif
#ifdef USE_CAFFE
#include <caffe/net.hpp>
#endif
cv::Mat gpuResize(cv::Mat& img, cv::Size newSize)
{
#ifdef USE_CUDA
DEFINE_string(image_path, "examples/media/COCO_val2014_000000000192.jpg", "Process the desired image.");
cv::Mat gpuResize(cv::Mat& img, const cv::Size& newSize)
{
#ifdef USE_CUDA
// Upload to Source to GPU
float* cpuPtr = &img.at<float>(0);
float* gpuPtr;
cudaMallocHost((void **)&gpuPtr, img.size().width * img.size().height * sizeof(float));
cudaMemcpy(gpuPtr, cpuPtr, img.size().width * img.size().height * sizeof(float),
cudaMemcpyHostToDevice);
// Upload to Dest to GPU
cv::Mat newImg = cv::Mat(newSize,CV_32FC1,cv::Scalar(0));
float* newCpuPtr = &newImg.at<float>(0);
float* newGpuPtr;
cudaMallocHost((void **)&newGpuPtr, newSize.width * newSize.height * sizeof(float));
cudaMemcpy(newGpuPtr, newCpuPtr, newSize.width * newSize.height * sizeof(float),
cudaMemcpyHostToDevice);
std::vector<const float*> sourcePtrs;
sourcePtrs.emplace_back(gpuPtr);
std::array<int, 4> targetSize = {1,1,newImg.size().height,newImg.size().width};
std::array<int, 4> sourceSize = {1,1,img.size().height,img.size().width};
std::vector<std::array<int, 4>> sourceSizes;
sourceSizes.emplace_back(sourceSize);
op::resizeAndMergeGpu(newGpuPtr, sourcePtrs, targetSize, sourceSizes);
cudaMemcpy(newCpuPtr, newGpuPtr, newImg.size().width * newImg.size().height * sizeof(float),
cudaMemcpyDeviceToHost);
cudaFree(gpuPtr);
cudaFree(newGpuPtr);
return newImg;
#else
UNUSED(img);
UNUSED(newSize);
op::error("OpenPose must be compiled with the `USE_CAFFE` & `USE_CUDA` macro definitions in order to run"
" this functionality.", __LINE__, __FUNCTION__, __FILE__);
#endif
}
cv::Mat cpuResize(cv::Mat& img, cv::Size newSize)
{
// Upload to Source to GPU
float* cpuPtr = &img.at<float>(0);
float* gpuPtr;
cudaMallocHost((void **)&gpuPtr, img.size().width * img.size().height * sizeof(float));
cudaMemcpy(gpuPtr, cpuPtr, img.size().width * img.size().height * sizeof(float),
cudaMemcpyHostToDevice);
// Upload to Dest to GPU
cv::Mat newImg = cv::Mat(newSize,CV_32FC1,cv::Scalar(0));
float* newCpuPtr = &newImg.at<float>(0);
float* newGpuPtr;
cudaMallocHost((void **)&newGpuPtr, newSize.width * newSize.height * sizeof(float));
cudaMemcpy(newGpuPtr, newCpuPtr, newSize.width * newSize.height * sizeof(float),
cudaMemcpyHostToDevice);
std::vector<const float*> sourcePtrs;
sourcePtrs.emplace_back(gpuPtr);
sourcePtrs.emplace_back(cpuPtr);
std::array<int, 4> targetSize = {1,1,newImg.size().height,newImg.size().width};
std::array<int, 4> sourceSize = {1,1,img.size().height,img.size().width};
std::vector<std::array<int, 4>> sourceSizes;
sourceSizes.emplace_back(sourceSize);
op::resizeAndMergeGpu(newGpuPtr, sourcePtrs, targetSize, sourceSizes);
cudaMemcpy(newCpuPtr, newGpuPtr, newImg.size().width * newImg.size().height * sizeof(float),
cudaMemcpyDeviceToHost);
op::resizeAndMergeCpu(&newImg.at<float>(0), sourcePtrs, targetSize, sourceSizes);
cudaFree(gpuPtr);
cudaFree(newGpuPtr);
return newImg;
#else
op::error("OpenPose must be compiled with the `USE_CAFFE` & `USE_CUDA` macro definitions in order to run"
" this functionality.", __LINE__, __FUNCTION__, __FILE__);
#endif
}
cv::Mat cpuResize(cv::Mat& img, cv::Size newSize)
{
// Upload to Source to GPU
float* cpuPtr = &img.at<float>(0);
// Upload to Dest to GPU
cv::Mat newImg = cv::Mat(newSize,CV_32FC1,cv::Scalar(0));
std::vector<const float*> sourcePtrs;
sourcePtrs.emplace_back(cpuPtr);
std::array<int, 4> targetSize = {1,1,newImg.size().height,newImg.size().width};
std::array<int, 4> sourceSize = {1,1,img.size().height,img.size().width};
std::vector<std::array<int, 4>> sourceSizes;
sourceSizes.emplace_back(sourceSize);
op::resizeAndMergeCpu(&newImg.at<float>(0), sourcePtrs, targetSize, sourceSizes);
return newImg;
}
int resizeTest()
{
// logging_level
cv::Mat img = op::loadImage(FLAGS_image_path, CV_LOAD_IMAGE_GRAYSCALE);
if(img.empty())
op::error("Could not open or find the image: " + FLAGS_image_path, __LINE__, __FUNCTION__, __FILE__);
img.convertTo(img, CV_32FC1);
img = cpuResize(img, cv::Size(img.size().width/4,img.size().height/4));
img*=0.005;
cv::Mat gpuImg = gpuResize(img, cv::Size(img.size().width*8,img.size().height*8));
cv::Mat cpuImg = cpuResize(img, cv::Size(img.size().width*8,img.size().height*8));
cv::imshow("gpuImg", gpuImg);
cv::imshow("cpuImg", cpuImg);
op::log("Done");
cv::waitKey(0);
return 0;
}
}
int resizeTest()
{
// logging_level
cv::Mat img = op::loadImage(FLAGS_image_path, CV_LOAD_IMAGE_GRAYSCALE);
if(img.empty())
op::error("Could not open or find the image: " + FLAGS_image_path, __LINE__, __FUNCTION__, __FILE__);
img.convertTo(img, CV_32FC1);
img = cpuResize(img, cv::Size(img.size().width/4,img.size().height/4));
img*=0.005;
cv::Mat gpuImg = gpuResize(img, cv::Size(img.size().width*8,img.size().height*8));
cv::Mat cpuImg = cpuResize(img, cv::Size(img.size().width*8,img.size().height*8));
cv::imshow("gpuImg", gpuImg);
cv::imshow("cpuImg", cpuImg);
op::log("Done");
cv::waitKey(0);
return 0;
}
#endif
int main(int argc, char *argv[])
{
// Parsing command line flags
gflags::ParseCommandLineFlags(&argc, &argv, true);
#ifdef USE_CUDA
// Parsing command line flags
gflags::ParseCommandLineFlags(&argc, &argv, true);
// Running handFromJsonTest
return resizeTest();
// Running handFromJsonTest
return resizeTest();
#else
op::error("OpenPose must be compiled with the `USE_CAFFE` & `USE_CUDA` macro definitions in order to run"
" this functionality.", __LINE__, __FUNCTION__, __FILE__);
return 0;
#endif
}
......@@ -137,27 +137,27 @@ namespace op
try
{
#ifdef USE_CAFFE
// Initialize net
#ifdef USE_OPENCL
caffe::Caffe::set_mode(caffe::Caffe::GPU);
caffe::Caffe::SelectDevice(upImpl->mGpuId, true);
upImpl->upCaffeNet.reset(new caffe::Net<float>{upImpl->mCaffeProto, caffe::TEST,
caffe::Caffe::GetDefaultDevice()});
upImpl->upCaffeNet->CopyTrainedLayersFrom(upImpl->mCaffeTrainedModel);
op::OpenCL::getInstance(upImpl->mGpuId, CL_DEVICE_TYPE_GPU, true);
#else
#ifdef USE_CUDA
caffe::Caffe::set_mode(caffe::Caffe::GPU);
caffe::Caffe::SetDevice(upImpl->mGpuId);
#else
caffe::Caffe::set_mode(caffe::Caffe::CPU);
#endif
upImpl->upCaffeNet.reset(new caffe::Net<float>{upImpl->mCaffeProto, caffe::TEST});
upImpl->upCaffeNet->CopyTrainedLayersFrom(upImpl->mCaffeTrainedModel);
#ifdef USE_CUDA
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
#endif
#endif
// Initialize net
#ifdef USE_OPENCL
caffe::Caffe::set_mode(caffe::Caffe::GPU);
caffe::Caffe::SelectDevice(upImpl->mGpuId, true);
upImpl->upCaffeNet.reset(new caffe::Net<float>{upImpl->mCaffeProto, caffe::TEST,
caffe::Caffe::GetDefaultDevice()});
upImpl->upCaffeNet->CopyTrainedLayersFrom(upImpl->mCaffeTrainedModel);
op::OpenCL::getInstance(upImpl->mGpuId, CL_DEVICE_TYPE_GPU, true);
#else
#ifdef USE_CUDA
caffe::Caffe::set_mode(caffe::Caffe::GPU);
caffe::Caffe::SetDevice(upImpl->mGpuId);
#else
caffe::Caffe::set_mode(caffe::Caffe::CPU);
#endif
upImpl->upCaffeNet.reset(new caffe::Net<float>{upImpl->mCaffeProto, caffe::TEST});
upImpl->upCaffeNet->CopyTrainedLayersFrom(upImpl->mCaffeTrainedModel);
#ifdef USE_CUDA
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
#endif
#endif
// Set spOutputBlob
upImpl->spOutputBlob = upImpl->upCaffeNet->blob_by_name(upImpl->mLastBlobName);
if (upImpl->spOutputBlob == nullptr)
......
......@@ -123,11 +123,17 @@ namespace op
+ std::to_string(wrapperStructPose.outputSize.x) + "x"
+ std::to_string(wrapperStructPose.outputSize.y) + ").",
__LINE__, __FUNCTION__, __FILE__);
if (wrapperStructOutput.writeVideoFps <= 0 && wrapperStructInput.producerSharedPtr->get(CV_CAP_PROP_FPS) > 0)
if (wrapperStructOutput.writeVideoFps <= 0
&& wrapperStructInput.producerSharedPtr->get(CV_CAP_PROP_FPS) > 0)
error("Set `--camera_fps` for this producer, as its frame rate is unknown.",
__LINE__, __FUNCTION__, __FILE__);
#ifdef USE_CPU_ONLY
if (wrapperStructPose.scalesNumber > 1)
error("Temporarily, the number of scales (`--scale_number`) cannot be greater than 1 for"
" `CPU_ONLY` version.", __LINE__, __FUNCTION__, __FILE__);
#endif
// Net input resolution cannot be reshaped for Caffe OpenCL and MKL versions, only for CUDA version
#if defined USE_MKL || defined CPU_ONLY
#if defined USE_MKL || defined USE_CPU_ONLY
// If image_dir and netInputSize == -1 --> error
if ((wrapperStructInput.producerSharedPtr == nullptr
|| wrapperStructInput.producerSharedPtr->getType() == ProducerType::ImageDirectory)
......
......@@ -330,6 +330,7 @@ ifeq ($(USE_CUDA), 1)
COMMON_FLAGS += -DUSE_CUDA
else
COMMON_FLAGS += -DCPU_ONLY # For Caffe
COMMON_FLAGS += -DUSE_CPU_ONLY
endif
LIBRARY_DIRS += $(LIB_BUILD_DIR)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册