CPU version released

e3b80524 · gineshidalgo99 · 7325aa32 · e3b80524 · e3b80524 · e3b80524
10 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -110,14 +110,16 @@ set(GPU_MODE CUDA CACHE STRING "Select the acceleration GPU library or CPU other
 # else ()
 #   set_property(CACHE GPU_MODE PROPERTY STRINGS CPU_ONLY)
 # endif ()
-set_property(CACHE GPU_MODE PROPERTY STRINGS CUDA)
+set_property(CACHE GPU_MODE PROPERTY STRINGS CUDA CPU_ONLY)

 # Look for CUDA
+set(CUDA_FOUND FALSE)
 if (${GPU_MODE} MATCHES "CUDA")
  find_package(CUDA)
 endif (${GPU_MODE} MATCHES "CUDA")
 # Look for OpenCL
 set(OpenCL_FOUND FALSE)
+set(CUDA_VERSION_MAJOR 0)
 if (${GPU_MODE} MATCHES "OPENCL")
  find_package(OpenCL)
 endif (${GPU_MODE} MATCHES "OPENCL")
@@ -136,9 +138,10 @@ if (${GPU_MODE} MATCHES "CUDA")
  add_definitions(-DUSE_CUDA)
  message(STATUS "Building with CUDA.")
 elseif (${GPU_MODE} MATCHES "CPU_ONLY")
+  add_definitions(-DUSE_CPU_ONLY)
+  message(STATUS "Building CPU Only.")
  # OpenPose flag for Caffe
  add_definitions(-DCPU_ONLY)
-  message(STATUS "Building CPU Only.")
 elseif (${GPU_MODE} MATCHES "OPENCL")
  # OpenPose flag for Caffe
  add_definitions(-DUSE_OPENCL)

--- a/README.md
+++ b/README.md
@@ -28,7 +28,8 @@


 ## Latest Features
- Mar 2017: Improved [**3-D keypoint reconstruction module**](doc/3d_reconstruction_demo.md) (from multiple camera views)!
+- Mar 2018: [**CPU version**](doc/installation.md#cpu-version)!
+- Mar 2018: Improved [**3-D keypoint reconstruction module**](doc/3d_reconstruction_demo.md) (from multiple camera views)!
 - Sep 2017: [**CMake**](doc/installation.md) installer and **IP camera** support!
 - Jul 2017: [**Windows portable binaries and demo**](https://github.com/CMU-Perceptual-Computing-Lab/openpose/releases)!
 - Jul 2017: **Hands** released!

--- a/cmake/Cuda.cmake
+++ b/cmake/Cuda.cmake
 # Copied from Caffe

-if(CPU_ONLY)
+if (CPU_ONLY)
  return()
-endif()
+endif ()

 ################################################################################################
 # Remove duplicates from list(s)
@@ -10,9 +10,9 @@ endif()
 #   op_list_unique(<list_variable> [<list_variable>] [...])
 macro(op_list_unique)
  foreach(__lst ${ARGN})
-    if(${__lst})
+    if (${__lst})
      list(REMOVE_DUPLICATES ${__lst})
-    endif()
+    endif ()
  endforeach()
 endmacro()

@@ -29,7 +29,7 @@ endif ()
 # Usage:
 #   op_detect_installed_gpus(out_variable)
 function(op_detect_installed_gpus out_variable)
-  if(NOT CUDA_gpu_detect_output)
+  if (NOT CUDA_gpu_detect_output)
    set(__cufile ${PROJECT_BINARY_DIR}/detect_cuda_archs.cu)

    file(WRITE ${__cufile} ""
@@ -53,20 +53,20 @@ function(op_detect_installed_gpus out_variable)
                    RESULT_VARIABLE __nvcc_res OUTPUT_VARIABLE __nvcc_out
                    ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)

-    if(__nvcc_res EQUAL 0)
+    if (__nvcc_res EQUAL 0)
      if (NOT WIN32)
        string(REPLACE "2.1" "2.1(2.0)" __nvcc_out "${__nvcc_out}")
      endif (NOT WIN32)
      set(CUDA_gpu_detect_output ${__nvcc_out} CACHE INTERNAL "Returned GPU architetures from op_detect_gpus tool" FORCE)
-    endif()
-  endif()
+    endif ()
+  endif ()

-  if(NOT CUDA_gpu_detect_output)
+  if (NOT CUDA_gpu_detect_output)
    message(STATUS "Automatic GPU detection failed. Building for all known architectures.")
    set(${out_variable} ${Caffe_known_gpu_archs} PARENT_SCOPE)
  else()
    set(${out_variable} ${CUDA_gpu_detect_output} PARENT_SCOPE)
-  endif()
+  endif ()
 endfunction()


@@ -78,10 +78,10 @@ function(op_select_nvcc_arch_flags out_variable)
  # List of arch names
  set(__archs_names "Fermi" "Kepler" "Maxwell" "Pascal" "All" "Manual")
  set(__archs_name_default "All")
-  if(NOT CMAKE_CROSSCOMPILING)
+  if (NOT CMAKE_CROSSCOMPILING)
    list(APPEND __archs_names "Auto")
    set(__archs_name_default "Auto")
-  endif()
+  endif ()

  # set CUDA_ARCH strings (so it will be seen as dropbox in CMake-Gui)
  # set(CUDA_ARCH ${__archs_name_default} CACHE STRING "Select target NVIDIA GPU achitecture.")
@@ -89,35 +89,35 @@ function(op_select_nvcc_arch_flags out_variable)
  # mark_as_advanced(CUDA_ARCH)

  # verify CUDA_ARCH value
-  if(NOT ";${__archs_names};" MATCHES ";${CUDA_ARCH};")
+  if (NOT ";${__archs_names};" MATCHES ";${CUDA_ARCH};")
    string(REPLACE ";" ", " __archs_names "${__archs_names}")
    message(FATAL_ERROR "Only ${__archs_names} architeture names are supported.")
-  endif()
+  endif ()

-  if(${CUDA_ARCH} STREQUAL "Manual")
+  if (${CUDA_ARCH} STREQUAL "Manual")
    set(CUDA_ARCH_BIN ${Caffe_known_gpu_archs} CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported")
    set(CUDA_ARCH_PTX "50"                     CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for")
    # mark_as_advanced(CUDA_ARCH_BIN CUDA_ARCH_PTX)
  else()
    unset(CUDA_ARCH_BIN CACHE)
    unset(CUDA_ARCH_PTX CACHE)
-  endif()
+  endif ()

-  if(${CUDA_ARCH} STREQUAL "Fermi" AND NOT WIN32)
+  if (${CUDA_ARCH} STREQUAL "Fermi" AND NOT WIN32)
    set(__cuda_arch_bin "20 21(20)")
-  elseif(${CUDA_ARCH} STREQUAL "Kepler")
+  elseif (${CUDA_ARCH} STREQUAL "Kepler")
    set(__cuda_arch_bin "30 35")
-  elseif(${CUDA_ARCH} STREQUAL "Maxwell")
+  elseif (${CUDA_ARCH} STREQUAL "Maxwell")
    set(__cuda_arch_bin "50 52")
-  elseif(${CUDA_ARCH} STREQUAL "Pascal")
+  elseif (${CUDA_ARCH} STREQUAL "Pascal")
    set(__cuda_arch_bin "60 61")
-  elseif(${CUDA_ARCH} STREQUAL "All")
+  elseif (${CUDA_ARCH} STREQUAL "All")
    set(__cuda_arch_bin ${Caffe_known_gpu_archs})
-  elseif(${CUDA_ARCH} STREQUAL "Auto")
+  elseif (${CUDA_ARCH} STREQUAL "Auto")
    op_detect_installed_gpus(__cuda_arch_bin)
  else()  # (${CUDA_ARCH} STREQUAL "Manual")
    set(__cuda_arch_bin ${CUDA_ARCH_BIN})
-  endif()
+  endif ()

  # remove dots and convert to lists
  string(REGEX REPLACE "\\." "" __cuda_arch_bin "${__cuda_arch_bin}")
@@ -131,7 +131,7 @@ function(op_select_nvcc_arch_flags out_variable)

  # Tell NVCC to add binaries for the specified GPUs
  foreach(__arch ${__cuda_arch_bin})
-    if(__arch MATCHES "([0-9]+)\\(([0-9]+)\\)")
+    if (__arch MATCHES "([0-9]+)\\(([0-9]+)\\)")
      # User explicitly specified PTX for the concrete BIN
      list(APPEND __nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1})
      list(APPEND __nvcc_archs_readable sm_${CMAKE_MATCH_1})
@@ -139,7 +139,7 @@ function(op_select_nvcc_arch_flags out_variable)
      # User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN
      list(APPEND __nvcc_flags -gencode arch=compute_${__arch},code=sm_${__arch})
      list(APPEND __nvcc_archs_readable sm_${__arch})
-    endif()
+    endif ()
  endforeach()

  # Tell NVCC to add PTX intermediate code for the specified architectures
@@ -166,13 +166,13 @@ macro(op_cuda_compile objlist_variable)

  endforeach()

-  if(UNIX OR APPLE)
+  if (UNIX OR APPLE)
    list(APPEND CUDA_NVCC_FLAGS -Xcompiler -fPIC)
-  endif()
+  endif ()

-  if(APPLE)
+  if (APPLE)
    list(APPEND CUDA_NVCC_FLAGS -Xcompiler -Wno-unused-function)
-  endif()
+  endif ()

  cuda_compile(cuda_objcs ${ARGN})

@@ -197,18 +197,18 @@ function(detect_cuDNN)
            DOC "Path to cuDNN include directory." )

  # dynamic libs have different suffix in mac and linux
-  if(APPLE)
+  if (APPLE)
    set(CUDNN_LIB_NAME "libcudnn.dylib")
  else()
    set(CUDNN_LIB_NAME "libcudnn.so")
-  endif()
+  endif ()

  get_filename_component(__libpath_hist ${CUDA_CUDART_LIBRARY} PATH)
  find_library(CUDNN_LIBRARY NAMES ${CUDNN_LIB_NAME}
   PATHS ${CUDNN_ROOT} $ENV{CUDNN_ROOT} ${CUDNN_INCLUDE} ${__libpath_hist} ${__libpath_hist}/../lib
   DOC "Path to cuDNN library.")
  
-  if(CUDNN_INCLUDE AND CUDNN_LIBRARY)
+  if (CUDNN_INCLUDE AND CUDNN_LIBRARY)
    set(HAVE_CUDNN  TRUE PARENT_SCOPE)
    set(CUDNN_FOUND TRUE PARENT_SCOPE)

@@ -228,23 +228,23 @@ function(detect_cuDNN)
    string(REGEX REPLACE "define CUDNN_PATCHLEVEL * +([0-9]+)" "\\1"
           CUDNN_VERSION_PATCH "${CUDNN_VERSION_PATCH}")

-    if(NOT CUDNN_VERSION_MAJOR)
+    if (NOT CUDNN_VERSION_MAJOR)
      set(CUDNN_VERSION "???")
    else()
      set(CUDNN_VERSION "${CUDNN_VERSION_MAJOR}.${CUDNN_VERSION_MINOR}.${CUDNN_VERSION_PATCH}")
-    endif()
+    endif ()

    message(STATUS "Found cuDNN: ver. ${CUDNN_VERSION} found (include: ${CUDNN_INCLUDE}, library: ${CUDNN_LIBRARY})")

    string(COMPARE LESS "${CUDNN_VERSION_MAJOR}" 3 cuDNNVersionIncompatible)
-    if(cuDNNVersionIncompatible)
+    if (cuDNNVersionIncompatible)
      message(FATAL_ERROR "cuDNN version >3 is required.")
-    endif()
+    endif ()

    set(CUDNN_VERSION "${CUDNN_VERSION}" PARENT_SCOPE)
    mark_as_advanced(CUDNN_INCLUDE CUDNN_LIBRARY CUDNN_ROOT)

-  endif()
+  endif ()
 endfunction()

 ################################################################################################
@@ -254,9 +254,9 @@ endfunction()
 find_package(CUDA 5.5 QUIET)
 find_cuda_helper_libs(curand)  # cmake 2.8.7 compartibility which doesn't search for curand

-if(NOT CUDA_FOUND)
+if (NOT CUDA_FOUND)
  return()
-endif()
+endif ()

 set(HAVE_CUDA TRUE)
 message(STATUS "CUDA detected: " ${CUDA_VERSION})
@@ -265,14 +265,14 @@ list(APPEND Caffe_LINKER_LIBS PUBLIC ${CUDA_CUDART_LIBRARY}
                                     ${CUDA_curand_LIBRARY} ${CUDA_CUBLAS_LIBRARIES})

 # cudnn detection
-if(USE_CUDNN)
+if (USE_CUDNN)
  detect_cuDNN()
-  if(HAVE_CUDNN)
+  if (HAVE_CUDNN)
    list(APPEND Caffe_DEFINITIONS PUBLIC -DUSE_CUDNN)
    list(APPEND Caffe_INCLUDE_DIRS PUBLIC ${CUDNN_INCLUDE})
    list(APPEND Caffe_LINKER_LIBS PUBLIC ${CUDNN_LIBRARY})
-  endif()
-endif()
+  endif ()
+endif ()

 # setting nvcc arch flags
 op_select_nvcc_arch_flags(NVCC_FLAGS_EXTRA)
@@ -281,11 +281,11 @@ message(STATUS "Added CUDA NVCC flags for: ${NVCC_FLAGS_EXTRA_readable}")

 # Boost 1.55 workaround, see https://svn.boost.org/trac/boost/ticket/9392 or
 # https://github.com/ComputationalRadiationPhysics/picongpu/blob/master/src/picongpu/CMakeLists.txt
-if(Boost_VERSION EQUAL 105500)
+if (Boost_VERSION EQUAL 105500)
  message(STATUS "Cuda + Boost 1.55: Applying noinline work around")
  # avoid warning for CMake >= 2.8.12
  set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} \"-DBOOST_NOINLINE=__attribute__((noinline))\" ")
-endif()
+endif ()

 # disable some nvcc diagnostic that apears in boost, glog, glags, opencv, etc.
 foreach(diag cc_clobber_ignored integer_sign_change useless_using_declaration set_but_not_used)
@@ -293,20 +293,20 @@ foreach(diag cc_clobber_ignored integer_sign_change useless_using_declaration se
 endforeach()

 # setting default testing device
-if(NOT CUDA_TEST_DEVICE)
+if (NOT CUDA_TEST_DEVICE)
  set(CUDA_TEST_DEVICE -1)
-endif()
+endif ()

 mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD)
 mark_as_advanced(CUDA_SDK_ROOT_DIR CUDA_SEPARABLE_COMPILATION)

 # Handle clang/libc++ issue
-if(APPLE)
+if (APPLE)
  op_detect_darwin_version(OSX_VERSION)

  # OSX 10.9 and higher uses clang/libc++ by default which is incompatible with old CUDA toolkits
-  if(OSX_VERSION VERSION_GREATER 10.8)
+  if (OSX_VERSION VERSION_GREATER 10.8)
    # enabled by default if and only if CUDA version is less than 7.0
    op_option(USE_libstdcpp "Use libstdc++ instead of libc++" (CUDA_VERSION VERSION_LESS 7.0))
-  endif()
-endif()
\ No newline at end of file
+  endif ()
+endif ()
--- a/doc/installation.md
+++ b/doc/installation.md
@@ -12,12 +12,13 @@ OpenPose - Installation
 8. [Uninstallation](#uninstallation)
 9. [Optional Settings](#optional-settings)
    1. [MPI Model](#mpi-model)
-    2. [3D Reconstruction Module](#3d-reconstruction-module)
-    3. [Compiling without cuDNN](#compiling-without-cudnn)
-    4. [Custom Caffe (Ubuntu Only)](#custom-caffe-ubuntu-only)
-    5. [Custom OpenCV (Ubuntu Only)](#custom-opencv-ubuntu-only)
-    6. [Doxygen Documentation Autogeneration (Ubuntu Only)](#doxygen-documentation-autogeneration-ubuntu-only)
-    7. [CMake Command Line Configuration (Ubuntu Only)](#cmake-command-line-configuration-ubuntu-only)
+    2. [CPU Version](#cpu-version)
+    3. [3D Reconstruction Module](#3d-reconstruction-module)
+    4. [Compiling without cuDNN](#compiling-without-cudnn)
+    5. [Custom Caffe (Ubuntu Only)](#custom-caffe-ubuntu-only)
+    6. [Custom OpenCV (Ubuntu Only)](#custom-opencv-ubuntu-only)
+    7. [Doxygen Documentation Autogeneration (Ubuntu Only)](#doxygen-documentation-autogeneration-ubuntu-only)
+    8. [CMake Command Line Configuration (Ubuntu Only)](#cmake-command-line-configuration-ubuntu-only)



@@ -37,11 +38,15 @@ This installation section is only intended if you plan to modify the OpenPose co


 ## Requirements
- NVIDIA graphics card with at least 1.6 GB available (the `nvidia-smi` command checks the available GPU memory in Ubuntu).
- At least 2 GB of free RAM memory.
- Highly recommended: cuDNN and a CPU with at least 8 cores.
+Requirements for the default configuration (you might need more resources with a greater `--net_resolution` and/or `scale_number` or less resources by reducing the net resolution and/or using the MPI and MPI_4 models):

-Note: These requirements assume the default configuration (i.e. `--net_resolution "656x368"` and `scale_number 1`). You might need more (with a greater net resolution and/or number of scales) or less resources (with smaller net resolution and/or using the MPI and MPI_4 models).
+- Nvidia GPU version:
+    - NVIDIA graphics card with at least 1.6 GB available (the `nvidia-smi` command checks the available GPU memory in Ubuntu).
+    - At least 2 GB of free RAM memory.
+    - Highly recommended: cuDNN.
+- CPU version:
+    - Around 8GB of free RAM memory.
+- Highly recommended: a CPU with at least 8 cores.



@@ -87,17 +92,18 @@ The instructions in this section describe the steps to build OpenPose using CMak
 1. Download and install CMake GUI:
    - Ubuntu: run the command `sudo apt-get install cmake-qt-gui`. Note: If you prefer to use CMake through the command line, see [Cmake Command Line Build](#cmake-command-line-build-ubuntu-only).
    - Windows: download and install the latest CMake win64-x64 msi installer from the [CMake website](https://cmake.org/download/), called `cmake-X.X.X-win64-x64.msi`.
-2. [**CUDA 8**](https://developer.nvidia.com/cuda-80-ga2-download-archive):
-    - Ubuntu: Run `sudo ubuntu/install_cuda.sh` or alternatively download and install it from their website.
-    - Windows: Install CUDA 8.0 after Visual Studio 2015 is installed to assure that the CUDA installation will generate all necessary files for VS. If CUDA was already installed, re-install CUDA after installing VS!
-    - **IMPORTANT**: As of a recent Windows update, you have to download the Nvidia [drivers](http://www.nvidia.com/Download/index.aspx) drivers first, and then install CUDA without the Graphics Driver flag or else your system might hang.
-3. [**cuDNN 5.1**](https://developer.nvidia.com/cudnn):
-    - Ubuntu: Run `sudo ubuntu/install_cudnn.sh` or alternatively download and install it from their website.
-    - Windows (and Ubuntu if manual installation): In order to manually install it, just unzip it and copy (merge) the contents on the CUDA folder, usually `/usr/local/cuda/` in Ubuntu and `C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0` in Windows.
-4. Ubuntu - Other prerequisites:
+2. Nvidia GPU version prerequisites:
+    1. [**CUDA 8**](https://developer.nvidia.com/cuda-80-ga2-download-archive):
+        - Ubuntu: Run `sudo ubuntu/install_cuda.sh` or alternatively download and install it from their website.
+        - Windows: Install CUDA 8.0 after Visual Studio 2015 is installed to assure that the CUDA installation will generate all necessary files for VS. If CUDA was already installed, re-install CUDA after installing VS!
+        - **IMPORTANT**: As of a recent Windows update, you have to download the Nvidia [drivers](http://www.nvidia.com/Download/index.aspx) drivers first, and then install CUDA without the Graphics Driver flag or else your system might hang.
+    2. [**cuDNN 5.1**](https://developer.nvidia.com/cudnn):
+        - Ubuntu: Run `sudo ubuntu/install_cudnn.sh` or alternatively download and install it from their website.
+        - Windows (and Ubuntu if manual installation): In order to manually install it, just unzip it and copy (merge) the contents on the CUDA folder, usually `/usr/local/cuda/` in Ubuntu and `C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0` in Windows.
+3. Ubuntu - Other prerequisites:
    - Caffe prerequisites: By default, OpenPose uses Caffe under the hood. If you have not used Caffe previously, install its dependencies by running `sudo bash ./ubuntu/install_cmake.sh`.
    - OpenCV must be already installed on your machine. It can be installed with `apt-get install libopencv-dev`. You can also use your own compiled OpenCV version.
-5. Windows - **Microsoft Visual Studio (VS) 2015 Enterprise Update 3**:
+4. Windows - **Microsoft Visual Studio (VS) 2015 Enterprise Update 3**:
    - If **Visual Studio 2017 Community** is desired, we do not officially support it, but it might be compiled by firstly [enabling CUDA 8.0 in VS2017](https://stackoverflow.com/questions/43745099/using-cuda-with-visual-studio-2017?answertab=active#tab-top) or use **VS2017 with CUDA 9** by checking the `.vcxproj` file and changing the necessary paths from CUDA 8 to 9.
    - VS 2015 Enterprise Update 1 will give some compiler errors and VS 2015 Community has not been tested.
 5. Windows - **Caffe, OpenCV, and Caffe prerequisites**:
@@ -214,6 +220,25 @@ By default, the body MPI model is not downloaded. You can download it by turning



+#### CPU Version
+To manually select the CPU Version, open CMake GUI mentioned above, and set the `GPU_MODE` flag to `CPU_ONLY`. **NOTE: Accuracy of the CPU version is ~1% higher than CUDA version, so the results will vary.**
+
+- On Ubuntu, OpenPose will link against the Intel MKL version (Math Kernel Library) of Caffe. Alternatively, the user can choose his own Caffe version, by unselecting `USE_MKL` and selecting his own Caffe path. 
+- On Windows, it will use the default version of Caffe or one provided by the user on the CPU.
+
+The default CPU version takes ~0.2 seconds per image on Ubuntu (~50x slower than GPU) while the MKL version provides a roughly 2x speedup at ~0.4 seconds. As of now OpenPose does not support MKL on Windows but will at a later date. Also, MKL version does not support unfixed resolution. So a folder of images of different resolutions with openpose, requires the `--net_resolution 656x368` flag for example.
+
+The user can configure the environmental variables `MKL_NUM_THREADS` and `OMP_NUM_THREADS`. They are set at an optimum parameter level by default (i.e., to the number of threads of the machine). However, they can be tweak by running the following commands into the terminal window, right before running any OpenPose application. Eg:
+```
+# Optimal number = Number of threads (used by default)
+export MKL_NUM_THREADS="8"
+export OMP_NUM_THREADS="8"
+```
+
+Do note that increasing the number of threads results in more memory use. You can check the [OpenPose benchmark](https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/doc/faq.md#speed-up-and-benchmark) for more information about speed and memory requirements in several CPUs and GPUs.
+
+
+
 #### 3D Reconstruction Module
 You can include the 3D reconstruction module by:


--- a/doc/output.md
+++ b/doc/output.md
@@ -67,6 +67,8 @@ There are 2 alternatives to save the OpenPose output.

 2. (Deprecated) The `write_keypoint` flag uses the OpenCV cv::FileStorage default formats, i.e. JSON (available after OpenCV 3.0), XML, and YML. Note that it does not include any other information othern than keypoints.

+Both of them follow the keypoint ordering described in the [Keypoint Ordering](#keypoint-ordering) section.
+


 ### Keypoint Ordering

--- a/doc/released_features.md
+++ b/doc/released_features.md
 OpenPose Library - Latest Released Features
 ====================================

- Mar 2017: Improved [**3-D keypoint reconstruction module**](doc/3d_reconstruction_demo.md) (from multiple camera views)!
+- Mar 2018: [**CPU version**](doc/installation.md#cpu-version)!
+- Mar 2018: Improved [**3-D keypoint reconstruction module**](doc/3d_reconstruction_demo.md) (from multiple camera views)!
 - Sep 2017: [**CMake**](doc/installation.md) installer and **IP camera** support!
 - Jul 2017: [**Windows portable binaries and demo**](https://github.com/CMU-Perceptual-Computing-Lab/openpose/releases)!
 - Jul 2017: **Hands** released!

--- a/examples/tests/resizeTest.cpp
+++ b/examples/tests/resizeTest.cpp
 // ------------------------- OpenPose Resize Layer Testing -------------------------

-#include <chrono> // `std::chrono::` functions and classes, e.g. std::chrono::milliseconds
-// GFlags: DEFINE_bool, _int32, _int64, _uint64, _double, _string
-#include <gflags/gflags.h>
-// Allow Google Flags in Ubuntu 14
-#ifndef GFLAGS_GFLAGS_H_
-namespace gflags = google;
-#endif
 #include <openpose/headers.hpp>
-#include <openpose/gpu/cuda.hpp>
-#ifdef USE_CAFFE
-    #include <caffe/net.hpp>
-#endif
-#include <openpose/net/resizeAndMergeBase.hpp>
-
-DEFINE_string(image_path,               "examples/media/COCO_val2014_000000000192.jpg",     "Process the desired image.");
+#ifdef USE_CUDA
+    #include <chrono> // `std::chrono::` functions and classes, e.g. std::chrono::milliseconds
+    // GFlags: DEFINE_bool, _int32, _int64, _uint64, _double, _string
+    #include <gflags/gflags.h>
+    // Allow Google Flags in Ubuntu 14
+    #ifndef GFLAGS_GFLAGS_H_
+    namespace gflags = google;
+    #endif
+    #ifdef USE_CAFFE
+        #include <caffe/net.hpp>
+    #endif

-cv::Mat gpuResize(cv::Mat& img, cv::Size newSize)
-{
-    #ifdef USE_CUDA
+    DEFINE_string(image_path,               "examples/media/COCO_val2014_000000000192.jpg",     "Process the desired image.");
+
+    cv::Mat gpuResize(cv::Mat& img, const cv::Size& newSize)
+    {
+        #ifdef USE_CUDA
+            // Upload to Source to GPU
+            float* cpuPtr = &img.at<float>(0);
+            float* gpuPtr;
+            cudaMallocHost((void **)&gpuPtr, img.size().width * img.size().height * sizeof(float));
+            cudaMemcpy(gpuPtr, cpuPtr, img.size().width * img.size().height * sizeof(float),
+                       cudaMemcpyHostToDevice);
+
+            // Upload to Dest to GPU
+            cv::Mat newImg = cv::Mat(newSize,CV_32FC1,cv::Scalar(0));
+            float* newCpuPtr = &newImg.at<float>(0);
+            float* newGpuPtr;
+            cudaMallocHost((void **)&newGpuPtr, newSize.width * newSize.height * sizeof(float));
+            cudaMemcpy(newGpuPtr, newCpuPtr, newSize.width * newSize.height * sizeof(float),
+                       cudaMemcpyHostToDevice);
+
+            std::vector<const float*> sourcePtrs;
+            sourcePtrs.emplace_back(gpuPtr);
+            std::array<int, 4> targetSize = {1,1,newImg.size().height,newImg.size().width};
+            std::array<int, 4> sourceSize = {1,1,img.size().height,img.size().width};
+            std::vector<std::array<int, 4>> sourceSizes;
+            sourceSizes.emplace_back(sourceSize);
+            op::resizeAndMergeGpu(newGpuPtr, sourcePtrs, targetSize, sourceSizes);
+            cudaMemcpy(newCpuPtr, newGpuPtr, newImg.size().width * newImg.size().height * sizeof(float),
+                       cudaMemcpyDeviceToHost);
+
+            cudaFree(gpuPtr);
+            cudaFree(newGpuPtr);
+            return newImg;
+        #else
+            UNUSED(img);
+            UNUSED(newSize);
+            op::error("OpenPose must be compiled with the `USE_CAFFE` & `USE_CUDA` macro definitions in order to run"
+                  " this functionality.", __LINE__, __FUNCTION__, __FILE__);
+        #endif
+    }
+
+    cv::Mat cpuResize(cv::Mat& img, cv::Size newSize)
+    {
        // Upload to Source to GPU
        float* cpuPtr = &img.at<float>(0);
-        float* gpuPtr;
-        cudaMallocHost((void **)&gpuPtr, img.size().width * img.size().height * sizeof(float));
-        cudaMemcpy(gpuPtr, cpuPtr, img.size().width * img.size().height * sizeof(float),
-                   cudaMemcpyHostToDevice);

        // Upload to Dest to GPU
        cv::Mat newImg = cv::Mat(newSize,CV_32FC1,cv::Scalar(0));
-        float* newCpuPtr = &newImg.at<float>(0);
-        float* newGpuPtr;
-        cudaMallocHost((void **)&newGpuPtr, newSize.width * newSize.height * sizeof(float));
-        cudaMemcpy(newGpuPtr, newCpuPtr, newSize.width * newSize.height * sizeof(float),
-                   cudaMemcpyHostToDevice);

        std::vector<const float*> sourcePtrs;
-        sourcePtrs.emplace_back(gpuPtr);
+        sourcePtrs.emplace_back(cpuPtr);
        std::array<int, 4> targetSize = {1,1,newImg.size().height,newImg.size().width};
        std::array<int, 4> sourceSize = {1,1,img.size().height,img.size().width};
        std::vector<std::array<int, 4>> sourceSizes;
        sourceSizes.emplace_back(sourceSize);
-        op::resizeAndMergeGpu(newGpuPtr, sourcePtrs, targetSize, sourceSizes);
-        cudaMemcpy(newCpuPtr, newGpuPtr, newImg.size().width * newImg.size().height * sizeof(float),
-                   cudaMemcpyDeviceToHost);
+        op::resizeAndMergeCpu(&newImg.at<float>(0), sourcePtrs, targetSize, sourceSizes);

-        cudaFree(gpuPtr);
-        cudaFree(newGpuPtr);
        return newImg;
-    #else
-        op::error("OpenPose must be compiled with the `USE_CAFFE` & `USE_CUDA` macro definitions in order to run"
-              " this functionality.", __LINE__, __FUNCTION__, __FILE__);
-    #endif
-}
-
-cv::Mat cpuResize(cv::Mat& img, cv::Size newSize)
-{
-    // Upload to Source to GPU
-    float* cpuPtr = &img.at<float>(0);
-
-    // Upload to Dest to GPU
-    cv::Mat newImg = cv::Mat(newSize,CV_32FC1,cv::Scalar(0));
-
-    std::vector<const float*> sourcePtrs;
-    sourcePtrs.emplace_back(cpuPtr);
-    std::array<int, 4> targetSize = {1,1,newImg.size().height,newImg.size().width};
-    std::array<int, 4> sourceSize = {1,1,img.size().height,img.size().width};
-    std::vector<std::array<int, 4>> sourceSizes;
-    sourceSizes.emplace_back(sourceSize);
-    op::resizeAndMergeCpu(&newImg.at<float>(0), sourcePtrs, targetSize, sourceSizes);
-
-    return newImg;
-}
-
-int resizeTest()
-{
-    // logging_level
-    cv::Mat img = op::loadImage(FLAGS_image_path, CV_LOAD_IMAGE_GRAYSCALE);
-    if(img.empty())
-        op::error("Could not open or find the image: " + FLAGS_image_path, __LINE__, __FUNCTION__, __FILE__);
-    img.convertTo(img, CV_32FC1);
-    img = cpuResize(img, cv::Size(img.size().width/4,img.size().height/4));
-    img*=0.005;
-
-    cv::Mat gpuImg = gpuResize(img, cv::Size(img.size().width*8,img.size().height*8));
-    cv::Mat cpuImg = cpuResize(img, cv::Size(img.size().width*8,img.size().height*8));
-    cv::imshow("gpuImg", gpuImg);
-    cv::imshow("cpuImg", cpuImg);
-
-    op::log("Done");
-    cv::waitKey(0);
-
-    return 0;
-}
+    }
+
+    int resizeTest()
+    {
+        // logging_level
+        cv::Mat img = op::loadImage(FLAGS_image_path, CV_LOAD_IMAGE_GRAYSCALE);
+        if(img.empty())
+            op::error("Could not open or find the image: " + FLAGS_image_path, __LINE__, __FUNCTION__, __FILE__);
+        img.convertTo(img, CV_32FC1);
+        img = cpuResize(img, cv::Size(img.size().width/4,img.size().height/4));
+        img*=0.005;
+
+        cv::Mat gpuImg = gpuResize(img, cv::Size(img.size().width*8,img.size().height*8));
+        cv::Mat cpuImg = cpuResize(img, cv::Size(img.size().width*8,img.size().height*8));
+        cv::imshow("gpuImg", gpuImg);
+        cv::imshow("cpuImg", cpuImg);
+
+        op::log("Done");
+        cv::waitKey(0);
+
+        return 0;
+    }
+#endif

 int main(int argc, char *argv[])
 {
-    // Parsing command line flags
-    gflags::ParseCommandLineFlags(&argc, &argv, true);
+    #ifdef USE_CUDA
+        // Parsing command line flags
+        gflags::ParseCommandLineFlags(&argc, &argv, true);

-    // Running handFromJsonTest
-    return resizeTest();
+        // Running handFromJsonTest
+        return resizeTest();
+    #else
+        op::error("OpenPose must be compiled with the `USE_CAFFE` & `USE_CUDA` macro definitions in order to run"
+              " this functionality.", __LINE__, __FUNCTION__, __FILE__);
+        return 0;
+    #endif
 }
--- a/src/openpose/net/netCaffe.cpp
+++ b/src/openpose/net/netCaffe.cpp
@@ -137,27 +137,27 @@ namespace op
        try
        {
            #ifdef USE_CAFFE
-               // Initialize net
-               #ifdef USE_OPENCL
-                   caffe::Caffe::set_mode(caffe::Caffe::GPU);
-                   caffe::Caffe::SelectDevice(upImpl->mGpuId, true);
-                   upImpl->upCaffeNet.reset(new caffe::Net<float>{upImpl->mCaffeProto, caffe::TEST,
-                                            caffe::Caffe::GetDefaultDevice()});
-                   upImpl->upCaffeNet->CopyTrainedLayersFrom(upImpl->mCaffeTrainedModel);
-                   op::OpenCL::getInstance(upImpl->mGpuId, CL_DEVICE_TYPE_GPU, true);
-               #else
-                   #ifdef USE_CUDA
-                       caffe::Caffe::set_mode(caffe::Caffe::GPU);
-                       caffe::Caffe::SetDevice(upImpl->mGpuId);
-                   #else
-                       caffe::Caffe::set_mode(caffe::Caffe::CPU);
-                   #endif
-                   upImpl->upCaffeNet.reset(new caffe::Net<float>{upImpl->mCaffeProto, caffe::TEST});
-                   upImpl->upCaffeNet->CopyTrainedLayersFrom(upImpl->mCaffeTrainedModel);
-                   #ifdef USE_CUDA
-                       cudaCheck(__LINE__, __FUNCTION__, __FILE__);
-                   #endif
-               #endif
+                // Initialize net
+                #ifdef USE_OPENCL
+                    caffe::Caffe::set_mode(caffe::Caffe::GPU);
+                    caffe::Caffe::SelectDevice(upImpl->mGpuId, true);
+                    upImpl->upCaffeNet.reset(new caffe::Net<float>{upImpl->mCaffeProto, caffe::TEST,
+                                             caffe::Caffe::GetDefaultDevice()});
+                    upImpl->upCaffeNet->CopyTrainedLayersFrom(upImpl->mCaffeTrainedModel);
+                    op::OpenCL::getInstance(upImpl->mGpuId, CL_DEVICE_TYPE_GPU, true);
+                #else
+                    #ifdef USE_CUDA
+                        caffe::Caffe::set_mode(caffe::Caffe::GPU);
+                        caffe::Caffe::SetDevice(upImpl->mGpuId);
+                    #else
+                        caffe::Caffe::set_mode(caffe::Caffe::CPU);
+                    #endif
+                    upImpl->upCaffeNet.reset(new caffe::Net<float>{upImpl->mCaffeProto, caffe::TEST});
+                    upImpl->upCaffeNet->CopyTrainedLayersFrom(upImpl->mCaffeTrainedModel);
+                    #ifdef USE_CUDA
+                        cudaCheck(__LINE__, __FUNCTION__, __FILE__);
+                    #endif
+                #endif
                // Set spOutputBlob
                upImpl->spOutputBlob = upImpl->upCaffeNet->blob_by_name(upImpl->mLastBlobName);
                if (upImpl->spOutputBlob == nullptr)

--- a/src/openpose/wrapper/wrapperAuxiliary.cpp
+++ b/src/openpose/wrapper/wrapperAuxiliary.cpp
@@ -123,11 +123,17 @@ namespace op
                      + std::to_string(wrapperStructPose.outputSize.x) + "x"
                      + std::to_string(wrapperStructPose.outputSize.y) + ").",
                      __LINE__, __FUNCTION__, __FILE__);
-            if (wrapperStructOutput.writeVideoFps <= 0 && wrapperStructInput.producerSharedPtr->get(CV_CAP_PROP_FPS) > 0)
+            if (wrapperStructOutput.writeVideoFps <= 0
+                && wrapperStructInput.producerSharedPtr->get(CV_CAP_PROP_FPS) > 0)
                error("Set `--camera_fps` for this producer, as its frame rate is unknown.",
                      __LINE__, __FUNCTION__, __FILE__);
+            #ifdef USE_CPU_ONLY
+                if (wrapperStructPose.scalesNumber > 1)
+                    error("Temporarily, the number of scales (`--scale_number`) cannot be greater than 1 for"
+                          " `CPU_ONLY` version.", __LINE__, __FUNCTION__, __FILE__);
+            #endif
            // Net input resolution cannot be reshaped for Caffe OpenCL and MKL versions, only for CUDA version
-            #if defined USE_MKL || defined CPU_ONLY
+            #if defined USE_MKL || defined USE_CPU_ONLY
                // If image_dir and netInputSize == -1 --> error
                if ((wrapperStructInput.producerSharedPtr == nullptr
                     || wrapperStructInput.producerSharedPtr->getType() == ProducerType::ImageDirectory)

--- a/ubuntu/Makefile.example
+++ b/ubuntu/Makefile.example
@@ -330,6 +330,7 @@ ifeq ($(USE_CUDA), 1)
 	COMMON_FLAGS += -DUSE_CUDA
 else
 	COMMON_FLAGS += -DCPU_ONLY # For Caffe
+	COMMON_FLAGS += -DUSE_CPU_ONLY
 endif

 LIBRARY_DIRS += $(LIB_BUILD_DIR)