From 4147ea04614930b514a468ce3bcab5cfecc43676 Mon Sep 17 00:00:00 2001
From: gineshidalgo99 <gineshidalgo99@gmail.com>
Date: Fri, 3 Nov 2017 17:47:08 -0400
Subject: [PATCH] Updated Caffe

---
 3rdparty/Versions.txt                         |   2 +-
 3rdparty/caffe/Makefile                       |   2 +-
 3rdparty/caffe/docs/development.md            |   2 +-
 3rdparty/caffe/docs/install_apt.md            |   7 +-
 3rdparty/caffe/docs/tutorial/layers.md        |   2 +-
 3rdparty/caffe/examples/web_demo/readme.md    |   2 +-
 3rdparty/caffe/include/caffe/filler.hpp       |   6 +-
 .../include/caffe/layers/accuracy_layer.hpp   |   4 +
 .../caffe/layers/infogain_loss_layer.hpp      |  21 +-
 3rdparty/caffe/python/caffe/io.py             |   2 +-
 3rdparty/caffe/python/caffe/test/test_net.py  |  50 +--
 .../caffe/src/caffe/layers/accuracy_layer.cpp |  33 +-
 .../caffe/src/caffe/layers/accuracy_layer.cu  | 147 +++++++
 .../src/caffe/test/test_accuracy_layer.cpp    | 360 ++++++++++--------
 3rdparty/caffe/src/caffe/test/test_filler.cpp |  43 ++-
 15 files changed, 452 insertions(+), 231 deletions(-)
 create mode 100644 3rdparty/caffe/src/caffe/layers/accuracy_layer.cu

diff --git a/3rdparty/Versions.txt b/3rdparty/Versions.txt
index daacb355..bf26b8f4 100644
--- a/3rdparty/Versions.txt
+++ b/3rdparty/Versions.txt
@@ -1,6 +1,6 @@
 Unix:
     - Caffe:
-        - Version 1.0.0, extracted from GitHub on 09/30/2017 from the current master branch.
+        - Version 1.0.0, extracted from GitHub on 11/03/2017 from the current master branch.
         - Link: https://github.com/BVLC/caffe
 
 Windows:
diff --git a/3rdparty/caffe/Makefile b/3rdparty/caffe/Makefile
index 8674f3a7..ca6f3c19 100644
--- a/3rdparty/caffe/Makefile
+++ b/3rdparty/caffe/Makefile
@@ -699,6 +699,6 @@ $(DISTRIBUTE_DIR): all py | $(DISTRIBUTE_SUBDIRS)
 	install -m 644 $(DYNAMIC_NAME) $(DISTRIBUTE_DIR)/lib
 	cd $(DISTRIBUTE_DIR)/lib; rm -f $(DYNAMIC_NAME_SHORT);   ln -s $(DYNAMIC_VERSIONED_NAME_SHORT) $(DYNAMIC_NAME_SHORT)
 	# add python - it's not the standard way, indeed...
-	cp -r python $(DISTRIBUTE_DIR)/python
+	cp -r python $(DISTRIBUTE_DIR)/
 
 -include $(DEPS)
diff --git a/3rdparty/caffe/docs/development.md b/3rdparty/caffe/docs/development.md
index ec05bbee..36cd3995 100644
--- a/3rdparty/caffe/docs/development.md
+++ b/3rdparty/caffe/docs/development.md
@@ -116,5 +116,5 @@ To get a list of all options `googletest` provides, simply pass the `--help` fla
 
 - **Run `make lint` to check C++ code.**
 - Wrap lines at 80 chars.
-- Follow [Google C++ style](http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml) and [Google python style](http://google-styleguide.googlecode.com/svn/trunk/pyguide.html) + [PEP 8](http://legacy.python.org/dev/peps/pep-0008/).
+- Follow [Google C++ style](https://google.github.io/styleguide/cppguide.html) and [Google python style](https://google.github.io/styleguide/pyguide.html) + [PEP 8](http://legacy.python.org/dev/peps/pep-0008/).
 - Remember that “a foolish consistency is the hobgoblin of little minds,” so use your best judgement to write the clearest code for your particular case.
diff --git a/3rdparty/caffe/docs/install_apt.md b/3rdparty/caffe/docs/install_apt.md
index b6cb1c2d..e361a92d 100644
--- a/3rdparty/caffe/docs/install_apt.md
+++ b/3rdparty/caffe/docs/install_apt.md
@@ -40,6 +40,7 @@ Continue with [compilation](installation.html#compilation).
 
     sudo apt-get install libprotobuf-dev libleveldb-dev libsnappy-dev libopencv-dev libhdf5-serial-dev protobuf-compiler
     sudo apt-get install --no-install-recommends libboost-all-dev
+    sudo apt-get install libgflags-dev libgoogle-glog-dev liblmdb-dev
 
 **CUDA**: Install by `apt-get` or the NVIDIA `.run` package.
 The NVIDIA package tends to follow more recent library and driver versions, but the installation is more manual.
@@ -54,12 +55,6 @@ This can be skipped for CPU-only installation.
 
 CUDA 8 is required on Ubuntu 16.04.
 
-**Remaining dependencies, 14.04**
-
-Everything is packaged in 14.04.
-
-    sudo apt-get install libgflags-dev libgoogle-glog-dev liblmdb-dev
-
 **Remaining dependencies, 12.04**
 
 These dependencies need manual installation in 12.04.
diff --git a/3rdparty/caffe/docs/tutorial/layers.md b/3rdparty/caffe/docs/tutorial/layers.md
index 2faacc58..78a46f3a 100644
--- a/3rdparty/caffe/docs/tutorial/layers.md
+++ b/3rdparty/caffe/docs/tutorial/layers.md
@@ -87,7 +87,7 @@ Layers:
 * [ELU](layers/elu.html) - exponential linear rectification.
 * [Sigmoid](layers/sigmoid.html)
 * [TanH](layers/tanh.html)
-* [Absolute Value](layers/abs.html)
+* [Absolute Value](layers/absval.html)
 * [Power](layers/power.html) - f(x) = (shift + scale * x) ^ power.
 * [Exp](layers/exp.html) - f(x) = base ^ (shift + scale * x).
 * [Log](layers/log.html) - f(x) = log(x).
diff --git a/3rdparty/caffe/examples/web_demo/readme.md b/3rdparty/caffe/examples/web_demo/readme.md
index fe74b9ef..e50c4f10 100644
--- a/3rdparty/caffe/examples/web_demo/readme.md
+++ b/3rdparty/caffe/examples/web_demo/readme.md
@@ -11,7 +11,7 @@ priority: 10
 ## Requirements
 
 The demo server requires Python with some dependencies.
-To make sure you have the dependencies, please run `pip install -r examples/web_demo/requirements.txt`, and also make sure that you've compiled the Python Caffe interface and that it is on your `PYTHONPATH` (see [installation instructions](/installation.html)).
+To make sure you have the dependencies, please run `pip install -r examples/web_demo/requirements.txt`, and also make sure that you've compiled the Python Caffe interface and that it is on your `PYTHONPATH` (see [installation instructions](http://caffe.berkeleyvision.org/installation.html)).
 
 Make sure that you have obtained the Reference CaffeNet Model and the ImageNet Auxiliary Data:
 
diff --git a/3rdparty/caffe/include/caffe/filler.hpp b/3rdparty/caffe/include/caffe/filler.hpp
index dad9ad46..bb92ded7 100644
--- a/3rdparty/caffe/include/caffe/filler.hpp
+++ b/3rdparty/caffe/include/caffe/filler.hpp
@@ -250,10 +250,10 @@ class BilinearFiller : public Filler<Dtype> {
     CHECK_EQ(blob->width(), blob->height()) << "Filter must be square";
     Dtype* data = blob->mutable_cpu_data();
     int f = ceil(blob->width() / 2.);
-    float c = (2 * f - 1 - f % 2) / (2. * f);
+    Dtype c = (blob->width() - 1) / (2. * f);
     for (int i = 0; i < blob->count(); ++i) {
-      float x = i % blob->width();
-      float y = (i / blob->width()) % blob->height();
+      Dtype x = i % blob->width();
+      Dtype y = (i / blob->width()) % blob->height();
       data[i] = (1 - fabs(x / f - c)) * (1 - fabs(y / f - c));
     }
     CHECK_EQ(this->filler_param_.sparse(), -1)
diff --git a/3rdparty/caffe/include/caffe/layers/accuracy_layer.hpp b/3rdparty/caffe/include/caffe/layers/accuracy_layer.hpp
index a9ad3225..dd2247b9 100644
--- a/3rdparty/caffe/include/caffe/layers/accuracy_layer.hpp
+++ b/3rdparty/caffe/include/caffe/layers/accuracy_layer.hpp
@@ -68,6 +68,8 @@ class AccuracyLayer : public Layer<Dtype> {
    */
   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top);
+  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
 
 
   /// @brief Not implemented -- AccuracyLayer cannot be used as a loss.
@@ -77,6 +79,8 @@ class AccuracyLayer : public Layer<Dtype> {
       if (propagate_down[i]) { NOT_IMPLEMENTED; }
     }
   }
+  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
 
   int label_axis_, outer_num_, inner_num_;
 
diff --git a/3rdparty/caffe/include/caffe/layers/infogain_loss_layer.hpp b/3rdparty/caffe/include/caffe/layers/infogain_loss_layer.hpp
index edecde82..3b3caa27 100644
--- a/3rdparty/caffe/include/caffe/layers/infogain_loss_layer.hpp
+++ b/3rdparty/caffe/include/caffe/layers/infogain_loss_layer.hpp
@@ -13,20 +13,21 @@
 namespace caffe {
 
 /**
- * @brief A generalization of MultinomialLogisticLossLayer that takes an
+ * @brief A generalization of SoftmaxWithLossLayer that takes an
  *        "information gain" (infogain) matrix specifying the "value" of all label
  *        pairs.
  *
- * Equivalent to the MultinomialLogisticLossLayer if the infogain matrix is the
+ * Equivalent to the SoftmaxWithLossLayer if the infogain matrix is the
  * identity.
  *
  * @param bottom input Blob vector (length 2-3)
  *   -# @f$ (N \times C \times H \times W) @f$
- *      the predictions @f$ \hat{p} @f$, a Blob with values in
- *      @f$ [0, 1] @f$ indicating the predicted probability of each of the
- *      @f$ K = CHW @f$ classes.  Each prediction vector @f$ \hat{p}_n @f$
- *      should sum to 1 as in a probability distribution: @f$
- *      \forall n \sum\limits_{k=1}^K \hat{p}_{nk} = 1 @f$.
+ *      the predictions @f$ x @f$, a Blob with values in
+ *      @f$ [-\infty, +\infty] @f$ indicating the predicted score for each of
+ *      the @f$ K = CHW @f$ classes. This layer maps these scores to a
+ *      probability distribution over classes using the softmax function
+ *      @f$ \hat{p}_{nk} = \exp(x_{nk}) /
+ *      \left[\sum_{k'} \exp(x_{nk'})\right] @f$ (see SoftmaxLayer).
  *   -# @f$ (N \times 1 \times 1 \times 1) @f$
  *      the labels @f$ l @f$, an integer-valued Blob with values
  *      @f$ l_n \in [0, 1, 2, ..., K - 1] @f$
@@ -35,7 +36,7 @@ namespace caffe {
  *      (\b optional) the infogain matrix @f$ H @f$.  This must be provided as
  *      the third bottom blob input if not provided as the infogain_mat in the
  *      InfogainLossParameter. If @f$ H = I @f$, this layer is equivalent to the
- *      MultinomialLogisticLossLayer.
+ *      SoftmaxWithLossLayer.
  * @param top output Blob vector (length 1)
  *   -# @f$ (1 \times 1 \times 1 \times 1) @f$
  *      the computed infogain multinomial logistic loss: @f$ E =
@@ -98,8 +99,8 @@ class InfogainLossLayer : public LossLayer<Dtype> {
    *      infogain matrix, if provided as bottom[2])
    * @param bottom input Blob vector (length 2-3)
    *   -# @f$ (N \times C \times H \times W) @f$
-   *      the predictions @f$ \hat{p} @f$; Backward computes diff
-   *      @f$ \frac{\partial E}{\partial \hat{p}} @f$
+   *      the predictions @f$ x @f$; Backward computes diff
+   *      @f$ \frac{\partial E}{\partial x} @f$
    *   -# @f$ (N \times 1 \times 1 \times 1) @f$
    *      the labels -- ignored as we can't compute their error gradients
    *   -# @f$ (1 \times 1 \times K \times K) @f$
diff --git a/3rdparty/caffe/python/caffe/io.py b/3rdparty/caffe/python/caffe/io.py
index 966c164c..d61f765b 100644
--- a/3rdparty/caffe/python/caffe/io.py
+++ b/3rdparty/caffe/python/caffe/io.py
@@ -323,7 +323,7 @@ def resize_image(im, new_dims, interp_order=1):
             # skimage is fast but only understands {1,3} channel images
             # in [0, 1].
             im_std = (im - im_min) / (im_max - im_min)
-            resized_std = resize(im_std, new_dims, order=interp_order)
+            resized_std = resize(im_std, new_dims, order=interp_order, mode='constant')
             resized_im = resized_std * (im_max - im_min) + im_min
         else:
             # the image is a constant -- avoid divide by 0
diff --git a/3rdparty/caffe/python/caffe/test/test_net.py b/3rdparty/caffe/python/caffe/test/test_net.py
index afd27690..ee1d38c3 100644
--- a/3rdparty/caffe/python/caffe/test/test_net.py
+++ b/3rdparty/caffe/python/caffe/test/test_net.py
@@ -72,41 +72,41 @@ class TestNet(unittest.TestCase):
         self.net.backward()
 
     def test_forward_start_end(self):
-        conv_blob=self.net.blobs['conv'];
-        ip_blob=self.net.blobs['ip_blob'];
-        sample_data=np.random.uniform(size=conv_blob.data.shape);
-        sample_data=sample_data.astype(np.float32);
-        conv_blob.data[:]=sample_data;
-        forward_blob=self.net.forward(start='ip',end='ip');
-        self.assertIn('ip_blob',forward_blob);
-
-        manual_forward=[];
+        conv_blob=self.net.blobs['conv']
+        ip_blob=self.net.blobs['ip_blob']
+        sample_data=np.random.uniform(size=conv_blob.data.shape)
+        sample_data=sample_data.astype(np.float32)
+        conv_blob.data[:]=sample_data
+        forward_blob=self.net.forward(start='ip',end='ip')
+        self.assertIn('ip_blob',forward_blob)
+
+        manual_forward=[]
         for i in range(0,conv_blob.data.shape[0]):
           dot=np.dot(self.net.params['ip'][0].data,
-                     conv_blob.data[i].reshape(-1));
-          manual_forward.append(dot+self.net.params['ip'][1].data);
-        manual_forward=np.array(manual_forward);
+                     conv_blob.data[i].reshape(-1))
+          manual_forward.append(dot+self.net.params['ip'][1].data)
+        manual_forward=np.array(manual_forward)
 
-        np.testing.assert_allclose(ip_blob.data,manual_forward,rtol=1e-3);
+        np.testing.assert_allclose(ip_blob.data,manual_forward,rtol=1e-3,atol=1e-5)
 
     def test_backward_start_end(self):
-        conv_blob=self.net.blobs['conv'];
-        ip_blob=self.net.blobs['ip_blob'];
+        conv_blob=self.net.blobs['conv']
+        ip_blob=self.net.blobs['ip_blob']
         sample_data=np.random.uniform(size=ip_blob.data.shape)
-        sample_data=sample_data.astype(np.float32);
-        ip_blob.diff[:]=sample_data;
-        backward_blob=self.net.backward(start='ip',end='ip');
-        self.assertIn('conv',backward_blob);
+        sample_data=sample_data.astype(np.float32)
+        ip_blob.diff[:]=sample_data
+        backward_blob=self.net.backward(start='ip',end='ip')
+        self.assertIn('conv',backward_blob)
 
-        manual_backward=[];
+        manual_backward=[]
         for i in range(0,conv_blob.data.shape[0]):
           dot=np.dot(self.net.params['ip'][0].data.transpose(),
-                     sample_data[i].reshape(-1));
-          manual_backward.append(dot);
-        manual_backward=np.array(manual_backward);
-        manual_backward=manual_backward.reshape(conv_blob.data.shape);
+                     sample_data[i].reshape(-1))
+          manual_backward.append(dot)
+        manual_backward=np.array(manual_backward)
+        manual_backward=manual_backward.reshape(conv_blob.data.shape)
 
-        np.testing.assert_allclose(conv_blob.diff,manual_backward,rtol=1e-3);
+        np.testing.assert_allclose(conv_blob.diff,manual_backward,rtol=1e-3,atol=1e-5)
 
     def test_clear_param_diffs(self):
         # Run a forward/backward step to have non-zero diffs
diff --git a/3rdparty/caffe/src/caffe/layers/accuracy_layer.cpp b/3rdparty/caffe/src/caffe/layers/accuracy_layer.cpp
index 4eddbb5c..392829e6 100644
--- a/3rdparty/caffe/src/caffe/layers/accuracy_layer.cpp
+++ b/3rdparty/caffe/src/caffe/layers/accuracy_layer.cpp
@@ -52,8 +52,6 @@ void AccuracyLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
   const Dtype* bottom_label = bottom[1]->cpu_data();
   const int dim = bottom[0]->count() / outer_num_;
   const int num_labels = bottom[0]->shape(label_axis_);
-  vector<Dtype> maxval(top_k_+1);
-  vector<int> max_id(top_k_+1);
   if (top.size() > 1) {
     caffe_set(nums_buffer_.count(), Dtype(0), nums_buffer_.mutable_cpu_data());
     caffe_set(top[1]->count(), Dtype(0), top[1]->mutable_cpu_data());
@@ -66,25 +64,22 @@ void AccuracyLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
       if (has_ignore_label_ && label_value == ignore_label_) {
         continue;
       }
-      if (top.size() > 1) ++nums_buffer_.mutable_cpu_data()[label_value];
       DCHECK_GE(label_value, 0);
       DCHECK_LT(label_value, num_labels);
+      if (top.size() > 1) ++nums_buffer_.mutable_cpu_data()[label_value];
+      const Dtype prob_of_true_class = bottom_data[i * dim
+                                                   + label_value * inner_num_
+                                                   + j];
+      int num_better_predictions = -1;  // true_class also counts as "better"
       // Top-k accuracy
-      std::vector<std::pair<Dtype, int> > bottom_data_vector;
-      for (int k = 0; k < num_labels; ++k) {
-        bottom_data_vector.push_back(std::make_pair(
-            bottom_data[i * dim + k * inner_num_ + j], k));
+      for (int k = 0; k < num_labels && num_better_predictions < top_k_; ++k) {
+        num_better_predictions +=
+          (bottom_data[i * dim + k * inner_num_ + j] >= prob_of_true_class);
       }
-      std::partial_sort(
-          bottom_data_vector.begin(), bottom_data_vector.begin() + top_k_,
-          bottom_data_vector.end(), std::greater<std::pair<Dtype, int> >());
-      // check if true label is in top k predictions
-      for (int k = 0; k < top_k_; k++) {
-        if (bottom_data_vector[k].second == label_value) {
-          ++accuracy;
-          if (top.size() > 1) ++top[1]->mutable_cpu_data()[label_value];
-          break;
-        }
+      // check if there are less than top_k_ predictions
+      if (num_better_predictions < top_k_) {
+        ++accuracy;
+        if (top.size() > 1) ++top[1]->mutable_cpu_data()[label_value];
       }
       ++count;
     }
@@ -102,6 +97,10 @@ void AccuracyLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
   // Accuracy layer should not be used as a loss function.
 }
 
+#ifdef CPU_ONLY
+STUB_GPU(AccuracyLayer);
+#endif
+
 INSTANTIATE_CLASS(AccuracyLayer);
 REGISTER_LAYER_CLASS(Accuracy);
 
diff --git a/3rdparty/caffe/src/caffe/layers/accuracy_layer.cu b/3rdparty/caffe/src/caffe/layers/accuracy_layer.cu
new file mode 100644
index 00000000..a8cff936
--- /dev/null
+++ b/3rdparty/caffe/src/caffe/layers/accuracy_layer.cu
@@ -0,0 +1,147 @@
+#include <vector>
+
+#include "caffe/layers/accuracy_layer.hpp"
+#include "caffe/util/math_functions.hpp"
+
+
+namespace caffe {
+
+template <typename Dtype>
+__global__ void AccuracyForwardGPU(const int nthreads,
+          const Dtype* bottom_data, const Dtype* label, Dtype* acc,
+          const int num, const int dim, const int spatial_dim,
+          const int num_labels, const int top_k,
+          const bool has_ignore_label_, const int ignore_label_,
+          Dtype* counts) {
+  CUDA_KERNEL_LOOP(index, nthreads) {
+    const int n = index / spatial_dim;
+    const int s = index % spatial_dim;
+    const int label_value = static_cast<int>(label[n * spatial_dim + s]);
+    const Dtype prob_of_true_class = bottom_data[n * dim
+                                                 + label_value * spatial_dim
+                                                 + s];
+    int num_better_predictions = -1;  // true_class also counts as "better"
+    if (has_ignore_label_ && label_value == ignore_label_) {
+      acc[index] = 0;
+      counts[index] = 0;
+    } else {
+      for (int k = 0; k < num_labels & num_better_predictions < top_k; k++) {
+        num_better_predictions +=
+          (bottom_data[n * dim + k * spatial_dim + s] >= prob_of_true_class);
+      }
+      acc[index] = (num_better_predictions < top_k);
+      counts[index] = 1;
+    }
+  }
+}
+
+template <typename Dtype>
+__global__ void AccuracyForwardWithPerClassGPU(const int nthreads,
+          const Dtype* bottom_data, const Dtype* label,
+          Dtype* acc, Dtype* counts,
+          const int num, const int dim, const int spatial_dim,
+          const int num_labels, const int top_k,
+          const bool has_ignore_label_, const int ignore_label_) {
+  CUDA_KERNEL_LOOP(index, nthreads) {
+    const int n = index / spatial_dim;
+    const int s = index % spatial_dim;
+    const int label_value = static_cast<int>(label[n * spatial_dim + s]);
+    const Dtype prob_of_true_class = bottom_data[n * dim
+                                                 + label_value * spatial_dim
+                                                 + s];
+    if (has_ignore_label_ && label_value == ignore_label_) {
+      // nothing to be done.
+    } else {
+      int num_better_predictions = -1;  // true_class also counts as "better"
+      for (int k = 0; k < num_labels & num_better_predictions < top_k; k++) {
+        num_better_predictions +=
+          (bottom_data[n * dim + k * spatial_dim + s] >= prob_of_true_class);
+      }
+      acc[label_value*nthreads + index] += (num_better_predictions < top_k);
+      counts[label_value*nthreads + index] = 1;
+    }
+  }
+}
+
+template <typename Dtype>
+void AccuracyLayer<Dtype>::Forward_gpu(
+    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
+  const Dtype* bottom_data = bottom[0]->gpu_data();
+  const Dtype* bottom_label = bottom[1]->gpu_data();
+  const int dim = bottom[0]->count() / outer_num_;
+  const int num_labels = bottom[0]->shape(label_axis_);
+  const int nthreads = outer_num_ * inner_num_;
+  // Since this memory is not used for anything,
+  // we use it here to avoid having to allocate new GPU
+  // memory to accumulate intermediate results in the kernel.
+  Dtype* acc_data = bottom[0]->mutable_gpu_diff();
+  if (top.size() == 1) {
+    // simple case - report only global accuracy.
+
+    // Similarly, this memory is never used elsewhere, and thus we can use it
+    // to avoid having to allocate additional GPU memory.
+    Dtype* counts = bottom[1]->mutable_gpu_diff();
+    // NOLINT_NEXT_LINE(whitespace/operators)
+    AccuracyForwardGPU<Dtype><<<CAFFE_GET_BLOCKS(nthreads),
+        CAFFE_CUDA_NUM_THREADS>>>(nthreads, bottom_data, bottom_label,
+        acc_data, outer_num_, dim, inner_num_, num_labels, top_k_,
+        has_ignore_label_, ignore_label_, counts);
+    Dtype acc;
+    caffe_gpu_asum(nthreads, acc_data, &acc);
+    Dtype valid_count;
+    caffe_gpu_asum(nthreads, counts, &valid_count);
+    if (valid_count > 0) {
+      top[0]->mutable_cpu_data()[0] = acc / valid_count;
+    } else {
+      top[0]->mutable_cpu_data()[0] = 0;
+    }
+  } else {
+    // need to report per-class accuracy as well
+
+    // allocate space for more detailed "counts"
+    nums_buffer_.ReshapeLike(*bottom[0]);
+    Dtype* counts = nums_buffer_.mutable_gpu_data();
+
+    caffe_gpu_set(bottom[0]->count(), Dtype(0), acc_data);
+    caffe_gpu_set(nums_buffer_.count(), Dtype(0), counts);
+
+    // NOLINT_NEXT_LINE(whitespace/operators)
+    AccuracyForwardWithPerClassGPU<Dtype><<<CAFFE_GET_BLOCKS(nthreads),
+        CAFFE_CUDA_NUM_THREADS>>>(nthreads, bottom_data, bottom_label,
+        acc_data, counts, outer_num_, dim, inner_num_, num_labels, top_k_,
+        has_ignore_label_, ignore_label_);
+
+    // get the overall accuracy
+    Dtype acc;
+    caffe_gpu_asum(bottom[0]->count(), acc_data, &acc);
+    Dtype valid_count;
+    caffe_gpu_asum(nums_buffer_.count(), counts, &valid_count);
+    if (valid_count > 0) {
+      top[0]->mutable_cpu_data()[0] = acc / valid_count;
+    } else {
+      top[0]->mutable_cpu_data()[0] = 0;
+    }
+
+    // get per-class accuracy
+    Dtype* per_class_acc = top[1]->mutable_cpu_data();
+    for (int l = 0; l < num_labels; l++) {
+      caffe_gpu_asum(nthreads, acc_data + l*nthreads, per_class_acc+l);
+      caffe_gpu_asum(nthreads, counts + l*nthreads, &valid_count);
+      if (valid_count > 0) {
+        per_class_acc[l] /= valid_count;
+      } else {
+        per_class_acc[l] = 0;
+      }
+    }
+  }
+}
+
+
+template <typename Dtype>
+void AccuracyLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
+    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+  if (propagate_down[1]) {  NOT_IMPLEMENTED;  }
+}
+
+INSTANTIATE_LAYER_GPU_FUNCS(AccuracyLayer);
+}  // namespace caffe
diff --git a/3rdparty/caffe/src/caffe/test/test_accuracy_layer.cpp b/3rdparty/caffe/src/caffe/test/test_accuracy_layer.cpp
index 6fe808bd..e5cc9d5e 100644
--- a/3rdparty/caffe/src/caffe/test/test_accuracy_layer.cpp
+++ b/3rdparty/caffe/src/caffe/test/test_accuracy_layer.cpp
@@ -13,8 +13,10 @@
 
 namespace caffe {
 
-template <typename Dtype>
-class AccuracyLayerTest : public CPUDeviceTest<Dtype> {
+template <typename TypeParam>
+class AccuracyLayerTest : public MultiDeviceTest<TypeParam> {
+  typedef typename TypeParam::Dtype Dtype;
+
  protected:
   AccuracyLayerTest()
       : blob_bottom_data_(new Blob<Dtype>()),
@@ -69,11 +71,12 @@ class AccuracyLayerTest : public CPUDeviceTest<Dtype> {
   int top_k_;
 };
 
-TYPED_TEST_CASE(AccuracyLayerTest, TestDtypes);
+TYPED_TEST_CASE(AccuracyLayerTest, TestDtypesAndDevices);
 
 TYPED_TEST(AccuracyLayerTest, TestSetup) {
+  typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;
-  AccuracyLayer<TypeParam> layer(layer_param);
+  AccuracyLayer<Dtype> layer(layer_param);
   layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
   EXPECT_EQ(this->blob_top_->num(), 1);
   EXPECT_EQ(this->blob_top_->channels(), 1);
@@ -82,11 +85,12 @@ TYPED_TEST(AccuracyLayerTest, TestSetup) {
 }
 
 TYPED_TEST(AccuracyLayerTest, TestSetupTopK) {
+  typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;
   AccuracyParameter* accuracy_param =
       layer_param.mutable_accuracy_param();
   accuracy_param->set_top_k(5);
-  AccuracyLayer<TypeParam> layer(layer_param);
+  AccuracyLayer<Dtype> layer(layer_param);
   layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
   EXPECT_EQ(this->blob_top_->num(), 1);
   EXPECT_EQ(this->blob_top_->channels(), 1);
@@ -95,8 +99,9 @@ TYPED_TEST(AccuracyLayerTest, TestSetupTopK) {
 }
 
 TYPED_TEST(AccuracyLayerTest, TestSetupOutputPerClass) {
+  typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;
-  AccuracyLayer<TypeParam> layer(layer_param);
+  AccuracyLayer<Dtype> layer(layer_param);
   layer.SetUp(this->blob_bottom_vec_, this->blob_top_per_class_vec_);
   EXPECT_EQ(this->blob_top_->num(), 1);
   EXPECT_EQ(this->blob_top_->channels(), 1);
@@ -108,33 +113,39 @@ TYPED_TEST(AccuracyLayerTest, TestSetupOutputPerClass) {
   EXPECT_EQ(this->blob_top_per_class_->width(), 1);
 }
 
-TYPED_TEST(AccuracyLayerTest, TestForwardCPU) {
+TYPED_TEST(AccuracyLayerTest, TestForward) {
+  typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;
-  AccuracyLayer<TypeParam> layer(layer_param);
+  AccuracyLayer<Dtype> layer(layer_param);
   layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
-  layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
-
-  TypeParam max_value;
-  int max_id;
-  int num_correct_labels = 0;
-  for (int i = 0; i < 100; ++i) {
-    max_value = -FLT_MAX;
-    max_id = 0;
-    for (int j = 0; j < 10; ++j) {
-      if (this->blob_bottom_data_->data_at(i, j, 0, 0) > max_value) {
-        max_value = this->blob_bottom_data_->data_at(i, j, 0, 0);
-        max_id = j;
+
+  // repeat the forward
+  for (int iter = 0; iter < 3; iter++) {
+    layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+
+    Dtype max_value;
+    int max_id;
+    int num_correct_labels = 0;
+    for (int i = 0; i < 100; ++i) {
+      max_value = -FLT_MAX;
+      max_id = 0;
+      for (int j = 0; j < 10; ++j) {
+        if (this->blob_bottom_data_->data_at(i, j, 0, 0) > max_value) {
+          max_value = this->blob_bottom_data_->data_at(i, j, 0, 0);
+          max_id = j;
+        }
+      }
+      if (max_id == this->blob_bottom_label_->data_at(i, 0, 0, 0)) {
+        ++num_correct_labels;
       }
     }
-    if (max_id == this->blob_bottom_label_->data_at(i, 0, 0, 0)) {
-      ++num_correct_labels;
-    }
+    EXPECT_NEAR(this->blob_top_->data_at(0, 0, 0, 0),
+                num_correct_labels / Dtype(100.0), 1e-4);
   }
-  EXPECT_NEAR(this->blob_top_->data_at(0, 0, 0, 0),
-              num_correct_labels / 100.0, 1e-4);
 }
 
 TYPED_TEST(AccuracyLayerTest, TestForwardWithSpatialAxes) {
+  typedef typename TypeParam::Dtype Dtype;
   this->blob_bottom_data_->Reshape(2, 10, 4, 5);
   vector<int> label_shape(3);
   label_shape[0] = 2; label_shape[1] = 4; label_shape[2] = 5;
@@ -142,195 +153,218 @@ TYPED_TEST(AccuracyLayerTest, TestForwardWithSpatialAxes) {
   this->FillBottoms();
   LayerParameter layer_param;
   layer_param.mutable_accuracy_param()->set_axis(1);
-  AccuracyLayer<TypeParam> layer(layer_param);
+  AccuracyLayer<Dtype> layer(layer_param);
   layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
-  layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
-
-  TypeParam max_value;
-  const int num_labels = this->blob_bottom_label_->count();
-  int max_id;
-  int num_correct_labels = 0;
-  vector<int> label_offset(3);
-  for (int n = 0; n < this->blob_bottom_data_->num(); ++n) {
-    for (int h = 0; h < this->blob_bottom_data_->height(); ++h) {
-      for (int w = 0; w < this->blob_bottom_data_->width(); ++w) {
-        max_value = -FLT_MAX;
-        max_id = 0;
-        for (int c = 0; c < this->blob_bottom_data_->channels(); ++c) {
-          const TypeParam pred_value =
-              this->blob_bottom_data_->data_at(n, c, h, w);
-          if (pred_value > max_value) {
-            max_value = pred_value;
-            max_id = c;
+
+  // repeat the forward
+  for (int iter = 0; iter < 3; iter++) {
+    layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+
+    Dtype max_value;
+    const int num_labels = this->blob_bottom_label_->count();
+    int max_id;
+    int num_correct_labels = 0;
+    vector<int> label_offset(3);
+    for (int n = 0; n < this->blob_bottom_data_->num(); ++n) {
+      for (int h = 0; h < this->blob_bottom_data_->height(); ++h) {
+        for (int w = 0; w < this->blob_bottom_data_->width(); ++w) {
+          max_value = -FLT_MAX;
+          max_id = 0;
+          for (int c = 0; c < this->blob_bottom_data_->channels(); ++c) {
+            const Dtype pred_value =
+                this->blob_bottom_data_->data_at(n, c, h, w);
+            if (pred_value > max_value) {
+              max_value = pred_value;
+              max_id = c;
+            }
+          }
+          label_offset[0] = n; label_offset[1] = h; label_offset[2] = w;
+          const int correct_label =
+              static_cast<int>(this->blob_bottom_label_->data_at(label_offset));
+          if (max_id == correct_label) {
+            ++num_correct_labels;
           }
-        }
-        label_offset[0] = n; label_offset[1] = h; label_offset[2] = w;
-        const int correct_label =
-            static_cast<int>(this->blob_bottom_label_->data_at(label_offset));
-        if (max_id == correct_label) {
-          ++num_correct_labels;
         }
       }
     }
+    EXPECT_NEAR(this->blob_top_->data_at(0, 0, 0, 0),
+                num_correct_labels / Dtype(num_labels), 1e-4);
   }
-  EXPECT_NEAR(this->blob_top_->data_at(0, 0, 0, 0),
-              num_correct_labels / TypeParam(num_labels), 1e-4);
 }
 
 TYPED_TEST(AccuracyLayerTest, TestForwardIgnoreLabel) {
+  typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;
-  const TypeParam kIgnoreLabelValue = -1;
+  const Dtype kIgnoreLabelValue = -1;
   layer_param.mutable_accuracy_param()->set_ignore_label(kIgnoreLabelValue);
-  AccuracyLayer<TypeParam> layer(layer_param);
+  AccuracyLayer<Dtype> layer(layer_param);
   // Manually set some labels to the ignore label value (-1).
   this->blob_bottom_label_->mutable_cpu_data()[2] = kIgnoreLabelValue;
   this->blob_bottom_label_->mutable_cpu_data()[5] = kIgnoreLabelValue;
   this->blob_bottom_label_->mutable_cpu_data()[32] = kIgnoreLabelValue;
   layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
-  layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
-
-  TypeParam max_value;
-  int max_id;
-  int num_correct_labels = 0;
-  int count = 0;
-  for (int i = 0; i < 100; ++i) {
-    if (kIgnoreLabelValue == this->blob_bottom_label_->data_at(i, 0, 0, 0)) {
-      continue;
-    }
-    ++count;
-    max_value = -FLT_MAX;
-    max_id = 0;
-    for (int j = 0; j < 10; ++j) {
-      if (this->blob_bottom_data_->data_at(i, j, 0, 0) > max_value) {
-        max_value = this->blob_bottom_data_->data_at(i, j, 0, 0);
-        max_id = j;
+
+  // repeat the forward
+  for (int iter = 0; iter < 3; iter++) {
+    layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+
+    Dtype max_value;
+    int max_id;
+    int num_correct_labels = 0;
+    int count = 0;
+    for (int i = 0; i < 100; ++i) {
+      if (kIgnoreLabelValue == this->blob_bottom_label_->data_at(i, 0, 0, 0)) {
+        continue;
+      }
+      ++count;
+      max_value = -FLT_MAX;
+      max_id = 0;
+      for (int j = 0; j < 10; ++j) {
+        if (this->blob_bottom_data_->data_at(i, j, 0, 0) > max_value) {
+          max_value = this->blob_bottom_data_->data_at(i, j, 0, 0);
+          max_id = j;
+        }
+      }
+      if (max_id == this->blob_bottom_label_->data_at(i, 0, 0, 0)) {
+        ++num_correct_labels;
       }
     }
-    if (max_id == this->blob_bottom_label_->data_at(i, 0, 0, 0)) {
-      ++num_correct_labels;
-    }
+    EXPECT_EQ(count, 97);  // We set 3 out of 100 labels to kIgnoreLabelValue.
+    EXPECT_NEAR(this->blob_top_->data_at(0, 0, 0, 0),
+                num_correct_labels / Dtype(count), 1e-4);
   }
-  EXPECT_EQ(count, 97);  // We set 3 out of 100 labels to kIgnoreLabelValue.
-  EXPECT_NEAR(this->blob_top_->data_at(0, 0, 0, 0),
-              num_correct_labels / TypeParam(count), 1e-4);
 }
 
-TYPED_TEST(AccuracyLayerTest, TestForwardCPUTopK) {
+TYPED_TEST(AccuracyLayerTest, TestForwardTopK) {
+  typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;
   AccuracyParameter* accuracy_param = layer_param.mutable_accuracy_param();
   accuracy_param->set_top_k(this->top_k_);
-  AccuracyLayer<TypeParam> layer(layer_param);
+  AccuracyLayer<Dtype> layer(layer_param);
   layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
-  layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
-
-  TypeParam current_value;
-  int current_rank;
-  int num_correct_labels = 0;
-  for (int i = 0; i < 100; ++i) {
-    for (int j = 0; j < 10; ++j) {
-      current_value = this->blob_bottom_data_->data_at(i, j, 0, 0);
-      current_rank = 0;
-      for (int k = 0; k < 10; ++k) {
-        if (this->blob_bottom_data_->data_at(i, k, 0, 0) > current_value) {
-          ++current_rank;
+
+  // repeat the forward
+  for (int iter = 0; iter < 3; iter++) {
+    layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+
+    Dtype current_value;
+    int current_rank;
+    int num_correct_labels = 0;
+    for (int i = 0; i < 100; ++i) {
+      for (int j = 0; j < 10; ++j) {
+        current_value = this->blob_bottom_data_->data_at(i, j, 0, 0);
+        current_rank = 0;
+        for (int k = 0; k < 10; ++k) {
+          if (this->blob_bottom_data_->data_at(i, k, 0, 0) > current_value) {
+            ++current_rank;
+          }
+        }
+        if (current_rank < this->top_k_ &&
+            j == this->blob_bottom_label_->data_at(i, 0, 0, 0)) {
+          ++num_correct_labels;
         }
-      }
-      if (current_rank < this->top_k_ &&
-          j == this->blob_bottom_label_->data_at(i, 0, 0, 0)) {
-        ++num_correct_labels;
       }
     }
-  }
 
-  EXPECT_NEAR(this->blob_top_->data_at(0, 0, 0, 0),
-              num_correct_labels / 100.0, 1e-4);
+    EXPECT_NEAR(this->blob_top_->data_at(0, 0, 0, 0),
+                num_correct_labels / Dtype(100.0), 1e-4);
+  }
 }
 
-TYPED_TEST(AccuracyLayerTest, TestForwardCPUPerClass) {
+TYPED_TEST(AccuracyLayerTest, TestForwardPerClass) {
+  typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;
-  AccuracyLayer<TypeParam> layer(layer_param);
+  AccuracyLayer<Dtype> layer(layer_param);
   layer.SetUp(this->blob_bottom_vec_, this->blob_top_per_class_vec_);
-  layer.Forward(this->blob_bottom_vec_, this->blob_top_per_class_vec_);
-
-  TypeParam max_value;
-  int max_id;
-  int num_correct_labels = 0;
-  const int num_class = this->blob_top_per_class_->num();
-  vector<int> correct_per_class(num_class, 0);
-  vector<int> num_per_class(num_class, 0);
-  for (int i = 0; i < 100; ++i) {
-    max_value = -FLT_MAX;
-    max_id = 0;
-    for (int j = 0; j < 10; ++j) {
-      if (this->blob_bottom_data_->data_at(i, j, 0, 0) > max_value) {
-        max_value = this->blob_bottom_data_->data_at(i, j, 0, 0);
-        max_id = j;
+  // repeat the forward
+  for (int iter = 0; iter < 3; iter++) {
+    layer.Forward(this->blob_bottom_vec_, this->blob_top_per_class_vec_);
+
+    Dtype max_value;
+    int max_id;
+    int num_correct_labels = 0;
+    const int num_class = this->blob_top_per_class_->num();
+    vector<int> correct_per_class(num_class, 0);
+    vector<int> num_per_class(num_class, 0);
+    for (int i = 0; i < 100; ++i) {
+      max_value = -FLT_MAX;
+      max_id = 0;
+      for (int j = 0; j < 10; ++j) {
+        if (this->blob_bottom_data_->data_at(i, j, 0, 0) > max_value) {
+          max_value = this->blob_bottom_data_->data_at(i, j, 0, 0);
+          max_id = j;
+        }
+      }
+      ++num_per_class[this->blob_bottom_label_->data_at(i, 0, 0, 0)];
+      if (max_id == this->blob_bottom_label_->data_at(i, 0, 0, 0)) {
+        ++num_correct_labels;
+        ++correct_per_class[max_id];
       }
     }
-    ++num_per_class[this->blob_bottom_label_->data_at(i, 0, 0, 0)];
-    if (max_id == this->blob_bottom_label_->data_at(i, 0, 0, 0)) {
-      ++num_correct_labels;
-      ++correct_per_class[max_id];
+    EXPECT_NEAR(this->blob_top_->data_at(0, 0, 0, 0),
+                num_correct_labels / 100.0, 1e-4);
+    for (int i = 0; i < num_class; ++i) {
+      Dtype accuracy_per_class = (num_per_class[i] > 0 ?
+         static_cast<Dtype>(correct_per_class[i]) / num_per_class[i] : 0);
+      EXPECT_NEAR(this->blob_top_per_class_->data_at(i, 0, 0, 0),
+                  accuracy_per_class, 1e-4);
     }
   }
-  EXPECT_NEAR(this->blob_top_->data_at(0, 0, 0, 0),
-              num_correct_labels / 100.0, 1e-4);
-  for (int i = 0; i < num_class; ++i) {
-    TypeParam accuracy_per_class = (num_per_class[i] > 0 ?
-       static_cast<TypeParam>(correct_per_class[i]) / num_per_class[i] : 0);
-    EXPECT_NEAR(this->blob_top_per_class_->data_at(i, 0, 0, 0),
-                accuracy_per_class, 1e-4);
-  }
 }
 
 
-TYPED_TEST(AccuracyLayerTest, TestForwardCPUPerClassWithIgnoreLabel) {
+TYPED_TEST(AccuracyLayerTest, TestForwardPerClassWithIgnoreLabel) {
+  typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;
-  const TypeParam kIgnoreLabelValue = -1;
+  const Dtype kIgnoreLabelValue = -1;
   layer_param.mutable_accuracy_param()->set_ignore_label(kIgnoreLabelValue);
-  AccuracyLayer<TypeParam> layer(layer_param);
+  AccuracyLayer<Dtype> layer(layer_param);
   // Manually set some labels to the ignore label value (-1).
   this->blob_bottom_label_->mutable_cpu_data()[2] = kIgnoreLabelValue;
   this->blob_bottom_label_->mutable_cpu_data()[5] = kIgnoreLabelValue;
   this->blob_bottom_label_->mutable_cpu_data()[32] = kIgnoreLabelValue;
   layer.SetUp(this->blob_bottom_vec_, this->blob_top_per_class_vec_);
-  layer.Forward(this->blob_bottom_vec_, this->blob_top_per_class_vec_);
-
-  TypeParam max_value;
-  int max_id;
-  int num_correct_labels = 0;
-  const int num_class = this->blob_top_per_class_->num();
-  vector<int> correct_per_class(num_class, 0);
-  vector<int> num_per_class(num_class, 0);
-  int count = 0;
-  for (int i = 0; i < 100; ++i) {
-    if (kIgnoreLabelValue == this->blob_bottom_label_->data_at(i, 0, 0, 0)) {
-      continue;
-    }
-    ++count;
-    max_value = -FLT_MAX;
-    max_id = 0;
-    for (int j = 0; j < 10; ++j) {
-      if (this->blob_bottom_data_->data_at(i, j, 0, 0) > max_value) {
-        max_value = this->blob_bottom_data_->data_at(i, j, 0, 0);
-        max_id = j;
+
+  // repeat the forward
+  for (int iter = 0; iter < 3; iter++) {
+    layer.Forward(this->blob_bottom_vec_, this->blob_top_per_class_vec_);
+
+    Dtype max_value;
+    int max_id;
+    int num_correct_labels = 0;
+    const int num_class = this->blob_top_per_class_->num();
+    vector<int> correct_per_class(num_class, 0);
+    vector<int> num_per_class(num_class, 0);
+    int count = 0;
+    for (int i = 0; i < 100; ++i) {
+      if (kIgnoreLabelValue == this->blob_bottom_label_->data_at(i, 0, 0, 0)) {
+        continue;
+      }
+      ++count;
+      max_value = -FLT_MAX;
+      max_id = 0;
+      for (int j = 0; j < 10; ++j) {
+        if (this->blob_bottom_data_->data_at(i, j, 0, 0) > max_value) {
+          max_value = this->blob_bottom_data_->data_at(i, j, 0, 0);
+          max_id = j;
+        }
+      }
+      ++num_per_class[this->blob_bottom_label_->data_at(i, 0, 0, 0)];
+      if (max_id == this->blob_bottom_label_->data_at(i, 0, 0, 0)) {
+        ++num_correct_labels;
+        ++correct_per_class[max_id];
       }
     }
-    ++num_per_class[this->blob_bottom_label_->data_at(i, 0, 0, 0)];
-    if (max_id == this->blob_bottom_label_->data_at(i, 0, 0, 0)) {
-      ++num_correct_labels;
-      ++correct_per_class[max_id];
+    EXPECT_EQ(count, 97);
+    EXPECT_NEAR(this->blob_top_->data_at(0, 0, 0, 0),
+                num_correct_labels / Dtype(count), 1e-4);
+    for (int i = 0; i < 10; ++i) {
+      Dtype accuracy_per_class = (num_per_class[i] > 0 ?
+         static_cast<Dtype>(correct_per_class[i]) / num_per_class[i] : 0);
+      EXPECT_NEAR(this->blob_top_per_class_->data_at(i, 0, 0, 0),
+                  accuracy_per_class, 1e-4);
     }
   }
-  EXPECT_EQ(count, 97);
-  EXPECT_NEAR(this->blob_top_->data_at(0, 0, 0, 0),
-              num_correct_labels / TypeParam(count), 1e-4);
-  for (int i = 0; i < 10; ++i) {
-    TypeParam accuracy_per_class = (num_per_class[i] > 0 ?
-       static_cast<TypeParam>(correct_per_class[i]) / num_per_class[i] : 0);
-    EXPECT_NEAR(this->blob_top_per_class_->data_at(i, 0, 0, 0),
-                accuracy_per_class, 1e-4);
-  }
 }
 
 }  // namespace caffe
diff --git a/3rdparty/caffe/src/caffe/test/test_filler.cpp b/3rdparty/caffe/src/caffe/test/test_filler.cpp
index 26e9b217..f84d707b 100644
--- a/3rdparty/caffe/src/caffe/test/test_filler.cpp
+++ b/3rdparty/caffe/src/caffe/test/test_filler.cpp
@@ -29,7 +29,7 @@ TYPED_TEST(ConstantFillerTest, TestFill) {
   const int count = this->blob_->count();
   const TypeParam* data = this->blob_->cpu_data();
   for (int i = 0; i < count; ++i) {
-    EXPECT_GE(data[i], this->filler_param_.value());
+    EXPECT_EQ(data[i], this->filler_param_.value());
   }
 }
 
@@ -238,4 +238,45 @@ TYPED_TEST(MSRAFillerTest, TestFillAverage) {
   this->test_params(FillerParameter_VarianceNorm_AVERAGE, n);
 }
 
+template <typename Dtype>
+class BilinearFillerTest : public ::testing::Test {
+ protected:
+  BilinearFillerTest() : filler_param_() {}
+  virtual void test_params(const int n) {
+    this->blob_ = new Blob<Dtype>(1000, 2, n, n);
+    this->filler_.reset(new BilinearFiller<Dtype>(this->filler_param_));
+    this->filler_->Fill(blob_);
+    EXPECT_TRUE(this->blob_);
+    const int outer_num = this->blob_->count(0, 2);
+    const int inner_num = this->blob_->count(2, 4);
+    const Dtype* data = this->blob_->cpu_data();
+    int f = ceil(this->blob_->width() / 2.);
+    Dtype c = (this->blob_->width() - 1) / (2. * f);
+    for (int i = 0; i < outer_num; ++i) {
+      for (int j = 0; j < inner_num; ++j) {
+        Dtype x = j % this->blob_->width();
+        Dtype y = (j / this->blob_->width()) % this->blob_->height();
+        Dtype expected_value = (1 - fabs(x / f - c)) * (1 - fabs(y / f - c));
+        const Dtype actual_value = data[i * inner_num + j];
+        EXPECT_NEAR(expected_value, actual_value, 0.01);
+      }
+    }
+  }
+  virtual ~BilinearFillerTest() { delete blob_; }
+  Blob<Dtype>* blob_;
+  FillerParameter filler_param_;
+  shared_ptr<BilinearFiller<Dtype> > filler_;
+};
+
+TYPED_TEST_CASE(BilinearFillerTest, TestDtypes);
+
+TYPED_TEST(BilinearFillerTest, TestFillOdd) {
+  const int n = 7;
+  this->test_params(n);
+}
+TYPED_TEST(BilinearFillerTest, TestFillEven) {
+  const int n = 6;
+  this->test_params(n);
+}
+
 }  // namespace caffe
-- 
GitLab