# lenet_consolidated_solver.prototxt consolidates the lenet_solver, lenet_train,
# and lenet_test prototxts into a single file.  It also adds an additional test
# net which runs on the training set, e.g., for the purpose of comparing
# train/test accuracy (accuracy is computed only on the test set in the included
# LeNet example).  This is mainly included as an example of using these features
# (specify NetParameters directly in the solver, specify multiple test nets)
# if desired.
#
# Carry out testing every 500 training iterations.
test_interval: 500
# The base learning rate, momentum and the weight decay of the network.
base_lr: 0.01
momentum: 0.9
weight_decay: 0.0005
# The learning rate policy
lr_policy: "inv"
gamma: 0.0001
power: 0.75
# Display every 100 iterations
display: 100
# The maximum number of iterations
max_iter: 10000
# snapshot intermediate results
snapshot: 5000
snapshot_prefix: "examples/mnist/lenet"
# Set a random_seed for repeatable results.
# (For results that vary due to random initialization, comment out the below
# line, or set to a negative integer -- e.g. "random_seed: -1")
random_seed: 1701
# solver mode: CPU or GPU
solver_mode: GPU

# We test on both the test and train set using "stages".  The TEST DATA layers
# each have a stage, either 'test-on-train-set' or 'test-on-test-set'.
# test_iter specifies how many forward passes the test should carry out.
# In the case of MNIST, we have test batch size 100 and 100 test iterations,
# covering the full 10,000 testing images.
test_iter: 100
test_state: { stage: "test-on-test-set" }
# The train set has 60K images, so we run 600 test iters (600 * 100 = 60K).
test_iter: 600
test_state: { stage: "test-on-train-set" }

# The net protocol buffer definition
net_param {
  name: "LeNet"
  layers {
    name: "mnist"
    type: DATA
    top: "data"
    top: "label"
    data_param {
      source: "examples/mnist/mnist_train_lmdb"
      backend: LMDB
      batch_size: 64
    }
    transform_param {
      scale: 0.00390625
    }
    include: { phase: TRAIN }
  }
  layers {
    name: "mnist"
    type: DATA
    top: "data"
    top: "label"
    data_param {
      source: "examples/mnist/mnist_test_lmdb"
      backend: LMDB
      batch_size: 100
    }
    transform_param {
      scale: 0.00390625
    }
    include: {
      phase: TEST
      stage: "test-on-test-set"
    }
  }
  layers {
    name: "mnist"
    type: DATA
    top: "data"
    top: "label"
    data_param {
      source: "examples/mnist/mnist_train_lmdb"
      backend: LMDB
      batch_size: 100
    }
    transform_param {
      scale: 0.00390625
    }
    include: {
      phase: TEST
      stage: "test-on-train-set"
    }
  }
  layers {
    name: "conv1"
    type: CONVOLUTION
    bottom: "data"
    top: "conv1"
    blobs_lr: 1
    blobs_lr: 2
    convolution_param {
      num_output: 20
      kernel_size: 5
      stride: 1
      weight_filler {
        type: "xavier"
      }
      bias_filler {
        type: "constant"
      }
    }
  }
  layers {
    name: "pool1"
    type: POOLING
    bottom: "conv1"
    top: "pool1"
    pooling_param {
      pool: MAX
      kernel_size: 2
      stride: 2
    }
  }
  layers {
    name: "conv2"
    type: CONVOLUTION
    bottom: "pool1"
    top: "conv2"
    blobs_lr: 1
    blobs_lr: 2
    convolution_param {
      num_output: 50
      kernel_size: 5
      stride: 1
      weight_filler {
        type: "xavier"
      }
      bias_filler {
        type: "constant"
      }
    }
  }
  layers {
    name: "pool2"
    type: POOLING
    bottom: "conv2"
    top: "pool2"
    pooling_param {
      pool: MAX
      kernel_size: 2
      stride: 2
    }
  }
  layers {
    name: "ip1"
    type: INNER_PRODUCT
    bottom: "pool2"
    top: "ip1"
    blobs_lr: 1
    blobs_lr: 2
    inner_product_param {
      num_output: 500
      weight_filler {
        type: "xavier"
      }
      bias_filler {
        type: "constant"
      }
    }
  }
  layers {
    name: "relu1"
    type: RELU
    bottom: "ip1"
    top: "ip1"
  }
  layers {
    name: "ip2"
    type: INNER_PRODUCT
    bottom: "ip1"
    top: "ip2"
    blobs_lr: 1
    blobs_lr: 2
    inner_product_param {
      num_output: 10
      weight_filler {
        type: "xavier"
      }
      bias_filler {
        type: "constant"
      }
    }
  }
  layers {
    name: "accuracy"
    type: ACCURACY
    bottom: "ip2"
    bottom: "label"
    top: "accuracy"
  }
  layers {
    name: "loss"
    type: SOFTMAX_LOSS
    bottom: "ip2"
    bottom: "label"
    top: "loss"
  }
}

# Expected results for first and last 500 iterations:
# (with portions of log omitted for brevity)
#
# Iteration 0, Testing net (#0)
# Test score #0: 0.067
# Test score #1: 2.30256
# Iteration 0, Testing net (#1)
# Test score #0: 0.0670334
# Test score #1: 2.30258
# Iteration 100, lr = 0.00992565
# Iteration 100, loss = 0.280585
# Iteration 200, lr = 0.00985258
# Iteration 200, loss = 0.345601
# Iteration 300, lr = 0.00978075
# Iteration 300, loss = 0.172217
# Iteration 400, lr = 0.00971013
# Iteration 400, loss = 0.261836
# Iteration 500, lr = 0.00964069
# Iteration 500, loss = 0.157803
# Iteration 500, Testing net (#0)
# Test score #0: 0.968
# Test score #1: 0.0993772
# Iteration 500, Testing net (#1)
# Test score #0: 0.965883
# Test score #1: 0.109374
#
# [...]
#
# Iteration 9500, Testing net (#0)
# Test score #0: 0.9899
# Test score #1: 0.0308299
# Iteration 9500, Testing net (#1)
# Test score #0: 0.996816
# Test score #1: 0.0118238
# Iteration 9600, lr = 0.00603682
# Iteration 9600, loss = 0.0126215
# Iteration 9700, lr = 0.00601382
# Iteration 9700, loss = 0.00579304
# Iteration 9800, lr = 0.00599102
# Iteration 9800, loss = 0.00500633
# Iteration 9900, lr = 0.00596843
# Iteration 9900, loss = 0.00796607
# Iteration 10000, lr = 0.00594604
# Iteration 10000, loss = 0.00271736
# Iteration 10000, Testing net (#0)
# Test score #0: 0.9914
# Test score #1: 0.0276671
# Iteration 10000, Testing net (#1)
# Test score #0: 0.997782
# Test score #1: 0.00908085