\n", + " **W1**\n", + " | \n", + "\n", + " [[ 0. 0. 0.]\n", + " [ 0. 0. 0.]]\n", + " | \n", + "
\n", + " **b1**\n", + " | \n", + "\n", + " [[ 0.]\n", + " [ 0.]]\n", + " | \n", + "
\n", + " **W2**\n", + " | \n", + "\n", + " [[ 0. 0.]]\n", + " | \n", + "
\n", + " **b2**\n", + " | \n", + "\n", + " [[ 0.]]\n", + " | \n", + "
\n", + " **W1**\n", + " | \n", + "\n", + " [[ 17.88628473 4.36509851 0.96497468]\n", + " [-18.63492703 -2.77388203 -3.54758979]]\n", + " | \n", + "
\n", + " **b1**\n", + " | \n", + "\n", + " [[ 0.]\n", + " [ 0.]]\n", + " | \n", + "
\n", + " **W2**\n", + " | \n", + "\n", + " [[-0.82741481 -6.27000677]]\n", + " | \n", + "
\n", + " **b2**\n", + " | \n", + "\n", + " [[ 0.]]\n", + " | \n", + "
\n", + " **W1**\n", + " | \n", + "\n", + " [[ 1.78862847 0.43650985]\n", + " [ 0.09649747 -1.8634927 ]\n", + " [-0.2773882 -0.35475898]\n", + " [-0.08274148 -0.62700068]]\n", + " | \n", + "
\n", + " **b1**\n", + " | \n", + "\n", + " [[ 0.]\n", + " [ 0.]\n", + " [ 0.]\n", + " [ 0.]]\n", + " | \n", + "
\n", + " **W2**\n", + " | \n", + "\n", + " [[-0.03098412 -0.33744411 -0.92904268 0.62552248]]\n", + " | \n", + "
\n", + " **b2**\n", + " | \n", + "\n", + " [[ 0.]]\n", + " | \n", + "
\n", + " **Model**\n", + " | \n", + "\n", + " **Train accuracy**\n", + " | \n", + "\n", + " **Problem/Comment**\n", + " | \n", + "\n", + "\n", + " 3-layer NN with zeros initialization\n", + " | \n", + "\n", + " 50%\n", + " | \n", + "\n", + " fails to break symmetry\n", + " | \n", + "
\n", + " 3-layer NN with large random initialization\n", + " | \n", + "\n", + " 83%\n", + " | \n", + "\n", + " too large weights \n", + " | \n", + "
\n", + " 3-layer NN with He initialization\n", + " | \n", + "\n", + " 99%\n", + " | \n", + "\n", + " recommended method\n", + " | \n", + "
\n", + " **cost**\n", + " | \n", + "\n", + " 1.78648594516\n", + " | \n", + " \n", + "
\n", + " **dW1**\n", + " | \n", + "\n", + " [[-0.25604646 0.12298827 -0.28297129]\n", + " [-0.17706303 0.34536094 -0.4410571 ]]\n", + " | \n", + "
\n", + " **dW2**\n", + " | \n", + "\n", + " [[ 0.79276486 0.85133918]\n", + " [-0.0957219 -0.01720463]\n", + " [-0.13100772 -0.03750433]]\n", + " | \n", + "
\n", + " **dW3**\n", + " | \n", + "\n", + " [[-1.77691347 -0.11832879 -0.09397446]]\n", + " | \n", + "
\n", + " **dA1**\n", + " | \n", + "\n", + " [[ 0.36544439 0. -0.00188233 0. -0.17408748]\n", + " [ 0.65515713 0. -0.00337459 0. -0. ]]\n", + " | \n", + " \n", + "
\n", + " **dA2**\n", + " | \n", + "\n", + " [[ 0.58180856 0. -0.00299679 0. -0.27715731]\n", + " [ 0. 0.53159854 -0. 0.53159854 -0.34089673]\n", + " [ 0. 0. -0.00292733 0. -0. ]]\n", + " | \n", + " \n", + "
\n", + " **model**\n", + " | \n", + "\n", + " **train accuracy**\n", + " | \n", + "\n", + " **test accuracy**\n", + " | \n", + "\n", + "\n", + " 3-layer NN without regularization\n", + " | \n", + "\n", + " 95%\n", + " | \n", + "\n", + " 91.5%\n", + " | \n", + "
\n", + " 3-layer NN with L2-regularization\n", + " | \n", + "\n", + " 94%\n", + " | \n", + "\n", + " 93%\n", + " | \n", + "
\n", + " 3-layer NN with dropout\n", + " | \n", + "\n", + " 93%\n", + " | \n", + "\n", + " 95%\n", + " | \n", + "