neural_network.cpp 32.7 KB
Newer Older
1
/**
2
 * @file
3
 * @author [Deep Raval](https://github.com/imdeep2905)
4 5 6 7
 *
 * @brief Implementation of [Multilayer Perceptron]
 * (https://en.wikipedia.org/wiki/Multilayer_perceptron).
 *
8
 * @details
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
 * A multilayer perceptron (MLP) is a class of feedforward artificial neural
 * network (ANN). The term MLP is used ambiguously, sometimes loosely to any
 * feedforward ANN, sometimes strictly to refer to networks composed of multiple
 * layers of perceptrons (with threshold activation). Multilayer perceptrons are
 * sometimes colloquially referred to as "vanilla" neural networks, especially
 * when they have a single hidden layer.
 *
 * An MLP consists of at least three layers of nodes: an input layer, a hidden
 * layer and an output layer. Except for the input nodes, each node is a neuron
 * that uses a nonlinear activation function. MLP utilizes a supervised learning
 * technique called backpropagation for training. Its multiple layers and
 * non-linear activation distinguish MLP from a linear perceptron. It can
 * distinguish data that is not linearly separable.
 *
 * See [Backpropagation](https://en.wikipedia.org/wiki/Backpropagation) for
 * training algorithm.
 *
 * \note This implementation uses mini-batch gradient descent as optimizer and
 * MSE as loss function. Bias is also not included.
28 29 30
 */

#include <algorithm>
31
#include <cassert>
32
#include <chrono>
33
#include <cmath>
34
#include <fstream>
35
#include <iostream>
36
#include <sstream>
37 38 39 40 41
#include <string>
#include <valarray>
#include <vector>

#include "vector_ops.hpp"  // Custom header file for vector operations
42 43 44 45 46

/** \namespace machine_learning
 * \brief Machine learning algorithms
 */
namespace machine_learning {
47 48 49 50 51 52 53 54 55 56 57 58 59 60
/** \namespace neural_network
 * \brief Neural Network or Multilayer Perceptron
 */
namespace neural_network {
/** \namespace activations
 * \brief Various activation functions used in Neural network
 */
namespace activations {
/**
 * Sigmoid function
 * @param X Value
 * @return Returns sigmoid(x)
 */
double sigmoid(const double &x) { return 1.0 / (1.0 + std::exp(-x)); }
61

62 63 64 65 66 67
/**
 * Derivative of sigmoid function
 * @param X Value
 * @return Returns derivative of sigmoid(x)
 */
double dsigmoid(const double &x) { return x * (1 - x); }
68

69 70 71 72 73 74
/**
 * Relu function
 * @param X Value
 * @returns relu(x)
 */
double relu(const double &x) { return std::max(0.0, x); }
75

76 77 78 79 80 81
/**
 * Derivative of relu function
 * @param X Value
 * @returns derivative of relu(x)
 */
double drelu(const double &x) { return x >= 0.0 ? 1.0 : 0.0; }
82

83 84 85 86 87 88
/**
 * Tanh function
 * @param X Value
 * @return Returns tanh(x)
 */
double tanh(const double &x) { return 2 / (1 + std::exp(-2 * x)) - 1; }
89

90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407
/**
 * Derivative of Sigmoid function
 * @param X Value
 * @return Returns derivative of tanh(x)
 */
double dtanh(const double &x) { return 1 - x * x; }
}  // namespace activations
/** \namespace util_functions
 * \brief Various utility functions used in Neural network
 */
namespace util_functions {
/**
 * Square function
 * @param X Value
 * @return Returns x * x
 */
double square(const double &x) { return x * x; }
/**
 * Identity function
 * @param X Value
 * @return Returns x
 */
double identity_function(const double &x) { return x; }
}  // namespace util_functions
/** \namespace layers
 * \brief This namespace contains layers used
 * in MLP.
 */
namespace layers {
/**
 * neural_network::layers::DenseLayer class is used to store all necessary
 * information about the layers (i.e. neurons, activation and kernal). This
 * class is used by NeuralNetwork class to store layers.
 *
 */
class DenseLayer {
 public:
    // To store activation function and it's derivative
    double (*activation_function)(const double &);
    double (*dactivation_function)(const double &);
    int neurons;             // To store number of neurons (used in summary)
    std::string activation;  // To store activation name (used in summary)
    std::vector<std::valarray<double>> kernal;  // To store kernal (aka weights)

    /**
     * Constructor for neural_network::layers::DenseLayer class
     * @param neurons number of neurons
     * @param activation activation function for layer
     * @param kernal_shape shape of kernal
     * @param random_kernal flag for whether to intialize kernal randomly
     */
    DenseLayer(const int &neurons, const std::string &activation,
               const std::pair<size_t, size_t> &kernal_shape,
               const bool &random_kernal) {
        // Choosing activation (and it's derivative)
        if (activation == "sigmoid") {
            activation_function = neural_network::activations::sigmoid;
            dactivation_function = neural_network::activations::sigmoid;
        } else if (activation == "relu") {
            activation_function = neural_network::activations::relu;
            dactivation_function = neural_network::activations::drelu;
        } else if (activation == "tanh") {
            activation_function = neural_network::activations::tanh;
            dactivation_function = neural_network::activations::dtanh;
        } else if (activation == "none") {
            // Set identity function in casse of none is supplied
            activation_function =
                neural_network::util_functions::identity_function;
            dactivation_function =
                neural_network::util_functions::identity_function;
        } else {
            // If supplied activation is invalid
            std::cerr << "ERROR (" << __func__ << ") : ";
            std::cerr << "Invalid argument. Expected {none, sigmoid, relu, "
                         "tanh} got ";
            std::cerr << activation << std::endl;
            std::exit(EXIT_FAILURE);
        }
        this->activation = activation;  // Setting activation name
        this->neurons = neurons;        // Setting number of neurons
        // Initialize kernal according to flag
        if (random_kernal) {
            uniform_random_initialization(kernal, kernal_shape, -1.0, 1.0);
        } else {
            unit_matrix_initialization(kernal, kernal_shape);
        }
    }
    /**
     * Constructor for neural_network::layers::DenseLayer class
     * @param neurons number of neurons
     * @param activation activation function for layer
     * @param kernal values of kernal (useful in loading model)
     */
    DenseLayer(const int &neurons, const std::string &activation,
               const std::vector<std::valarray<double>> &kernal) {
        // Choosing activation (and it's derivative)
        if (activation == "sigmoid") {
            activation_function = neural_network::activations::sigmoid;
            dactivation_function = neural_network::activations::sigmoid;
        } else if (activation == "relu") {
            activation_function = neural_network::activations::relu;
            dactivation_function = neural_network::activations::drelu;
        } else if (activation == "tanh") {
            activation_function = neural_network::activations::tanh;
            dactivation_function = neural_network::activations::dtanh;
        } else if (activation == "none") {
            // Set identity function in casse of none is supplied
            activation_function =
                neural_network::util_functions::identity_function;
            dactivation_function =
                neural_network::util_functions::identity_function;
        } else {
            // If supplied activation is invalid
            std::cerr << "ERROR (" << __func__ << ") : ";
            std::cerr << "Invalid argument. Expected {none, sigmoid, relu, "
                         "tanh} got ";
            std::cerr << activation << std::endl;
            std::exit(EXIT_FAILURE);
        }
        this->activation = activation;  // Setting activation name
        this->neurons = neurons;        // Setting number of neurons
        this->kernal = kernal;          // Setting supplied kernal values
    }

    /**
     * Copy Constructor for class DenseLayer.
     *
     * @param model instance of class to be copied.
     */
    DenseLayer(const DenseLayer &layer) = default;

    /**
     * Destructor for class DenseLayer.
     */
    ~DenseLayer() = default;

    /**
     * Copy assignment operator for class DenseLayer
     */
    DenseLayer &operator=(const DenseLayer &layer) = default;

    /**
     * Move constructor for class DenseLayer
     */
    DenseLayer(DenseLayer &&) = default;

    /**
     * Move assignment operator for class DenseLayer
     */
    DenseLayer &operator=(DenseLayer &&) = default;
};
}  // namespace layers
/**
 * NeuralNetwork class is implements MLP. This class is
 * used by actual user to create and train networks.
 *
 */
class NeuralNetwork {
 private:
    std::vector<neural_network::layers::DenseLayer> layers;  // To store layers
    /**
     * Private Constructor for class NeuralNetwork. This constructor
     * is used internally to load model.
     * @param config vector containing pair (neurons, activation)
     * @param kernals vector containing all pretrained kernals
     */
    NeuralNetwork(
        const std::vector<std::pair<int, std::string>> &config,
        const std::vector<std::vector<std::valarray<double>>> &kernals) {
        // First layer should not have activation
        if (config.begin()->second != "none") {
            std::cerr << "ERROR (" << __func__ << ") : ";
            std::cerr
                << "First layer can't have activation other than none got "
                << config.begin()->second;
            std::cerr << std::endl;
            std::exit(EXIT_FAILURE);
        }
        // Network should have atleast two layers
        if (config.size() <= 1) {
            std::cerr << "ERROR (" << __func__ << ") : ";
            std::cerr << "Invalid size of network, ";
            std::cerr << "Atleast two layers are required";
            std::exit(EXIT_FAILURE);
        }
        // Reconstructing all pretrained layers
        for (size_t i = 0; i < config.size(); i++) {
            layers.emplace_back(neural_network::layers::DenseLayer(
                config[i].first, config[i].second, kernals[i]));
        }
        std::cout << "INFO: Network constructed successfully" << std::endl;
    }
    /**
     * Private function to get detailed predictions (i.e.
     * activated neuron values). This function is used in
     * backpropagation, single predict and batch predict.
     * @param X input vector
     */
    std::vector<std::vector<std::valarray<double>>>
    __detailed_single_prediction(const std::vector<std::valarray<double>> &X) {
        std::vector<std::vector<std::valarray<double>>> details;
        std::vector<std::valarray<double>> current_pass = X;
        details.emplace_back(X);
        for (const auto &l : layers) {
            current_pass = multiply(current_pass, l.kernal);
            current_pass = apply_function(current_pass, l.activation_function);
            details.emplace_back(current_pass);
        }
        return details;
    }

 public:
    /**
     * Default Constructor for class NeuralNetwork. This constructor
     * is used to create empty variable of type NeuralNetwork class.
     */
    NeuralNetwork() = default;

    /**
     * Constructor for class NeuralNetwork. This constructor
     * is used by user.
     * @param config vector containing pair (neurons, activation)
     */
    explicit NeuralNetwork(
        const std::vector<std::pair<int, std::string>> &config) {
        // First layer should not have activation
        if (config.begin()->second != "none") {
            std::cerr << "ERROR (" << __func__ << ") : ";
            std::cerr
                << "First layer can't have activation other than none got "
                << config.begin()->second;
            std::cerr << std::endl;
            std::exit(EXIT_FAILURE);
        }
        // Network should have atleast two layers
        if (config.size() <= 1) {
            std::cerr << "ERROR (" << __func__ << ") : ";
            std::cerr << "Invalid size of network, ";
            std::cerr << "Atleast two layers are required";
            std::exit(EXIT_FAILURE);
        }
        // Separately creating first layer so it can have unit matrix
        // as kernal.
        layers.push_back(neural_network::layers::DenseLayer(
            config[0].first, config[0].second,
            {config[0].first, config[0].first}, false));
        // Creating remaining layers
        for (size_t i = 1; i < config.size(); i++) {
            layers.push_back(neural_network::layers::DenseLayer(
                config[i].first, config[i].second,
                {config[i - 1].first, config[i].first}, true));
        }
        std::cout << "INFO: Network constructed successfully" << std::endl;
    }

    /**
     * Copy Constructor for class NeuralNetwork.
     *
     * @param model instance of class to be copied.
     */
    NeuralNetwork(const NeuralNetwork &model) = default;

    /**
     * Destructor for class NeuralNetwork.
     */
    ~NeuralNetwork() = default;

    /**
     * Copy assignment operator for class NeuralNetwork
     */
    NeuralNetwork &operator=(const NeuralNetwork &model) = default;

    /**
     * Move constructor for class NeuralNetwork
     */
    NeuralNetwork(NeuralNetwork &&) = default;

    /**
     * Move assignment operator for class NeuralNetwork
     */
    NeuralNetwork &operator=(NeuralNetwork &&) = default;

    /**
     * Function to get X and Y from csv file (where X = data, Y = label)
     * @param file_name csv file name
     * @param last_label flag for whether label is in first or last column
     * @param normalize flag for whether to normalize data
     * @param slip_lines number of lines to skip
     * @return returns pair of X and Y
     */
    std::pair<std::vector<std::vector<std::valarray<double>>>,
              std::vector<std::vector<std::valarray<double>>>>
    get_XY_from_csv(const std::string &file_name, const bool &last_label,
                    const bool &normalize, const int &slip_lines = 1) {
        std::ifstream in_file;                          // Ifstream to read file
        in_file.open(file_name.c_str(), std::ios::in);  // Open file
        // If there is any problem in opening file
        if (!in_file.is_open()) {
            std::cerr << "ERROR (" << __func__ << ") : ";
            std::cerr << "Unable to open file: " << file_name << std::endl;
            std::exit(EXIT_FAILURE);
        }
        std::vector<std::vector<std::valarray<double>>> X,
            Y;             // To store X and Y
        std::string line;  // To store each line
        // Skip lines
        for (int i = 0; i < slip_lines; i++) {
            std::getline(in_file, line, '\n');  // Ignore line
        }
        // While file has information
        while (!in_file.eof() && std::getline(in_file, line, '\n')) {
            std::valarray<double> x_data,
                y_data;                  // To store single sample and label
            std::stringstream ss(line);  // Constructing stringstream from line
            std::string token;  // To store each token in line (seprated by ',')
            while (std::getline(ss, token, ',')) {  // For each token
                // Insert numerical value of token in x_data
                x_data = insert_element(x_data, std::stod(token));
408
            }
409 410 411 412 413 414
            // If label is in last column
            if (last_label) {
                y_data.resize(this->layers.back().neurons);
                // If task is classification
                if (y_data.size() > 1) {
                    y_data[x_data[x_data.size() - 1]] = 1;
415
                }
416 417 418
                // If task is regrssion (of single value)
                else {
                    y_data[0] = x_data[x_data.size() - 1];
419
                }
420 421 422 423 424 425
                x_data = pop_back(x_data);  // Remove label from x_data
            } else {
                y_data.resize(this->layers.back().neurons);
                // If task is classification
                if (y_data.size() > 1) {
                    y_data[x_data[x_data.size() - 1]] = 1;
426
                }
427 428 429
                // If task is regrssion (of single value)
                else {
                    y_data[0] = x_data[x_data.size() - 1];
430
                }
431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521
                x_data = pop_front(x_data);  // Remove label from x_data
            }
            // Push collected X_data and y_data in X and Y
            X.push_back({x_data});
            Y.push_back({y_data});
        }
        // Normalize training data if flag is set
        if (normalize) {
            // Scale data between 0 and 1 using min-max scaler
            X = minmax_scaler(X, 0.01, 1.0);
        }
        in_file.close();         // Closing file
        return make_pair(X, Y);  // Return pair of X and Y
    }

    /**
     * Function to get prediction of model on single sample.
     * @param X array of feature vectors
     * @return returns predictions as vector
     */
    std::vector<std::valarray<double>> single_predict(
        const std::vector<std::valarray<double>> &X) {
        // Get activations of all layers
        auto activations = this->__detailed_single_prediction(X);
        // Return activations of last layer (actual predicted values)
        return activations.back();
    }

    /**
     * Function to get prediction of model on batch
     * @param X array of feature vectors
     * @return returns predicted values as vector
     */
    std::vector<std::vector<std::valarray<double>>> batch_predict(
        const std::vector<std::vector<std::valarray<double>>> &X) {
        // Store predicted values
        std::vector<std::vector<std::valarray<double>>> predicted_batch(
            X.size());
        for (size_t i = 0; i < X.size(); i++) {  // For every sample
            // Push predicted values
            predicted_batch[i] = this->single_predict(X[i]);
        }
        return predicted_batch;  // Return predicted values
    }

    /**
     * Function to fit model on supplied data
     * @param X array of feature vectors
     * @param Y array of target values
     * @param epochs number of epochs (default = 100)
     * @param learning_rate learning rate (default = 0.01)
     * @param batch_size batch size for gradient descent (default = 32)
     * @param shuffle flag for whether to shuffle data (default = true)
     */
    void fit(const std::vector<std::vector<std::valarray<double>>> &X_,
             const std::vector<std::vector<std::valarray<double>>> &Y_,
             const int &epochs = 100, const double &learning_rate = 0.01,
             const size_t &batch_size = 32, const bool &shuffle = true) {
        std::vector<std::vector<std::valarray<double>>> X = X_, Y = Y_;
        // Both label and input data should have same size
        if (X.size() != Y.size()) {
            std::cerr << "ERROR (" << __func__ << ") : ";
            std::cerr << "X and Y in fit have different sizes" << std::endl;
            std::exit(EXIT_FAILURE);
        }
        std::cout << "INFO: Training Started" << std::endl;
        for (int epoch = 1; epoch <= epochs; epoch++) {  // For every epoch
            // Shuffle X and Y if flag is set
            if (shuffle) {
                equal_shuffle(X, Y);
            }
            auto start =
                std::chrono::high_resolution_clock::now();  // Start clock
            double loss = 0,
                   acc = 0;  // Intialize performance metrics with zero
            // For each starting index of batch
            for (size_t batch_start = 0; batch_start < X.size();
                 batch_start += batch_size) {
                for (size_t i = batch_start;
                     i < std::min(X.size(), batch_start + batch_size); i++) {
                    std::vector<std::valarray<double>> grad, cur_error,
                        predicted;
                    auto activations = this->__detailed_single_prediction(X[i]);
                    // Gradients vector to store gradients for all layers
                    // They will be averaged and applied to kernal
                    std::vector<std::vector<std::valarray<double>>> gradients;
                    gradients.resize(this->layers.size());
                    // First intialize gradients to zero
                    for (size_t i = 0; i < gradients.size(); i++) {
                        zeroes_initialization(
                            gradients[i], get_shape(this->layers[i].kernal));
522
                    }
523 524 525 526 527 528 529 530
                    predicted = activations.back();  // Predicted vector
                    cur_error = predicted - Y[i];    // Absoulute error
                    // Calculating loss with MSE
                    loss += sum(apply_function(
                        cur_error, neural_network::util_functions::square));
                    // If prediction is correct
                    if (argmax(predicted) == argmax(Y[i])) {
                        acc += 1;
531
                    }
532 533 534 535 536 537 538 539 540 541 542 543 544 545 546
                    // For every layer (except first) starting from last one
                    for (size_t j = this->layers.size() - 1; j >= 1; j--) {
                        // Backpropogating errors
                        cur_error = hadamard_product(
                            cur_error,
                            apply_function(
                                activations[j + 1],
                                this->layers[j].dactivation_function));
                        // Calculating gradient for current layer
                        grad = multiply(transpose(activations[j]), cur_error);
                        // Change error according to current kernal values
                        cur_error = multiply(cur_error,
                                             transpose(this->layers[j].kernal));
                        // Adding gradient values to collection of gradients
                        gradients[j] = gradients[j] + grad / double(batch_size);
547
                    }
548 549 550 551 552
                    // Applying gradients
                    for (size_t j = this->layers.size() - 1; j >= 1; j--) {
                        // Updating kernal (aka weights)
                        this->layers[j].kernal = this->layers[j].kernal -
                                                 gradients[j] * learning_rate;
553 554
                    }
                }
555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716
            }
            auto stop =
                std::chrono::high_resolution_clock::now();  // Stoping the clock
            // Calculate time taken by epoch
            auto duration =
                std::chrono::duration_cast<std::chrono::microseconds>(stop -
                                                                      start);
            loss /= X.size();        // Averaging loss
            acc /= X.size();         // Averaging accuracy
            std::cout.precision(4);  // set output precision to 4
            // Printing training stats
            std::cout << "Training: Epoch " << epoch << '/' << epochs;
            std::cout << ", Loss: " << loss;
            std::cout << ", Accuracy: " << acc;
            std::cout << ", Taken time: " << duration.count() / 1e6
                      << " seconds";
            std::cout << std::endl;
        }
        return;
    }

    /**
     * Function to fit model on data stored in csv file
     * @param file_name csv file name
     * @param last_label flag for whether label is in first or last column
     * @param epochs number of epochs
     * @param learning_rate learning rate
     * @param normalize flag for whether to normalize data
     * @param slip_lines number of lines to skip
     * @param batch_size batch size for gradient descent (default = 32)
     * @param shuffle flag for whether to shuffle data (default = true)
     */
    void fit_from_csv(const std::string &file_name, const bool &last_label,
                      const int &epochs, const double &learning_rate,
                      const bool &normalize, const int &slip_lines = 1,
                      const size_t &batch_size = 32,
                      const bool &shuffle = true) {
        // Getting training data from csv file
        auto data =
            this->get_XY_from_csv(file_name, last_label, normalize, slip_lines);
        // Fit the model on training data
        this->fit(data.first, data.second, epochs, learning_rate, batch_size,
                  shuffle);
        return;
    }

    /**
     * Function to evaluate model on supplied data
     * @param X array of feature vectors (input data)
     * @param Y array of target values (label)
     */
    void evaluate(const std::vector<std::vector<std::valarray<double>>> &X,
                  const std::vector<std::vector<std::valarray<double>>> &Y) {
        std::cout << "INFO: Evaluation Started" << std::endl;
        double acc = 0, loss = 0;  // intialize performance metrics with zero
        for (size_t i = 0; i < X.size(); i++) {  // For every sample in input
            // Get predictions
            std::vector<std::valarray<double>> pred =
                this->single_predict(X[i]);
            // If predicted class is correct
            if (argmax(pred) == argmax(Y[i])) {
                acc += 1;  // Increment accuracy
            }
            // Calculating loss - Mean Squared Error
            loss += sum(apply_function((Y[i] - pred),
                                       neural_network::util_functions::square) *
                        0.5);
        }
        acc /= X.size();   // Averaging accuracy
        loss /= X.size();  // Averaging loss
        // Prinitng performance of the model
        std::cout << "Evaluation: Loss: " << loss;
        std::cout << ", Accuracy: " << acc << std::endl;
        return;
    }

    /**
     * Function to evaluate model on data stored in csv file
     * @param file_name csv file name
     * @param last_label flag for whether label is in first or last column
     * @param normalize flag for whether to normalize data
     * @param slip_lines number of lines to skip
     */
    void evaluate_from_csv(const std::string &file_name, const bool &last_label,
                           const bool &normalize, const int &slip_lines = 1) {
        // Getting training data from csv file
        auto data =
            this->get_XY_from_csv(file_name, last_label, normalize, slip_lines);
        // Evaluating model
        this->evaluate(data.first, data.second);
        return;
    }

    /**
     * Function to save current model.
     * @param file_name file name to save model (*.model)
     */
    void save_model(const std::string &_file_name) {
        std::string file_name = _file_name;
        // Adding ".model" extension if it is not already there in name
        if (file_name.find(".model") == file_name.npos) {
            file_name += ".model";
        }
        std::ofstream out_file;  // Ofstream to write in file
        // Open file in out|trunc mode
        out_file.open(file_name.c_str(),
                      std::ofstream::out | std::ofstream::trunc);
        // If there is any problem in opening file
        if (!out_file.is_open()) {
            std::cerr << "ERROR (" << __func__ << ") : ";
            std::cerr << "Unable to open file: " << file_name << std::endl;
            std::exit(EXIT_FAILURE);
        }
        /**
            Format in which model is saved:

            total_layers
            neurons(1st neural_network::layers::DenseLayer) activation_name(1st
           neural_network::layers::DenseLayer) kernal_shape(1st
           neural_network::layers::DenseLayer) kernal_values
            .
            .
            .
            neurons(Nth neural_network::layers::DenseLayer) activation_name(Nth
           neural_network::layers::DenseLayer) kernal_shape(Nth
           neural_network::layers::DenseLayer) kernal_value

            For Example, pretrained model with 3 layers:
            <pre>
            3
            4 none
            4 4
            1 0 0 0
            0 1 0 0
            0 0 1 0
            0 0 0 1
            6 relu
            4 6
            -1.88963 -3.61165 1.30757 -0.443906 -2.41039 -2.69653
            -0.684753 0.0891452 0.795294 -2.39619 2.73377 0.318202
            -2.91451 -4.43249 -0.804187 2.51995 -6.97524 -1.07049
            -0.571531 -1.81689 -1.24485 1.92264 -2.81322 1.01741
            3 sigmoid
            6 3
            0.390267 -0.391703 -0.0989607
            0.499234 -0.564539 -0.28097
            0.553386 -0.153974 -1.92493
            -2.01336 -0.0219682 1.44145
            1.72853 -0.465264 -0.705373
            -0.908409 -0.740547 0.376416
            </pre>
        */
        // Saving model in the same format
        out_file << layers.size();
        out_file << std::endl;
        for (const auto &layer : this->layers) {
            out_file << layer.neurons << ' ' << layer.activation << std::endl;
            const auto shape = get_shape(layer.kernal);
            out_file << shape.first << ' ' << shape.second << std::endl;
            for (const auto &row : layer.kernal) {
                for (const auto &val : row) {
                    out_file << val << ' ';
717
                }
718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756
                out_file << std::endl;
            }
        }
        std::cout << "INFO: Model saved successfully with name : ";
        std::cout << file_name << std::endl;
        out_file.close();  // Closing file
        return;
    }

    /**
     * Function to load earlier saved model.
     * @param file_name file from which model will be loaded (*.model)
     * @return instance of NeuralNetwork class with pretrained weights
     */
    NeuralNetwork load_model(const std::string &file_name) {
        std::ifstream in_file;            // Ifstream to read file
        in_file.open(file_name.c_str());  // Openinig file
        // If there is any problem in opening file
        if (!in_file.is_open()) {
            std::cerr << "ERROR (" << __func__ << ") : ";
            std::cerr << "Unable to open file: " << file_name << std::endl;
            std::exit(EXIT_FAILURE);
        }
        std::vector<std::pair<int, std::string>> config;  // To store config
        std::vector<std::vector<std::valarray<double>>>
            kernals;  // To store pretrained kernals
        // Loading model from saved file format
        size_t total_layers = 0;
        in_file >> total_layers;
        for (size_t i = 0; i < total_layers; i++) {
            int neurons = 0;
            std::string activation;
            size_t shape_a = 0, shape_b = 0;
            std::vector<std::valarray<double>> kernal;
            in_file >> neurons >> activation >> shape_a >> shape_b;
            for (size_t r = 0; r < shape_a; r++) {
                std::valarray<double> row(shape_b);
                for (size_t c = 0; c < shape_b; c++) {
                    in_file >> row[c];
757
                }
758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799
                kernal.push_back(row);
            }
            config.emplace_back(make_pair(neurons, activation));
            ;
            kernals.emplace_back(kernal);
        }
        std::cout << "INFO: Model loaded successfully" << std::endl;
        in_file.close();  // Closing file
        return NeuralNetwork(
            config, kernals);  // Return instance of NeuralNetwork class
    }

    /**
     * Function to print summary of the network.
     */
    void summary() {
        // Printing Summary
        std::cout
            << "==============================================================="
            << std::endl;
        std::cout << "\t\t+ MODEL SUMMARY +\t\t\n";
        std::cout
            << "==============================================================="
            << std::endl;
        for (size_t i = 1; i <= layers.size(); i++) {  // For every layer
            std::cout << i << ")";
            std::cout << " Neurons : "
                      << layers[i - 1].neurons;  // number of neurons
            std::cout << ", Activation : "
                      << layers[i - 1].activation;  // activation
            std::cout << ", Kernal Shape : "
                      << get_shape(layers[i - 1].kernal);  // kernal shape
            std::cout << std::endl;
        }
        std::cout
            << "==============================================================="
            << std::endl;
        return;
    }
};
}  // namespace neural_network
}  // namespace machine_learning
800 801 802 803 804 805 806 807

/**
 * Function to test neural network
 * @returns none
 */
static void test() {
    // Creating network with 3 layers for "iris.csv"
    machine_learning::neural_network::NeuralNetwork myNN =
808 809 810 811 812 813 814
        machine_learning::neural_network::NeuralNetwork({
            {4, "none"},  // First layer with 3 neurons and "none" as activation
            {6,
             "relu"},  // Second layer with 6 neurons and "relu" as activation
            {3, "sigmoid"}  // Third layer with 3 neurons and "sigmoid" as
                            // activation
        });
815 816 817 818 819
    // Printing summary of model
    myNN.summary();
    // Training Model
    myNN.fit_from_csv("iris.csv", true, 100, 0.3, false, 2, 32, true);
    // Testing predictions of model
820 821 822 823 824 825
    assert(machine_learning::argmax(
               myNN.single_predict({{5, 3.4, 1.6, 0.4}})) == 0);
    assert(machine_learning::argmax(
               myNN.single_predict({{6.4, 2.9, 4.3, 1.3}})) == 1);
    assert(machine_learning::argmax(
               myNN.single_predict({{6.2, 3.4, 5.4, 2.3}})) == 2);
826 827 828 829 830 831 832 833 834
    return;
}

/** Driver Code */
int main() {
    // Testing
    test();
    return 0;
}