{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Prediction (out of sample)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "from __future__ import print_function\n",
    "import numpy as np\n",
    "import statsmodels.api as sm"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Artificial data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "nsample = 50\n",
    "sig = 0.25\n",
    "x1 = np.linspace(0, 20, nsample)\n",
    "X = np.column_stack((x1, np.sin(x1), (x1-5)**2))\n",
    "X = sm.add_constant(X)\n",
    "beta = [5., 0.5, 0.5, -0.02]\n",
    "y_true = np.dot(X, beta)\n",
    "y = y_true + sig * np.random.normal(size=nsample)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Estimation "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "olsmod = sm.OLS(y, X)\n",
    "olsres = olsmod.fit()\n",
    "print(olsres.summary())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## In-sample prediction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ypred = olsres.predict(X)\n",
    "print(ypred)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Create a new sample of explanatory variables Xnew, predict and plot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "x1n = np.linspace(20.5,25, 10)\n",
    "Xnew = np.column_stack((x1n, np.sin(x1n), (x1n-5)**2))\n",
    "Xnew = sm.add_constant(Xnew)\n",
    "ynewpred =  olsres.predict(Xnew) # predict out of sample\n",
    "print(ynewpred)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Plot comparison"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "\n",
    "fig, ax = plt.subplots()\n",
    "ax.plot(x1, y, 'o', label=\"Data\")\n",
    "ax.plot(x1, y_true, 'b-', label=\"True\")\n",
    "ax.plot(np.hstack((x1, x1n)), np.hstack((ypred, ynewpred)), 'r', label=\"OLS prediction\")\n",
    "ax.legend(loc=\"best\");"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Predicting with Formulas"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Using formulas can make both estimation and prediction a lot easier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from statsmodels.formula.api import ols\n",
    "\n",
    "data = {\"x1\" : x1, \"y\" : y}\n",
    "\n",
    "res = ols(\"y ~ x1 + np.sin(x1) + I((x1-5)**2)\", data=data).fit()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We use the `I` to indicate use of the Identity transform. Ie., we don't want any expansion magic from using `**2`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "res.params"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now we only have to pass the single variable and we get the transformed right-hand side variables automatically"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "res.predict(exog=dict(x1=x1n))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.4.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}