{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Autoregressive Moving Average (ARMA): Sunspots data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This notebook replicates the existing ARMA notebook using the `statsmodels.tsa.statespace.SARIMAX` class rather than the `statsmodels.tsa.ARMA` class." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from __future__ import print_function\n", "import numpy as np\n", "from scipy import stats\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "\n", "import statsmodels.api as sm" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from statsmodels.graphics.api import qqplot" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Sunpots Data" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "print(sm.datasets.sunspots.NOTE)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "dta = sm.datasets.sunspots.load_pandas().data" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "dta.index = pd.Index(sm.tsa.datetools.dates_from_range('1700', '2008'))\n", "del dta[\"YEAR\"]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "dta.plot(figsize=(12,4));" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "fig = plt.figure(figsize=(12,8))\n", "ax1 = fig.add_subplot(211)\n", "fig = sm.graphics.tsa.plot_acf(dta.values.squeeze(), lags=40, ax=ax1)\n", "ax2 = fig.add_subplot(212)\n", "fig = sm.graphics.tsa.plot_pacf(dta, lags=40, ax=ax2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "arma_mod20 = sm.tsa.statespace.SARIMAX(dta, order=(2,0,0), trend='c').fit(disp=False)\n", "print(arma_mod20.params)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "arma_mod30 = sm.tsa.statespace.SARIMAX(dta, order=(3,0,0), trend='c').fit(disp=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "print(arma_mod20.aic, arma_mod20.bic, arma_mod20.hqic)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "print(arma_mod30.params)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "print(arma_mod30.aic, arma_mod30.bic, arma_mod30.hqic)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* Does our model obey the theory?" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "sm.stats.durbin_watson(arma_mod30.resid)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "fig = plt.figure(figsize=(12,4))\n", "ax = fig.add_subplot(111)\n", "ax = plt.plot(arma_mod30.resid)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "resid = arma_mod30.resid" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "stats.normaltest(resid)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "fig = plt.figure(figsize=(12,4))\n", "ax = fig.add_subplot(111)\n", "fig = qqplot(resid, line='q', ax=ax, fit=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "fig = plt.figure(figsize=(12,8))\n", "ax1 = fig.add_subplot(211)\n", "fig = sm.graphics.tsa.plot_acf(resid, lags=40, ax=ax1)\n", "ax2 = fig.add_subplot(212)\n", "fig = sm.graphics.tsa.plot_pacf(resid, lags=40, ax=ax2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "r,q,p = sm.tsa.acf(resid, qstat=True)\n", "data = np.c_[range(1,41), r[1:], q, p]\n", "table = pd.DataFrame(data, columns=['lag', \"AC\", \"Q\", \"Prob(>Q)\"])\n", "print(table.set_index('lag'))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* This indicates a lack of fit." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* In-sample dynamic prediction. How good does our model do?" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "predict_sunspots = arma_mod30.predict(start='1990', end='2012', dynamic=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "fig, ax = plt.subplots(figsize=(12, 8))\n", "dta.loc['1950':].plot(ax=ax)\n", "predict_sunspots.plot(ax=ax, style='r');" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def mean_forecast_err(y, yhat):\n", " return y.sub(yhat).mean()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "mean_forecast_err(dta.SUNACTIVITY, predict_sunspots)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.3" } }, "nbformat": 4, "nbformat_minor": 0 }