"""examples for usage of F-test on linear restrictions in OLS linear restriction is R \beta = 0 R is (nr,nk), beta is (nk,1) (in matrix notation) TODO: clean this up for readability and explain Notes ----- This example was written mostly for cross-checks and refactoring. """ import numpy as np import numpy.testing as npt import statsmodels.api as sm print('\n\n Example 1: Longley Data, high multicollinearity') data = sm.datasets.longley.load() data.exog = sm.add_constant(data.exog, prepend=False) res = sm.OLS(data.endog, data.exog).fit() # test pairwise equality of some coefficients R2 = [[0, 1, -1, 0, 0, 0, 0], [0, 0, 0, 0, 1, -1, 0]] Ftest = res.f_test(R2) print(repr((Ftest.fvalue, Ftest.pvalue))) # use repr to get more digits # 9.740461873303655 0.0056052885317360301 ##Compare to R (after running R_lm.s in the longley folder) ## ##> library(car) ##> linear.hypothesis(m1, c("GNP = UNEMP","POP = YEAR")) ##Linear hypothesis test ## ##Hypothesis: ##GNP - UNEMP = 0 ##POP - YEAR = 0 ## ##Model 1: TOTEMP ~ GNPDEFL + GNP + UNEMP + ARMED + POP + YEAR ##Model 2: restricted model ## ## Res.Df RSS Df Sum of Sq F Pr(>F) ##1 9 836424 ##2 11 2646903 -2 -1810479 9.7405 0.005605 ** print('Regression Results Summary') print(res.summary()) print('\n F-test whether all variables have zero effect') R = np.eye(7)[:-1, :] Ftest0 = res.f_test(R) print(repr((Ftest0.fvalue, Ftest0.pvalue))) print('%r' % res.fvalue) npt.assert_almost_equal(res.fvalue, Ftest0.fvalue, decimal=9) ttest0 = res.t_test(R[0, :]) print(repr((ttest0.tvalue, ttest0.pvalue))) betatval = res.tvalues betatval[0] npt.assert_almost_equal(betatval[0], ttest0.tvalue, decimal=15) """ # several ttests at the same time # currently not checked for this, but it (kind of) works >>> ttest0 = res.t_test(R[:2,:]) >>> print(repr((ttest0.t, ttest0.pvalue)) (array([[ 0.17737603, NaN], [ NaN, -1.06951632]]), array([[ 0.43157042, 1. ], [ 1. , 0.84365947]])) >>> ttest0 = res.t_test(R) >>> ttest0.t array([[ 1.77376028e-01, NaN, NaN, NaN, -1.43660623e-02, 2.15494063e+01], [ NaN, -1.06951632e+00, -1.62440215e+01, -1.78173553e+01, NaN, NaN], [ NaN, -2.88010561e-01, -4.13642736e+00, -4.06097408e+00, NaN, NaN], [ NaN, -6.17679489e-01, -7.94027056e+00, -4.82198531e+00, NaN, NaN], [ 4.23409809e+00, NaN, NaN, NaN, -2.26051145e-01, 2.89324928e+02], [ 1.77445341e-01, NaN, NaN, NaN, -8.08336103e-03, 4.01588981e+00]]) >>> betatval array([ 0.17737603, -1.06951632, -4.13642736, -4.82198531, -0.22605114, 4.01588981, -3.91080292]) >>> ttest0.t array([ 0.17737603, -1.06951632, -4.13642736, -4.82198531, -0.22605114, 4.01588981]) """ print('\nsimultaneous t-tests') ttest0 = res.t_test(R2) t2 = ttest0.tvalue print(ttest0.tvalue) print(t2) t2a = np.r_[res.t_test(np.array(R2)[0, :]).tvalue, res.t_test(np.array(R2)[1, :]).tvalue] print(t2 - t2a) t2pval = ttest0.pvalue print('%r' % t2pval) # reject # array([ 9.33832896e-04, 9.98483623e-01]) print('reject') print('%r' % (t2pval < 0.05)) # f_test needs 2-d currently Ftest2a = res.f_test(np.asarray(R2)[:1, :]) print(repr((Ftest2a.fvalue, Ftest2a.pvalue))) Ftest2b = res.f_test(np.asarray(R2)[1:2, :]) print(repr((Ftest2b.fvalue, Ftest2b.pvalue))) print('\nequality of t-test and F-test') print(t2a**2 - np.array((Ftest2a.fvalue, Ftest2b.fvalue))) npt.assert_almost_equal(t2a**2, np.vstack((Ftest2a.fvalue, Ftest2b.fvalue))) #npt.assert_almost_equal(t2pval, np.array((Ftest2a.pvalue, Ftest2b.pvalue))) npt.assert_almost_equal(t2pval * 2, np.c_[Ftest2a.pvalue, Ftest2b.pvalue].squeeze()) print('\n\n Example 2: Artificial Data') nsample = 100 ncat = 4 sigma = 2 xcat = np.linspace(0, ncat - 1, nsample).round()[:, np.newaxis] dummyvar = (xcat == np.arange(ncat)).astype(float) beta = np.array([0., 2, -2, 1])[:, np.newaxis] ytrue = np.dot(dummyvar, beta) X = sm.tools.add_constant(dummyvar[:, :-1], prepend=False) y = ytrue + sigma * np.random.randn(nsample, 1) mod2 = sm.OLS(y[:, 0], X) res2 = mod2.fit() print(res2.summary()) R3 = np.eye(ncat)[:-1, :] Ftest = res2.f_test(R3) print(repr((Ftest.fvalue, Ftest.pvalue))) R3 = np.atleast_2d([0, 1, -1, 2]) Ftest = res2.f_test(R3) print(repr((Ftest.fvalue, Ftest.pvalue))) print('simultaneous t-test for zero effects') R4 = np.eye(ncat)[:-1, :] ttest = res2.t_test(R4) print(repr((ttest.tvalue, ttest.pvalue))) R5 = np.atleast_2d([0, 1, 1, 2]) np.dot(R5, res2.params) Ftest = res2.f_test(R5) print(repr((Ftest.fvalue, Ftest.pvalue))) ttest = res2.t_test(R5) #print(repr((ttest.t, ttest.pvalue)) print(repr((ttest.tvalue, ttest.pvalue))) R6 = np.atleast_2d([1, -1, 0, 0]) np.dot(R6, res2.params) Ftest = res2.f_test(R6) print(repr((Ftest.fvalue, Ftest.pvalue))) ttest = res2.t_test(R6) #print(repr((ttest.t, ttest.pvalue)) print(repr((ttest.tvalue, ttest.pvalue))) R7 = np.atleast_2d([1, 0, 0, 0]) np.dot(R7, res2.params) Ftest = res2.f_test(R7) print(repr((Ftest.fvalue, Ftest.pvalue))) ttest = res2.t_test(R7) #print(repr((ttest.t, ttest.pvalue)) print(repr((ttest.tvalue, ttest.pvalue))) print('\nExample: 2 categories: replicate stats.glm and stats.ttest_ind') mod2 = sm.OLS(y[xcat.flat < 2][:, 0], X[xcat.flat < 2, :][:, (0, -1)]) res2 = mod2.fit() R8 = np.atleast_2d([1, 0]) np.dot(R8, res2.params) Ftest = res2.f_test(R8) print(repr((Ftest.fvalue, Ftest.pvalue))) print(repr((np.sqrt(Ftest.fvalue), Ftest.pvalue))) ttest = res2.t_test(R8) #print(repr(ttest.t), ttest.pvalue))) print(repr((ttest.tvalue, ttest.pvalue))) from scipy import stats print(stats.glm(y[xcat < 2].ravel(), xcat[xcat < 2].ravel())) print(stats.ttest_ind(y[xcat == 0], y[xcat == 1])) #TODO: compare with f_oneway