From 62466a73f04b0b1c318b0598c68743982fff2905 Mon Sep 17 00:00:00 2001
From: Josef <josef.pktd@gmail.com>
Date: Fri, 2 Oct 2015 23:18:11 -0400
Subject: [PATCH 1/4] Notebook: GLM as a GMM model, plus score test example

---
 notebooks/ex_gmm_glm.ipynb | 818 +++++++++++++++++++++++++++++++++++++
 notebooks/ex_gmm_glm.py    | 245 +++++++++++
 2 files changed, 1063 insertions(+)
 create mode 100644 notebooks/ex_gmm_glm.ipynb
 create mode 100644 notebooks/ex_gmm_glm.py

diff --git a/notebooks/ex_gmm_glm.ipynb b/notebooks/ex_gmm_glm.ipynb
new file mode 100644
index 0000000..dccda9d
--- /dev/null
+++ b/notebooks/ex_gmm_glm.ipynb
@@ -0,0 +1,818 @@
+{
+ "metadata": {
+  "name": "",
+  "signature": "sha256:6b71041c98ca379f22247310c703eeb66dc113589d202bf39f3b4d7e66ef2163"
+ },
+ "nbformat": 3,
+ "nbformat_minor": 0,
+ "worksheets": [
+  {
+   "cells": [
+    {
+     "cell_type": "heading",
+     "level": 1,
+     "metadata": {},
+     "source": [
+      "GLM as a GMM Model"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "The purpose of this notebook is to replicate generalized linear models, or generalized estimating equations with a independence correlation structure, as a Generalized Methods of Moments model.\n",
+      "\n",
+      "The second part compares a score test for variable addition with a test of overidentifying restriction which should eventually provide a generic framework of conditional moment tests. \n",
+      "\n",
+      "**Status**: We can replicate GLM by GMM, but the rest is experimental"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "from __future__ import division\n",
+      "import numpy as np\n",
+      "from statsmodels.sandbox.regression.gmm import GMM"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 29
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "class GMMGLM(GMM):\n",
+      "\n",
+      "    def __init__(self, endog, exog, instrument=None, glm_model=None):\n",
+      "        if instrument is None:\n",
+      "            instrument = exog\n",
+      "        super(GMMGLM, self).__init__(endog, exog, instrument)\n",
+      "        self.glm_model = glm_model\n",
+      "        \n",
+      "    def momcond(self, params):\n",
+      "        mom1 = self.glm_model.score_factor(params)[:, None] * self.instrument\n",
+      "        return mom1"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 30
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Set up a Logit model for reference. We use an example from the statsmodels documentation."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "import statsmodels.api as sm"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "# stardata is binomial count we 2d endog, Doesn't work yet.\n",
+      "\n",
+      "data = sm.datasets.star98.load()\n",
+      "data_exog = sm.add_constant(data.exog, prepend=True)[:, :8]"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "dta = sm.datasets.fair.load_pandas().data\n",
+      "dta['affair'] = (dta['affairs'] > 0).astype(float)\n",
+      "\n",
+      "affair_mod = sm.formula.logit(\"affair ~ occupation + educ + occupation_husb\" \n",
+      "                   \"+ rate_marriage + age + yrs_married + children\"\n",
+      "                   \" + religious\", dta).fit()\n",
+      "\n",
+      "# leave formulas and pandas for later\n",
+      "data_endog = affair_mod.model.endog\n",
+      "data_exog = affair_mod.model.exog\n",
+      "\n",
+      "glm_binom = sm.GLM(data_endog, data_exog, family=sm.families.Binomial())\n",
+      "res = glm_binom.fit()\n",
+      "print(res.summary())"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "Optimization terminated successfully.\n",
+        "         Current function value: 0.545314\n",
+        "         Iterations 6\n",
+        "                 Generalized Linear Model Regression Results                  \n",
+        "==============================================================================\n",
+        "Dep. Variable:                      y   No. Observations:                 6366\n",
+        "Model:                            GLM   Df Residuals:                     6357\n",
+        "Model Family:                Binomial   Df Model:                            8\n",
+        "Link Function:                  logit   Scale:                             1.0\n",
+        "Method:                          IRLS   Log-Likelihood:                -3471.5\n",
+        "Date:                Fri, 02 Oct 2015   Deviance:                       6942.9\n",
+        "Time:                        22:09:49   Pearson chi2:                 6.30e+03\n",
+        "No. Iterations:                     6                                         \n",
+        "==============================================================================\n",
+        "                 coef    std err          z      P>|z|      [0.025      0.975]\n",
+        "------------------------------------------------------------------------------\n",
+        "const          3.7257      0.299     12.470      0.000       3.140       4.311\n",
+        "x1             0.1602      0.034      4.717      0.000       0.094       0.227\n",
+        "x2            -0.0392      0.015     -2.533      0.011      -0.070      -0.009\n",
+        "x3             0.0124      0.023      0.541      0.589      -0.033       0.057\n",
+        "x4            -0.7161      0.031    -22.784      0.000      -0.778      -0.655\n",
+        "x5            -0.0605      0.010     -5.885      0.000      -0.081      -0.040\n",
+        "x6             0.1100      0.011     10.054      0.000       0.089       0.131\n",
+        "x7            -0.0042      0.032     -0.134      0.893      -0.066       0.058\n",
+        "x8            -0.3752      0.035    -10.792      0.000      -0.443      -0.307\n",
+        "==============================================================================\n"
+       ]
+      }
+     ],
+     "prompt_number": 80
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "res_binom_hc0 = glm_binom.fit(cov_type='HC0')\n",
+      "print(res_binom_hc0.bse)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "[ 0.29756134  0.03449398  0.01557106  0.02322051  0.03223825  0.01035988\n",
+        "  0.01096011  0.03237676  0.03444219]\n"
+       ]
+      }
+     ],
+     "prompt_number": 49
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "mod = GMMGLM(data_endog, data_exog, glm_model=glm_binom)\n",
+      "res_gmm = mod.fit(res.params*0.5, maxiter=1)#, optim_method='nm')"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "Optimization terminated successfully.\n",
+        "         Current function value: 0.000000\n",
+        "         Iterations: 67\n",
+        "         Function evaluations: 83\n",
+        "         Gradient evaluations: 83\n"
+       ]
+      }
+     ],
+     "prompt_number": 60
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "print(res_gmm.params)\n"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "[ 3.72566133  0.16023399 -0.03921794  0.01240123 -0.71610457 -0.06048658\n",
+        "  0.11001699 -0.00423259 -0.37515608]\n"
+       ]
+      }
+     ],
+     "prompt_number": 61
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "print(res_gmm.summary())"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "                                GMMGLM Results                                \n",
+        "==============================================================================\n",
+        "Dep. Variable:                      y   Hansen J:                    7.169e-11\n",
+        "Model:                         GMMGLM   Prob (Hansen J):                   nan\n",
+        "Method:                           GMM                                         \n",
+        "Date:                Fri, 02 Oct 2015                                         \n",
+        "Time:                        21:36:51                                         \n",
+        "No. Observations:                6366                                         \n",
+        "==============================================================================\n",
+        "                 coef    std err          z      P>|z|      [0.025      0.975]\n",
+        "------------------------------------------------------------------------------\n",
+        "const          3.7257      0.298     12.521      0.000       3.142       4.309\n",
+        "x1             0.1602      0.034      4.645      0.000       0.093       0.228\n",
+        "x2            -0.0392      0.016     -2.519      0.012      -0.070      -0.009\n",
+        "x3             0.0124      0.023      0.534      0.593      -0.033       0.058\n",
+        "x4            -0.7161      0.032    -22.213      0.000      -0.779      -0.653\n",
+        "x5            -0.0605      0.010     -5.839      0.000      -0.081      -0.040\n",
+        "x6             0.1100      0.011     10.038      0.000       0.089       0.131\n",
+        "x7            -0.0042      0.032     -0.131      0.896      -0.068       0.059\n",
+        "x8            -0.3752      0.034    -10.892      0.000      -0.443      -0.308\n",
+        "==============================================================================\n"
+       ]
+      }
+     ],
+     "prompt_number": 62
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "mod.momcond(res.params).mean(0)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 63,
+       "text": [
+        "array([ -2.48575029e-15,  -6.33966820e-15,  -2.96547790e-14,\n",
+        "        -1.16476801e-14,  -1.07515491e-14,  -7.86880597e-14,\n",
+        "        -2.72113640e-14,  -4.37912439e-15,  -4.54604588e-15])"
+       ]
+      }
+     ],
+     "prompt_number": 63
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "res.model.score(res.params)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 64,
+       "text": [
+        "array([ -1.58242863e-11,  -4.03583278e-11,  -1.88782323e-10,\n",
+        "        -7.41491313e-11,  -6.84443613e-11,  -5.00928188e-10,\n",
+        "        -1.73227543e-10,  -2.78775059e-11,  -2.89401281e-11])"
+       ]
+      }
+     ],
+     "prompt_number": 64
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "GMM produces the same result as GLM-Logit with robust standard errors"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "res_gmm.bse / res_binom_hc0.bse - 1"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "res_gmm.bse - res_binom_hc0.bse"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 67,
+       "text": [
+        "array([ -1.12557086e-06,  -3.39554191e-08,  -2.53929408e-08,\n",
+        "        -2.10870701e-08,  -6.18484530e-08,  -2.12759985e-08,\n",
+        "        -3.32750719e-08,  -3.88941921e-08,  -3.76187979e-08])"
+       ]
+      }
+     ],
+     "prompt_number": 67
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "Application: score test for added variable"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "This is experimental and might not work yet.\n",
+      "\n",
+      "The third variable `educ` has a p-value of 0.012 in the wald t_test based on the asymptotic normal distribution using the heteroscedasticity robust standard errors. Since wald test are in many cases liberal, we can compare this with a score test which has in many cases more accurate p-values. For simplicity we just want to test the null hypothesis that the effect of education is zero.\n",
+      "\n",
+      "In the following we drop the variable from the estimated model and calculate score and conditional moment tests. In the GMM version we estimate the reduced model with `educ` as additional instrument, and look at the test for overidentifying restrictions.\n",
+      "\n",
+      "The following will be easier to do with formulas and pandas DataFrames, but I want to minimize possible sources of errors, and I'm more comfortable with numpy."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "affair_mod.model.data.xnames"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 70,
+       "text": [
+        "['Intercept',\n",
+        " 'occupation',\n",
+        " 'educ',\n",
+        " 'occupation_husb',\n",
+        " 'rate_marriage',\n",
+        " 'age',\n",
+        " 'yrs_married',\n",
+        " 'children',\n",
+        " 'religious']"
+       ]
+      }
+     ],
+     "prompt_number": 70
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "idx = list(range(len(res.params)))\n",
+      "del idx[2]\n",
+      "print(res.params[idx])  # check that we don't have -0.0392 at index 2 anymore"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "[ 3.72571987  0.16023383  0.01240082 -0.71610711 -0.06048768  0.11001794\n",
+        " -0.00423323 -0.37515765]\n"
+       ]
+      }
+     ],
+     "prompt_number": 73
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "exog_reduced = data_exog[:, idx]   # exog without educ"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 75
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "glm_binom2 = sm.GLM(data_endog, exog_reduced, family=sm.families.Binomial())\n",
+      "res2 = glm_binom2.fit()"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 84
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "res2.model.score_test(res2.params, exog_extra = data_exog[:, 2])"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 85,
+       "text": [
+        "(array([ 6.42724105]), array([ 0.0112383]), 1)"
+       ]
+      }
+     ],
+     "prompt_number": 85
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "The pvalue of the score test, 0.01123, is close to the pvalue of the robust wald test and almost identical to the pvalue of the nonrobust wald test."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "res.pvalues[2]"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 86,
+       "text": [
+        "0.01129370516727681"
+       ]
+      }
+     ],
+     "prompt_number": 86
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Next we try the test for overidentifying restriction in GMM"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "mod_red = GMMGLM(data_endog, exog_reduced, data_exog, glm_model=glm_binom2)\n",
+      "res_gmm_red = mod_red.fit(res2.params*0.5, maxiter=1)\n",
+      "print(res_gmm_red.summary())"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "Optimization terminated successfully.\n",
+        "         Current function value: 0.000012\n",
+        "         Iterations: 93\n",
+        "         Function evaluations: 113\n",
+        "         Gradient evaluations: 113\n",
+        "                                GMMGLM Results                                \n",
+        "==============================================================================\n",
+        "Dep. Variable:                      y   Hansen J:                      0.07338\n",
+        "Model:                         GMMGLM   Prob (Hansen J):                 0.786\n",
+        "Method:                           GMM                                         \n",
+        "Date:                Fri, 02 Oct 2015                                         \n",
+        "Time:                        22:17:25                                         \n",
+        "No. Observations:                6366                                         \n",
+        "==============================================================================\n",
+        "                 coef    std err          z      P>|z|      [0.025      0.975]\n",
+        "------------------------------------------------------------------------------\n",
+        "const          1.9039      0.255      7.460      0.000       1.404       2.404\n",
+        "x1             0.1672      0.032      5.270      0.000       0.105       0.229\n",
+        "x2             0.0256      0.023      1.133      0.257      -0.019       0.070\n",
+        "x3            -0.6397      0.031    -20.788      0.000      -0.700      -0.579\n",
+        "x4            -0.0260      0.009     -2.748      0.006      -0.044      -0.007\n",
+        "x5             0.0811      0.010      8.045      0.000       0.061       0.101\n",
+        "x6             0.0146      0.031      0.465      0.642      -0.047       0.076\n",
+        "x7            -0.3311      0.034     -9.859      0.000      -0.397      -0.265\n",
+        "==============================================================================\n"
+       ]
+      }
+     ],
+     "prompt_number": 88
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "res_gmm_red.jtest()"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 89,
+       "text": [
+        "(0.073381567745762516, 0.78647538706118691, 1)"
+       ]
+      }
+     ],
+     "prompt_number": 89
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "The overidentifying restrictions are not rejected. \n",
+      "Note: This is a bit of a weird test in this case. We have the reduced model in the prediction, but the extra variable as an instrument. \n",
+      "    I'm not sure what we are actually testing, besides some general orthogonality condition.\n",
+      "    \n",
+      "What we would like to do right now is to use the restricted parameters, so we can calculate the moment conditions in the constrained model. I don't see yet where this is supported or how to support it. \n",
+      "\n",
+      "The following are just some additional checks with respect to changes in the number of GMM iterations. The above uses the identity weight matrix which is irrelevant in the exactly identified case that we had before. Now, we are in the overidentified case and need to adjust the weight matrix. As it turns out below, using two or more GMM iterations reduces the p-value for the j-test to 0.011 which is almost exactly the same as the non-robust Wald and the score test. (I need to think.)"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "res2.params"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 90,
+       "text": [
+        "array([  3.42523458e+00,   1.31822707e-01,   5.06765091e-03,\n",
+        "        -7.18774739e-01,  -6.64916925e-02,   1.15877472e-01,\n",
+        "         7.73938325e-04,  -3.76889869e-01])"
+       ]
+      }
+     ],
+     "prompt_number": 90
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "res_gmm_red0 = mod_red.fit(res2.params*0.5, maxiter=0)\n",
+      "res_gmm_red2 = mod_red.fit(res2.params*0.5, maxiter=2)\n",
+      "print(res_gmm_red0.params)\n",
+      "print(res_gmm_red2.params)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "Optimization terminated successfully.\n",
+        "         Current function value: 0.000012\n",
+        "         Iterations: 93\n",
+        "         Function evaluations: 113\n",
+        "         Gradient evaluations: 113\n",
+        "Optimization terminated successfully."
+       ]
+      },
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "\n",
+        "         Current function value: 0.000012\n",
+        "         Iterations: 93\n",
+        "         Function evaluations: 113\n",
+        "         Gradient evaluations: 113\n",
+        "Optimization terminated successfully."
+       ]
+      },
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "\n",
+        "         Current function value: 0.001004\n",
+        "         Iterations: 22\n",
+        "         Function evaluations: 25\n",
+        "         Gradient evaluations: 25\n",
+        "[ 1.90392843  0.16723188  0.02556783 -0.63974794 -0.02597373  0.08106879\n",
+        "  0.01457806 -0.33111524]\n",
+        "[  3.45054937e+00   1.30305938e-01   4.82099713e-03  -7.20765741e-01\n",
+        "  -6.70891066e-02   1.16593705e-01   1.87696971e-03  -3.78327300e-01]\n"
+       ]
+      }
+     ],
+     "prompt_number": 91
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "res2.params*0.5  # check how much params moved with maxiter=0"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 94,
+       "text": [
+        "array([  1.71261729e+00,   6.59113534e-02,   2.53382545e-03,\n",
+        "        -3.59387370e-01,  -3.32458462e-02,   5.79387358e-02,\n",
+        "         3.86969163e-04,  -1.88444934e-01])"
+       ]
+      }
+     ],
+     "prompt_number": 94
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "res_gmm_red2.jtest()"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 95,
+       "text": [
+        "(6.3917262195208489, 0.011465347883424048, 1)"
+       ]
+      }
+     ],
+     "prompt_number": 95
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "res_gmm_red10 = mod_red.fit(res2.params*0.5, maxiter=10, optim_args=dict(disp=0)) # turn of the display of iteration results\n",
+      "res_gmm_red10.jtest()"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 98,
+       "text": [
+        "(6.4141956667727804, 0.011321159627002643, 1)"
+       ]
+      }
+     ],
+     "prompt_number": 98
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "res_gmm_red10.params"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 99,
+       "text": [
+        "array([  3.45062431e+00,   1.30390828e-01,   4.65270807e-03,\n",
+        "        -7.20710409e-01,  -6.71229305e-02,   1.16670805e-01,\n",
+        "         1.88483098e-03,  -3.78264590e-01])"
+       ]
+      }
+     ],
+     "prompt_number": 99
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "res2.params"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 100,
+       "text": [
+        "array([  3.42523458e+00,   1.31822707e-01,   5.06765091e-03,\n",
+        "        -7.18774739e-01,  -6.64916925e-02,   1.15877472e-01,\n",
+        "         7.73938325e-04,  -3.76889869e-01])"
+       ]
+      }
+     ],
+     "prompt_number": 100
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "This looks like the expected results after all. After 10 GMM iterations the parameter estimates get close to the constrained GLM estimates.\n",
+      "\n",
+      "We are getting closer but this still solves a different problem since we don't keep the parameters fixed at the constrained solution.\n",
+      "\n",
+      "More to come: In another branch I have generic standalone conditional moment tests, that are similar to the GMM setup but use fixed parameters estimated in the constrained model."
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Using another option, we can get the constrained parameter estimate by assigning zero weight to the extra restriction. The parameter estimates are close to the constrained GLM estimates. \n",
+      "But I never tried this before and I'm not sure whether the weights are inverse weights in this example. Also, since the estimation is not based on the efficient weights matrix, the jtest does not apply, even though the result is still available."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "inv_weights = np.eye(9)\n",
+      "inv_weights[2,2] = 0\n",
+      "res_gmm_red1w = mod_red.fit(res2.params*0.5, maxiter=1, inv_weights=inv_weights, optim_args=dict(disp=0), has_optimal_weights=False)\n",
+      "print(res_gmm_red1w.params)\n",
+      "print(res_gmm_red1w.jtest())"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "[  3.42521502e+00   1.31823174e-01   5.06794151e-03  -7.18773753e-01\n",
+        "  -6.64911831e-02   1.15877026e-01   7.74106909e-04  -3.76889129e-01]\n",
+        "(1.4495974698075029e-11, 0.99999696216785972, 1)\n"
+       ]
+      }
+     ],
+     "prompt_number": 108
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "**Conclusion** so far is that I don't know yet how to get the equivalent of an score test for a constrained model with GMM"
+     ]
+    }
+   ],
+   "metadata": {}
+  }
+ ]
+}
\ No newline at end of file
diff --git a/notebooks/ex_gmm_glm.py b/notebooks/ex_gmm_glm.py
new file mode 100644
index 0000000..b2368e7
--- /dev/null
+++ b/notebooks/ex_gmm_glm.py
@@ -0,0 +1,245 @@
+
+# coding: utf-8
+
+## GLM as a GMM Model
+
+# The purpose of this notebook is to replicate generalized linear models, or generalized estimating equations with a independence correlation structure, as a Generalized Methods of Moments model.
+# 
+# The second part compares a score test for variable addition with a test of overidentifying restriction which should eventually provide a generic framework of conditional moment tests. 
+# 
+# **Status**: We can replicate GLM by GMM, but the rest is experimental
+
+# In[29]:
+
+from __future__ import division
+import numpy as np
+from statsmodels.sandbox.regression.gmm import GMM
+
+
+# In[30]:
+
+class GMMGLM(GMM):
+
+    def __init__(self, endog, exog, instrument=None, glm_model=None):
+        if instrument is None:
+            instrument = exog
+        super(GMMGLM, self).__init__(endog, exog, instrument)
+        self.glm_model = glm_model
+        
+    def momcond(self, params):
+        mom1 = self.glm_model.score_factor(params)[:, None] * self.instrument
+        return mom1
+
+
+# Set up a Logit model for reference. We use an example from the statsmodels documentation.
+
+# In[ ]:
+
+import statsmodels.api as sm
+
+
+# In[ ]:
+
+# stardata is binomial count we 2d endog, Doesn't work yet.
+
+data = sm.datasets.star98.load()
+data_exog = sm.add_constant(data.exog, prepend=True)[:, :8]
+
+
+# In[80]:
+
+dta = sm.datasets.fair.load_pandas().data
+dta['affair'] = (dta['affairs'] > 0).astype(float)
+
+affair_mod = sm.formula.logit("affair ~ occupation + educ + occupation_husb" 
+                   "+ rate_marriage + age + yrs_married + children"
+                   " + religious", dta).fit()
+
+# leave formulas and pandas for later
+data_endog = affair_mod.model.endog
+data_exog = affair_mod.model.exog
+
+glm_binom = sm.GLM(data_endog, data_exog, family=sm.families.Binomial())
+res = glm_binom.fit()
+print(res.summary())
+
+
+# In[49]:
+
+res_binom_hc0 = glm_binom.fit(cov_type='HC0')
+print(res_binom_hc0.bse)
+
+
+# In[60]:
+
+mod = GMMGLM(data_endog, data_exog, glm_model=glm_binom)
+res_gmm = mod.fit(res.params*0.5, maxiter=1)#, optim_method='nm')
+
+
+# In[61]:
+
+print(res_gmm.params)
+
+
+# In[62]:
+
+print(res_gmm.summary())
+
+
+# In[63]:
+
+mod.momcond(res.params).mean(0)
+
+
+# In[64]:
+
+res.model.score(res.params)
+
+
+# GMM produces the same result as GLM-Logit with robust standard errors
+
+# In[ ]:
+
+res_gmm.bse / res_binom_hc0.bse - 1
+
+
+# In[67]:
+
+res_gmm.bse - res_binom_hc0.bse
+
+
+# In[ ]:
+
+
+
+
+### Application: score test for added variable
+
+# This is experimental and might not work yet.
+# 
+# The third variable `educ` has a p-value of 0.012 in the wald t_test based on the asymptotic normal distribution using the heteroscedasticity robust standard errors. Since wald test are in many cases liberal, we can compare this with a score test which has in many cases more accurate p-values. For simplicity we just want to test the null hypothesis that the effect of education is zero.
+# 
+# In the following we drop the variable from the estimated model and calculate score and conditional moment tests. In the GMM version we estimate the reduced model with `educ` as additional instrument, and look at the test for overidentifying restrictions.
+# 
+# The following will be easier to do with formulas and pandas DataFrames, but I want to minimize possible sources of errors, and I'm more comfortable with numpy.
+
+# In[70]:
+
+affair_mod.model.data.xnames
+
+
+# In[73]:
+
+idx = list(range(len(res.params)))
+del idx[2]
+print(res.params[idx])  # check that we don't have -0.0392 at index 2 anymore
+
+
+# In[75]:
+
+exog_reduced = data_exog[:, idx]   # exog without educ
+
+
+# In[84]:
+
+glm_binom2 = sm.GLM(data_endog, exog_reduced, family=sm.families.Binomial())
+res2 = glm_binom2.fit()
+
+
+# In[85]:
+
+res2.model.score_test(res2.params, exog_extra = data_exog[:, 2])
+
+
+# The pvalue of the score test, 0.01123, is close to the pvalue of the robust wald test and almost identical to the pvalue of the nonrobust wald test.
+
+# In[86]:
+
+res.pvalues[2]
+
+
+# Next we try the test for overidentifying restriction in GMM
+
+# In[88]:
+
+mod_red = GMMGLM(data_endog, exog_reduced, data_exog, glm_model=glm_binom2)
+res_gmm_red = mod_red.fit(res2.params*0.5, maxiter=1)
+print(res_gmm_red.summary())
+
+
+# In[89]:
+
+res_gmm_red.jtest()
+
+
+# The overidentifying restrictions are not rejected. 
+# Note: This is a bit of a weird test in this case. We have the reduced model in the prediction, but the extra variable as an instrument. 
+#     I'm not sure what we are actually testing, besides some general orthogonality condition.
+#     
+# What we would like to do right now is to use the restricted parameters, so we can calculate the moment conditions in the constrained model. I don't see yet where this is supported or how to support it. 
+# 
+# The following are just some additional checks with respect to changes in the number of GMM iterations. The above uses the identity weight matrix which is irrelevant in the exactly identified case that we had before. Now, we are in the overidentified case and need to adjust the weight matrix. As it turns out below, using two or more GMM iterations reduces the p-value for the j-test to 0.011 which is almost exactly the same as the non-robust Wald and the score test. (I need to think.)
+
+# In[90]:
+
+res2.params
+
+
+# In[91]:
+
+res_gmm_red0 = mod_red.fit(res2.params*0.5, maxiter=0)
+res_gmm_red2 = mod_red.fit(res2.params*0.5, maxiter=2)
+print(res_gmm_red0.params)
+print(res_gmm_red2.params)
+
+
+# In[94]:
+
+res2.params*0.5  # check how much params moved with maxiter=0
+
+
+# In[95]:
+
+res_gmm_red2.jtest()
+
+
+# In[ ]:
+
+
+
+
+# In[98]:
+
+res_gmm_red10 = mod_red.fit(res2.params*0.5, maxiter=10, optim_args=dict(disp=0)) # turn of the display of iteration results
+res_gmm_red10.jtest()
+
+
+# In[99]:
+
+res_gmm_red10.params
+
+
+# In[100]:
+
+res2.params
+
+
+# This looks like the expected results after all. After 10 GMM iterations the parameter estimates get close to the constrained GLM estimates.
+# 
+# We are getting closer but this still solves a different problem since we don't keep the parameters fixed at the constrained solution.
+# 
+# More to come: In another branch I have generic standalone conditional moment tests, that are similar to the GMM setup but use fixed parameters estimated in the constrained model.
+
+# Using another option, we can get the constrained parameter estimate by assigning zero weight to the extra restriction. The parameter estimates are close to the constrained GLM estimates. 
+# But I never tried this before and I'm not sure whether the weights are inverse weights in this example. Also, since the estimation is not based on the efficient weights matrix, the jtest does not apply, even though the result is still available.
+
+# In[108]:
+
+inv_weights = np.eye(9)
+inv_weights[2,2] = 0
+res_gmm_red1w = mod_red.fit(res2.params*0.5, maxiter=1, inv_weights=inv_weights, optim_args=dict(disp=0), has_optimal_weights=False)
+print(res_gmm_red1w.params)
+print(res_gmm_red1w.jtest())
+
+
+# **Conclusion** so far is that I don't know yet how to get the equivalent of an score test for a constrained model with GMM

From e774e6e51bec682a3e0491d4397502e9059bc704 Mon Sep 17 00:00:00 2001
From: Josef <josef.pktd@gmail.com>
Date: Thu, 8 Oct 2015 19:17:10 -0400
Subject: [PATCH 2/4] ENH: more on GMMGLM score tests

---
 notebooks/ex_gmm_glm.ipynb | 957 +++++++++++++++++++++++++++++++++++--
 notebooks/ex_gmm_glm.py    | 296 ++++++++++--
 2 files changed, 1171 insertions(+), 82 deletions(-)

diff --git a/notebooks/ex_gmm_glm.ipynb b/notebooks/ex_gmm_glm.ipynb
index dccda9d..a49a601 100644
--- a/notebooks/ex_gmm_glm.ipynb
+++ b/notebooks/ex_gmm_glm.ipynb
@@ -1,7 +1,7 @@
 {
  "metadata": {
   "name": "",
-  "signature": "sha256:6b71041c98ca379f22247310c703eeb66dc113589d202bf39f3b4d7e66ef2163"
+  "signature": "sha256:452e9c7db28c092d28c36c232cf5c77753902a09c82865ba7c8d5f41715b0f96"
  },
  "nbformat": 3,
  "nbformat_minor": 0,
@@ -38,7 +38,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 29
+     "prompt_number": 1
     },
     {
      "cell_type": "code",
@@ -59,7 +59,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 30
+     "prompt_number": 2
     },
     {
      "cell_type": "markdown",
@@ -76,7 +76,8 @@
      ],
      "language": "python",
      "metadata": {},
-     "outputs": []
+     "outputs": [],
+     "prompt_number": 3
     },
     {
      "cell_type": "code",
@@ -89,7 +90,17 @@
      ],
      "language": "python",
      "metadata": {},
-     "outputs": []
+     "outputs": [],
+     "prompt_number": 4
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Note: The affairs dataset is most likely to large to show problems with hypothesis testing that we have in small samples. The p-values of wald and score tests for testing that education has no effect are very close to each other. Asymptotically they have the same distribution, but in small samples waldtest is often liberal and overrejects, while score tests can be conservative and underreject. If both agree, then it is a strong indication that we don't have small sample problems by using the asymptotic distribution.\n",
+      "\n",
+      "I'm planning to redo the analysis with a random subsample of the dataset."
+     ]
     },
     {
      "cell_type": "code",
@@ -127,8 +138,8 @@
         "Model Family:                Binomial   Df Model:                            8\n",
         "Link Function:                  logit   Scale:                             1.0\n",
         "Method:                          IRLS   Log-Likelihood:                -3471.5\n",
-        "Date:                Fri, 02 Oct 2015   Deviance:                       6942.9\n",
-        "Time:                        22:09:49   Pearson chi2:                 6.30e+03\n",
+        "Date:                Thu, 08 Oct 2015   Deviance:                       6942.9\n",
+        "Time:                        19:15:06   Pearson chi2:                 6.30e+03\n",
         "No. Iterations:                     6                                         \n",
         "==============================================================================\n",
         "                 coef    std err          z      P>|z|      [0.025      0.975]\n",
@@ -146,7 +157,7 @@
        ]
       }
      ],
-     "prompt_number": 80
+     "prompt_number": 5
     },
     {
      "cell_type": "code",
@@ -167,7 +178,7 @@
        ]
       }
      ],
-     "prompt_number": 49
+     "prompt_number": 6
     },
     {
      "cell_type": "code",
@@ -191,7 +202,7 @@
        ]
       }
      ],
-     "prompt_number": 60
+     "prompt_number": 7
     },
     {
      "cell_type": "code",
@@ -211,7 +222,7 @@
        ]
       }
      ],
-     "prompt_number": 61
+     "prompt_number": 8
     },
     {
      "cell_type": "code",
@@ -231,8 +242,8 @@
         "Dep. Variable:                      y   Hansen J:                    7.169e-11\n",
         "Model:                         GMMGLM   Prob (Hansen J):                   nan\n",
         "Method:                           GMM                                         \n",
-        "Date:                Fri, 02 Oct 2015                                         \n",
-        "Time:                        21:36:51                                         \n",
+        "Date:                Thu, 08 Oct 2015                                         \n",
+        "Time:                        19:15:09                                         \n",
         "No. Observations:                6366                                         \n",
         "==============================================================================\n",
         "                 coef    std err          z      P>|z|      [0.025      0.975]\n",
@@ -250,7 +261,7 @@
        ]
       }
      ],
-     "prompt_number": 62
+     "prompt_number": 9
     },
     {
      "cell_type": "code",
@@ -264,7 +275,7 @@
       {
        "metadata": {},
        "output_type": "pyout",
-       "prompt_number": 63,
+       "prompt_number": 10,
        "text": [
         "array([ -2.48575029e-15,  -6.33966820e-15,  -2.96547790e-14,\n",
         "        -1.16476801e-14,  -1.07515491e-14,  -7.86880597e-14,\n",
@@ -272,7 +283,7 @@
        ]
       }
      ],
-     "prompt_number": 63
+     "prompt_number": 10
     },
     {
      "cell_type": "code",
@@ -286,7 +297,7 @@
       {
        "metadata": {},
        "output_type": "pyout",
-       "prompt_number": 64,
+       "prompt_number": 11,
        "text": [
         "array([ -1.58242863e-11,  -4.03583278e-11,  -1.88782323e-10,\n",
         "        -7.41491313e-11,  -6.84443613e-11,  -5.00928188e-10,\n",
@@ -294,7 +305,7 @@
        ]
       }
      ],
-     "prompt_number": 64
+     "prompt_number": 11
     },
     {
      "cell_type": "markdown",
@@ -311,7 +322,19 @@
      ],
      "language": "python",
      "metadata": {},
-     "outputs": []
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 12,
+       "text": [
+        "array([ -3.78265158e-06,  -9.84386738e-07,  -1.63077784e-06,\n",
+        "        -9.08122702e-07,  -1.91848044e-06,  -2.05369217e-06,\n",
+        "        -3.03601619e-06,  -1.20129965e-06,  -1.09223021e-06])"
+       ]
+      }
+     ],
+     "prompt_number": 12
     },
     {
      "cell_type": "code",
@@ -325,7 +348,7 @@
       {
        "metadata": {},
        "output_type": "pyout",
-       "prompt_number": 67,
+       "prompt_number": 13,
        "text": [
         "array([ -1.12557086e-06,  -3.39554191e-08,  -2.53929408e-08,\n",
         "        -2.10870701e-08,  -6.18484530e-08,  -2.12759985e-08,\n",
@@ -333,7 +356,7 @@
        ]
       }
      ],
-     "prompt_number": 67
+     "prompt_number": 13
     },
     {
      "cell_type": "code",
@@ -341,7 +364,8 @@
      "input": [],
      "language": "python",
      "metadata": {},
-     "outputs": []
+     "outputs": [],
+     "prompt_number": 13
     },
     {
      "cell_type": "heading",
@@ -376,7 +400,7 @@
       {
        "metadata": {},
        "output_type": "pyout",
-       "prompt_number": 70,
+       "prompt_number": 14,
        "text": [
         "['Intercept',\n",
         " 'occupation',\n",
@@ -390,7 +414,7 @@
        ]
       }
      ],
-     "prompt_number": 70
+     "prompt_number": 14
     },
     {
      "cell_type": "code",
@@ -412,7 +436,7 @@
        ]
       }
      ],
-     "prompt_number": 73
+     "prompt_number": 15
     },
     {
      "cell_type": "code",
@@ -423,7 +447,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 75
+     "prompt_number": 16
     },
     {
      "cell_type": "code",
@@ -435,7 +459,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 84
+     "prompt_number": 17
     },
     {
      "cell_type": "code",
@@ -449,13 +473,13 @@
       {
        "metadata": {},
        "output_type": "pyout",
-       "prompt_number": 85,
+       "prompt_number": 18,
        "text": [
         "(array([ 6.42724105]), array([ 0.0112383]), 1)"
        ]
       }
      ],
-     "prompt_number": 85
+     "prompt_number": 18
     },
     {
      "cell_type": "markdown",
@@ -476,13 +500,13 @@
       {
        "metadata": {},
        "output_type": "pyout",
-       "prompt_number": 86,
+       "prompt_number": 19,
        "text": [
         "0.01129370516727681"
        ]
       }
      ],
-     "prompt_number": 86
+     "prompt_number": 19
     },
     {
      "cell_type": "markdown",
@@ -516,8 +540,8 @@
         "Dep. Variable:                      y   Hansen J:                      0.07338\n",
         "Model:                         GMMGLM   Prob (Hansen J):                 0.786\n",
         "Method:                           GMM                                         \n",
-        "Date:                Fri, 02 Oct 2015                                         \n",
-        "Time:                        22:17:25                                         \n",
+        "Date:                Thu, 08 Oct 2015                                         \n",
+        "Time:                        19:15:13                                         \n",
         "No. Observations:                6366                                         \n",
         "==============================================================================\n",
         "                 coef    std err          z      P>|z|      [0.025      0.975]\n",
@@ -534,7 +558,7 @@
        ]
       }
      ],
-     "prompt_number": 88
+     "prompt_number": 20
     },
     {
      "cell_type": "code",
@@ -548,13 +572,13 @@
       {
        "metadata": {},
        "output_type": "pyout",
-       "prompt_number": 89,
+       "prompt_number": 21,
        "text": [
         "(0.073381567745762516, 0.78647538706118691, 1)"
        ]
       }
      ],
-     "prompt_number": 89
+     "prompt_number": 21
     },
     {
      "cell_type": "markdown",
@@ -581,7 +605,7 @@
       {
        "metadata": {},
        "output_type": "pyout",
-       "prompt_number": 90,
+       "prompt_number": 22,
        "text": [
         "array([  3.42523458e+00,   1.31822707e-01,   5.06765091e-03,\n",
         "        -7.18774739e-01,  -6.64916925e-02,   1.15877472e-01,\n",
@@ -589,7 +613,7 @@
        ]
       }
      ],
-     "prompt_number": 90
+     "prompt_number": 22
     },
     {
      "cell_type": "code",
@@ -643,7 +667,7 @@
        ]
       }
      ],
-     "prompt_number": 91
+     "prompt_number": 23
     },
     {
      "cell_type": "code",
@@ -657,7 +681,7 @@
       {
        "metadata": {},
        "output_type": "pyout",
-       "prompt_number": 94,
+       "prompt_number": 24,
        "text": [
         "array([  1.71261729e+00,   6.59113534e-02,   2.53382545e-03,\n",
         "        -3.59387370e-01,  -3.32458462e-02,   5.79387358e-02,\n",
@@ -665,7 +689,7 @@
        ]
       }
      ],
-     "prompt_number": 94
+     "prompt_number": 24
     },
     {
      "cell_type": "code",
@@ -679,13 +703,13 @@
       {
        "metadata": {},
        "output_type": "pyout",
-       "prompt_number": 95,
+       "prompt_number": 25,
        "text": [
         "(6.3917262195208489, 0.011465347883424048, 1)"
        ]
       }
      ],
-     "prompt_number": 95
+     "prompt_number": 25
     },
     {
      "cell_type": "code",
@@ -693,7 +717,8 @@
      "input": [],
      "language": "python",
      "metadata": {},
-     "outputs": []
+     "outputs": [],
+     "prompt_number": 25
     },
     {
      "cell_type": "code",
@@ -708,13 +733,13 @@
       {
        "metadata": {},
        "output_type": "pyout",
-       "prompt_number": 98,
+       "prompt_number": 26,
        "text": [
         "(6.4141956667727804, 0.011321159627002643, 1)"
        ]
       }
      ],
-     "prompt_number": 98
+     "prompt_number": 26
     },
     {
      "cell_type": "code",
@@ -728,7 +753,7 @@
       {
        "metadata": {},
        "output_type": "pyout",
-       "prompt_number": 99,
+       "prompt_number": 27,
        "text": [
         "array([  3.45062431e+00,   1.30390828e-01,   4.65270807e-03,\n",
         "        -7.20710409e-01,  -6.71229305e-02,   1.16670805e-01,\n",
@@ -736,7 +761,7 @@
        ]
       }
      ],
-     "prompt_number": 99
+     "prompt_number": 27
     },
     {
      "cell_type": "code",
@@ -750,7 +775,7 @@
       {
        "metadata": {},
        "output_type": "pyout",
-       "prompt_number": 100,
+       "prompt_number": 28,
        "text": [
         "array([  3.42523458e+00,   1.31822707e-01,   5.06765091e-03,\n",
         "        -7.18774739e-01,  -6.64916925e-02,   1.15877472e-01,\n",
@@ -758,7 +783,7 @@
        ]
       }
      ],
-     "prompt_number": 100
+     "prompt_number": 28
     },
     {
      "cell_type": "markdown",
@@ -802,7 +827,7 @@
        ]
       }
      ],
-     "prompt_number": 108
+     "prompt_number": 29
     },
     {
      "cell_type": "markdown",
@@ -810,6 +835,836 @@
      "source": [
       "**Conclusion** so far is that I don't know yet how to get the equivalent of an score test for a constrained model with GMM"
      ]
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "Lagrange Multiplier"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "(experimental)\n",
+      "\n",
+      "In the following we add a slack parameter to a moment condition. This is a way to estimate the parameters of the constrained model but we have the moment conditions available for the full model. See McFadden or ... (which textbook).\n",
+      "\n",
+      "The wald test on the slack parameter provides a version of the Lagrange multiplier or score test. The difference to the likelihood based score test is that this moment test is based on a robust covariance matrix, in this case HC0.\n",
+      "\n",
+      "Note McFadden, or Newey and McFadden (?) have several definitions of LM or score tests. I'm not sure yet which variant this is.\n",
+      "\n",
+      "In this case we cannot use the test for overidentifying restrictions anymore because we added artificial parameters that makes the problem exactly identified, given that we started out with an exactly identified model. \n",
+      "\n",
+      "The test is the same variable addition test as in the previous section. The second explanatory variable is removed from the mean function of the model but included as instrument in the moment condition. The null hypothesis is that this variable has a coefficient of zero, and  is therefore orthogonal to the weighted residuals or elementary zero function of the model. (\"elementary zero function\" is terminology in  Davidson, MacKinnon.)"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 29
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "class GMMGLMVADD(GMM):\n",
+      "\n",
+      "    def __init__(self, endog, exog, instrument=None, glm_model=None, slack_idx=None):\n",
+      "        if instrument is None:\n",
+      "            instrument = exog\n",
+      "        super(GMMGLMVADD, self).__init__(endog, exog, instrument)\n",
+      "        self.glm_model = glm_model\n",
+      "        \n",
+      "        if slack_idx is None:\n",
+      "            self.k_slack = 0\n",
+      "        else:\n",
+      "            self.slack_idx = slack_idx\n",
+      "            self.k_slack = len(slack_idx)\n",
+      "        \n",
+      "    def momcond(self, params):\n",
+      "        k_slack = self.k_slack\n",
+      "        mom1 = self.glm_model.score_factor(params[:-k_slack])[:, None] * self.instrument\n",
+      "        if k_slack > 0:\n",
+      "            mom1[:, self.slack_idx] += params[-k_slack:]\n",
+      "        return mom1"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 30
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "mod_red3 = GMMGLMVADD(data_endog, exog_reduced, data_exog, glm_model=glm_binom2, slack_idx=[2])\n",
+      "res_gmm_red3 = mod_red3.fit(np.concatenate((res2.params*0.5, [0])), maxiter=2)\n",
+      "xnames = ['const', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'slack2']\n",
+      "print(res_gmm_red3.summary(xname=xnames))"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "Optimization terminated successfully.\n",
+        "         Current function value: 0.000000\n",
+        "         Iterations: 132\n",
+        "         Function evaluations: 157\n",
+        "         Gradient evaluations: 157\n",
+        "Optimization terminated successfully."
+       ]
+      },
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "\n",
+        "         Current function value: 0.000000\n",
+        "         Iterations: 4\n",
+        "         Function evaluations: 8\n",
+        "         Gradient evaluations: 8\n",
+        "                              GMMGLMVADD Results                              \n",
+        "==============================================================================\n",
+        "Dep. Variable:                      y   Hansen J:                    1.749e-06\n",
+        "Model:                     GMMGLMVADD   Prob (Hansen J):                   nan\n",
+        "Method:                           GMM                                         \n",
+        "Date:                Thu, 08 Oct 2015                                         \n",
+        "Time:                        19:15:21                                         \n",
+        "No. Observations:                6366                                         \n",
+        "==============================================================================\n",
+        "                 coef    std err          z      P>|z|      [0.025      0.975]\n",
+        "------------------------------------------------------------------------------\n",
+        "const          3.4249      0.274     12.482      0.000       2.887       3.963\n",
+        "x1             0.1318      0.032      4.073      0.000       0.068       0.195\n",
+        "x2             0.0051      0.023      0.221      0.825      -0.040       0.050\n",
+        "x3            -0.7188      0.032    -22.307      0.000      -0.782      -0.656\n",
+        "x4            -0.0665      0.010     -6.578      0.000      -0.086      -0.047\n",
+        "x5             0.1159      0.011     10.818      0.000       0.095       0.137\n",
+        "x6             0.0008      0.032      0.024      0.981      -0.062       0.064\n",
+        "x7            -0.3769      0.034    -10.965      0.000      -0.444      -0.310\n",
+        "slack2         0.0259      0.010      2.535      0.011       0.006       0.046\n",
+        "==============================================================================\n"
+       ]
+      }
+     ],
+     "prompt_number": 31
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "mod_red3.slack_idx, mod_red3.k_slack\n"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 32,
+       "text": [
+        "([2], 1)"
+       ]
+      }
+     ],
+     "prompt_number": 32
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "res_gmm_red3.bse\n"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 33,
+       "text": [
+        "array([ 0.27439167,  0.03236501,  0.02290473,  0.03222159,  0.01010725,\n",
+        "        0.0107106 ,  0.03228203,  0.03437142,  0.01021506])"
+       ]
+      }
+     ],
+     "prompt_number": 33
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "res_gmm_red3.pvalues"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 34,
+       "text": [
+        "array([  9.39607510e-036,   4.63687648e-005,   8.24724152e-001,\n",
+        "         3.18839613e-110,   4.78033256e-011,   2.82512202e-027,\n",
+        "         9.80801929e-001,   5.63042188e-028,   1.12320912e-002])"
+       ]
+      }
+     ],
+     "prompt_number": 34
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "mod_red3.data.xnames\n"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 35,
+       "text": [
+        "['const', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7']"
+       ]
+      }
+     ],
+     "prompt_number": 35
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "res2.params"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 36,
+       "text": [
+        "array([  3.42523458e+00,   1.31822707e-01,   5.06765091e-03,\n",
+        "        -7.18774739e-01,  -6.64916925e-02,   1.15877472e-01,\n",
+        "         7.73938325e-04,  -3.76889869e-01])"
+       ]
+      }
+     ],
+     "prompt_number": 36
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "res2.bse"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 37,
+       "text": [
+        "array([ 0.27311515,  0.03204911,  0.02269253,  0.03141184,  0.01003245,\n",
+        "        0.01071536,  0.03154603,  0.03470859])"
+       ]
+      }
+     ],
+     "prompt_number": 37
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "res2.pvalues"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 38,
+       "text": [
+        "array([  4.43338940e-036,   3.90300395e-005,   8.23287987e-001,\n",
+        "         6.97441580e-116,   3.41051586e-011,   2.95011174e-027,\n",
+        "         9.80426966e-001,   1.81326984e-027])"
+       ]
+      }
+     ],
+     "prompt_number": 38
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "scs, scpvalue, _ = res2.model.score_test(res2.params, exog_extra = data_exog[:, 2])"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 39
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "res_gmm_red3.pvalues[-1], scpvalue, res_gmm_red3.pvalues[-1] - scpvalue, res_gmm_red3.pvalues[-1] / scpvalue - 1"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 40,
+       "text": [
+        "(0.011232091165573164,\n",
+        " array([ 0.0112383]),\n",
+        " array([ -6.21036920e-06]),\n",
+        " array([-0.00055261]))"
+       ]
+      }
+     ],
+     "prompt_number": 40
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "scs, res_gmm_red3.tvalues[-1]**2, scs / res_gmm_red3.tvalues[-1]**2 - 1\n"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 41,
+       "text": [
+        "(array([ 6.42724105]), 6.4282228003214819, array([-0.00015272]))"
+       ]
+      }
+     ],
+     "prompt_number": 41
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "res2.model.score_test(res2.params, exog_extra = data_exog[:, 2])\n"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 42,
+       "text": [
+        "(array([ 6.42724105]), array([ 0.0112383]), 1)"
+       ]
+      }
+     ],
+     "prompt_number": 42
+    },
+    {
+     "cell_type": "heading",
+     "level": 3,
+     "metadata": {},
+     "source": [
+      "Explicit Calculation of score tests"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Finally, we can calculate the score or Lagrange Multiplier tests directly based on the matrices that can be calculated with the models.\n",
+      "\n",
+      "As starting point we use the full model with all moment conditions and the parameter estimates of the restricted model."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "res_gmm_red3.params[:-1], res2.params"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 43,
+       "text": [
+        "(array([  3.42487405e+00,   1.31829634e-01,   5.07277912e-03,\n",
+        "         -7.18754106e-01,  -6.64816846e-02,   1.15868568e-01,\n",
+        "          7.76819793e-04,  -3.76882429e-01]),\n",
+        " array([  3.42523458e+00,   1.31822707e-01,   5.06765091e-03,\n",
+        "         -7.18774739e-01,  -6.64916925e-02,   1.15877472e-01,\n",
+        "          7.73938325e-04,  -3.76889869e-01]))"
+       ]
+      }
+     ],
+     "prompt_number": 43
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "params_restricted = np.insert(res_gmm_red3.params[:-1], 2, 0)\n",
+      "params_restricted = np.insert(res2.params, 2, 0)\n",
+      "params_restricted"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 44,
+       "text": [
+        "array([  3.42523458e+00,   1.31822707e-01,   0.00000000e+00,\n",
+        "         5.06765091e-03,  -7.18774739e-01,  -6.64916925e-02,\n",
+        "         1.15877472e-01,   7.73938325e-04,  -3.76889869e-01])"
+       ]
+      }
+     ],
+     "prompt_number": 44
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "M = mod.momcond_mean(params_restricted)\n",
+      "D = mod.gradient_momcond(params_restricted)\n",
+      "moms = mod.momcond(params_restricted)\n",
+      "#V = mod.calc_weightmatrix(...)   check this\n",
+      "V = np.cov(moms.T)\n",
+      "W = res_gmm.weights\n",
+      "M, D, W, V"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 45,
+       "text": [
+        "(array([ -2.10543873e-15,  -6.17188780e-15,  -2.58991722e-02,\n",
+        "         -4.87933079e-15,  -5.85380175e-15,  -7.20706700e-14,\n",
+        "         -2.38310785e-14,  -3.97369286e-15,  -5.26707173e-15]),\n",
+        " array([[  -0.18283202,   -0.62931154,   -2.58428762,   -0.70685725,\n",
+        "           -0.72666576,   -5.41078539,   -1.8003257 ,   -0.28018795,\n",
+        "           -0.42789648],\n",
+        "        [  -0.62931154,   -2.33007695,   -9.03580485,   -2.47994857,\n",
+        "           -2.51293754,  -18.73930255,   -6.24245299,   -0.95923235,\n",
+        "           -1.47873372],\n",
+        "        [  -2.58428778,   -9.03580541,  -37.38268793,  -10.09033072,\n",
+        "          -10.29460253,  -76.5458288 ,  -25.14389312,   -3.87953041,\n",
+        "           -6.05105514],\n",
+        "        [  -0.70685726,   -2.47994857,  -10.09033011,   -3.05838927,\n",
+        "           -2.82113798,  -21.18732083,   -7.19004012,   -1.11166366,\n",
+        "           -1.65646452],\n",
+        "        [  -0.72666576,   -2.51293754,  -10.29460202,   -2.82113798,\n",
+        "           -3.0492791 ,  -21.48041489,   -7.14981783,   -1.10433206,\n",
+        "           -1.70069266],\n",
+        "        [  -5.41078758,  -18.7393102 ,  -76.54585388,  -21.18732946,\n",
+        "          -21.48042289, -169.0112392 ,  -61.79527152,   -9.53554966,\n",
+        "          -12.87355868],\n",
+        "        [  -1.80032595,   -6.24245386,  -25.1438944 ,   -7.19004109,\n",
+        "           -7.14981882,  -61.79524342,  -27.70449502,   -4.2519359 ,\n",
+        "           -4.45012795],\n",
+        "        [  -0.28018795,   -0.95923235,   -3.87953009,   -1.11166366,\n",
+        "           -1.10433206,   -9.53554401,   -4.25193516,   -0.81335245,\n",
+        "           -0.70232518],\n",
+        "        [  -0.42789648,   -1.47873371,   -6.05105483,   -1.65646452,\n",
+        "           -1.70069266,  -12.87355376,   -4.45012732,   -0.70232518,\n",
+        "           -1.13853604]]),\n",
+        " array([[ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],\n",
+        "        [ 0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],\n",
+        "        [ 0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.],\n",
+        "        [ 0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.],\n",
+        "        [ 0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.],\n",
+        "        [ 0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.],\n",
+        "        [ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.],\n",
+        "        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.],\n",
+        "        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.]]),\n",
+        " array([[   0.18342526,    0.62836418,    2.58301434,    0.70789551,\n",
+        "            0.72262673,    5.49056489,    1.86697968,    0.29238911,\n",
+        "            0.42691353],\n",
+        "        [   0.62836418,    2.31968388,    8.98456669,    2.46953305,\n",
+        "            2.48977564,   18.91204798,    6.4167698 ,    0.99177319,\n",
+        "            1.46574926],\n",
+        "        [   2.58301434,    8.98456669,   37.19750312,   10.06011201,\n",
+        "           10.19287649,   77.43856837,   26.07619283,    4.05121205,\n",
+        "            6.00971088],\n",
+        "        [   0.70789551,    2.46953305,   10.06011201,    3.06115022,\n",
+        "            2.80057596,   21.44504654,    7.43070773,    1.1551975 ,\n",
+        "            1.64978165],\n",
+        "        [   0.72262673,    2.48977564,   10.19287649,    2.80057596,\n",
+        "            3.01717045,   21.599835  ,    7.34649643,    1.13695951,\n",
+        "            1.68118051],\n",
+        "        [   5.49056489,   18.91204798,   77.43856837,   21.44504654,\n",
+        "           21.599835  ,  172.9604553 ,   64.06313033,    9.91802978,\n",
+        "           12.96891675],\n",
+        "        [   1.86697968,    6.4167698 ,   26.07619283,    7.43070773,\n",
+        "            7.34649643,   64.06313033,   28.60910013,    4.39962125,\n",
+        "            4.5638303 ],\n",
+        "        [   0.29238911,    0.99177319,    4.05121205,    1.1551975 ,\n",
+        "            1.13695951,    9.91802978,    4.39962125,    0.84613688,\n",
+        "            0.72422916],\n",
+        "        [   0.42691353,    1.46574926,    6.00971088,    1.64978165,\n",
+        "            1.68118051,   12.96891675,    4.5638303 ,    0.72422916,\n",
+        "            1.12725452]]))"
+       ]
+      }
+     ],
+     "prompt_number": 45
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "from scipy import stats\n",
+      "nobs = mod.endog.shape[0]\n",
+      "Vinv = np.linalg.pinv(V)\n",
+      "ddf = len(res2.params)\n",
+      "ss = (nobs - ddf) * M.dot(Vinv.dot(M))\n",
+      "ss, stats.chi2.sf(ss, 1)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 46,
+       "text": [
+        "(6.4477917682854198, 0.011109033369376293)"
+       ]
+      }
+     ],
+     "prompt_number": 46
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "M"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 47,
+       "text": [
+        "array([ -2.10543873e-15,  -6.17188780e-15,  -2.58991722e-02,\n",
+        "        -4.87933079e-15,  -5.85380175e-15,  -7.20706700e-14,\n",
+        "        -2.38310785e-14,  -3.97369286e-15,  -5.26707173e-15])"
+       ]
+      }
+     ],
+     "prompt_number": 47
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "nobs"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 48,
+       "text": [
+        "6366"
+       ]
+      }
+     ],
+     "prompt_number": 48
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "scs, scpvalue"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 49,
+       "text": [
+        "(array([ 6.42724105]), array([ 0.0112383]))"
+       ]
+      }
+     ],
+     "prompt_number": 49
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "M.dot(V.dot(M))"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 50,
+       "text": [
+        "0.024950862053233158"
+       ]
+      }
+     ],
+     "prompt_number": 50
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "Vb = D.dot(Vinv).dot(D)\n",
+      "Vbinv = np.linalg.pinv(V)\n",
+      "ss = (nobs - ddf) * M.dot(Vbinv.dot(M))\n",
+      "ss, stats.chi2.sf(ss, 1)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 51,
+       "text": [
+        "(6.4477917682854198, 0.011109033369376293)"
+       ]
+      }
+     ],
+     "prompt_number": 51
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "np.sqrt(np.diag(Vbinv))"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 52,
+       "text": [
+        "array([ 23.94470765,   2.67108656,   1.22958727,   1.80686733,\n",
+        "         2.44629158,   0.81341329,   0.87077882,   2.46498821,   2.79771633])"
+       ]
+      }
+     ],
+     "prompt_number": 52
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "res2.bse"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 53,
+       "text": [
+        "array([ 0.27311515,  0.03204911,  0.02269253,  0.03141184,  0.01003245,\n",
+        "        0.01071536,  0.03154603,  0.03470859])"
+       ]
+      }
+     ],
+     "prompt_number": 53
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "23.94470765 / 0.27311515\n"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 54,
+       "text": [
+        "87.67257199023929"
+       ]
+      }
+     ],
+     "prompt_number": 54
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "np.sqrt(nobs)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 55,
+       "text": [
+        "79.787217021274785"
+       ]
+      }
+     ],
+     "prompt_number": 55
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "2.67108656 / 0.03204911"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 56,
+       "text": [
+        "83.34354869760814"
+       ]
+      }
+     ],
+     "prompt_number": 56
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "Vb = D.dot(Vinv).dot(D)\n",
+      "Vbinv = np.linalg.pinv(V)\n",
+      "ss = (nobs - 0) * M.dot(Vbinv.dot(M))\n",
+      "ss, stats.chi2.sf(ss, 1)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 57,
+       "text": [
+        "(6.4559047494345672, 0.011058422030834639)"
+       ]
+      }
+     ],
+     "prompt_number": 57
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "k_params = len(params_restricted)\n",
+      "R = np.zeros((1, k_params))\n",
+      "R[0, 2] = 1\n",
+      "R.dot(params_restricted)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 58,
+       "text": [
+        "array([ 0.])"
+       ]
+      }
+     ],
+     "prompt_number": 58
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "ss = (nobs - 0) * M[[2]].dot(R.dot(Vbinv.dot(R.T)).dot(M[[2]]))\n",
+      "ss, stats.chi2.sf(ss, 1)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 59,
+       "text": [
+        "(6.4559047494491058, 0.011058422030744188)"
+       ]
+      }
+     ],
+     "prompt_number": 59
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "M[[2]]"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 60,
+       "text": [
+        "array([-0.02589917])"
+       ]
+      }
+     ],
+     "prompt_number": 60
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 60
     }
    ],
    "metadata": {}
diff --git a/notebooks/ex_gmm_glm.py b/notebooks/ex_gmm_glm.py
index b2368e7..7b117d7 100644
--- a/notebooks/ex_gmm_glm.py
+++ b/notebooks/ex_gmm_glm.py
@@ -9,14 +9,14 @@
 # 
 # **Status**: We can replicate GLM by GMM, but the rest is experimental
 
-# In[29]:
+# In[1]:
 
 from __future__ import division
 import numpy as np
 from statsmodels.sandbox.regression.gmm import GMM
 
 
-# In[30]:
+# In[2]:
 
 class GMMGLM(GMM):
 
@@ -33,12 +33,12 @@ def momcond(self, params):
 
 # Set up a Logit model for reference. We use an example from the statsmodels documentation.
 
-# In[ ]:
+# In[3]:
 
 import statsmodels.api as sm
 
 
-# In[ ]:
+# In[4]:
 
 # stardata is binomial count we 2d endog, Doesn't work yet.
 
@@ -46,7 +46,11 @@ def momcond(self, params):
 data_exog = sm.add_constant(data.exog, prepend=True)[:, :8]
 
 
-# In[80]:
+# Note: The affairs dataset is most likely to large to show problems with hypothesis testing that we have in small samples. The p-values of wald and score tests for testing that education has no effect are very close to each other. Asymptotically they have the same distribution, but in small samples waldtest is often liberal and overrejects, while score tests can be conservative and underreject. If both agree, then it is a strong indication that we don't have small sample problems by using the asymptotic distribution.
+# 
+# I'm planning to redo the analysis with a random subsample of the dataset.
+
+# In[5]:
 
 dta = sm.datasets.fair.load_pandas().data
 dta['affair'] = (dta['affairs'] > 0).astype(float)
@@ -64,51 +68,51 @@ def momcond(self, params):
 print(res.summary())
 
 
-# In[49]:
+# In[6]:
 
 res_binom_hc0 = glm_binom.fit(cov_type='HC0')
 print(res_binom_hc0.bse)
 
 
-# In[60]:
+# In[7]:
 
 mod = GMMGLM(data_endog, data_exog, glm_model=glm_binom)
 res_gmm = mod.fit(res.params*0.5, maxiter=1)#, optim_method='nm')
 
 
-# In[61]:
+# In[8]:
 
 print(res_gmm.params)
 
 
-# In[62]:
+# In[9]:
 
 print(res_gmm.summary())
 
 
-# In[63]:
+# In[10]:
 
 mod.momcond(res.params).mean(0)
 
 
-# In[64]:
+# In[11]:
 
 res.model.score(res.params)
 
 
 # GMM produces the same result as GLM-Logit with robust standard errors
 
-# In[ ]:
+# In[12]:
 
 res_gmm.bse / res_binom_hc0.bse - 1
 
 
-# In[67]:
+# In[13]:
 
 res_gmm.bse - res_binom_hc0.bse
 
 
-# In[ ]:
+# In[13]:
 
 
 
@@ -123,51 +127,51 @@ def momcond(self, params):
 # 
 # The following will be easier to do with formulas and pandas DataFrames, but I want to minimize possible sources of errors, and I'm more comfortable with numpy.
 
-# In[70]:
+# In[14]:
 
 affair_mod.model.data.xnames
 
 
-# In[73]:
+# In[15]:
 
 idx = list(range(len(res.params)))
 del idx[2]
 print(res.params[idx])  # check that we don't have -0.0392 at index 2 anymore
 
 
-# In[75]:
+# In[16]:
 
 exog_reduced = data_exog[:, idx]   # exog without educ
 
 
-# In[84]:
+# In[17]:
 
 glm_binom2 = sm.GLM(data_endog, exog_reduced, family=sm.families.Binomial())
 res2 = glm_binom2.fit()
 
 
-# In[85]:
+# In[18]:
 
 res2.model.score_test(res2.params, exog_extra = data_exog[:, 2])
 
 
 # The pvalue of the score test, 0.01123, is close to the pvalue of the robust wald test and almost identical to the pvalue of the nonrobust wald test.
 
-# In[86]:
+# In[19]:
 
 res.pvalues[2]
 
 
 # Next we try the test for overidentifying restriction in GMM
 
-# In[88]:
+# In[20]:
 
 mod_red = GMMGLM(data_endog, exog_reduced, data_exog, glm_model=glm_binom2)
 res_gmm_red = mod_red.fit(res2.params*0.5, maxiter=1)
 print(res_gmm_red.summary())
 
 
-# In[89]:
+# In[21]:
 
 res_gmm_red.jtest()
 
@@ -180,12 +184,12 @@ def momcond(self, params):
 # 
 # The following are just some additional checks with respect to changes in the number of GMM iterations. The above uses the identity weight matrix which is irrelevant in the exactly identified case that we had before. Now, we are in the overidentified case and need to adjust the weight matrix. As it turns out below, using two or more GMM iterations reduces the p-value for the j-test to 0.011 which is almost exactly the same as the non-robust Wald and the score test. (I need to think.)
 
-# In[90]:
+# In[22]:
 
 res2.params
 
 
-# In[91]:
+# In[23]:
 
 res_gmm_red0 = mod_red.fit(res2.params*0.5, maxiter=0)
 res_gmm_red2 = mod_red.fit(res2.params*0.5, maxiter=2)
@@ -193,33 +197,33 @@ def momcond(self, params):
 print(res_gmm_red2.params)
 
 
-# In[94]:
+# In[24]:
 
 res2.params*0.5  # check how much params moved with maxiter=0
 
 
-# In[95]:
+# In[25]:
 
 res_gmm_red2.jtest()
 
 
-# In[ ]:
+# In[25]:
 
 
 
 
-# In[98]:
+# In[26]:
 
 res_gmm_red10 = mod_red.fit(res2.params*0.5, maxiter=10, optim_args=dict(disp=0)) # turn of the display of iteration results
 res_gmm_red10.jtest()
 
 
-# In[99]:
+# In[27]:
 
 res_gmm_red10.params
 
 
-# In[100]:
+# In[28]:
 
 res2.params
 
@@ -233,7 +237,7 @@ def momcond(self, params):
 # Using another option, we can get the constrained parameter estimate by assigning zero weight to the extra restriction. The parameter estimates are close to the constrained GLM estimates. 
 # But I never tried this before and I'm not sure whether the weights are inverse weights in this example. Also, since the estimation is not based on the efficient weights matrix, the jtest does not apply, even though the result is still available.
 
-# In[108]:
+# In[29]:
 
 inv_weights = np.eye(9)
 inv_weights[2,2] = 0
@@ -243,3 +247,233 @@ def momcond(self, params):
 
 
 # **Conclusion** so far is that I don't know yet how to get the equivalent of an score test for a constrained model with GMM
+
+### Lagrange Multiplier
+
+# (experimental)
+# 
+# In the following we add a slack parameter to a moment condition. This is a way to estimate the parameters of the constrained model but we have the moment conditions available for the full model. See McFadden or ... (which textbook).
+# 
+# The wald test on the slack parameter provides a version of the Lagrange multiplier or score test. The difference to the likelihood based score test is that this moment test is based on a robust covariance matrix, in this case HC0.
+# 
+# Note McFadden, or Newey and McFadden (?) have several definitions of LM or score tests. I'm not sure yet which variant this is.
+# 
+# In this case we cannot use the test for overidentifying restrictions anymore because we added artificial parameters that makes the problem exactly identified, given that we started out with an exactly identified model. 
+# 
+# The test is the same variable addition test as in the previous section. The second explanatory variable is removed from the mean function of the model but included as instrument in the moment condition. The null hypothesis is that this variable has a coefficient of zero, and  is therefore orthogonal to the weighted residuals or elementary zero function of the model. ("elementary zero function" is terminology in  Davidson, MacKinnon.)
+
+# In[29]:
+
+
+
+
+# In[30]:
+
+class GMMGLMVADD(GMM):
+
+    def __init__(self, endog, exog, instrument=None, glm_model=None, slack_idx=None):
+        if instrument is None:
+            instrument = exog
+        super(GMMGLMVADD, self).__init__(endog, exog, instrument)
+        self.glm_model = glm_model
+        
+        if slack_idx is None:
+            self.k_slack = 0
+        else:
+            self.slack_idx = slack_idx
+            self.k_slack = len(slack_idx)
+        
+    def momcond(self, params):
+        k_slack = self.k_slack
+        mom1 = self.glm_model.score_factor(params[:-k_slack])[:, None] * self.instrument
+        if k_slack > 0:
+            mom1[:, self.slack_idx] += params[-k_slack:]
+        return mom1
+
+
+# In[31]:
+
+mod_red3 = GMMGLMVADD(data_endog, exog_reduced, data_exog, glm_model=glm_binom2, slack_idx=[2])
+res_gmm_red3 = mod_red3.fit(np.concatenate((res2.params*0.5, [0])), maxiter=2)
+xnames = ['const', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'slack2']
+print(res_gmm_red3.summary(xname=xnames))
+
+
+# In[32]:
+
+mod_red3.slack_idx, mod_red3.k_slack
+
+
+# In[33]:
+
+res_gmm_red3.bse
+
+
+# In[34]:
+
+res_gmm_red3.pvalues
+
+
+# In[35]:
+
+mod_red3.data.xnames
+
+
+# In[36]:
+
+res2.params
+
+
+# In[37]:
+
+res2.bse
+
+
+# In[38]:
+
+res2.pvalues
+
+
+# In[39]:
+
+scs, scpvalue, _ = res2.model.score_test(res2.params, exog_extra = data_exog[:, 2])
+
+
+# In[40]:
+
+res_gmm_red3.pvalues[-1], scpvalue, res_gmm_red3.pvalues[-1] - scpvalue, res_gmm_red3.pvalues[-1] / scpvalue - 1
+
+
+# In[41]:
+
+scs, res_gmm_red3.tvalues[-1]**2, scs / res_gmm_red3.tvalues[-1]**2 - 1
+
+
+# In[42]:
+
+res2.model.score_test(res2.params, exog_extra = data_exog[:, 2])
+
+
+#### Explicit Calculation of score tests
+
+# Finally, we can calculate the score or Lagrange Multiplier tests directly based on the matrices that can be calculated with the models.
+# 
+# As starting point we use the full model with all moment conditions and the parameter estimates of the restricted model.
+
+# In[43]:
+
+res_gmm_red3.params[:-1], res2.params
+
+
+# In[44]:
+
+params_restricted = np.insert(res_gmm_red3.params[:-1], 2, 0)
+params_restricted = np.insert(res2.params, 2, 0)
+params_restricted
+
+
+# In[45]:
+
+M = mod.momcond_mean(params_restricted)
+D = mod.gradient_momcond(params_restricted)
+moms = mod.momcond(params_restricted)
+#V = mod.calc_weightmatrix(...)   check this
+V = np.cov(moms.T)
+W = res_gmm.weights
+M, D, W, V
+
+
+# In[46]:
+
+from scipy import stats
+nobs = mod.endog.shape[0]
+Vinv = np.linalg.pinv(V)
+ddf = len(res2.params)
+ss = (nobs - ddf) * M.dot(Vinv.dot(M))
+ss, stats.chi2.sf(ss, 1)
+
+
+# In[47]:
+
+M
+
+
+# In[48]:
+
+nobs
+
+
+# In[49]:
+
+scs, scpvalue
+
+
+# In[50]:
+
+M.dot(V.dot(M))
+
+
+# In[51]:
+
+Vb = D.dot(Vinv).dot(D)
+Vbinv = np.linalg.pinv(V)
+ss = (nobs - ddf) * M.dot(Vbinv.dot(M))
+ss, stats.chi2.sf(ss, 1)
+
+
+# In[52]:
+
+np.sqrt(np.diag(Vbinv))
+
+
+# In[53]:
+
+res2.bse
+
+
+# In[54]:
+
+23.94470765 / 0.27311515
+
+
+# In[55]:
+
+np.sqrt(nobs)
+
+
+# In[56]:
+
+2.67108656 / 0.03204911
+
+
+# In[57]:
+
+Vb = D.dot(Vinv).dot(D)
+Vbinv = np.linalg.pinv(V)
+ss = (nobs - 0) * M.dot(Vbinv.dot(M))
+ss, stats.chi2.sf(ss, 1)
+
+
+# In[58]:
+
+k_params = len(params_restricted)
+R = np.zeros((1, k_params))
+R[0, 2] = 1
+R.dot(params_restricted)
+
+
+# In[59]:
+
+ss = (nobs - 0) * M[[2]].dot(R.dot(Vbinv.dot(R.T)).dot(M[[2]]))
+ss, stats.chi2.sf(ss, 1)
+
+
+# In[60]:
+
+M[[2]]
+
+
+# In[60]:
+
+
+

From 558762b7277dbee914f8bb9f0b2809126d97fd01 Mon Sep 17 00:00:00 2001
From: Josef <josef.pktd@gmail.com>
Date: Sat, 10 Oct 2015 15:58:07 -0400
Subject: [PATCH 3/4] DOC: getting started with GMM notes

---
 notebooks/gmm_notation.ipynb | 307 +++++++++++++++++++++++++++++++++++
 notebooks/gmm_notation.py    | 190 ++++++++++++++++++++++
 2 files changed, 497 insertions(+)
 create mode 100644 notebooks/gmm_notation.ipynb
 create mode 100644 notebooks/gmm_notation.py

diff --git a/notebooks/gmm_notation.ipynb b/notebooks/gmm_notation.ipynb
new file mode 100644
index 0000000..84e8b2c
--- /dev/null
+++ b/notebooks/gmm_notation.ipynb
@@ -0,0 +1,307 @@
+{
+ "metadata": {
+  "name": "",
+  "signature": "sha256:abd31ea631eacb95854c06519fc3cf8869ca7c8602e0b4f1da762e42bc37c379"
+ },
+ "nbformat": 3,
+ "nbformat_minor": 0,
+ "worksheets": [
+  {
+   "cells": [
+    {
+     "cell_type": "heading",
+     "level": 1,
+     "metadata": {},
+     "source": [
+      "GMM - Notation"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "TODO: check shapes for arrays in math: column or row vectors, and transpose"
+     ]
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "The general setup"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "The following follows the notation for GMM in McFadden's lecture notes. It assumes iid observations, but large part generalize with appropriate redefinition of the covariance of the moment conditions. (One problem with McFaddens notation is that capital letter of a lower case name of a function refers to the derivative, while for us it would be more useful to refer the the array or vector of functions over observations.)\n",
+      "\n",
+      "Letters with subscript `n` denote empirical means, i.e. expectation with respect to the empirical distribution. Letters without subscripts refer in most cases to expectation with respect to the true, unknown distribution. Since we are not deriving any asymptotic results, we will drop the `n` subscript also for the estimate or empirical value.\n",
+      "\n",
+      "Note or Todo: For simplicity we ignore in most of the following the normalization by the number of observations $1 / n$. This is important for the asymptotic analysis, but in most cases the normalization drops out of the quadratic forms."
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "objective function:  $$Q(\\theta) = 0.5 g_n(\\theta)' W_n (\\tau_n) g_n(\\theta)$$\n",
+      "\n",
+      "moment condition   $$g_n(\\theta) = \\sum_i{g(z_t, \\theta)}$$\n",
+      "\n",
+      "jacobian of moment condition  $$G_n(\\theta) = - \\frac{1}{n} \\sum_i{\\Delta_\\theta g(z_t, \\theta)}$$\n",
+      "\n",
+      "covariance of moment conditions (i.i.d.)   $$S_n(\\theta) = \\Omega_n(\\theta) = \\frac{1}{n} \\sum_i{g(z_t, \\theta) g(z_t, \\theta)'}$$\n",
+      "\n",
+      "For this covariance we will use $S$ as an alias in the following.\n"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "From now on we drop subscript $n$ and do not explicitly include the dependence on $theta$ unless we need it. In the following we will have several quadratic forms that are repeatedly used.\n",
+      "\n",
+      "\\begin{align}\n",
+      "B &= G' S G \\\\\n",
+      "C &= G' W G\\\\\n",
+      "H &= G' W S W G\n",
+      "\\end{align}\n",
+      "\n",
+      "\n",
+      "The covariance estimate of the parameters assuming efficient GMM, $W = S$ is $$(G' S G)^{-1}.$$\n",
+      "For arbitrary weight matrix, the covariance is $$(G' W G)^{-1} G' W S W G (G' W G)^{-1}$$ or $$C^{-1} H C^{-1}.$$\n",
+      "\n"
+     ]
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "Iteration"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "The following is a detour to describe briefly the implementation in statsmodels related to the iterative updating of the weight matrix and estimation of the parameter and their covariance.\n",
+      "\n",
+      "(TODO: figure out formatting for this section)\n",
+      "\n",
+      "This is based on what I remember and needs to be checked with the code, especially for maxiter=0.\n",
+      "\n",
+      "Define $W^{(0)}$ as the initial weight matrix, which is the identity matrix by default, and denote by parenthesis in superscript the value after an iteration.\n",
+      "\n",
+      "The covariance of the parameters is\n",
+      "\n",
+      "maxiter = 0, efficient=True :   $$(G(\\theta^{(1)})' \\hspace{3pt} W^{(0)} \\hspace{3pt} G(\\theta^{(1)}))^{-1}$    $\n",
+      "\n",
+      "**check:** maxiter=0 needs another review, maybe not correct for the general case, we still need scale in formula for OLS case\n",
+      "\n",
+      "maxiter = 1, efficient=True :   $$(G(\\theta^{(1)})' \\hspace{3pt} S(\\theta^{(1)}) \\hspace{3pt} G(\\theta^{(1)}))^{-1}$$\n",
+      "\n",
+      "Note: this assumes that $E W^{(0)} = E S(\\theta^{(1)}) = \\Omega$ for the asymptotic analysis.\n",
+      "\n",
+      "maxiter = 0, efficient=False :   $$(G' W G)^{-1} \\hspace{3pt} G' W S W G \\hspace{3pt} (G' W G)^{-1}$$ \n",
+      "\n",
+      "where $G$ and $S$ are evaluated at $\\theta^{(1)}$, i.e. $W = W^{(0)}$, $S = S(\\theta^{(1)})$ and $G = G(\\theta^{(1)})$\n",
+      "\n",
+      "\n",
+      "For iterative GMM after iteration $i$ we have for efficient GMM :   \n",
+      "$$(G(\\theta^{(i)})' \\hspace{3pt} S(\\theta^{(i)}) \\hspace{3pt} G(\\theta^{(i)}))^{-1}$$\n",
+      "\n",
+      "In each case, $\\theta^{(i)}$ at $i=1, 2, 3, ...$ solves\n",
+      "\n",
+      "$$\\theta^{(i)} = \\text{argmin}_{\\theta} \\hspace{5pt} 0.5 \\hspace{3pt} g_n(\\theta)\u2032 \\hspace{3pt} W_n(\\theta^{(i-1)}) \\hspace{3pt} g_n(\\theta)$$\n",
+      "\n",
+      "GMM with continuous updating treats the weight matrix also as a function of the parameters, i.e.\n",
+      "\n",
+      "$$\\theta^{cu} = \\text{argmin}_{\\theta} \\hspace{5pt} 0.5 \\hspace{3pt} g_n(\\theta)\u2032 \\hspace{3pt} W_n(\\theta) \\hspace{3pt} g_n(\\theta)$$"
+     ]
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "Special Case: Exactly Identified Models"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "If we have as many moment conditions as unknown parameters, then the parameters are exactly identified, under rank conditions additional to the usual identification assumptions. In this case we can solve the moments exactly for the parameters, all moment conditions will be zero at the optimum, and the solution does not depend on the weight matrix.\n",
+      "\n",
+      "It also implies that $G$ is a full rank square matrix and we can split the inverse of quadratic forms into quadratic forms of the inverses, for example \n",
+      "$$(G' S G)^{-1} = (G')^{-1} S^{-1} G^{-1}$$ and\n",
+      "$$(G' W G)^{-1} = (G')^{-1} W^{-1} G^{-1}$$.\n",
+      "\n",
+      "If we use $W = G^{-1}$ when $G$ is symmetric, then the covariance\n",
+      "\n",
+      "$$(G' W G)^{-1} \\hspace{3pt} G' W S W G \\hspace{3pt} (G' W G)^{-1}$$ \n",
+      "\n",
+      "becomes\n",
+      "$$(G')^{-1} G G^{-1} \\hspace{3pt} G' G^{-1} S G^{-1} G \\hspace{3pt} (G')^{-1} G G^{-1}$$ \n",
+      "\n",
+      "and\n",
+      "$$(G')^{-1} S G^{-1}$$\n",
+      "\n",
+      "If we use identity weight matrix, then this becomes\n",
+      "$$(G')^{-1} G^{-1} \\hspace{3pt} G' S G \\hspace{3pt} (G')^{-1} G^{-1}$$ \n",
+      "\n",
+      "which also reduces to\n",
+      "$$(G')^{-1} S G^{-1}$$\n",
+      "\n",
+      "\n",
+      "This last expression is the standard form of a sandwich covariance where in the linear model $G$ is given by $X'X$ for exogenous variables  $X$."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 0
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "Special Case: Nonlinear IV-GMM"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "In the general setup of the GMM estimation, we have not imposed any specific structure on the moment conditions. In a very large class of models we can separate the moment conditions into two multiplicative parts, the instruments and a *essential zero function*:\n",
+      "$$ g(y, x, z)  = z f(y, x, \\theta) $$\n",
+      "\n",
+      "In the simple case the essential zero function $f(y, x, \\theta)$ is a scalar function that multiplies each element of the instruments $z$.\n",
+      "\n",
+      "This case makes it easier to obtain additional results and it provides some simplification in the definition and implementation of the model and the estimator. The linear model where f is the residual of a linear function is just a special case of this where the main advantages for us are in using linear algebra to get an explicit solutions of the optimization problem, instead of using a nonlinear optimization algorithm.\n",
+      "\n",
+      "(Footnote: Instruments and IV in the current general context does not refer specifically to the endogeneity problem, it is just a name for the multiplicative variables in the moment conditions. Instruments can be for example the explanatory variables if there are no endogenous variables, they can be non-linear functions of the explanatory variables can also include other linear transformation of the moment conditions conditions.)\n",
+      "\n",
+      "The following list shows essential zero function $f(y, x, \\theta)$ for some common models.\n",
+      "\n",
+      "In both linear model and nonlinear least squares models it is given by the residuals\n",
+      "\n",
+      "$$f(y, x, \\theta) = y - X \\theta$$\n",
+      "\n",
+      "and\n",
+      "$$f(y, x, \\theta) = y - m(X, \\theta)$$\n",
+      "\n",
+      "Models with specified heteroscedasticity and generalize linear model have weighted residuals\n",
+      "\n",
+      "$$f(y, x, \\theta) = \\frac{y - m(x \\theta)}{V(x, \\theta, ...)}$$\n",
+      "\n",
+      "where $V(x, \\theta, ...)$ can depend on the explanatory variables, on the predicted mean and on variance specific exogenous variables.\n"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 0
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 0
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "Hypothesis testing"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "We have the three main hypothesis tests available to test hypotheses on the values of or restrictions on parameter, the Wald test, the score or Lagrange multiplier test and the j-test that compares the values of the objective function which is the GMM analog to the likelihood ratio test.\n",
+      "\n",
+      "Wald tests are easy to implement and are generally available in all statistical packages since they only require the asymptotic covariance matrix of the parameters. Wald tests are based on unrestricted parameter which additionally allows to test different hypothesis without estimating the model under different restrictions. An additional advantage of Wald tests is that it is relatively easy to obtain robust covariance matrices, i.e. sandwich covariance of the parameters that are robust to unspecified correlation and heteroscedasticity.\n",
+      "The main disadvantage of Wald tests is that they are often liberal in small samples and reject too often, in some case they can be very liberal.\n",
+      "\n",
+      "The likelihood ration test requires that the entire likelihood function is correctly specified, with a few exceptions like over or underdispersion that can be captured by a constant scale factor. Similarly, the J-test only has asymptotically a chisquare distribution if we use efficient GMM, i.e. the weight matrix is (asymptotically) equal to the covariance of the moments conditions.\n",
+      "A technical complication is that it is generally recommended that the weight matrix is kept constant when calculating the value of the objective function under the restricted and under the unrestricted model. \n",
+      "Note: Davidson and MacKinnon state that this is a requirement for having a chisquare distribution, but I don't see that yet. It shouldn't matter the first order asymptotics under the null. which approximation for the covariance of moment restrictions we use as weight matrix. However, in small samples, it is possible that the difference in the objective function has the wrong sign if we use different weight matrices.\n",
+      "\n",
+      "Finally, the score or Lagrange multiplier test require that we estimate the restricted model under the null hypothesis. Estimating the restricted models can be easier when we want to compute diagnostic tests to check whether the restricted model is correct, but it requires that we estimated each restricted model separately if we test several restrictions, as in the use case for the Wald test that provides p-values and confidence intervals for each parameter. \n",
+      "\n",
+      "In many cases, score test are conservative and often they are closer to having correct size than Wald tests in small samples. All three tests are asymptotically equivalent and have the same limiting distribution, with the restriction that J-test or LR test does not apply in all cases for which Wald and score tests are available. Additionally, in many cases there are different ways of calculating the score or wald statistic that are all asymptotically equivalent. However, the tests and versions of the tests have different accuracy in small samples and differ in general in higher order asymptotics. \n",
+      "\n",
+      "In some cases we have a good indication about small sample properties which often comes from Monte Carlo studies. In those cases we can focus on implementing better behaved statistics if that is computationally feasible. \n",
+      "\n",
+      "In the following we mainly look at score or Lagrange Multiplier test. They are currently still mostly missing in statsmodels as a general approach to hypothesis testing.\n",
+      "\n"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "The general nonlinear hypothesis that we want to test is $a(\\theta) = 0$ with gradient function $A(\\theta) = \\Delta_{\\theta} a(\\theta)$\n",
+      "\n",
+      "Common special cases are linear and affine restrictions:\n",
+      "$$A \\theta = 0\\\\\n",
+      "A \\theta - b = 0\n",
+      "$$\n",
+      "\n",
+      "and zero restrictions\n",
+      "$$ \\theta_i = 0 $$\n",
+      "\n",
+      "where $\\theta_i$ is an element or a subset of elements of $\\theta$."
+     ]
+    },
+    {
+     "cell_type": "heading",
+     "level": 3,
+     "metadata": {},
+     "source": [
+      "Score or Lagrange Multiplier tests"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "We can compute the constrained model either through reparameterization or through explicitly solving a constrained optimization problem. The former is especially convenient in diagnostic tests where the constrained model is easier to estimate than a fully specified unrestricted problem. As example consider testing for heteroscedasticity where the full model requires the specification and estimation of a model that incorporates the variance function. Testing Poisson against an model that allows over or underdispersion requires that we specify and estimate a model that allows for both. In both cases it is easy to estimate the restricted model. In most cases only the reparameterized version is implemented in a statistical package.\n",
+      "\n",
+      "The equations for the Lagrange multiplier tests can be confusing because we need to distinguish between moment conditions that are used in a constrained reparameterized model and the model that would be obtained by imposing the restrictions on a fully specified unrestricted model.\n",
+      "\n",
+      "Conditional moment tests and score tests for non GMM models raise a similar issue, see ...."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 0
+    }
+   ],
+   "metadata": {}
+  }
+ ]
+}
\ No newline at end of file
diff --git a/notebooks/gmm_notation.py b/notebooks/gmm_notation.py
new file mode 100644
index 0000000..a755d2d
--- /dev/null
+++ b/notebooks/gmm_notation.py
@@ -0,0 +1,190 @@
+
+# coding: utf-8
+
+## GMM - Notation
+
+# TODO: check shapes for arrays in math: column or row vectors, and transpose
+
+### The general setup
+
+# The following follows the notation for GMM in McFadden's lecture notes. It assumes iid observations, but large part generalize with appropriate redefinition of the covariance of the moment conditions. (One problem with McFaddens notation is that capital letter of a lower case name of a function refers to the derivative, while for us it would be more useful to refer the the array or vector of functions over observations.)
+# 
+# Letters with subscript `n` denote empirical means, i.e. expectation with respect to the empirical distribution. Letters without subscripts refer in most cases to expectation with respect to the true, unknown distribution. Since we are not deriving any asymptotic results, we will drop the `n` subscript also for the estimate or empirical value.
+# 
+# Note or Todo: For simplicity we ignore in most of the following the normalization by the number of observations $1 / n$. This is important for the asymptotic analysis, but in most cases the normalization drops out of the quadratic forms.
+
+# objective function:  $$Q(\theta) = 0.5 g_n(\theta)' W_n (\tau_n) g_n(\theta)$$
+# 
+# moment condition   $$g_n(\theta) = \sum_i{g(z_t, \theta)}$$
+# 
+# jacobian of moment condition  $$G_n(\theta) = - \frac{1}{n} \sum_i{\Delta_\theta g(z_t, \theta)}$$
+# 
+# covariance of moment conditions (i.i.d.)   $$S_n(\theta) = \Omega_n(\theta) = \frac{1}{n} \sum_i{g(z_t, \theta) g(z_t, \theta)'}$$
+# 
+# For this covariance we will use $S$ as an alias in the following.
+# 
+
+# From now on we drop subscript $n$ and do not explicitly include the dependence on $theta$ unless we need it. In the following we will have several quadratic forms that are repeatedly used.
+# 
+# \begin{align}
+# B &= G' S G \\
+# C &= G' W G\\
+# H &= G' W S W G
+# \end{align}
+# 
+# 
+# The covariance estimate of the parameters assuming efficient GMM, $W = S$ is $$(G' S G)^{-1}.$$
+# For arbitrary weight matrix, the covariance is $$(G' W G)^{-1} G' W S W G (G' W G)^{-1}$$ or $$C^{-1} H C^{-1}.$$
+# 
+# 
+
+### Iteration
+
+# The following is a detour to describe briefly the implementation in statsmodels related to the iterative updating of the weight matrix and estimation of the parameter and their covariance.
+# 
+# (TODO: figure out formatting for this section)
+# 
+# This is based on what I remember and needs to be checked with the code, especially for maxiter=0.
+# 
+# Define $W^{(0)}$ as the initial weight matrix, which is the identity matrix by default, and denote by parenthesis in superscript the value after an iteration.
+# 
+# The covariance of the parameters is
+# 
+# maxiter = 0, efficient=True :   $$(G(\theta^{(1)})' \hspace{3pt} W^{(0)} \hspace{3pt} G(\theta^{(1)}))^{-1}$    $
+# 
+# **check:** maxiter=0 needs another review, maybe not correct for the general case, we still need scale in formula for OLS case
+# 
+# maxiter = 1, efficient=True :   $$(G(\theta^{(1)})' \hspace{3pt} S(\theta^{(1)}) \hspace{3pt} G(\theta^{(1)}))^{-1}$$
+# 
+# Note: this assumes that $E W^{(0)} = E S(\theta^{(1)}) = \Omega$ for the asymptotic analysis.
+# 
+# maxiter = 0, efficient=False :   $$(G' W G)^{-1} \hspace{3pt} G' W S W G \hspace{3pt} (G' W G)^{-1}$$ 
+# 
+# where $G$ and $S$ are evaluated at $\theta^{(1)}$, i.e. $W = W^{(0)}$, $S = S(\theta^{(1)})$ and $G = G(\theta^{(1)})$
+# 
+# 
+# For iterative GMM after iteration $i$ we have for efficient GMM :   
+# $$(G(\theta^{(i)})' \hspace{3pt} S(\theta^{(i)}) \hspace{3pt} G(\theta^{(i)}))^{-1}$$
+# 
+# In each case, $\theta^{(i)}$ at $i=1, 2, 3, ...$ solves
+# 
+# $$\theta^{(i)} = \text{argmin}_{\theta} \hspace{5pt} 0.5 \hspace{3pt} g_n(\theta)′ \hspace{3pt} W_n(\theta^{(i-1)}) \hspace{3pt} g_n(\theta)$$
+# 
+# GMM with continuous updating treats the weight matrix also as a function of the parameters, i.e.
+# 
+# $$\theta^{cu} = \text{argmin}_{\theta} \hspace{5pt} 0.5 \hspace{3pt} g_n(\theta)′ \hspace{3pt} W_n(\theta) \hspace{3pt} g_n(\theta)$$
+
+### Special Case: Exactly Identified Models
+
+# If we have as many moment conditions as unknown parameters, then the parameters are exactly identified, under rank conditions additional to the usual identification assumptions. In this case we can solve the moments exactly for the parameters, all moment conditions will be zero at the optimum, and the solution does not depend on the weight matrix.
+# 
+# It also implies that $G$ is a full rank square matrix and we can split the inverse of quadratic forms into quadratic forms of the inverses, for example 
+# $$(G' S G)^{-1} = (G')^{-1} S^{-1} G^{-1}$$ and
+# $$(G' W G)^{-1} = (G')^{-1} W^{-1} G^{-1}$$.
+# 
+# If we use $W = G^{-1}$ when $G$ is symmetric, then the covariance
+# 
+# $$(G' W G)^{-1} \hspace{3pt} G' W S W G \hspace{3pt} (G' W G)^{-1}$$ 
+# 
+# becomes
+# $$(G')^{-1} G G^{-1} \hspace{3pt} G' G^{-1} S G^{-1} G \hspace{3pt} (G')^{-1} G G^{-1}$$ 
+# 
+# and
+# $$(G')^{-1} S G^{-1}$$
+# 
+# If we use identity weight matrix, then this becomes
+# $$(G')^{-1} G^{-1} \hspace{3pt} G' S G \hspace{3pt} (G')^{-1} G^{-1}$$ 
+# 
+# which also reduces to
+# $$(G')^{-1} S G^{-1}$$
+# 
+# 
+# This last expression is the standard form of a sandwich covariance where in the linear model $G$ is given by $X'X$ for exogenous variables  $X$.
+
+# In[ ]:
+
+
+
+
+### Special Case: Nonlinear IV-GMM
+
+# In the general setup of the GMM estimation, we have not imposed any specific structure on the moment conditions. In a very large class of models we can separate the moment conditions into two multiplicative parts, the instruments and a *essential zero function*:
+# $$ g(y, x, z)  = z f(y, x, \theta) $$
+# 
+# In the simple case the essential zero function $f(y, x, \theta)$ is a scalar function that multiplies each element of the instruments $z$.
+# 
+# This case makes it easier to obtain additional results and it provides some simplification in the definition and implementation of the model and the estimator. The linear model where f is the residual of a linear function is just a special case of this where the main advantages for us are in using linear algebra to get an explicit solutions of the optimization problem, instead of using a nonlinear optimization algorithm.
+# 
+# (Footnote: Instruments and IV in the current general context does not refer specifically to the endogeneity problem, it is just a name for the multiplicative variables in the moment conditions. Instruments can be for example the explanatory variables if there are no endogenous variables, they can be non-linear functions of the explanatory variables can also include other linear transformation of the moment conditions conditions.)
+# 
+# The following list shows essential zero function $f(y, x, \theta)$ for some common models.
+# 
+# In both linear model and nonlinear least squares models it is given by the residuals
+# 
+# $$f(y, x, \theta) = y - X \theta$$
+# 
+# and
+# $$f(y, x, \theta) = y - m(X, \theta)$$
+# 
+# Models with specified heteroscedasticity and generalize linear model have weighted residuals
+# 
+# $$f(y, x, \theta) = \frac{y - m(x \theta)}{V(x, \theta, ...)}$$
+# 
+# where $V(x, \theta, ...)$ can depend on the explanatory variables, on the predicted mean and on variance specific exogenous variables.
+# 
+
+# In[ ]:
+
+
+
+
+# In[ ]:
+
+
+
+
+### Hypothesis testing
+
+# We have the three main hypothesis tests available to test hypotheses on the values of or restrictions on parameter, the Wald test, the score or Lagrange multiplier test and the j-test that compares the values of the objective function which is the GMM analog to the likelihood ratio test.
+# 
+# Wald tests are easy to implement and are generally available in all statistical packages since they only require the asymptotic covariance matrix of the parameters. Wald tests are based on unrestricted parameter which additionally allows to test different hypothesis without estimating the model under different restrictions. An additional advantage of Wald tests is that it is relatively easy to obtain robust covariance matrices, i.e. sandwich covariance of the parameters that are robust to unspecified correlation and heteroscedasticity.
+# The main disadvantage of Wald tests is that they are often liberal in small samples and reject too often, in some case they can be very liberal.
+# 
+# The likelihood ration test requires that the entire likelihood function is correctly specified, with a few exceptions like over or underdispersion that can be captured by a constant scale factor. Similarly, the J-test only has asymptotically a chisquare distribution if we use efficient GMM, i.e. the weight matrix is (asymptotically) equal to the covariance of the moments conditions.
+# A technical complication is that it is generally recommended that the weight matrix is kept constant when calculating the value of the objective function under the restricted and under the unrestricted model. 
+# Note: Davidson and MacKinnon state that this is a requirement for having a chisquare distribution, but I don't see that yet. It shouldn't matter the first order asymptotics under the null. which approximation for the covariance of moment restrictions we use as weight matrix. However, in small samples, it is possible that the difference in the objective function has the wrong sign if we use different weight matrices.
+# 
+# Finally, the score or Lagrange multiplier test require that we estimate the restricted model under the null hypothesis. Estimating the restricted models can be easier when we want to compute diagnostic tests to check whether the restricted model is correct, but it requires that we estimated each restricted model separately if we test several restrictions, as in the use case for the Wald test that provides p-values and confidence intervals for each parameter. 
+# 
+# In many cases, score test are conservative and often they are closer to having correct size than Wald tests in small samples. All three tests are asymptotically equivalent and have the same limiting distribution, with the restriction that J-test or LR test does not apply in all cases for which Wald and score tests are available. Additionally, in many cases there are different ways of calculating the score or wald statistic that are all asymptotically equivalent. However, the tests and versions of the tests have different accuracy in small samples and differ in general in higher order asymptotics. 
+# 
+# In some cases we have a good indication about small sample properties which often comes from Monte Carlo studies. In those cases we can focus on implementing better behaved statistics if that is computationally feasible. 
+# 
+# In the following we mainly look at score or Lagrange Multiplier test. They are currently still mostly missing in statsmodels as a general approach to hypothesis testing.
+# 
+# 
+
+# The general nonlinear hypothesis that we want to test is $a(\theta) = 0$ with gradient function $A(\theta) = \Delta_{\theta} a(\theta)$
+# 
+# Common special cases are linear and affine restrictions:
+# $$A \theta = 0\\
+# A \theta - b = 0
+# $$
+# 
+# and zero restrictions
+# $$ \theta_i = 0 $$
+# 
+# where $\theta_i$ is an element or a subset of elements of $\theta$.
+
+#### Score or Lagrange Multiplier tests
+
+# We can compute the constrained model either through reparameterization or through explicitly solving a constrained optimization problem. The former is especially convenient in diagnostic tests where the constrained model is easier to estimate than a fully specified unrestricted problem. As example consider testing for heteroscedasticity where the full model requires the specification and estimation of a model that incorporates the variance function. Testing Poisson against an model that allows over or underdispersion requires that we specify and estimate a model that allows for both. In both cases it is easy to estimate the restricted model. In most cases only the reparameterized version is implemented in a statistical package.
+# 
+# The equations for the Lagrange multiplier tests can be confusing because we need to distinguish between moment conditions that are used in a constrained reparameterized model and the model that would be obtained by imposing the restrictions on a fully specified unrestricted model.
+# 
+# Conditional moment tests and score tests for non GMM models raise a similar issue, see ....
+
+# In[ ]:
+
+
+

From 14e3ac26d0fd9972d1cfd54aa630d682619f1fee Mon Sep 17 00:00:00 2001
From: Josef <josef.pktd@gmail.com>
Date: Sun, 11 Oct 2015 15:32:11 -0400
Subject: [PATCH 4/4] more on GMM LM tests

---
 notebooks/gmm_notation.ipynb | 290 ++++++++++++++++++++++++++++++++++-
 notebooks/gmm_notation.py    | 179 ++++++++++++++++++++-
 2 files changed, 464 insertions(+), 5 deletions(-)

diff --git a/notebooks/gmm_notation.ipynb b/notebooks/gmm_notation.ipynb
index 84e8b2c..bb2623a 100644
--- a/notebooks/gmm_notation.ipynb
+++ b/notebooks/gmm_notation.ipynb
@@ -1,7 +1,7 @@
 {
  "metadata": {
   "name": "",
-  "signature": "sha256:abd31ea631eacb95854c06519fc3cf8869ca7c8602e0b4f1da762e42bc37c379"
+  "signature": "sha256:c7f871b8d98965a894b60e10effb7ebf2d3833a8a4546c63d8e2daee072a1456"
  },
  "nbformat": 3,
  "nbformat_minor": 0,
@@ -16,6 +16,13 @@
       "GMM - Notation"
      ]
     },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Author: Josef Perktold"
+     ]
+    },
     {
      "cell_type": "markdown",
      "metadata": {},
@@ -97,7 +104,7 @@
       "\n",
       "The covariance of the parameters is\n",
       "\n",
-      "maxiter = 0, efficient=True :   $$(G(\\theta^{(1)})' \\hspace{3pt} W^{(0)} \\hspace{3pt} G(\\theta^{(1)}))^{-1}$    $\n",
+      "maxiter = 0, efficient=True :   $$(G(\\theta^{(1)})' \\hspace{3pt} W^{(0)} \\hspace{3pt} G(\\theta^{(1)}))^{-1}$$\n",
       "\n",
       "**check:** maxiter=0 needs another review, maybe not correct for the general case, we still need scale in formula for OLS case\n",
       "\n",
@@ -291,6 +298,171 @@
       "Conditional moment tests and score tests for non GMM models raise a similar issue, see ...."
      ]
     },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "'''\n",
+      "todo\n",
+      "\n",
+      "When we estimate based on a subset of moment conditions:\n",
+      "McFadden has a section on overidentifying restriction and reformulation as LM test. I used the idea in the GMM-GLM notebook. \n",
+      "That trick might be useful as general approach to implementing extra moment conditions. (see notes in issue)\n",
+      "Also, specification tests as special case, e.g. heteroscedasticity.\n",
+      "\n",
+      "On version of LM test in the GMM-GLM notebook only works in exactly identified models, g V g, general version coming up. \n",
+      "It looks like I'm going to reimplement conditional moment tests directly as a GMM LM test. \n",
+      "Robust (HC, HAC, cluster, ...) LM test will then be \"for free\".\n",
+      "\n",
+      "I'm getting closer to understanding the variations of score or LM tests.\n",
+      "\n",
+      "Essentially, I need to implement two version, one with arbitrary restrictions on parameters \n",
+      "(a general fit_constrained when the full model is available) \n",
+      "and a version that tests additional moment conditions added to an estimated restricted model.\n",
+      "\n",
+      "Variation: Testing individual moment condition when we add several. Robust specification tests. \n",
+      "papers ??? (I read them during conditional moment test development)\n",
+      "\n",
+      "\n",
+      "'''\n",
+      "0"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 1,
+       "text": [
+        "0"
+       ]
+      }
+     ],
+     "prompt_number": 1
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "The following summarizes the LM test statistics in McFadden. In these formulas, capital letters without subscript refer to asymptotic quantities that can be estimated using the constrained parameters. However, under the null hypothesis different estimates all converge to the same values.\n",
+      "\n",
+      "The Lagrange multipliers of the constrained GMM optimization are denoted by $\\gamma_{an}$. All other terms are defined above. Superscript minus is used for the generalized inverse, the Moore-Penrose inverse.\n",
+      "\n",
+      "The first set of LM test statistics are for efficient GMM, i.e. with $W = S^{-1}$\n",
+      "\n",
+      "$LM_{1n}$ $$n \\hspace{3pt} \\gamma_{an} \\hspace{3pt} A B^{-1} A' \\hspace{3pt} \\gamma_{an}$$\n",
+      "\n",
+      "$LM_{3n}$ $$n \\hspace{3pt} \\Delta_{\\theta} Q_n(T_{an})' \\hspace{3pt} B^{-1} \\hspace{3pt} \\Delta_{\\theta} Q_n(T_{an})$$\n",
+      "\n",
+      "$LM_{2n}2$ $$n \\hspace{3pt} \\Delta_{\\theta} Q_n(T_{an})' \\hspace{3pt} [ A'  \\hspace{3pt}(A B^{-1} A')^{-1} \\hspace{3pt} A']^- \\hspace{3pt} \\Delta_{\\theta} Q_n(T_{an})$$\n",
+      "\n",
+      "$LM_{2n}1$ $$n \\hspace{3pt} \\Delta_{\\theta} Q_n(T_{an})' \\hspace{3pt} B^{-1} A'  \\hspace{3pt}(A B^{-1} A')^{-1} \\hspace{3pt} A B^{-1} \\hspace{3pt} \\Delta_{\\theta} Q_n(T_{an})$$\n",
+      "\n",
+      "The score or derivative of the GMM objective function is\n",
+      "$$\\Delta_{\\theta} Q_n(T_{an}) = G W g(z, T_{an})$$\n",
+      "\n",
+      "substituting this and the definiton of $B$ into $LM_{3n}$ and using $W = S^{-1}$, it becomes\n",
+      "$$n \\hspace{3pt}  g(z, T_{an})' S^{-1} G'\\hspace{3pt} (G' S^{-1} G)^{-1} \\hspace{3pt} G S^{-1} g(z, T_{an})$$\n",
+      "\n",
+      "In the exactly identified case G is square and invertible and we can factor the inverse matrices to obtain\n",
+      "$$n \\hspace{3pt}  g(z, T_{an})' S^{-1} G' G'^{-1} S G^{-1}  G S^{-1} g(z, T_{an})$$\n",
+      "\n",
+      "which simplifies to\n",
+      "$$n \\hspace{3pt}  g(z, T_{an})' S^{-1} g(z, T_{an})$$\n",
+      "\n",
+      "which is the same as the score statistic for maximum likelihood models where g is the score function, that is the first derivative of the loglikelihood function, and S is ???.\n",
+      "\n",
+      "We will continue later with this form of the score test for the special case of specification testing when the constrained model has been estimated with a subset of moment restrictions and we want to test the specification by adding additional moment conditions. If the initial model is exactly identified as in MLE models, then the initial set of moment conditions, the initial subvector of $g$, will be zero. "
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "If we do not use efficient GMM and the weight matrix is not the inverse of the covariance matrix of the moment conditions, then terms in the quadratic forms do not cancel and we need the long version. $LM_{3n}$ is not available in this case because it does not have a asymptotic chisquare distribution.\n",
+      "\n",
+      "$LM_{1n}$ $$n \\hspace{3pt} \\gamma_{an} \\hspace{3pt} A C^{-1} A' \\hspace{3pt} [A C^{-1} H C^{-1} A']^{-1} \\hspace{3pt} A C^{-1} A' \\hspace{3pt} \\gamma_{an}$$\n",
+      "\n",
+      "$LM_{2n}1$ $$n \\hspace{3pt} \\Delta_{\\theta} Q_n(T_{an})' \\hspace{3pt} [A' (A C^{-1} A')^{-1} \\hspace{3pt} A C^{-1} H C^{-1} A' \\hspace{3pt} (A C^{-1} A')^{-1} A]^{\\mathbf{-}} \\hspace{3pt} \\Delta_{\\theta} Q_n(T_{an})$$\n",
+      "\n",
+      "$LM_{2n}2$ $$n \\hspace{3pt} \\Delta_{\\theta} Q_n(T_{an})' \\hspace{3pt} A' \\hspace{3pt} [A C^{-1} H C^{-1} A']^{-1} \\hspace{3pt} A  \\hspace{3pt} \\Delta_{\\theta} Q_n(T_{an})$$\n",
+      "\n",
+      "\n",
+      "**Implementation note:** The last version is relatively simple, the center part $C^{-1} H C^{-1}$ is the covariance of the parameter estimate in the unconstrained model. However, this is not directly available in the current implementation for the restricted model, where the underlying matrices are evaluated at the constrained parameter estimates.\n",
+      "It would be easy to implement if `maxiter` had an option to not update or estimate the parameter. Otherwise we have to evaluate all underlying matrices directly.\n",
+      "\n",
+      "All version of the LM test statistic are asymptotically equivalent, including variations that use different estimates for the matrices in the quadratic forms that are consistent under the Null (and local alternatives). However, small sample or higher order properties will differ across variations, but I do not know in which way.\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": []
+    },
+    {
+     "cell_type": "heading",
+     "level": 3,
+     "metadata": {},
+     "source": [
+      "Specification Testing in exactly identified models"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "This is the part that got me initially interested in using GMM for implementing score tests. \n",
+      "\n",
+      "A large number of specification tests are available for the linear model, where we estimate an OLS model and then test for various deviations from the intitial specification, for example we test for omitted variables, heteroscedasticity, correlation and nonlinearity.\n",
+      "Under normality, and in general with all distributions in the linear exponential family, the parameters for the covariance of the error are asymptotically uncorrelated with the mean paramters. This block structure provides a very simple form of to test for heteroscedasticity and correlation because we do not need to take the interaction of mean and covariance parameters into account in the Lagrange multiplier tests.\n",
+      "\n",
+      "The following illustrates two examples from the section on overidentifying restrictions in McFadden."
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "**Example 1 **\n",
+      "\n",
+      "In the homoscedastic linear model $y = x \\beta + u$ with $E(y|x) = 0$ and $E(u|x) = \\sigma^2$ the moment conditions are\n",
+      "\n",
+      "\\begin{align}\n",
+      "&x (y - x \\beta) \\\\\n",
+      "&(y - x \\beta)^2 - \\sigma^2\n",
+      "\\end{align}\n",
+      "\n",
+      "If the errors $u$ are conditonally normally distributed $u \\sim N(0, \\sigma^2)$, then we have additional restrictions on higher order moments. Normality tests like Jarque Bera test restrictions on the values of skew and kurtosis or third and fourth moment. Those moment conditions are given by\n",
+      "\n",
+      "\\begin{align}\n",
+      "&(y - x \\beta)^3 / \\sigma^2 \\\\\n",
+      "&(y - x \\beta)^4 / \\sigma^4 - 3\n",
+      "\\end{align}\n",
+      "\n",
+      "\n",
+      "Note: If we apply standard two-step or iterated GMM, then we would estimate the weight matrix from the empirical moment conditions without imposing further restrictions. However, GMM estimates can be noisy in small samples, and we can often improve the precision of the estimates by including available information in the calculation of the weight matrix or the covariance of the moment conditions. In this case, we could use the assumption of normality and of no heteroscedasticity also on the updating of the weight matrix (see ??? for an empirical example.) However, tests for variance that are derived under normality are in general very sensitive to deviations from normality, and we are trading of increased precision to robustness to misspecification. One example for this is the Bartlett test for the equality of variances. If we want to test the null hypothesis of normality, then imposing normality on the weight matrix would be in the spirit of score or Lagrange multiplier tests, where we want to derive our test statistic under the null. To emphasize again, these versions of test for overidentifying restrictions and LM tests are asymptotically equivalent but will differ in small sample properties.\n",
+      "\n",
+      "(Related: articles that show that GMM is not very reliable and precise in small samples, especially when including variance and higher order terms. where and which references???)\n",
+      "\n",
+      "**Using parameter restriction**\n",
+      "\n",
+      "We can transform the previous example to testing normality as as LM test with parameter restriction. We estimate two new parameters, skew $c_1$ and excess kurtosis $c2$. The null hypothesis of normality is then that both coefficient are zero, i.e. $H0: c_1=0, \\hspace{5pt} c_2=0$\n",
+      "\n",
+      "\\begin{align}\n",
+      "&x (y - x \\beta) \\\\\n",
+      "&(y - x \\beta)^2 - \\sigma^2 \\\\\n",
+      "&(y - x \\beta)^3 / \\sigma^2 - c_1 \\\\\n",
+      "&(y - x \\beta)^4 / \\sigma^4 - 3 - c_2\n",
+      "\\end{align}\n",
+      "\n",
+      "Because we are now in the exactly identified case, we can use the simplified version of score test $LM_{3n}$."
+     ]
+    },
     {
      "cell_type": "code",
      "collapsed": false,
@@ -298,7 +470,119 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 0
+     "prompt_number": 1
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 1
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 1
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 1
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "The next is completely unrelated, integer chunk iterator for scipy by Evgeni"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "import numpy as np\n",
+      "def _iter_chunked(x0, x1, chunksize=4, inc=1):\n",
+      "    \"\"\"Iterate from x0 to x1 *inclusive* in chunks of chunksize and steps inc.\n",
+      "    x0 must be finite, x1 need not be. In the latter case, the iterator is infinite.\n",
+      "    Handles both x0 < x1 and x0 > x1 (in which case, iterates downwards.)\n",
+      "    \"\"\"\n",
+      "    x = x0\n",
+      "    while (x - x1) * inc < 0:\n",
+      "        delta = min(chunksize, abs(x - x1))\n",
+      "        step = delta * inc\n",
+      "        supp = np.arange(x, x + step, inc)\n",
+      "        x += step\n",
+      "        yield supp"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 2
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 2
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "x0 = 0; x1 = 10\n",
+      "for ii in _iter_chunked(x0, x1, chunksize=3, inc=1): print(ii)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "[0 1 2]\n",
+        "[3 4 5]\n",
+        "[6 7 8]\n",
+        "[9]\n"
+       ]
+      }
+     ],
+     "prompt_number": 3
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "x0 = 10; x1 = -1\n",
+      "for ii in _iter_chunked(x0, x1, chunksize=3, inc=-1): print('->', repr(ii))"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "-> array([10,  9,  8])\n",
+        "-> array([7, 6, 5])\n",
+        "-> array([4, 3, 2])\n",
+        "-> array([1, 0])\n"
+       ]
+      }
+     ],
+     "prompt_number": 4
     }
    ],
    "metadata": {}
diff --git a/notebooks/gmm_notation.py b/notebooks/gmm_notation.py
index a755d2d..96fbe83 100644
--- a/notebooks/gmm_notation.py
+++ b/notebooks/gmm_notation.py
@@ -3,6 +3,8 @@
 
 ## GMM - Notation
 
+# Author: Josef Perktold
+
 # TODO: check shapes for arrays in math: column or row vectors, and transpose
 
 ### The general setup
@@ -50,7 +52,7 @@
 # 
 # The covariance of the parameters is
 # 
-# maxiter = 0, efficient=True :   $$(G(\theta^{(1)})' \hspace{3pt} W^{(0)} \hspace{3pt} G(\theta^{(1)}))^{-1}$    $
+# maxiter = 0, efficient=True :   $$(G(\theta^{(1)})' \hspace{3pt} W^{(0)} \hspace{3pt} G(\theta^{(1)}))^{-1}$$
 # 
 # **check:** maxiter=0 needs another review, maybe not correct for the general case, we still need scale in formula for OLS case
 # 
@@ -184,7 +186,180 @@
 # 
 # Conditional moment tests and score tests for non GMM models raise a similar issue, see ....
 
-# In[ ]:
+# In[1]:
+
+'''
+todo
+
+When we estimate based on a subset of moment conditions:
+McFadden has a section on overidentifying restriction and reformulation as LM test. I used the idea in the GMM-GLM notebook. 
+That trick might be useful as general approach to implementing extra moment conditions. (see notes in issue)
+Also, specification tests as special case, e.g. heteroscedasticity.
+
+On version of LM test in the GMM-GLM notebook only works in exactly identified models, g V g, general version coming up. 
+It looks like I'm going to reimplement conditional moment tests directly as a GMM LM test. 
+Robust (HC, HAC, cluster, ...) LM test will then be "for free".
+
+I'm getting closer to understanding the variations of score or LM tests.
+
+Essentially, I need to implement two version, one with arbitrary restrictions on parameters 
+(a general fit_constrained when the full model is available) 
+and a version that tests additional moment conditions added to an estimated restricted model.
+
+Variation: Testing individual moment condition when we add several. Robust specification tests. 
+papers ??? (I read them during conditional moment test development)
+
+
+'''
+0
+
+
+# The following summarizes the LM test statistics in McFadden. In these formulas, capital letters without subscript refer to asymptotic quantities that can be estimated using the constrained parameters. However, under the null hypothesis different estimates all converge to the same values.
+# 
+# The Lagrange multipliers of the constrained GMM optimization are denoted by $\gamma_{an}$. All other terms are defined above. Superscript minus is used for the generalized inverse, the Moore-Penrose inverse.
+# 
+# The first set of LM test statistics are for efficient GMM, i.e. with $W = S^{-1}$
+# 
+# $LM_{1n}$ $$n \hspace{3pt} \gamma_{an} \hspace{3pt} A B^{-1} A' \hspace{3pt} \gamma_{an}$$
+# 
+# $LM_{3n}$ $$n \hspace{3pt} \Delta_{\theta} Q_n(T_{an})' \hspace{3pt} B^{-1} \hspace{3pt} \Delta_{\theta} Q_n(T_{an})$$
+# 
+# $LM_{2n}2$ $$n \hspace{3pt} \Delta_{\theta} Q_n(T_{an})' \hspace{3pt} [ A'  \hspace{3pt}(A B^{-1} A')^{-1} \hspace{3pt} A']^- \hspace{3pt} \Delta_{\theta} Q_n(T_{an})$$
+# 
+# $LM_{2n}1$ $$n \hspace{3pt} \Delta_{\theta} Q_n(T_{an})' \hspace{3pt} B^{-1} A'  \hspace{3pt}(A B^{-1} A')^{-1} \hspace{3pt} A B^{-1} \hspace{3pt} \Delta_{\theta} Q_n(T_{an})$$
+# 
+# The score or derivative of the GMM objective function is
+# $$\Delta_{\theta} Q_n(T_{an}) = G W g(z, T_{an})$$
+# 
+# substituting this and the definiton of $B$ into $LM_{3n}$ and using $W = S^{-1}$, it becomes
+# $$n \hspace{3pt}  g(z, T_{an})' S^{-1} G'\hspace{3pt} (G' S^{-1} G)^{-1} \hspace{3pt} G S^{-1} g(z, T_{an})$$
+# 
+# In the exactly identified case G is square and invertible and we can factor the inverse matrices to obtain
+# $$n \hspace{3pt}  g(z, T_{an})' S^{-1} G' G'^{-1} S G^{-1}  G S^{-1} g(z, T_{an})$$
+# 
+# which simplifies to
+# $$n \hspace{3pt}  g(z, T_{an})' S^{-1} g(z, T_{an})$$
+# 
+# which is the same as the score statistic for maximum likelihood models where g is the score function, that is the first derivative of the loglikelihood function, and S is ???.
+# 
+# We will continue later with this form of the score test for the special case of specification testing when the constrained model has been estimated with a subset of moment restrictions and we want to test the specification by adding additional moment conditions. If the initial model is exactly identified as in MLE models, then the initial set of moment conditions, the initial subvector of $g$, will be zero. 
+
+# If we do not use efficient GMM and the weight matrix is not the inverse of the covariance matrix of the moment conditions, then terms in the quadratic forms do not cancel and we need the long version. $LM_{3n}$ is not available in this case because it does not have a asymptotic chisquare distribution.
+# 
+# $LM_{1n}$ $$n \hspace{3pt} \gamma_{an} \hspace{3pt} A C^{-1} A' \hspace{3pt} [A C^{-1} H C^{-1} A']^{-1} \hspace{3pt} A C^{-1} A' \hspace{3pt} \gamma_{an}$$
+# 
+# $LM_{2n}1$ $$n \hspace{3pt} \Delta_{\theta} Q_n(T_{an})' \hspace{3pt} [A' (A C^{-1} A')^{-1} \hspace{3pt} A C^{-1} H C^{-1} A' \hspace{3pt} (A C^{-1} A')^{-1} A]^{\mathbf{-}} \hspace{3pt} \Delta_{\theta} Q_n(T_{an})$$
+# 
+# $LM_{2n}2$ $$n \hspace{3pt} \Delta_{\theta} Q_n(T_{an})' \hspace{3pt} A' \hspace{3pt} [A C^{-1} H C^{-1} A']^{-1} \hspace{3pt} A  \hspace{3pt} \Delta_{\theta} Q_n(T_{an})$$
+# 
+# 
+# **Implementation note:** The last version is relatively simple, the center part $C^{-1} H C^{-1}$ is the covariance of the parameter estimate in the unconstrained model. However, this is not directly available in the current implementation for the restricted model, where the underlying matrices are evaluated at the constrained parameter estimates.
+# It would be easy to implement if `maxiter` had an option to not update or estimate the parameter. Otherwise we have to evaluate all underlying matrices directly.
+# 
+# All version of the LM test statistic are asymptotically equivalent, including variations that use different estimates for the matrices in the quadratic forms that are consistent under the Null (and local alternatives). However, small sample or higher order properties will differ across variations, but I do not know in which way.
+# 
+# 
+# 
+# 
+# 
+
+# 
+
+#### Specification Testing in exactly identified models
+
+# This is the part that got me initially interested in using GMM for implementing score tests. 
+# 
+# A large number of specification tests are available for the linear model, where we estimate an OLS model and then test for various deviations from the intitial specification, for example we test for omitted variables, heteroscedasticity, correlation and nonlinearity.
+# Under normality, and in general with all distributions in the linear exponential family, the parameters for the covariance of the error are asymptotically uncorrelated with the mean paramters. This block structure provides a very simple form of to test for heteroscedasticity and correlation because we do not need to take the interaction of mean and covariance parameters into account in the Lagrange multiplier tests.
+# 
+# The following illustrates two examples from the section on overidentifying restrictions in McFadden.
+
+# **Example 1 **
+# 
+# In the homoscedastic linear model $y = x \beta + u$ with $E(y|x) = 0$ and $E(u|x) = \sigma^2$ the moment conditions are
+# 
+# \begin{align}
+# &x (y - x \beta) \\
+# &(y - x \beta)^2 - \sigma^2
+# \end{align}
+# 
+# If the errors $u$ are conditonally normally distributed $u \sim N(0, \sigma^2)$, then we have additional restrictions on higher order moments. Normality tests like Jarque Bera test restrictions on the values of skew and kurtosis or third and fourth moment. Those moment conditions are given by
+# 
+# \begin{align}
+# &(y - x \beta)^3 / \sigma^2 \\
+# &(y - x \beta)^4 / \sigma^4 - 3
+# \end{align}
+# 
+# 
+# Note: If we apply standard two-step or iterated GMM, then we would estimate the weight matrix from the empirical moment conditions without imposing further restrictions. However, GMM estimates can be noisy in small samples, and we can often improve the precision of the estimates by including available information in the calculation of the weight matrix or the covariance of the moment conditions. In this case, we could use the assumption of normality and of no heteroscedasticity also on the updating of the weight matrix (see ??? for an empirical example.) However, tests for variance that are derived under normality are in general very sensitive to deviations from normality, and we are trading of increased precision to robustness to misspecification. One example for this is the Bartlett test for the equality of variances. If we want to test the null hypothesis of normality, then imposing normality on the weight matrix would be in the spirit of score or Lagrange multiplier tests, where we want to derive our test statistic under the null. To emphasize again, these versions of test for overidentifying restrictions and LM tests are asymptotically equivalent but will differ in small sample properties.
+# 
+# (Related: articles that show that GMM is not very reliable and precise in small samples, especially when including variance and higher order terms. where and which references???)
+# 
+# **Using parameter restriction**
+# 
+# We can transform the previous example to testing normality as as LM test with parameter restriction. We estimate two new parameters, skew $c_1$ and excess kurtosis $c2$. The null hypothesis of normality is then that both coefficient are zero, i.e. $H0: c_1=0, \hspace{5pt} c_2=0$
+# 
+# \begin{align}
+# &x (y - x \beta) \\
+# &(y - x \beta)^2 - \sigma^2 \\
+# &(y - x \beta)^3 / \sigma^2 - c_1 \\
+# &(y - x \beta)^4 / \sigma^4 - 3 - c_2
+# \end{align}
+# 
+# Because we are now in the exactly identified case, we can use the simplified version of score test $LM_{3n}$.
+
+# In[1]:
+
+
+
+
+# In[1]:
+
+
+
+
+# In[1]:
+
+
+
+
+# In[1]:
+
+
+
+
+# The next is completely unrelated, integer chunk iterator for scipy by Evgeni
+
+# In[2]:
+
+import numpy as np
+def _iter_chunked(x0, x1, chunksize=4, inc=1):
+    """Iterate from x0 to x1 *inclusive* in chunks of chunksize and steps inc.
+    x0 must be finite, x1 need not be. In the latter case, the iterator is infinite.
+    Handles both x0 < x1 and x0 > x1 (in which case, iterates downwards.)
+    """
+    x = x0
+    while (x - x1) * inc < 0:
+        delta = min(chunksize, abs(x - x1))
+        step = delta * inc
+        supp = np.arange(x, x + step, inc)
+        x += step
+        yield supp
+
+
+# In[2]:
+
+
+
+
+# In[3]:
+
+x0 = 0; x1 = 10
+for ii in _iter_chunked(x0, x1, chunksize=3, inc=1): print(ii)
+
 
+# In[4]:
 
+x0 = 10; x1 = -1
+for ii in _iter_chunked(x0, x1, chunksize=3, inc=-1): print('->', repr(ii))