pacman::p_load(
  tidyverse,
  kableExtra,
  pracma,
  tictoc
)5 Risk sharing with limited commitoment: estimation
5.1 Estimation method
5.1.1 Model
The main idea in the estimation is to find the parameters that explain the consumption pattern of a household: c_{it} = \widehat{c}_{it}(y_t, x_{t - 1}; \theta), where \widehat{c}_{it} is consumption predicted by the limited commitment risk sharing model with a given set of parameters, \theta. To explain the discrepancies from the observed consumption and the predicted consumption, I introduce multiplicative measurement errors in consumption, as I introduced in the full risk-sharing model. With this, I get the following relationship: \log \left(c_{it}^*\right) = \log \left( \widehat{c}_{it}(y_t, x_{t - 1}; \theta) \right) + \varepsilon_{it}^c, where \varepsilon_{it}^c \sim N(0, \gamma_C^2).
The previous-period relative Pareto weight, x_{t - 1}, is derived by the ratio of the marginal utilities in the previous period: x_{t - 1} = \frac{c_{v, t - 1}^{-\sigma}}{c_{i, t - 1}^{-\sigma}} = \frac{c_{v, t - 1}^{-\sigma}}{c_{i, t - 1}^{*-\sigma} / \exp(\varepsilon_{i, t - 1}^c)}, where c_{v, t} = \frac{1}{N} \sum_i c_{i, t}.
Note that consumption measurement errors affect the consumption prediction, \widehat{c} through x_{t - 1}. And the effect is non-linear in the measurement errors and does not have an analytical expression. Therefore, for estimation, I use the simulated maximum likelihood method.
5.1.2 Estimation steps
- Randomly generate measurement errors, and denote their s’th simulations for i at t by \varepsilon_{s, i, t}^c. Then calculate “true” consumption of households and “village”, c_{s, i, t} = c_{i, t}^* / \exp(\varepsilon_{s, i, t}) and c_{s, v, t} = \exp \left( \frac{1}{N} \sum_i \log (c_{s, i, t}) \right). The latter is calculated this way, instead of c_{s, v, t} = \frac{1}{N} \sum_i c_{s, i, t}, to be aligned with how the village consumption is calculated in the full risk-sharing model estimation.
- Calculate the previous-period relative Pareto weight by x_{s, t - 1} = \frac{c_{s, v, t - 1}^{-\sigma}}{c_{s, i, t - 1}^{-\sigma}}.
- Predict the consumption given x_{s, t - 1} and y_{s, t}. For this, get x_{s, t} based on x_{s, t - 1} and the bounds in relative Pareto weights given \theta and y_{s, t}. Note that this updating rule approximates the N-household case by the “one-versus-the-other” approach.
- Calculate the likelihood by f(\log(c_{it}^*) - \log \left( \widehat{c}_{it}(y_{s, t}, x_{s, t - 1}; \theta) \right), \theta), where f(x, \theta) is the density function of N(0, \gamma_C^2).
- Take the average of the likelihood across s: \frac{1}{S} \sum_s f(\log(c_{it}^*) - \log \left( \widehat{c}_{it}(y_{s, t}, x_{s, t - 1}; \theta) \right), \theta).
- Take a logarithm and sum it over i and t.
For standard errors, I do not assume that the model is correctly specified. Without autocorrelation, the standard errors of the parameters is the squared roots of the diagonal elements of A(\widehat{\theta})^{-1} B(\widehat{\theta}) A(\widehat{\theta})^{-1}. Here, A(\widehat{\theta}) is the Hessian matrix of the log-likelihood function and B(\widehat{\theta}) is the outer product of scores.
5.2 Estimation code
5.2.1 Load data and functions
load('IntermediateData/allData.RData')
load('IntermediateData/lc_hom_model_functions.RData')5.2.2 Simulate random measuremente errors
set.seed(123)
numSimulations <- 500
measurementErrorArray <- array(
  rnorm(hnum * tnum * numSimulations), 
  dim = c(hnum, tnum, numSimulations)
  )5.2.3 Functions to calculate likelihood
interpolateRelativeParetoWeightBound <- function(
    householdIncomeGridPoints,
    villageIncomeGridPoints,
    relativeParetoWeightBoundsVec,
    householdIncomeTrue,
    villageIncomeTrue
) {
  interp2(
    x = householdIncomeGridPoints,
    y = villageIncomeGridPoints,
    Z = matrix(
      relativeParetoWeightBoundsVec,
      nrow = length(villageIncomeGridPoints),
      byrow = TRUE
      ),
    xp = householdIncomeTrue %>% 
      pmax(min(householdIncomeGridPoints)) %>% 
      pmin(max(householdIncomeGridPoints)),
    yp = villageIncomeTrue %>% 
      pmax(min(villageIncomeGridPoints)) %>% 
      pmin(max(villageIncomeGridPoints)),
    method = "linear"
  )
}
updateRelativeParetoWeight <- function(
    previousRelativeParetoWeight,
    relativeParetoWeightLowerBound,
    relativeParetoWeightUpperBound
    ) {
  previousRelativeParetoWeight %>% 
    pmax(relativeParetoWeightLowerBound) %>% 
    pmin(relativeParetoWeightUpperBound)
}
calculateCurrentRelativeParetoWeightEachObsBySimLCHom <- function(
    village,
    meanClass,
    CVClass,
    householdAR1EstimationResult,
    villageAR1EstimationResult,
    householdIncomeTrue,
    villageIncomeTrue,
    previousRelativeParetoWeight,
    relativeParetoWeightBoundsArray
) {
  
    householdIncomeGridPoints <- (
      householdAR1EstimationResult[[village]]$gridPointsArray[meanClass, CVClass,]
    )
    villageIncomeGridPoints <- villageAR1EstimationResult[[village]]$gridPoints
    
    relativeParetoWeightLowerBound <- interpolateRelativeParetoWeightBound(
      householdIncomeGridPoints,
      villageIncomeGridPoints,
      relativeParetoWeightBoundsArray[meanClass, CVClass, , 1],
      householdIncomeTrue,
      villageIncomeTrue
      )
    
    relativeParetoWeightUpperBound <- interpolateRelativeParetoWeightBound(
      householdIncomeGridPoints,
      villageIncomeGridPoints,
      relativeParetoWeightBoundsArray[meanClass, CVClass, , 2],
      householdIncomeTrue,
      villageIncomeTrue
      )
    
    currentRelativeParetoWeight <- updateRelativeParetoWeight(
      previousRelativeParetoWeight,
      relativeParetoWeightLowerBound,
      relativeParetoWeightUpperBound
      )
    
    return(currentRelativeParetoWeight)
}
calculateLogLikelihoodForEachObsLCHom <- function(
    param,
    village,
    consdat,
    incdatRescaled,
    householdIncMeanClassVec,
    householdIncCVClassVec,
    householdAR1EstimationResult,
    villageAR1EstimationResult,
    measurementErrorArray,
    villageIndicatorMatrix,
    numIncomeStates,
    numSimulations
) {
  
  delta <- param[1]
  sigma <- param[2]
  punishment <- param[3]
  gammaC2 <- param[4]
  
  relativeParetoWeightBoundsArray <- solveLCRiskSharingByVillage(
      village,
      delta,
      sigma,
      punishment,
      householdAR1EstimationResult,
      villageAR1EstimationResult,
      numIncomeStates,
      numRelativeParetoWeights = 2000,
      iterationLimit = 100,
      diffLimit = 1e-8
  )
  consVillage <- consdat[villageIndicatorMatrix[, village], ]
  incdatRescaledVillage <- incdatRescaled[villageIndicatorMatrix[, village], ]
  measurementErrorArrayVillage <- measurementErrorArray[
    villageIndicatorMatrix[, village], ,
    ]
  numHouseholds <- householdAR1EstimationResult[[village]]$numHouseholds
  
  currentRelativeParetoWeightArray <- array(NA, dim = c(numHouseholds, tnum - 1, numSimulations))
  
  for (simulationIndex in seq(1, numSimulations)) {
    
    consTrue <- (
      consVillage 
      / exp(measurementErrorArrayVillage[, , simulationIndex] * sqrt(gammaC2))
      )
    
    relativeParetoWeightMatrix <- (
      outer(
        rep(1, numHouseholds), 
        calculateMarginalUtility(consTrue, sigma) %>% log %>% colMeans %>% exp
        )
      / calculateMarginalUtility(consTrue, sigma)
    )
    
    for (householdIndex in seq(1, numHouseholds)) {
      for (periodIndex in seq(2, tnum)) {
        
        meanClass <- householdIncMeanClassVec[villageIndicatorMatrix[, village]][householdIndex]
        CVClass <- householdIncCVClassVec[villageIndicatorMatrix[, village]][householdIndex]
          
        householdIncomeTrue <- incdatRescaledVillage[householdIndex, periodIndex]
        villageIncomeTrue <- (incdatRescaledVillage %>% colMeans(na.rm = TRUE))[periodIndex]
        
        householdCons <- consVillage[householdIndex, periodIndex]
        previousRelativeParetoWeight <- relativeParetoWeightMatrix[householdIndex, (periodIndex - 1)]
        
        currentRelativeParetoWeightArray[householdIndex, periodIndex - 1, simulationIndex] <- (
          calculateCurrentRelativeParetoWeightEachObsBySimLCHom(
            village,
            meanClass,
            CVClass,
            householdAR1EstimationResult,
            villageAR1EstimationResult,
            householdIncomeTrue,
            villageIncomeTrue,
            previousRelativeParetoWeight,
            relativeParetoWeightBoundsArray
            )
        )
      }
    }
  }
  
  likelihoodArrayPerSim <- array(NA, dim = c(numHouseholds, tnum - 1, numSimulations))
  for (simulationIndex in seq(1, numSimulations)) {
    
    for (householdIndex in seq(1, numHouseholds)) {
      for (periodIndex in seq(2, tnum)) {
        logConsPredict <- (
          log(
            (incdatRescaledVillage[, periodIndex] %>% sum(na.rm = TRUE)) 
            / (
              (currentRelativeParetoWeightArray[, periodIndex - 1, simulationIndex]^(1 / sigma)) %>% 
                sum(na.rm = TRUE)
              )
            )
          + 1 / sigma * log(
            currentRelativeParetoWeightArray[householdIndex, periodIndex - 1, simulationIndex]
            )
        )
        likelihoodArrayPerSim[householdIndex, periodIndex - 1, simulationIndex] <- dnorm(
          log(consVillage[householdIndex, periodIndex]) 
          - logConsPredict,
          sd = sqrt(gammaC2)
        )
      }
    }
  }
  
  logLikelihoodArray <- array(NA, dim = c(numHouseholds, tnum - 1))
  for (householdIndex in seq(1, numHouseholds)) {
    for (periodIndex in seq(2, tnum)) {
      likelihoodSimMean <- likelihoodArrayPerSim[householdIndex, periodIndex - 1, ] %>% mean(na.rm = TRUE)
      logLikelihoodArray[householdIndex, periodIndex - 1] <- log(likelihoodSimMean %>% pmax(1e-8))
    }
  }
  
  return(logLikelihoodArray)
}
calculateLogLikelihoodLCHom <- function(
    param,
    village,
    consdat,
    incdatRescaled,
    householdIncMeanClassVec,
    householdIncCVClassVec,
    householdAR1EstimationResult,
    villageAR1EstimationResult,
    measurementErrorArray,
    villageIndicatorMatrix,
    numIncomeStates,
    numSimulations
    ) {
  logLikelihoodForEachObs <- calculateLogLikelihoodForEachObsLCHom(
    param,
    village,
    consdat,
    incdatRescaled,
    householdIncMeanClassVec,
    householdIncCVClassVec,
    householdAR1EstimationResult,
    villageAR1EstimationResult,
    measurementErrorArray,
    villageIndicatorMatrix,
    numIncomeStates,
    numSimulations
  )
  
  logLikelihood <- sum(logLikelihoodForEachObs)
  
  return(- logLikelihood)
}estimateMLLCHom <- function(
  initialParam,
  lowerBounds,
  upperBounds,
  village,
  consdat,
  incdatRescaled,
  householdIncMeanClassVec,
  householdIncCVClassVec,
  householdAR1EstimationResult,
  villageAR1EstimationResult,
  measurementErrorArray,
  villageIndicatorMatrix,
  numIncomeStates,
  numSimulations
) {
  
  tic()
  optimRes <- optim(
    initialParam,
    calculateLogLikelihoodLCHom,
    village = village,
    consdat = consdat,
    incdatRescaled = incdatRescaled,
    householdIncMeanClassVec = householdIncMeanClassVec,
    householdIncCVClassVec = householdIncCVClassVec,
    householdAR1EstimationResult = householdAR1EstimationResult,
    villageAR1EstimationResult = villageAR1EstimationResult,
    measurementErrorArray = measurementErrorArray,
    villageIndicatorMatrix = villageIndicatorMatrix,
    numIncomeStates = numIncomeStates,
    numSimulations = numSimulations,
    method = "L-BFGS-B",
    lower = lowerBounds,
    upper = upperBounds,
    control = list(trace = 5, maxit = 200),
    hessian = TRUE
    )
  
  
  score <- jacobian(
    function(x) calculateLogLikelihoodForEachObsLCHom(
      x,
      village,
      consdat,
      incdatRescaled,
      householdIncMeanClassVec,
      householdIncCVClassVec,
      householdAR1EstimationResult,
      villageAR1EstimationResult,
      measurementErrorArray,
      villageIndicatorMatrix,
      numIncomeStates,
      numSimulations
    ) %>% as.vector,
    optimRes$par
  )
  
  standardErrors <- (
    (optimRes$hessian %>% solve) 
    %*% reduce(
      map(
        seq(nrow(score)),
        ~ outer(score[., ], score[., ])
      ),
      function(x, y) x + y
    )
    %*% (optimRes$hessian %>% solve) 
  ) %>% diag %>% sqrt
  
  toc()
  return(list(optimRes = optimRes, standardErrors = standardErrors))
}5.3 Estimation result
initialParam <- c(0.95, 3.0, 0.3, 0.03)
lowerBounds <- c(0.5, 1.0, 0.0, 1e-3)
upperBounds <- c(0.98, 5.0, 0.99, 1.0)
estimationLCHomRes <- map(
  seq(1, numVillages),
  ~ estimateMLLCHom(
    initialParam,
    lowerBounds,
    upperBounds,
    .,
    consdat,
    incdatRescaled,
    householdIncMeanClassVec,
    householdIncCVClassVec,
    householdAR1EstimationResult,
    villageAR1EstimationResult,
    measurementErrorArray,
    villageIndicatorMatrix,
    numIncomeStates,
    numSimulations
  )
)createRegressionTable <- function(village) {
  
  estimationLCHomTable <- c()
  for (varIndex in seq_along(estimationLCHomRes[[village]]$optimRes$par)) {
    estimationLCHomTable <- c(
      estimationLCHomTable,
      formatC(estimationLCHomRes[[village]]$optimRes$par[varIndex], digits = 3, format = "f"),
      str_interp(
        '(${formatC(estimationLCHomRes[[village]]$standardErrors[varIndex], digits = 3, format = "f")})'
        )
      )
  }
  estimationLCHomTable <- c(
    estimationLCHomTable,
    formatC(-estimationLCHomRes[[village]]$optimRes$value, digits = 3, format = "f")
  )
  return(estimationLCHomTable)
}
estimationLCHomTable <- do.call(
  cbind,
  map(
    seq(1, numVillages),
    createRegressionTable
  )
)
colnames(estimationLCHomTable) <- c("Aurepalle", "Kanzara", "Shirapur")
rownames(estimationLCHomTable) <- c(
  "Discount factor",
  "",
  "Coef of RRA",
  "",
  "punishment parameter",
  "",
  "Variance of consumption measurement errors",
  "",
  "Log likelihood"
  )
estimationLCHomTable %>% 
  kbl(digits = 3) %>% 
  kable_classic()| Aurepalle | Kanzara | Shirapur | |
|---|---|---|---|
| Discount factor | 0.980 | 0.748 | 0.661 | 
| (0.025) | (0.030) | (0.232) | |
| Coef of RRA | 2.998 | 2.905 | 2.756 | 
| (0.078) | (0.013) | (0.066) | |
| punishment parameter | 0.234 | 0.347 | 0.004 | 
| (0.035) | (0.032) | (0.004) | |
| Variance of consumption measurement errors | 0.035 | 0.028 | 0.059 | 
| (0.005) | (0.004) | (0.009) | |
| Log likelihood | -3.129 | -9.478 | -18.022 |