pacman::p_load(
tidyverse,
kableExtra,
pracma,
tictoc
)5 Risk sharing with limited commitoment: estimation
5.1 Estimation method
5.1.1 Model
The main idea in the estimation is to find the parameters that explain the consumption pattern of a household: c_{it} = \widehat{c}_{it}(y_t, x_{t - 1}; \theta), where \widehat{c}_{it} is consumption predicted by the limited commitment risk sharing model with a given set of parameters, \theta. To explain the discrepancies from the observed consumption and the predicted consumption, I introduce multiplicative measurement errors in consumption, as I introduced in the full risk-sharing model. With this, I get the following relationship: \log \left(c_{it}^*\right) = \log \left( \widehat{c}_{it}(y_t, x_{t - 1}; \theta) \right) + \varepsilon_{it}^c, where \varepsilon_{it}^c \sim N(0, \gamma_C^2).
The previous-period relative Pareto weight, x_{t - 1}, is derived by the ratio of the marginal utilities in the previous period: x_{t - 1} = \frac{c_{v, t - 1}^{-\sigma}}{c_{i, t - 1}^{-\sigma}} = \frac{c_{v, t - 1}^{-\sigma}}{c_{i, t - 1}^{*-\sigma} / \exp(\varepsilon_{i, t - 1}^c)}, where c_{v, t} = \frac{1}{N} \sum_i c_{i, t}.
Note that consumption measurement errors affect the consumption prediction, \widehat{c} through x_{t - 1}. And the effect is non-linear in the measurement errors and does not have an analytical expression. Therefore, for estimation, I use the simulated maximum likelihood method.
5.1.2 Estimation steps
- Randomly generate measurement errors, and denote their s’th simulations for i at t by \varepsilon_{s, i, t}^c. Then calculate “true” consumption of households and “village”, c_{s, i, t} = c_{i, t}^* / \exp(\varepsilon_{s, i, t}) and c_{s, v, t} = \exp \left( \frac{1}{N} \sum_i \log (c_{s, i, t}) \right). The latter is calculated this way, instead of c_{s, v, t} = \frac{1}{N} \sum_i c_{s, i, t}, to be aligned with how the village consumption is calculated in the full risk-sharing model estimation.
- Calculate the previous-period relative Pareto weight by x_{s, t - 1} = \frac{c_{s, v, t - 1}^{-\sigma}}{c_{s, i, t - 1}^{-\sigma}}.
- Predict the consumption given x_{s, t - 1} and y_{s, t}. For this, get x_{s, t} based on x_{s, t - 1} and the bounds in relative Pareto weights given \theta and y_{s, t}. Note that this updating rule approximates the N-household case by the “one-versus-the-other” approach.
- Calculate the likelihood by f(\log(c_{it}^*) - \log \left( \widehat{c}_{it}(y_{s, t}, x_{s, t - 1}; \theta) \right), \theta), where f(x, \theta) is the density function of N(0, \gamma_C^2).
- Take the average of the likelihood across s: \frac{1}{S} \sum_s f(\log(c_{it}^*) - \log \left( \widehat{c}_{it}(y_{s, t}, x_{s, t - 1}; \theta) \right), \theta).
- Take a logarithm and sum it over i and t.
For standard errors, I do not assume that the model is correctly specified. Without autocorrelation, the standard errors of the parameters is the squared roots of the diagonal elements of A(\widehat{\theta})^{-1} B(\widehat{\theta}) A(\widehat{\theta})^{-1}. Here, A(\widehat{\theta}) is the Hessian matrix of the log-likelihood function and B(\widehat{\theta}) is the outer product of scores.
5.2 Estimation code
5.2.1 Load data and functions
load('IntermediateData/allData.RData')
load('IntermediateData/lc_hom_model_functions.RData')5.2.2 Simulate random measuremente errors
set.seed(123)
numSimulations <- 500
measurementErrorArray <- array(
rnorm(hnum * tnum * numSimulations),
dim = c(hnum, tnum, numSimulations)
)5.2.3 Functions to calculate likelihood
interpolateRelativeParetoWeightBound <- function(
householdIncomeGridPoints,
villageIncomeGridPoints,
relativeParetoWeightBoundsVec,
householdIncomeTrue,
villageIncomeTrue
) {
interp2(
x = householdIncomeGridPoints,
y = villageIncomeGridPoints,
Z = matrix(
relativeParetoWeightBoundsVec,
nrow = length(villageIncomeGridPoints),
byrow = TRUE
),
xp = householdIncomeTrue %>%
pmax(min(householdIncomeGridPoints)) %>%
pmin(max(householdIncomeGridPoints)),
yp = villageIncomeTrue %>%
pmax(min(villageIncomeGridPoints)) %>%
pmin(max(villageIncomeGridPoints)),
method = "linear"
)
}
updateRelativeParetoWeight <- function(
previousRelativeParetoWeight,
relativeParetoWeightLowerBound,
relativeParetoWeightUpperBound
) {
previousRelativeParetoWeight %>%
pmax(relativeParetoWeightLowerBound) %>%
pmin(relativeParetoWeightUpperBound)
}
calculateCurrentRelativeParetoWeightEachObsBySimLCHom <- function(
village,
meanClass,
CVClass,
householdAR1EstimationResult,
villageAR1EstimationResult,
householdIncomeTrue,
villageIncomeTrue,
previousRelativeParetoWeight,
relativeParetoWeightBoundsArray
) {
householdIncomeGridPoints <- (
householdAR1EstimationResult[[village]]$gridPointsArray[meanClass, CVClass,]
)
villageIncomeGridPoints <- villageAR1EstimationResult[[village]]$gridPoints
relativeParetoWeightLowerBound <- interpolateRelativeParetoWeightBound(
householdIncomeGridPoints,
villageIncomeGridPoints,
relativeParetoWeightBoundsArray[meanClass, CVClass, , 1],
householdIncomeTrue,
villageIncomeTrue
)
relativeParetoWeightUpperBound <- interpolateRelativeParetoWeightBound(
householdIncomeGridPoints,
villageIncomeGridPoints,
relativeParetoWeightBoundsArray[meanClass, CVClass, , 2],
householdIncomeTrue,
villageIncomeTrue
)
currentRelativeParetoWeight <- updateRelativeParetoWeight(
previousRelativeParetoWeight,
relativeParetoWeightLowerBound,
relativeParetoWeightUpperBound
)
return(currentRelativeParetoWeight)
}
calculateLogLikelihoodForEachObsLCHom <- function(
param,
village,
consdat,
incdatRescaled,
householdIncMeanClassVec,
householdIncCVClassVec,
householdAR1EstimationResult,
villageAR1EstimationResult,
measurementErrorArray,
villageIndicatorMatrix,
numIncomeStates,
numSimulations
) {
delta <- param[1]
sigma <- param[2]
punishment <- param[3]
gammaC2 <- param[4]
relativeParetoWeightBoundsArray <- solveLCRiskSharingByVillage(
village,
delta,
sigma,
punishment,
householdAR1EstimationResult,
villageAR1EstimationResult,
numIncomeStates,
numRelativeParetoWeights = 2000,
iterationLimit = 100,
diffLimit = 1e-8
)
consVillage <- consdat[villageIndicatorMatrix[, village], ]
incdatRescaledVillage <- incdatRescaled[villageIndicatorMatrix[, village], ]
measurementErrorArrayVillage <- measurementErrorArray[
villageIndicatorMatrix[, village], ,
]
numHouseholds <- householdAR1EstimationResult[[village]]$numHouseholds
currentRelativeParetoWeightArray <- array(NA, dim = c(numHouseholds, tnum - 1, numSimulations))
for (simulationIndex in seq(1, numSimulations)) {
consTrue <- (
consVillage
/ exp(measurementErrorArrayVillage[, , simulationIndex] * sqrt(gammaC2))
)
relativeParetoWeightMatrix <- (
outer(
rep(1, numHouseholds),
calculateMarginalUtility(consTrue, sigma) %>% log %>% colMeans %>% exp
)
/ calculateMarginalUtility(consTrue, sigma)
)
for (householdIndex in seq(1, numHouseholds)) {
for (periodIndex in seq(2, tnum)) {
meanClass <- householdIncMeanClassVec[villageIndicatorMatrix[, village]][householdIndex]
CVClass <- householdIncCVClassVec[villageIndicatorMatrix[, village]][householdIndex]
householdIncomeTrue <- incdatRescaledVillage[householdIndex, periodIndex]
villageIncomeTrue <- (incdatRescaledVillage %>% colMeans(na.rm = TRUE))[periodIndex]
householdCons <- consVillage[householdIndex, periodIndex]
previousRelativeParetoWeight <- relativeParetoWeightMatrix[householdIndex, (periodIndex - 1)]
currentRelativeParetoWeightArray[householdIndex, periodIndex - 1, simulationIndex] <- (
calculateCurrentRelativeParetoWeightEachObsBySimLCHom(
village,
meanClass,
CVClass,
householdAR1EstimationResult,
villageAR1EstimationResult,
householdIncomeTrue,
villageIncomeTrue,
previousRelativeParetoWeight,
relativeParetoWeightBoundsArray
)
)
}
}
}
likelihoodArrayPerSim <- array(NA, dim = c(numHouseholds, tnum - 1, numSimulations))
for (simulationIndex in seq(1, numSimulations)) {
for (householdIndex in seq(1, numHouseholds)) {
for (periodIndex in seq(2, tnum)) {
logConsPredict <- (
log(
(incdatRescaledVillage[, periodIndex] %>% sum(na.rm = TRUE))
/ (
(currentRelativeParetoWeightArray[, periodIndex - 1, simulationIndex]^(1 / sigma)) %>%
sum(na.rm = TRUE)
)
)
+ 1 / sigma * log(
currentRelativeParetoWeightArray[householdIndex, periodIndex - 1, simulationIndex]
)
)
likelihoodArrayPerSim[householdIndex, periodIndex - 1, simulationIndex] <- dnorm(
log(consVillage[householdIndex, periodIndex])
- logConsPredict,
sd = sqrt(gammaC2)
)
}
}
}
logLikelihoodArray <- array(NA, dim = c(numHouseholds, tnum - 1))
for (householdIndex in seq(1, numHouseholds)) {
for (periodIndex in seq(2, tnum)) {
likelihoodSimMean <- likelihoodArrayPerSim[householdIndex, periodIndex - 1, ] %>% mean(na.rm = TRUE)
logLikelihoodArray[householdIndex, periodIndex - 1] <- log(likelihoodSimMean %>% pmax(1e-8))
}
}
return(logLikelihoodArray)
}
calculateLogLikelihoodLCHom <- function(
param,
village,
consdat,
incdatRescaled,
householdIncMeanClassVec,
householdIncCVClassVec,
householdAR1EstimationResult,
villageAR1EstimationResult,
measurementErrorArray,
villageIndicatorMatrix,
numIncomeStates,
numSimulations
) {
logLikelihoodForEachObs <- calculateLogLikelihoodForEachObsLCHom(
param,
village,
consdat,
incdatRescaled,
householdIncMeanClassVec,
householdIncCVClassVec,
householdAR1EstimationResult,
villageAR1EstimationResult,
measurementErrorArray,
villageIndicatorMatrix,
numIncomeStates,
numSimulations
)
logLikelihood <- sum(logLikelihoodForEachObs)
return(- logLikelihood)
}estimateMLLCHom <- function(
initialParam,
lowerBounds,
upperBounds,
village,
consdat,
incdatRescaled,
householdIncMeanClassVec,
householdIncCVClassVec,
householdAR1EstimationResult,
villageAR1EstimationResult,
measurementErrorArray,
villageIndicatorMatrix,
numIncomeStates,
numSimulations
) {
tic()
optimRes <- optim(
initialParam,
calculateLogLikelihoodLCHom,
village = village,
consdat = consdat,
incdatRescaled = incdatRescaled,
householdIncMeanClassVec = householdIncMeanClassVec,
householdIncCVClassVec = householdIncCVClassVec,
householdAR1EstimationResult = householdAR1EstimationResult,
villageAR1EstimationResult = villageAR1EstimationResult,
measurementErrorArray = measurementErrorArray,
villageIndicatorMatrix = villageIndicatorMatrix,
numIncomeStates = numIncomeStates,
numSimulations = numSimulations,
method = "L-BFGS-B",
lower = lowerBounds,
upper = upperBounds,
control = list(trace = 5, maxit = 200),
hessian = TRUE
)
score <- jacobian(
function(x) calculateLogLikelihoodForEachObsLCHom(
x,
village,
consdat,
incdatRescaled,
householdIncMeanClassVec,
householdIncCVClassVec,
householdAR1EstimationResult,
villageAR1EstimationResult,
measurementErrorArray,
villageIndicatorMatrix,
numIncomeStates,
numSimulations
) %>% as.vector,
optimRes$par
)
standardErrors <- (
(optimRes$hessian %>% solve)
%*% reduce(
map(
seq(nrow(score)),
~ outer(score[., ], score[., ])
),
function(x, y) x + y
)
%*% (optimRes$hessian %>% solve)
) %>% diag %>% sqrt
toc()
return(list(optimRes = optimRes, standardErrors = standardErrors))
}5.3 Estimation result
initialParam <- c(0.95, 3.0, 0.3, 0.03)
lowerBounds <- c(0.5, 1.0, 0.0, 1e-3)
upperBounds <- c(0.98, 5.0, 0.99, 1.0)
estimationLCHomRes <- map(
seq(1, numVillages),
~ estimateMLLCHom(
initialParam,
lowerBounds,
upperBounds,
.,
consdat,
incdatRescaled,
householdIncMeanClassVec,
householdIncCVClassVec,
householdAR1EstimationResult,
villageAR1EstimationResult,
measurementErrorArray,
villageIndicatorMatrix,
numIncomeStates,
numSimulations
)
)createRegressionTable <- function(village) {
estimationLCHomTable <- c()
for (varIndex in seq_along(estimationLCHomRes[[village]]$optimRes$par)) {
estimationLCHomTable <- c(
estimationLCHomTable,
formatC(estimationLCHomRes[[village]]$optimRes$par[varIndex], digits = 3, format = "f"),
str_interp(
'(${formatC(estimationLCHomRes[[village]]$standardErrors[varIndex], digits = 3, format = "f")})'
)
)
}
estimationLCHomTable <- c(
estimationLCHomTable,
formatC(-estimationLCHomRes[[village]]$optimRes$value, digits = 3, format = "f")
)
return(estimationLCHomTable)
}
estimationLCHomTable <- do.call(
cbind,
map(
seq(1, numVillages),
createRegressionTable
)
)
colnames(estimationLCHomTable) <- c("Aurepalle", "Kanzara", "Shirapur")
rownames(estimationLCHomTable) <- c(
"Discount factor",
"",
"Coef of RRA",
"",
"punishment parameter",
"",
"Variance of consumption measurement errors",
"",
"Log likelihood"
)
estimationLCHomTable %>%
kbl(digits = 3) %>%
kable_classic()| Aurepalle | Kanzara | Shirapur | |
|---|---|---|---|
| Discount factor | 0.980 | 0.748 | 0.661 |
| (0.025) | (0.030) | (0.232) | |
| Coef of RRA | 2.998 | 2.905 | 2.756 |
| (0.078) | (0.013) | (0.066) | |
| punishment parameter | 0.234 | 0.347 | 0.004 |
| (0.035) | (0.032) | (0.004) | |
| Variance of consumption measurement errors | 0.035 | 0.028 | 0.059 |
| (0.005) | (0.004) | (0.009) | |
| Log likelihood | -3.129 | -9.478 | -18.022 |