::p_load(
pacman
tidyverse,
kableExtra,
pracma,
tictoc )
5 Risk sharing with limited commitoment: estimation
5.1 Estimation method
5.1.1 Model
The main idea in the estimation is to find the parameters that explain the consumption pattern of a household: c_{it} = \widehat{c}_{it}(y_t, x_{t - 1}; \theta), where \widehat{c}_{it} is consumption predicted by the limited commitment risk sharing model with a given set of parameters, \theta. To explain the discrepancies from the observed consumption and the predicted consumption, I introduce multiplicative measurement errors in consumption, as I introduced in the full risk-sharing model. With this, I get the following relationship: \log \left(c_{it}^*\right) = \log \left( \widehat{c}_{it}(y_t, x_{t - 1}; \theta) \right) + \varepsilon_{it}^c, where \varepsilon_{it}^c \sim N(0, \gamma_C^2).
The previous-period relative Pareto weight, x_{t - 1}, is derived by the ratio of the marginal utilities in the previous period: x_{t - 1} = \frac{c_{v, t - 1}^{-\sigma}}{c_{i, t - 1}^{-\sigma}} = \frac{c_{v, t - 1}^{-\sigma}}{c_{i, t - 1}^{*-\sigma} / \exp(\varepsilon_{i, t - 1}^c)}, where c_{v, t} = \frac{1}{N} \sum_i c_{i, t}.
Note that consumption measurement errors affect the consumption prediction, \widehat{c} through x_{t - 1}. And the effect is non-linear in the measurement errors and does not have an analytical expression. Therefore, for estimation, I use the simulated maximum likelihood method.
5.1.2 Estimation steps
- Randomly generate measurement errors, and denote their s’th simulations for i at t by \varepsilon_{s, i, t}^c. Then calculate “true” consumption of households and “village”, c_{s, i, t} = c_{i, t}^* / \exp(\varepsilon_{s, i, t}) and c_{s, v, t} = \exp \left( \frac{1}{N} \sum_i \log (c_{s, i, t}) \right). The latter is calculated this way, instead of c_{s, v, t} = \frac{1}{N} \sum_i c_{s, i, t}, to be aligned with how the village consumption is calculated in the full risk-sharing model estimation.
- Calculate the previous-period relative Pareto weight by x_{s, t - 1} = \frac{c_{s, v, t - 1}^{-\sigma}}{c_{s, i, t - 1}^{-\sigma}}.
- Predict the consumption given x_{s, t - 1} and y_{s, t}. For this, get x_{s, t} based on x_{s, t - 1} and the bounds in relative Pareto weights given \theta and y_{s, t}. Note that this updating rule approximates the N-household case by the “one-versus-the-other” approach.
- Calculate the likelihood by f(\log(c_{it}^*) - \log \left( \widehat{c}_{it}(y_{s, t}, x_{s, t - 1}; \theta) \right), \theta), where f(x, \theta) is the density function of N(0, \gamma_C^2).
- Take the average of the likelihood across s: \frac{1}{S} \sum_s f(\log(c_{it}^*) - \log \left( \widehat{c}_{it}(y_{s, t}, x_{s, t - 1}; \theta) \right), \theta).
- Take a logarithm and sum it over i and t.
For standard errors, I do not assume that the model is correctly specified. Without autocorrelation, the standard errors of the parameters is the squared roots of the diagonal elements of A(\widehat{\theta})^{-1} B(\widehat{\theta}) A(\widehat{\theta})^{-1}. Here, A(\widehat{\theta}) is the Hessian matrix of the log-likelihood function and B(\widehat{\theta}) is the outer product of scores.
5.2 Estimation code
5.2.1 Load data and functions
load('IntermediateData/allData.RData')
load('IntermediateData/lc_hom_model_functions.RData')
5.2.2 Simulate random measuremente errors
set.seed(123)
<- 500
numSimulations <- array(
measurementErrorArray rnorm(hnum * tnum * numSimulations),
dim = c(hnum, tnum, numSimulations)
)
5.2.3 Functions to calculate likelihood
<- function(
interpolateRelativeParetoWeightBound
householdIncomeGridPoints,
villageIncomeGridPoints,
relativeParetoWeightBoundsVec,
householdIncomeTrue,
villageIncomeTrue
) {interp2(
x = householdIncomeGridPoints,
y = villageIncomeGridPoints,
Z = matrix(
relativeParetoWeightBoundsVec,nrow = length(villageIncomeGridPoints),
byrow = TRUE
),xp = householdIncomeTrue %>%
pmax(min(householdIncomeGridPoints)) %>%
pmin(max(householdIncomeGridPoints)),
yp = villageIncomeTrue %>%
pmax(min(villageIncomeGridPoints)) %>%
pmin(max(villageIncomeGridPoints)),
method = "linear"
)
}
<- function(
updateRelativeParetoWeight
previousRelativeParetoWeight,
relativeParetoWeightLowerBound,
relativeParetoWeightUpperBound
) {%>%
previousRelativeParetoWeight pmax(relativeParetoWeightLowerBound) %>%
pmin(relativeParetoWeightUpperBound)
}
<- function(
calculateCurrentRelativeParetoWeightEachObsBySimLCHom
village,
meanClass,
CVClass,
householdAR1EstimationResult,
villageAR1EstimationResult,
householdIncomeTrue,
villageIncomeTrue,
previousRelativeParetoWeight,
relativeParetoWeightBoundsArray
) {
<- (
householdIncomeGridPoints $gridPointsArray[meanClass, CVClass,]
householdAR1EstimationResult[[village]]
)<- villageAR1EstimationResult[[village]]$gridPoints
villageIncomeGridPoints
<- interpolateRelativeParetoWeightBound(
relativeParetoWeightLowerBound
householdIncomeGridPoints,
villageIncomeGridPoints,1],
relativeParetoWeightBoundsArray[meanClass, CVClass, ,
householdIncomeTrue,
villageIncomeTrue
)
<- interpolateRelativeParetoWeightBound(
relativeParetoWeightUpperBound
householdIncomeGridPoints,
villageIncomeGridPoints,2],
relativeParetoWeightBoundsArray[meanClass, CVClass, ,
householdIncomeTrue,
villageIncomeTrue
)
<- updateRelativeParetoWeight(
currentRelativeParetoWeight
previousRelativeParetoWeight,
relativeParetoWeightLowerBound,
relativeParetoWeightUpperBound
)
return(currentRelativeParetoWeight)
}
<- function(
calculateLogLikelihoodForEachObsLCHom
param,
village,
consdat,
incdatRescaled,
householdIncMeanClassVec,
householdIncCVClassVec,
householdAR1EstimationResult,
villageAR1EstimationResult,
measurementErrorArray,
villageIndicatorMatrix,
numIncomeStates,
numSimulations
) {
<- param[1]
delta <- param[2]
sigma <- param[3]
punishment <- param[4]
gammaC2
<- solveLCRiskSharingByVillage(
relativeParetoWeightBoundsArray
village,
delta,
sigma,
punishment,
householdAR1EstimationResult,
villageAR1EstimationResult,
numIncomeStates,numRelativeParetoWeights = 2000,
iterationLimit = 100,
diffLimit = 1e-8
)
<- consdat[villageIndicatorMatrix[, village], ]
consVillage <- incdatRescaled[villageIndicatorMatrix[, village], ]
incdatRescaledVillage <- measurementErrorArray[
measurementErrorArrayVillage
villageIndicatorMatrix[, village], ,
]<- householdAR1EstimationResult[[village]]$numHouseholds
numHouseholds
<- array(NA, dim = c(numHouseholds, tnum - 1, numSimulations))
currentRelativeParetoWeightArray
for (simulationIndex in seq(1, numSimulations)) {
<- (
consTrue
consVillage / exp(measurementErrorArrayVillage[, , simulationIndex] * sqrt(gammaC2))
)
<- (
relativeParetoWeightMatrix outer(
rep(1, numHouseholds),
calculateMarginalUtility(consTrue, sigma) %>% log %>% colMeans %>% exp
)/ calculateMarginalUtility(consTrue, sigma)
)
for (householdIndex in seq(1, numHouseholds)) {
for (periodIndex in seq(2, tnum)) {
<- householdIncMeanClassVec[villageIndicatorMatrix[, village]][householdIndex]
meanClass <- householdIncCVClassVec[villageIndicatorMatrix[, village]][householdIndex]
CVClass
<- incdatRescaledVillage[householdIndex, periodIndex]
householdIncomeTrue <- (incdatRescaledVillage %>% colMeans(na.rm = TRUE))[periodIndex]
villageIncomeTrue
<- consVillage[householdIndex, periodIndex]
householdCons <- relativeParetoWeightMatrix[householdIndex, (periodIndex - 1)]
previousRelativeParetoWeight
- 1, simulationIndex] <- (
currentRelativeParetoWeightArray[householdIndex, periodIndex calculateCurrentRelativeParetoWeightEachObsBySimLCHom(
village,
meanClass,
CVClass,
householdAR1EstimationResult,
villageAR1EstimationResult,
householdIncomeTrue,
villageIncomeTrue,
previousRelativeParetoWeight,
relativeParetoWeightBoundsArray
)
)
}
}
}
<- array(NA, dim = c(numHouseholds, tnum - 1, numSimulations))
likelihoodArrayPerSim for (simulationIndex in seq(1, numSimulations)) {
for (householdIndex in seq(1, numHouseholds)) {
for (periodIndex in seq(2, tnum)) {
<- (
logConsPredict log(
%>% sum(na.rm = TRUE))
(incdatRescaledVillage[, periodIndex] / (
- 1, simulationIndex]^(1 / sigma)) %>%
(currentRelativeParetoWeightArray[, periodIndex sum(na.rm = TRUE)
)
)+ 1 / sigma * log(
- 1, simulationIndex]
currentRelativeParetoWeightArray[householdIndex, periodIndex
)
)- 1, simulationIndex] <- dnorm(
likelihoodArrayPerSim[householdIndex, periodIndex log(consVillage[householdIndex, periodIndex])
- logConsPredict,
sd = sqrt(gammaC2)
)
}
}
}
<- array(NA, dim = c(numHouseholds, tnum - 1))
logLikelihoodArray for (householdIndex in seq(1, numHouseholds)) {
for (periodIndex in seq(2, tnum)) {
<- likelihoodArrayPerSim[householdIndex, periodIndex - 1, ] %>% mean(na.rm = TRUE)
likelihoodSimMean - 1] <- log(likelihoodSimMean %>% pmax(1e-8))
logLikelihoodArray[householdIndex, periodIndex
}
}
return(logLikelihoodArray)
}
<- function(
calculateLogLikelihoodLCHom
param,
village,
consdat,
incdatRescaled,
householdIncMeanClassVec,
householdIncCVClassVec,
householdAR1EstimationResult,
villageAR1EstimationResult,
measurementErrorArray,
villageIndicatorMatrix,
numIncomeStates,
numSimulations
) {
<- calculateLogLikelihoodForEachObsLCHom(
logLikelihoodForEachObs
param,
village,
consdat,
incdatRescaled,
householdIncMeanClassVec,
householdIncCVClassVec,
householdAR1EstimationResult,
villageAR1EstimationResult,
measurementErrorArray,
villageIndicatorMatrix,
numIncomeStates,
numSimulations
)
<- sum(logLikelihoodForEachObs)
logLikelihood
return(- logLikelihood)
}
<- function(
estimateMLLCHom
initialParam,
lowerBounds,
upperBounds,
village,
consdat,
incdatRescaled,
householdIncMeanClassVec,
householdIncCVClassVec,
householdAR1EstimationResult,
villageAR1EstimationResult,
measurementErrorArray,
villageIndicatorMatrix,
numIncomeStates,
numSimulations
) {
tic()
<- optim(
optimRes
initialParam,
calculateLogLikelihoodLCHom,village = village,
consdat = consdat,
incdatRescaled = incdatRescaled,
householdIncMeanClassVec = householdIncMeanClassVec,
householdIncCVClassVec = householdIncCVClassVec,
householdAR1EstimationResult = householdAR1EstimationResult,
villageAR1EstimationResult = villageAR1EstimationResult,
measurementErrorArray = measurementErrorArray,
villageIndicatorMatrix = villageIndicatorMatrix,
numIncomeStates = numIncomeStates,
numSimulations = numSimulations,
method = "L-BFGS-B",
lower = lowerBounds,
upper = upperBounds,
control = list(trace = 5, maxit = 200),
hessian = TRUE
)
<- jacobian(
score function(x) calculateLogLikelihoodForEachObsLCHom(
x,
village,
consdat,
incdatRescaled,
householdIncMeanClassVec,
householdIncCVClassVec,
householdAR1EstimationResult,
villageAR1EstimationResult,
measurementErrorArray,
villageIndicatorMatrix,
numIncomeStates,
numSimulations%>% as.vector,
) $par
optimRes
)
<- (
standardErrors $hessian %>% solve)
(optimRes%*% reduce(
map(
seq(nrow(score)),
~ outer(score[., ], score[., ])
),function(x, y) x + y
)%*% (optimRes$hessian %>% solve)
%>% diag %>% sqrt
)
toc()
return(list(optimRes = optimRes, standardErrors = standardErrors))
}
5.3 Estimation result
<- c(0.95, 3.0, 0.3, 0.03)
initialParam <- c(0.5, 1.0, 0.0, 1e-3)
lowerBounds <- c(0.98, 5.0, 0.99, 1.0)
upperBounds
<- map(
estimationLCHomRes seq(1, numVillages),
~ estimateMLLCHom(
initialParam,
lowerBounds,
upperBounds,
.,
consdat,
incdatRescaled,
householdIncMeanClassVec,
householdIncCVClassVec,
householdAR1EstimationResult,
villageAR1EstimationResult,
measurementErrorArray,
villageIndicatorMatrix,
numIncomeStates,
numSimulations
) )
<- function(village) {
createRegressionTable
<- c()
estimationLCHomTable for (varIndex in seq_along(estimationLCHomRes[[village]]$optimRes$par)) {
<- c(
estimationLCHomTable
estimationLCHomTable,formatC(estimationLCHomRes[[village]]$optimRes$par[varIndex], digits = 3, format = "f"),
str_interp(
'(${formatC(estimationLCHomRes[[village]]$standardErrors[varIndex], digits = 3, format = "f")})'
)
)
}<- c(
estimationLCHomTable
estimationLCHomTable,formatC(-estimationLCHomRes[[village]]$optimRes$value, digits = 3, format = "f")
)return(estimationLCHomTable)
}
<- do.call(
estimationLCHomTable
cbind,map(
seq(1, numVillages),
createRegressionTable
)
)
colnames(estimationLCHomTable) <- c("Aurepalle", "Kanzara", "Shirapur")
rownames(estimationLCHomTable) <- c(
"Discount factor",
"",
"Coef of RRA",
"",
"punishment parameter",
"",
"Variance of consumption measurement errors",
"",
"Log likelihood"
)
%>%
estimationLCHomTable kbl(digits = 3) %>%
kable_classic()
Aurepalle | Kanzara | Shirapur | |
---|---|---|---|
Discount factor | 0.980 | 0.748 | 0.661 |
(0.025) | (0.030) | (0.232) | |
Coef of RRA | 2.998 | 2.905 | 2.756 |
(0.078) | (0.013) | (0.066) | |
punishment parameter | 0.234 | 0.347 | 0.004 |
(0.035) | (0.032) | (0.004) | |
Variance of consumption measurement errors | 0.035 | 0.028 | 0.059 |
(0.005) | (0.004) | (0.009) | |
Log likelihood | -3.129 | -9.478 | -18.022 |