::p_load(
pacman
tidyverse,
kableExtra )
1 Data preparation
In this page, I show the data preparation process. The main goal in the data prep is to estimate the discretized income processes of households and the “village” (= average of households in a village).
1.1 Load packages
1.2 Global settings
set.seed(123)
1.3 Load datasets
load('Laczo2015/allest')
1.3.1 Basic data processes of raw data
<- max(villagedat)
numVillages <- do.call(
villageIndicatorMatrix
cbind,map(
seq(1, numVillages),
~ villagedat[, 1] == .
) )
<- function(
createVillageAggregateByYear
data,
func,
numVillages,
villageIndicatorMatrix
) {do.call(
rbind, map(
seq(1, numVillages),
~ data[villageIndicatorMatrix[, .],] %>% func
)
)
}
<- function(
createVillageAggregate
data,
func,
numVillages,
villageIndicatorMatrix
) {map_vec(
seq(1, numVillages),
~ data[villageIndicatorMatrix[, .],] %>% func
) }
<- createVillageAggregateByYear(incdat, colMeans, numVillages, villageIndicatorMatrix)
vilMeanIncByYear <- createVillageAggregateByYear(consdat, colMeans, numVillages, villageIndicatorMatrix)
vilMeanConsByYear <- createVillageAggregateByYear(log(consdat), colMeans, numVillages, villageIndicatorMatrix) vilMeanLogCons
<- createVillageAggregate(
incLow
incdat, function(x) quantile(x, 0.025, na.rm = TRUE),
numVillages,
villageIndicatorMatrix
)<- createVillageAggregate(
incHigh
incdat, function(x) quantile(x, 0.975, na.rm = TRUE),
numVillages,
villageIndicatorMatrix )
Since there is no saving in the model, the consumption and income should coincide. For this, I rescale the income so that the means of consumption and income are identical in each village.
<- vilMeanConsByYear / vilMeanIncByYear
vilConsPerIncByYear <- incdat * (villageIndicatorMatrix %*% vilConsPerIncByYear)
incdatRescaled <- createVillageAggregateByYear(
vilMeanIncByYearRescaled
incdatRescaled, colMeans, numVillages, villageIndicatorMatrix )
<- createVillageAggregate(
incRescaledLow
incdatRescaled, function(x) quantile(x, 0.025, na.rm = TRUE),
numVillages,
villageIndicatorMatrix
)<- createVillageAggregate(
incRescaledHigh
incdatRescaled, function(x) quantile(x, 0.975, na.rm = TRUE),
numVillages,
villageIndicatorMatrix )
1.4 Estimate income processes
The main step of this data processing is to estimate the income process, first of households and second of the “village” (= average of households in a village). We want the income process of the village since, in estimation, we consider the risk-sharing transfers between a household and “the rest of the village” so that we can consider one-to-one transfers instead of one-to-n transfers.
I follow the estimation process of Laczó (2015), which is detailed in the paper’s online appendix. First I estimate the income process of households by types, where the types are by income’s mean and coefficient of variance (CV). Here, in total we have 4 types (high/low mean income X high/low CV income). If there were long enough income data, we would be able to estimate the income process of each household, but given the 6 observations per household for income in ICRISAT data, this is not feasible. Also, to mitigate the effect of outliers, income smaller than 2.5 percentile or larger than 97.5 percentile in each village is not used for income process estimation (but they are used for estimation in the risk sharing model).
<- 8
numIncomeStatesHH <- 5
numIncomeStatesVillage <- numIncomeStatesHH * numIncomeStatesVillage numIncomeStates
1.4.1 Household
<- incdatRescaled %>% rowMeans
householdIncMean <- apply(incdatRescaled, 1, sd, na.rm = TRUE)
householdIncSD <- householdIncSD / householdIncMean
householdIncCV
<- createVillageAggregate(
vilIncMeanMedian as.matrix(householdIncMean),
median,
numVillages,
villageIndicatorMatrix
)<- createVillageAggregate(
vilIncCVMedian as.matrix(householdIncCV),
median,
numVillages,
villageIndicatorMatrix )
1.4.1.1 Estimate AR(1) process
First I estimate the AR(1) process of households’ income. In particular, the following AR(1) process is estimated:
y_{it} = (1 - \rho) \mu + \rho y_{i, t - 1} + u_{it},
where y_{it} is the income of a household i in period t.
The parameters are given as
\mu = E(y_{it}) \\ \rho = Cor(y_{it}, y_{i, t - 1}) \\ \sigma_{u}^2 = (1 - \rho^2) Var(y_{it}).
<- (
householdIncMeanClassVec > (villageIndicatorMatrix %*% vilIncMeanMedian)
householdIncMean + 1
) <- (
householdIncCVClassVec > (villageIndicatorMatrix %*% vilIncCVMedian)
householdIncCV + 1
)
<- incdatRescaled
incdatRescaledForAR1Estimation <- cbind(
laggedIncdatRescaledForAR1Estimation NA,
seq(1, tnum - 1)]
incdatRescaled[,
)
incdatRescaledForAR1Estimation[<= as.vector(villageIndicatorMatrix %*% incLow)) |
(incdat >= as.vector(villageIndicatorMatrix %*% incHigh))] <- NA
(incdat
laggedIncdatRescaledForAR1Estimation[<= as.vector(villageIndicatorMatrix %*% incLow)) |
(incdat >= as.vector(villageIndicatorMatrix %*% incHigh))] <- NA
(incdat
<- function(
getDataByMeanCVClassByVillage
village,
data,
meanClass,
CVClass,
meanClassVec,
CVClassVec,
villageIndicatorMatrix
) {
data[== meanClass) &
(meanClassVec == CVClass) &
(CVClassVec
(villageIndicatorMatrix[, village])
]
}
<- function(data, laggedData) {
calculateAR1Parameters <- mean(data, na.rm = TRUE)
mu <- cor(
rho
data,
laggedData,use = "complete.obs"
)<- sqrt(var(data, na.rm = TRUE) * (1 - rho^2))
sigmau
return(list(mu = mu, rho = rho, sigmau = sigmau))
}
1.4.1.2 Approximate the AR(1) process with Markov chain for income of households
Given the estimated parameters, I approximate the AR(1) process with discretization. For this, Laczó (2015) used Tauchen’s method with a small modification that, instead of assigning the bounds of grid points with a parameter, the quantiles of income distributions are used to determine each grid point. To guarantee that the mean income at the steady state of the estimated process coincides with the actual mean income, the grid points are rescaled at the end.
<- function(numStates, data) {
calculateGridPoints <- seq(0, 1, by = 1 / numStates)
gridQuantile map_dbl(
1:(length(gridQuantile) - 1)] + gridQuantile[2:length(gridQuantile)]) / 2,
(gridQuantile[~ quantile(data, ., na.rm = TRUE)
)
}
<- function(numStates, data, mu, rho, sigma) {
approximateAR1Tauchen
<- calculateGridPoints(numStates, data)
gridPoints
<- array(NA, c(numStates, numStates))
transitionMatrix
for (currentState in 1:numStates) {
1] <- (
transitionMatrix[currentState, pnorm(
2] + gridPoints[1]) / 2
((gridPoints[- (1 - rho) * mu - rho * gridPoints[currentState])
/ sigma
)
)<- 1 - pnorm(
transitionMatrix[currentState, numStates] + gridPoints[numStates - 1]) / 2
((gridPoints[numStates] - (1 - rho) * mu - rho * gridPoints[currentState])
/ sigma
)
}
for (currentState in 1:numStates) {
for (nextState in 2:(numStates - 1)) {
<- (
transitionMatrix[currentState, nextState] pnorm(
+ 1] + gridPoints[nextState]) / 2
((gridPoints[nextState - (1 - rho) * mu - rho * gridPoints[currentState])
/ sigma
)- pnorm(
+ gridPoints[nextState - 1]) / 2
((gridPoints[nextState] - (1 - rho) * mu - rho * gridPoints[currentState])
/ sigma
)
)
}
}
return(list(transitionMatrix = transitionMatrix, gridPoints = gridPoints))
}
<- function(transitionMatrix) {
calculateSteadyStateProb
(eigen(t(transitionMatrix))$vector[, 1]
/ sum(eigen(t(transitionMatrix))$vector[, 1])
)
}
<- function(transitionMatrix, gridPoints, data) {
rescaleGridPoints <- calculateSteadyStateProb(transitionMatrix)
steadyStateProb <- as.numeric(
rescaleScalar mean(data, na.rm = TRUE) / gridPoints
%*% steadyStateProb
)<- gridPoints * rescaleScalar
gridPointsRescaled return(list(
gridPointsRescaled = gridPointsRescaled,
steadyStateProb = steadyStateProb
))
}
<- function(numStates, data, mu, rho, sigma) {
approximateAR1TauchenWithRescaling
<- approximateAR1Tauchen(numStates, data, mu, rho, sigma)
TauchenResult <- TauchenResult$transitionMatrix
transitionMatrix <- TauchenResult$gridPoints
gridPoints <- rescaleGridPoints(
gridPointsRescaledResult
transitionMatrix, gridPoints, data
)<- gridPointsRescaledResult$gridPointsRescaled
gridPointsRescaled <- gridPointsRescaledResult$steadyStateProb
steadyStateProb
return(list(
transitionMatrix = transitionMatrix,
gridPointsRescaled = gridPointsRescaled,
steadyStateProb = steadyStateProb
))
}
<- function(
estimateHouseholdIncomeTransitionProcessByVillage
village,
numStates,
data,
laggedData,
householdIncMeanClassVec,
householdIncCVClassVec,
villageIndicatorMatrix
) {<- array(NA, c(2, 2, numStates))
gridPointsArray <- array(NA, c(2, 2, numStates, numStates))
transitionMatrixArray <- array(NA, c(2, 2, numStates))
steadyStateProbArray <- array(NA, c(2, 2, 3))
AR1ParametersArray
for (incomeMeanClass in seq(1, 2)) {
for (incomeCVClass in seq(1, 2)) {
<- getDataByMeanCVClassByVillage(
incdatRescaledMeanCVClass
village, data, incomeMeanClass, incomeCVClass,
householdIncMeanClassVec, householdIncCVClassVec, villageIndicatorMatrix
)<- getDataByMeanCVClassByVillage(
laggeedIncdatRescaledMeanCVClass
village, laggedData, incomeMeanClass, incomeCVClass,
householdIncMeanClassVec, householdIncCVClassVec, villageIndicatorMatrix
)<- calculateAR1Parameters(incdatRescaledMeanCVClass, laggeedIncdatRescaledMeanCVClass)
AR1Parameters <- approximateAR1TauchenWithRescaling(
TauchenResult
numIncomeStatesHH, incdatRescaledMeanCVClass, $mu, AR1Parameters$rho, AR1Parameters$sigmau
AR1Parameters
)<- TauchenResult$gridPoints
gridPointsArray[incomeMeanClass, incomeCVClass,] <- TauchenResult$transitionMatrix
transitionMatrixArray[incomeMeanClass, incomeCVClass,,] <- TauchenResult$steadyStateProb
steadyStateProbArray[incomeMeanClass, incomeCVClass,] <- unlist(AR1Parameters)
AR1ParametersArray[incomeMeanClass, incomeCVClass,]
}
}
return(list(
gridPointsArray = gridPointsArray,
transitionMatrixArray = transitionMatrixArray,
steadyStateProbArray = steadyStateProbArray,
AR1ParametersArray = AR1ParametersArray,
numHouseholds = villageIndicatorMatrix[, village] %>% sum
))
}
<- map(
householdAR1EstimationResult seq(1, numVillages),
~ estimateHouseholdIncomeTransitionProcessByVillage(
.,
numIncomeStatesHH,
incdatRescaledForAR1Estimation,
laggedIncdatRescaledForAR1Estimation,
householdIncMeanClassVec,
householdIncCVClassVec,
villageIndicatorMatrix
) )
1.4.2 Village
Given the estimated household income processes, I estimate the income process of the “village”. For this, I first simulate the average village income, using the parameters estimated above. In particular, I simulate the household income over 1000 periods, then compute the mean income in each period. After excluding the first 100 observations to use the income at the steady state, I estimate the parameters using the same method as for the household income.
1.4.2.1 Simulate income process for village
<- 1000
numVillageIncomeSimulations <- 100 numVillageIncomeSimulationsPeriodDrop
<- function(
sample1stPeriodIncomeStateByMeanCVClass
meanClass,
CVClass,
numStates,
steadyStateProbArray
) {sample(
seq(1, numStates),
1,
prob = steadyStateProbArray[meanClass, CVClass, ]
)
}
<- function(
sampleConditionalIncomeStateByMeanCVClass
meanClass,
CVClass,
numStates,
transitionMatrixArray,
previousState
) {sample(
seq(1, numStates),
1,
prob = transitionMatrixArray[meanClass, CVClass, previousState,]
)
}
<- function(
sampleAllIncomeStateByMeanCVClass
meanClass,
CVClass,
numStates,
steadyStateProbArray,
transitionMatrixArray,numSimulations = numVillageIncomeSimulations
) {<- vector(mode = "integer", length = numSimulations)
incomeStateVector 1] <- sample1stPeriodIncomeStateByMeanCVClass(
incomeStateVector[
meanClass,
CVClass,
numStates,
steadyStateProbArray
)for (period in seq(2, numSimulations)) {
<- sampleConditionalIncomeStateByMeanCVClass(
incomeStateVector[period]
meanClass,
CVClass,
numStates,
transitionMatrixArray,- 1]
incomeStateVector[period
)
}return(incomeStateVector)
}
<- function(
simulateHouseholdIncomeByMeanCVClass
meanClass,
CVClass,
numStates,
steadyStateProbArray,
transitionMatrixArray,
gridPointsArray,numSimulations = numVillageIncomeSimulations
) {<- sampleAllIncomeStateByMeanCVClass(
incomeStateVec
meanClass,
CVClass,
numStates,
steadyStateProbArray,
transitionMatrixArray)
gridPointsArray[meanClass, CVClass, incomeStateVec]
}
<- function(
simulateHouseholdIncomeByVillage
village,
meanClassVec,
CVClassVec,
numStates,
householdAR1EstimationResult,
villageIndicatorMatrix
) {
<- meanClassVec[villageIndicatorMatrix[, village]]
meanClassVillage <- CVClassVec[villageIndicatorMatrix[, village]]
CVClassVillage <- householdAR1EstimationResult[[village]]$steadyStateProbArray
steadyStateProbArrayVillage <- householdAR1EstimationResult[[village]]$transitionMatrixArray
transitionMatrixArrayVillage <- householdAR1EstimationResult[[village]]$gridPointsArray
gridPointsArrayVillage
do.call(
rbind,map2(
meanClassVillage, CVClassVillage,~ simulateHouseholdIncomeByMeanCVClass(
.x, .y,
numStates,
steadyStateProbArrayVillage,
transitionMatrixArrayVillage,
gridPointsArrayVillage
)
)
)
}
1.4.2.2 Estimate the income process of the village
Given the simulated income data, I estimate the income process parameters and approximate the process with Tauchen’s method.
<- function(
estimateVillagencomeTransitionProcessByVillage
village,
meanClassVec,
CVClassVec,
numStatesHH,
numStatesVillage,
householdAR1EstimationResult,
villageIndicatorMatrix,
numVillageIncomeSimulationsPeriodDrop,
numVillageIncomeSimulations
){<- simulateHouseholdIncomeByVillage(
householdIncomeSimulationResult
village,
meanClassVec,
CVClassVec,
numStatesHH,
householdAR1EstimationResult,
villageIndicatorMatrix
)
<- colMeans(
villageSimulatedIncMean
householdIncomeSimulationResult[:numVillageIncomeSimulations
, numVillageIncomeSimulationsPeriodDrop
]
)<- colMeans(
villageSimulatedLaggedIncMean
householdIncomeSimulationResult[- 1):(numVillageIncomeSimulations - 1)
, (numVillageIncomeSimulationsPeriodDrop
]
)
<- calculateAR1Parameters(
villageAR1Parameters
villageSimulatedIncMean,
villageSimulatedLaggedIncMean
)
<- approximateAR1Tauchen(
villageAR1TauchenApproximation
numStatesVillage, villageSimulatedIncMean, $mu, villageAR1Parameters$rho, villageAR1Parameters$sigmau
villageAR1Parameters
)
<- calculateGridPoints(numStatesVillage, villageSimulatedIncMean)
villageIncomeGridPoints
<- rescaleGridPoints(
villageIncomeGridPointsRescaled $transitionMatrix,
villageAR1TauchenApproximation
villageIncomeGridPoints,
villageSimulatedIncMean
)
return(list(
transitionMatrix = villageAR1TauchenApproximation$transitionMatrix,
gridPoints = villageIncomeGridPointsRescaled$gridPointsRescaled
))
}
<- map(
villageAR1EstimationResult seq(1, numVillages),
~ estimateVillagencomeTransitionProcessByVillage(
.,
householdIncMeanClassVec,
householdIncCVClassVec,
numIncomeStatesHH,
numIncomeStatesVillage,
householdAR1EstimationResult,
villageIndicatorMatrix,
numVillageIncomeSimulationsPeriodDrop,
numVillageIncomeSimulations
) )
1.5 Sanity check: compare against the parameters in Laczó (2015)
Just for a sanity check of my code, I compare the household AR(1) process parameters I derived against the ones provided in the appendix of Laczó (2015). The parameters in the tables below coincide to the parameters in the paper appendix, except the ones for “Low mean, high risk” in Shirapur. I reran the original R script and confirmed that the correct parameters are the ones that I am showing in the table below.
Since I refactored the origianl R script, I cannot reproduce the village parameters. To be clear, the author has set the random seed in the code, and the reason I cannot reproduce the village parameters is merely because of the difference in code structures.
<- function(
createParameterTableByVillage
village,
householdAR1EstimationResult
) {
<- array(NA, c(3, 4))
villageParameters <- 0
colIndex for (incomeMeanClass in seq(1, 2)) {
for (incomeCVClass in seq(1, 2)) {
<- colIndex + 1
colIndex <-
villageParameters[, colIndex] $AR1ParametersArray[incomeMeanClass, incomeCVClass,]
householdAR1EstimationResult[[village]]
}
}rownames(villageParameters) <- c(
"mu",
"rho",
"sigmau_squared"
)%>%
villageParameters kbl(digits = 3) %>%
kable_classic() %>%
add_header_above(
c(
"",
"Low mean, \n low risk",
"Low mean, \n high risk",
"High mean, \n low risk",
"High mean, \n high risk"
)
)
}createParameterTableByVillage(1, householdAR1EstimationResult)
Low mean, low risk |
Low mean, high risk |
High mean, low risk |
High mean, high risk |
|
---|---|---|---|---|
mu | 182.475 | 160.201 | 441.843 | 391.798 |
rho | 0.189 | 0.626 | 0.550 | 0.365 |
sigmau_squared | 49.088 | 68.506 | 128.741 | 188.458 |
createParameterTableByVillage(2, householdAR1EstimationResult)
Low mean, low risk |
Low mean, high risk |
High mean, low risk |
High mean, high risk |
|
---|---|---|---|---|
mu | 235.906 | 243.872 | 506.555 | 525.520 |
rho | 0.491 | 0.285 | 0.846 | 0.520 |
sigmau_squared | 45.808 | 79.111 | 122.302 | 237.918 |
createParameterTableByVillage(3, householdAR1EstimationResult)
Low mean, low risk |
Low mean, high risk |
High mean, low risk |
High mean, high risk |
|
---|---|---|---|---|
mu | 263.292 | 288.625 | 446.590 | 642.628 |
rho | 0.318 | -0.150 | 0.160 | 0.199 |
sigmau_squared | 78.793 | 126.987 | 129.785 | 271.997 |
1.6 Save data
save.image("IntermediateData/allData.RData")