# Install and load packages ---------------
packages <- c(
  "tidyverse",
  "ggrepel",
  "WDI",
  "modelsummary",
  "forcats",
  "grid",
  "gridExtra",
  "countrycode",
  "lmtest",
  "sandwich",
  "scales",
  "haven",
  "kableExtra",
  "magrittr"
)

# Change to install = TRUE to install the required packages
pacman::p_load(packages, character.only = TRUE)
df <- read_dta("../data/HISP materials/evaluation.dta") 

df_analysis <- df %>% 
  filter(eligible == 1) %>% 
  filter(round %in% c(0, 1)) %>% 
  group_by(locality_identifier, treatment_locality, round) %>% 
  summarise_at(vars(health_expenditures), mean) %>% 
  pivot_wider(names_from = round, values_from = health_expenditures) %>% 
  rename(
    pre_health_exp = `0`,
    post_health_exp = `1`,
  ) %>% 
  ungroup()

Summary Statistics

cbind(
  c(
    "Pre-treatment health exp.",
    "Post-treatment health exp."
  ),
  df_analysis %>% 
    group_by(treatment_locality) %>% 
    summarise_at(vars(pre_health_exp, post_health_exp), list(mean, sd)) %>% 
    pivot_longer(
      cols = -treatment_locality,
      names_to = c(".value", "func"),
      names_pattern = "(.*)(_fn\\d)"
      ) %>%
    mutate(
      across(
        c(pre_health_exp, post_health_exp),
        ~ ifelse(
          func == "_fn1",
          formatC(., digits = 2, format = "f"),
          paste0("(", formatC(., digits = 2, format = "f"), ")")
          )
        ),
      ) %>% 
    select(- c(treatment_locality, func)) %>% 
    t(),
  df_analysis %>% 
    summarise_at(vars(pre_health_exp, post_health_exp), list(min, max)) %>% 
      pivot_longer(
        cols = everything(),
        names_to = c(".value", "func"),
        names_pattern = "(.*)(_fn\\d)"
        ) %>%
    mutate(
      across(
        c(pre_health_exp, post_health_exp),
        ~ formatC(., digits = 2, format = "f"),
        ),
      ) %>% 
    select(- c(func)) %>% 
    t()
  ) %>% 
  set_rownames(NULL) %>% 
  kable("html", booktabs = TRUE, align = c("l", rep("c", 6))) %>% 
  add_header_above(
    c(" ", "Average", "Sample (S.D.)", "Average", "Sample (S.D.)", "Min", "Max")
  ) %>% 
  add_header_above(
    c("Variable", "Control ($N_c$ = 99)" = 2, "Treatment ($N_t$ = 98)" = 2, " ", " ")
  ) %>% 
  kable_styling(position = "center")
Variable
Control (\(N_c\) = 99)
Treatment (\(N_t\) = 98)
Average
Sample (S.D.)
Average
Sample (S.D.)
Min
Max
Pre-treatment health exp. 14.93 (2.14) 14.49 (1.86) 8.27 25.49
Post-treatment health exp. 18.40 (3.21) 7.92 (2.95) 0.04 27.63

Regression estimates for average treatment effects

df_analysis
## # A tibble: 197 × 4
##    locality_identifier treatment_locality pre_health_exp post_health_exp
##                  <dbl>              <dbl>          <dbl>           <dbl>
##  1                   2                  0           14.6           17.9 
##  2                   3                  1           13.6           10.1 
##  3                   4                  0           14.7           17.6 
##  4                   5                  1           14.0            7.63
##  5                   6                  0           13.9           15.2 
##  6                   7                  1           16.1            9.23
##  7                   9                  1           14.8            8.65
##  8                  10                  0           16.2           24.3 
##  9                  11                  1           14.4            8.65
## 10                  12                  0           16.0           16.7 
## # … with 187 more rows
res1 <- lm(post_health_exp ~ treatment_locality, df_analysis)
res2 <- lm(post_health_exp ~ treatment_locality + pre_health_exp, df_analysis)
res3 <- lm(
  post_health_exp ~ treatment_locality + pre_health_exp + treatment_locality:(demean_covar), 
  df_analysis %>% 
    mutate(
      demean_covar = pre_health_exp - mean(pre_health_exp)
      )
  )

bind_cols(
  c("No covariates", "Pre-treatment var", "Pre-treatment var interacted with $W$"),
  map(
    list(res1, res2, res3),
    function(x) x %>% summary %>% coefficients %>% .["treatment_locality", c("Estimate", "Std. Error")]
    ) %>% 
    bind_rows() 
  ) %>% 
  mutate(
    Estimate = formatC(Estimate, digits = 2, format = "f"),
    `Std. Error` = paste0("(", formatC(`Std. Error`, digits = 2, format = "f"), ")")
  ) %>% 
  as.matrix() %>% 
  set_colnames(NULL) %>% 
  kable("html", booktabs = TRUE, align = c("l", rep("c", 2))) %>% 
  add_header_above(
    c(" ", "Estimate", "($\\widehat{\\text{s.e.}}$)")
  ) %>% 
  add_header_above(
    c("Covariates", "EFfect of assginment to treatment on post health expenditures" = 2)
  ) %>% 
  kable_styling(position = "center")
## New names:
## * NA -> ...1
Covariates
EFfect of assginment to treatment on post health expenditures
Estimate
(\(\widehat{\text{s.e.}}\))
No covariates -10.48 (0.44)
Pre-treatment var -10.29 (0.42)
Pre-treatment var interacted with \(W\) -10.28 (0.42)

Regression estimates for average treatment effects

res1 <- lm(
  post_health_exp ~ treatment_locality + pre_health_exp + treatment_locality:(demean_covar), 
  df_analysis %>% 
    mutate(
      demean_covar = pre_health_exp - mean(pre_health_exp)
      )
  )
res2 <- lm(
  log(post_health_exp) ~ treatment_locality + pre_health_exp + treatment_locality:(demean_covar), 
  df_analysis %>% 
    mutate(
      demean_covar = pre_health_exp - mean(pre_health_exp)
      )
  )



bind_cols(
  c("Assignment", "Intercept", "Pre-treatment var.", "Pre-traetment var. $\\times$ Assignment"),
  map(
    list(res1, res2),
    function(x)  x %>% 
      summary %>% 
      coefficients %>% 
      .[, c("Estimate", "Std. Error")] %>% 
      .[c(2,1,3,4),] %>% 
      as_tibble()
    )
  ) %>% 
  mutate(
    across(
      c(Estimate...2, Estimate...4),
      ~ formatC(., digits = 2, format = "f"),
    ),
    across(
      c("Std. Error...3", "Std. Error...5"),
      ~ paste0("(", formatC(., digits = 2, format = "f"), ")")
    )
  ) %>% 
  as.matrix() %>% 
  rbind(
    c(
      "R-squared",
      res1 %>% summary %>% .$r.squared %>% format(digits = 2, format = "f"),
      " ",
      res2 %>% summary %>% .$r.squared %>% format(digits = 2, format = "f"),
      " "
    )
  ) %>% 
  set_colnames(NULL) %>% 
  kable("html", booktabs = TRUE, align = c("l", rep("c", 4))) %>% 
  add_header_above(
    c(" ", rep(c("Est", "($\\widehat{\\text{s.e.}}$)"), 2))
  ) %>% 
  add_header_above(
    c("Covariates", "Model for Levels" = 2, "Model for Logs" = 2)
  ) %>% 
  kable_styling(position = "center")
## New names:
## * NA -> ...1
## * Estimate -> Estimate...2
## * `Std. Error` -> `Std. Error...3`
## * Estimate -> Estimate...4
## * `Std. Error` -> `Std. Error...5`
Covariates
Model for Levels
Model for Logs
Est
(\(\widehat{\text{s.e.}}\))
Est
(\(\widehat{\text{s.e.}}\))
Assignment -10.28 (0.42) -0.90 (0.06)
Intercept 13.92 (2.10) 2.65 (0.31)
Pre-treatment var. 0.30 (0.14) 0.02 (0.02)
Pre-traetment var. \(\times\) Assignment 0.34 (0.21) 0.14 (0.03)
R-squared 0.77 0.59

\(p\)-values for tests for constant and zero treatment effects

res <- lm(
  post_health_exp ~ treatment_locality + pre_health_exp + treatment_locality:(demean_covar), 
  df_analysis %>% 
    mutate(
      demean_covar = pre_health_exp - mean(pre_health_exp)
      )
  )

# Zero treatment effect
coef <- res %>% summary %>% coefficients %>% .[c("treatment_locality", "treatment_locality:demean_covar"), "Estimate"]
varcov <- vcov(res)[
  c("treatment_locality", "treatment_locality:demean_covar"),
  c("treatment_locality", "treatment_locality:demean_covar")
  ]

chi2_test_zero <- 1 - pchisq(
  coef %*% solve(varcov) %*% coef,
  df = 2
) 

# Fisher's exact test

set.seed(123)
treatment_perm <-  map(
  seq(1, 10000),
  ~ sample(nrow(df_analysis), nrow(df_analysis %>% filter(treatment_locality == 1)), replace = FALSE)
  ) %>% 
  bind_cols() %>% 
  as.matrix()
## New names:
## * NA -> ...1
## * NA -> ...2
## * NA -> ...3
## * NA -> ...4
## * NA -> ...5
## * ...
get_perm_res <- function(df_analysis, treatment_perm, tau) {
  
  df_tmp <- df_analysis %>% 
    mutate(
      Y1 = post_health_exp * (treatment_locality == 1) + (post_health_exp + tau) * (treatment_locality == 0),
      Y0 = post_health_exp * (treatment_locality == 0) + (post_health_exp - tau) * (treatment_locality == 1),
      )
  
  output <- map_dbl(
    seq(ncol(treatment_perm)),
    ~ abs(
      mean((df_tmp %>% pull(Y1))[treatment_perm[, .]]) - 
        mean((df_tmp %>% pull(Y0))[-treatment_perm[, .]]) - tau
    )
  )
  
  return(output)
}

perm_test_res <- get_perm_res(df_analysis, treatment_perm %>% as.matrix, 0)

df_tmp <- df_analysis %>% 
  mutate(
    Y1 = post_health_exp * (treatment_locality == 1) + (post_health_exp + tau) * (treatment_locality == 0),
    Y0 = post_health_exp * (treatment_locality == 0) + (post_health_exp - tau) * (treatment_locality == 1),
    )

actual_diff <- abs(
  mean((df_tmp %>% filter(treatment_locality == 1) %>%  pull(Y1))) - 
    mean((df_tmp %>% filter(treatment_locality == 0) %>% pull(Y0)))
  )

# ggplot() +
#   geom_histogram(aes(x = perm_test_res), binwidth = 0.01) +
#   geom_vline(xintercept = quantile(perm_test_res, .90), linetype = 5) +
#   geom_vline(xintercept = quantile(perm_test_res, .95), linetype = 4) +
#   geom_vline(xintercept = quantile(perm_test_res, .99), linetype = 3) +
#   geom_vline(xintercept = actual_diff, color = "red") +
#   theme(
#     panel.background = element_blank(),
#     panel.grid.major = element_blank(),
#     panel.grid.minor = element_blank(),
#     axis.line = element_line(colour = "black")
#     )
   
fisher_test <- mean(perm_test_res > actual_diff)

# Constant treatment effect

coef <- res %>% summary %>% coefficients %>% .["treatment_locality:demean_covar", "Estimate"]
varcov <- vcov(res)["treatment_locality:demean_covar", "treatment_locality:demean_covar"]

chi2_test_const <- 1 - pchisq(
  coef %*% solve(varcov) %*% coef,
  df = 1
) 

cbind(
  c("$\\chi^2(2)$ approximation", "Fisher exact $p$-value", "$\\chi^2(1)$ approximation"),
  c(
    formatC(chi2_test_zero, digits = 3, format = "f"), 
    formatC(fisher_test, digits = 3, format = "f"),
    formatC(chi2_test_const, digits = 3, format = "f")
    )
  ) %>% 
  kable("html", booktabs = TRUE, align = c("l", rep("c", 1))) %>% 
  add_header_above(
    c(" ", "Post health exp.")
  ) %>% 
  kable_styling(position = "center") %>% 
  pack_rows("Zero treatment effect", 1, 2) %>%
  pack_rows("Constant treatment effect", 3, 3)
Post health exp.
Zero treatment effect
\(\chi^2(2)\) approximation 0.000
Fisher exact \(p\)-value 0.000
Constant treatment effect
\(\chi^2(1)\) approximation 0.105