Replication: Fearon and Humphreys (2018)




 

 


This .Rmd file produces all tables reported in the paper Why do women cooperate more in women’s groups? from Fearon and Humphreys (2018)

Table 1: Effects of composition

# ---------------------------------- #
 # Randomization Inference Function
# ---------------------------------- #
  # Y are contributions
  # X is true allocation to treatment
  # BA are analysis blocks
  # PERM is permutation matrix 

ate_ri <- function(Y, Z, BA=rep(1, length(Y)), subset=TRUE, PERM) {
  
  # Exclude village with IRC_CODE == 30
    Y       = Y[subset]
    Z       = Z[subset]
    BA      = BA[subset]
    PERM    = PERM[subset,]
  
  # Strata weights
    BANAMES = sort(unique(BA))
    n = length(Y)       
    w = (sapply(BANAMES, function(i) sum(BA==i)))/n             
  
  # ATE
    f <- function(x){
        t = tapply(Y,list(x, BA), mean,na.rm=TRUE)
        (w%*%(t[2,]-t[1,]))[1,1]
     }
    t = tapply(Y,list(Z, BA), mean,na.rm=TRUE)
    t1 = (w%*%t[2,])[1,1]; t0 =(w%*%t[1,])[1,1]
    # ATE
    est = f(Z)          
    # ATEs for permutated data
    null = apply(PERM,2,f)                              
  
  # p-values from randomization inference 
    # p: Ha diff<0
    pL = mean(null<=est, na.rm=TRUE)
    # p: Ha diff!=0
    p  = mean(null>=abs(est), na.rm=TRUE) + mean(null<=-abs(est), na.rm=TRUE)                   
    # p: Ha diff>0
    pR = mean(null>=est, na.rm=TRUE)                                            
  
  # Neyman standard errors
    
    freq = (table(BA)/n)^2
    var  = tapply(Y, list(BA, Z), var)  
    n_ba =  tapply(Y, list(BA, Z), length)
    neyman.se = sqrt(sum(freq*(var[,1]/n_ba[,1]  +  var[,2]/n_ba[,2])))
    neyman.t = est/neyman.se
    neyman.p = 2*(1- pt(abs(neyman.t), df=n-2))
  
  # produce output
    out = c(t0, t1,est, n, pR, neyman.se);
    names(out) = c('T = 0', 'T = 1', 'ATE', 'n',  'p (pos, ri)', 'Neyman se')
    cbind(out)
  }
# ---------------------------------- #
  # Basic ATE calculation   
# ---------------------------------- #
# Define blocks to condition on CDR and district
  BA <- 10*pvill$treatment + pvill$Voinjama

  ATE_GENDER <- ate_ri(Y = g.eff_df$Y,
                       Z = g.eff_df$Z,
                       subset = ((1:83)!=30),
                       BA = BA,
                       PERM = PERM_WOMEN)
   
  ATE_GENDER_Q <- ate_ri(Y = g.eff_df$Y,
                         Z = g.eff_df$Z,
                         subset = ((1:83)!=30 & pvill$Q==1 ),
                         BA = pvill$treatment,
                        PERM = PERM_WOMEN)
  
  ATE_GENDER_NQ <- ate_ri(Y = g.eff_df$Y,
                          Z = g.eff_df$Z,
                          subset = ((1:83)!=30 & pvill$Q==0 ),
                          BA = pvill$treatment,
                          PERM=PERM_WOMEN)
      

table_gender <- round(cbind(ATE_GENDER, ATE_GENDER_Q, ATE_GENDER_NQ),2)
colnames(table_gender) <- c("All", "In quarters", "Outside quarters")
 
kable(cbind( c("Mixed Villages", "Homogeneous Villages", "Difference (ATE)" ,"N", "$p$ (ri)", "s.e. (Neyman)"), table_gender),row.names = FALSE, caption = "\\label{tab:main} Effects of composition \\\n Source: Authors' own construction ", align = c("l", "c", "c", "c"))
Effects of composition
Source: Authors’ own construction
All In quarters Outside quarters
Mixed Villages 218.55 182.15 240.86
Homogeneous Villages 248.62 232.91 252.61
Difference (ATE) 30.06 50.75 11.75
N 82 28 54
p (ri) 0 0 0.15
s.e. (Neyman) 9.43 13.59 11.73

Table 2: Expectations given different treatments

# ---------------------------------- #
  # Cross tabulation
# ---------------------------------- #
  summus <- function(vars, round = 2){
    x <- sapply(vars, function(i) {
      aggregate(replace(hh[i][,1], hh[i][,1]<0, NA),
                list(type = hh$gen3), mean, na.rm = TRUE)[,2]
    })
    rownames(x) <- conditions
    round(x, round)
  }

exp  <- summus(c("kept300", "kept0", "others", "mfgiving3"))

# Actual avg given in group by others
  # 00 Women-homog
  # 01 Women-mixed
  # 11 Men-mixed

  t <- round(tapply(hh$contrib,list(hh$mixed , hh$gender),mean,na.rm=T),2)
  
  exp_00 <-  t[1,1]
  exp_01 <- mean((t[-1,1]*(11/12) + t[-1,2])/(1 + 11/12), na.rm=T)
  exp_11 <- mean((t[,1] + t[,2]*(11/12))/(1 + 11/12), na.rm=T)

# Produce table
  exp <- round(cbind(exp[,1:3],c(exp_00, exp_01,exp_11),exp[,4]),2)
  exp[,1:2] <-round(exp[,1:2]/23,2)
  exp <- t(exp)
  
  rownames(exp) <- c("Expected share giving 0", "Expected share giving 300", "Expected average amount given by others","Actual avgerage given by others","Predict women give strictly more")
  kable(exp, caption = "\\label{tab:exp} Expectations given different treatments \\\n Source: Authors' own construction " )
Expectations given different treatments
Source: Authors’ own construction
Women in all women Women in mixed Men in mixed
Expected share giving 0 0.11 0.13 0.14
Expected share giving 300 0.85 0.80 0.81
Expected average amount given by others 273.45 258.66 254.52
Actual avgerage given by others 245.93 223.03 222.81
Predict women give strictly more 0.83 0.73 0.48

Table 3: (External) replication of Table 1 (cols 1 and 2) in Greig and Bohnet (2009)

# ------------------- #
 # Cluster Robust 
# ------------------- #

lm_cluster_robust <- function(formula, data, cluster_name, warnings = FALSE){

  # run regression
  model <- lm(as.formula(formula), data = data, na.action="na.exclude" )
 
  # check number of clusters
   not.miss<- !is.na(predict(model))
  cluster<- data[cluster_name][[1]]
  if(length(not.miss)!=length(cluster)){
    stop("check your data: cluster variable has different N than model")
  }
  M <- length(unique(cluster[not.miss]))
  N <- length(cluster[not.miss])
  K <- model$rank
  if(M<50 & warnings){
    warning("Fewer than 50 clusters, variances may be unreliable (could try block bootstrap instead).")
  }
  
  # compute robust varcov matrix
  dfc <- (M/(M - 1)) * ((N - 1)/(N - K))
  uj  <- apply(estfun(model), 2, function(x) tapply(x, cluster, sum, na.rm=TRUE));
  vcovCL <- dfc * sandwich(model, meat = crossprod(uj)/N)
  
  # return
  out <- list()
  out[[1]] <- coeftest(model, vcovCL)
  out[[2]] <- N
  out[[3]] <- vcovCL
  return(out)
}

# --------------------- #
 # Run lm with robust varcov 
# --------------------- #
  SEs  <- lm_cluster_robust("contrib ~  homog*gender", hh, cluster_name = "irccode", warnings = FALSE)
  SEs2 <- lm_cluster_robust("contrib ~  homog*gender+others", hh,cluster_name = "irccode", warnings = FALSE)
  SEs3 <- lm_cluster_robust("contrib ~  homog*gender+others_impute*others_missing", hh, cluster_name = "irccode", warnings = FALSE)
  SEs4 <- lm_cluster_robust("contrib ~  homog*gender*others_impute +others_impute*others_missing", hh, cluster_name= "irccode", warnings = FALSE)

# Create output
  x  <- output_function(SEs, coefrows = 1:3, round = 2)
  x2 <- output_function(SEs2, coefrows = 1:4, round = 2)
  x3 <- output_function(SEs3, coefrows = 1:4, round = 2)
  x4 <- output_function(SEs4, coefrows = c(1:4, 6:7), round = 2)
  X  <- cbind(c(x[-7], rep("",6), x[7]),
            c(x2[-9], rep("",4), x2[9]),
            c(x3[-9], rep("",4), x3[9]),
              x4)

# Define row and colnames
  rownames(X) <- c("constant", "sd (constant)", "Homogeneous", "sd (Homogeneous)",  "Male", "sd (Male)", "Expectations", "sd (Expectations)", "Expectations are imputed", "sd (Expectations are imputed)","Expectations * Expectations are imputed","sd (Exp * Exp are imputed)","N")
  
  r <-  c("constant", "", "Homogeneous", "",  "Male", "", "Expectations", "", "Homog * Expectations", "","Male * Expectations","","N")
  
  
  kable(cbind(r, X), row.names = FALSE, caption = "\\label{tab:rep} (External) Replication of Table 1 (cols 1 and 2)  in Greig et al (2009). Note that all variables are demeaned to simplify interpretation of main terms. \\\n Source: Authors' own construction and  Greig and Bohnet (2009) ", col.names = c("", 1:4), align = c("l","c","c","c"), digits = 2)
(External) Replication of Table 1 (cols 1 and 2) in Greig et al (2009). Note that all variables are demeaned to simplify interpretation of main terms.
Source: Authors’ own construction and Greig and Bohnet (2009)
1 2 3 4
constant 220.44 106.97 109.92 86.71
(8.39)** (20.51)** (20.49)** (32.74)**
Homogeneous 25.5 19.54 15.46 80.63
(9.65)** (10.54) (8.68) (40.24)**
Male 4.96 10.99 6.3 10.84
(6.92) (9.33) (6.88) (35.21)
Expectations 0.38 0.38 0.47
(0.07)** (0.07)** (0.12)**
Homog * Expectations -0.24
(0.15)
Male * Expectations -0.02
(0.13)
N 1979 1093 1979 1979

Table 4: Reports of mobilization activity by condition and CDR treatment status, 0-1 scale

# ------------------------------------------------------------- #
  # Table: Reports of mobilization
# ------------------------------------------------------------- #

# Select mobilization variables
  mobvars <- dplyr::select(hh, contactpr, contactgm,stayhome,  knreps, othmeets)
  mobvars[mobvars< 0] <- NA
# Combined mobilization score  
  mobvars <- apply(mobvars, 1, mean, na.rm = TRUE)
  mob <- tapply( mobvars, list(hh$gen3 ,hh$treat), mean, na.rm = TRUE )
  mob_all <- tapply( mobvars, list(hh$gen3), mean, na.rm = TRUE )
# Produce table
  mob <- cbind( mob_all, mob)
  mob <- t(mob)
  rownames(mob) <-   c("All", "No CDR", "CDR")
  colnames(mob) <- conditions
  kable(mob, digits = 2, caption= "\\label{fig:mob} Reports of mobilization activity by condition and CDR treatment status, 0-1 scale \\\n Source: Authors' own construction" )
Reports of mobilization activity by condition and CDR treatment status, 0-1 scale
Source: Authors’ own construction
Women in all women Women in mixed Men in mixed
All 0.46 0.41 0.47
No CDR 0.47 0.33 0.42
CDR 0.45 0.48 0.51

Table 5: Parameter estimates

# ------------------------------------------------------------- #
  # Multilevel Bayesian model 
# ------------------------------------------------------------- #
 (M <- stan_model("X_only_normal.stan"))
S4 class stanmodel 'X_only_normal' coded as follows:
// Program for Liberia Study Model
// 2017/01/14
// version with no village level effects on non-negative parameters


functions {

  // function makes weights for categorical distribution given parameters
  // key assumption is alpha_i is distributed normal 
  // Then different values of alpha_i determine different choices.
  // requires gamma positive. Choices in 0:3
  vector make_weights(real E, real q, real r,
                      real alpha, real gamma, real rho, real phi, real c) {

    vector[4] w_U;

   // max category: probabilities calculated assuming choice on  0-3 scale for utilty
   // even though options are labelled 1-4

    w_U[1] =
        normal_cdf(  ( gamma * 1 - 2 * gamma * rho * E - r - phi * q ),
                      alpha, c);
    w_U[2] =
        normal_cdf(  ( gamma * 3 - 2 * gamma * rho * E - r - phi * q ),
                      alpha, c ) -
        normal_cdf(  ( gamma * 1 - 2 * gamma * rho * E - r - phi * q ),
                      alpha, c );
     w_U[3] =
        normal_cdf(  ( gamma * 5 - 2 * gamma * rho * E - r - phi * q ),
                      alpha, c ) -
        normal_cdf(  ( gamma * 3 - 2 * gamma * rho * E - r - phi * q ),
                      alpha, c );
    w_U[4] =
        1 -
        normal_cdf(  ( gamma * 5 - 2 * gamma * rho * E - r - phi * q ),
                      alpha, c );

    return w_U;
  }
}

data {
  // counters
  int<lower=0> N;             // number of subjects
  int<lower=0> K;             // number of villages

  // main data: Note some redundancy in this data; but makes the transformations more transparent
  
  int<lower=1,upper=4>              X[N];                  // contributions made on 1-4 scale
  real<lower=0,upper=3>             E[N];                  // expectations  on 0-3 scale, continuous
  int<lower=2,upper=5>              r[N];                  // multipliers
  real<lower=0,upper=1>             q[N];                  // probability of punishment from survey
  int<lower=1,upper=K>              village_id[N];         // village identifier
  int<lower=0,upper=1>              homog[K];              // homog (1) / mixed (0) treatment village identifier
  int<lower=0,upper=1>              mixed[K];              // homog (0) / mixed (1) treatment village identifier
  int<lower=0,upper=1>              male[N];               // male (1) / female (0) individual
  int<lower=0,upper=1>              fhomog[N];             // female in homog (1) other (0) 
  int<lower=0,upper=1>              fmixed[N];             // female in mixed (1) other (0) 
  real<lower=0>                     sigma[7];              // variance for priors

  }
parameters {
 
 // village random intercepts for village varying parameters: alpha,  phi set to 0 mean
 // positive lower bound on sigma
  vector[K] alpha_re;
  real <lower=.1>  sigma_alpha_re;
  vector[K] phi_re;
  real <lower=.1>  sigma_phi_re;

 // treatment condition  parameters  
  real alpha_F;     // condition effect for alpha MALE
  real alpha_M;     // condition effect for alpha MALE
  real alpha_H;     // condition effect for alpha HOMOG
  real phi_F;       // condition effect for phi
  real phi_M;       // condition effect for phi
  real phi_H;       // condition effect for phi
  real<lower=0.01> gamma_F;     // condition effect for gamma
  real<lower=0.01> gamma_M;     // condition effect for gamma
  real<lower=0.01> gamma_H;     // condition effect for gamma
  real<lower=0.01> rho_F;       // condition effect for rho
  real<lower=0.01> rho_M;       // condition effect for rho
  real<lower=0.01> rho_H;       // condition effect for rho
  real<lower=0.01> c_MX;        // condition effect for c (assumed common across genders in mixed communities)
  real<lower=0.01> c_H;         // condition effect for c (assumed common across genders)
 }

transformed parameters {

  // primitives setup on individual level
  real                     alpha[N];         // intrinsic value of public good
  real                     phi[N];           // expected punishment
  real<lower=0>            gamma[N];         // conformity concerns
  real<lower=0>            rho[N];           // conformity reference point
  real<lower=0>            c[N];             // range of intrinsic value of public good
  

  // transform to individual level 
  for (n in 1:N) {
    alpha[n] = alpha_re[village_id[n]]  + alpha_M*male[n]   + alpha_H*fhomog[n] + alpha_F*fmixed[n];
    phi[n]   = phi_re[village_id[n]]    + phi_M*male[n]     + phi_H*fhomog[n]   + phi_F*fmixed[n];
    gamma[n] =                            gamma_M*male[n]   + gamma_H*fhomog[n] + gamma_F*fmixed[n];
    rho[n]   =                            rho_M*male[n]     + rho_H*fhomog[n]   + rho_F*fmixed[n];  
    c[n]     = c_H*(homog[village_id[n]]) + c_MX*(mixed[village_id[n]]);                                                         
    }
}

    
model {
  // weights for categorical distribution
  vector[4] w_U[N];
 
  // generate weights for each individual choice using custom function
  for (n in 1:N) {
      w_U[n] = make_weights( E[n], q[n], r[n],
                             alpha[n], gamma[n], rho[n], phi[n], c[n] );
    }

 
  
  // Likelihood
  for (n in 1:N) {
    X[n] ~ categorical( w_U[n] );
    }

  // Priors
  alpha_re   ~ normal(0, sigma_alpha_re);
  phi_re   ~ normal(0, sigma_phi_re);

  sigma_alpha_re ~ normal(0, 10);
  sigma_phi_re   ~ normal(0, 10);
  
  alpha_F    ~ normal(0, sigma[1]);  # K base levels -- differs across villages
  alpha_M    ~ normal(0, sigma[1]);  # MALE
  alpha_H    ~ normal(0, sigma[1]);  # wHOMOG
  phi_F      ~ normal(0, sigma[4]);  # K base levels -- differs across villages
  phi_M      ~ normal(0, sigma[4]);  # Effect of MALE
  phi_H      ~ normal(0, sigma[4]);  # Effect of HOMOG
  rho_F      ~ normal(0, sigma[3]);
  rho_M      ~ normal(0, sigma[3]);
  rho_H      ~ normal(0, sigma[3]);
  gamma_F    ~ normal(0, sigma[2]);
  gamma_M    ~ normal(0, sigma[2]);
  gamma_H    ~ normal(0, sigma[2]);
  c_H        ~ normal(0, sigma[5]);
  c_MX       ~ normal(0, sigma[5]);
   // Prior on boast implicit uniform over 0, 1
  } 
  hh_f <- dplyr::select(hh, contrib, intst, notanon2, others_impute, irccode, gen3, irccode, mixed, Z, gender)
  hh_f <- dplyr::filter(hh_f, complete.cases(hh_f))
  
  
  hh_stan <- list(N = nrow(hh_f),
                  K = length(unique(hh_f$irccode)),
                  village_id = hh_f$irccode,
                  r = hh_f$intst,
                  q = .01+0.98*hh_f$notanon2,
                  fhomog = ifelse(hh_f$Z == 1, 1, 0),
                  fmixed = ifelse(hh_f$Z == 2, 1, 0),
                  male = ifelse(hh_f$Z == 3, 1, 0),
                  mixed = (dplyr::summarize(group_by(hh_f, irccode), mixed = mean(mixed))$mixed),
                  homog = (1-dplyr::summarize(group_by(hh_f, irccode), mixed = mean(mixed))$mixed),
                  X = 1+hh_f$contrib/100,         # On 1-4 scale
                  E = (hh_f$others_impute)/100,   # On 0-3 scale
                  sigma = rep(5, 7) #,
                  )
# Extract posterior estimates
  posterior <- rstan::extract(X_re, permuted = TRUE)
  posterior_df_small <- dplyr::select(as.data.frame(posterior),
                                      alpha_F, alpha_M, alpha_H,
                                      phi_F,   phi_M,   phi_H,
                                      rho_F,   rho_M,   rho_H,
                                      gamma_F, gamma_M, gamma_H,
                                      c_MX, c_H,
                                      sigma_alpha_re,
                                      sigma_phi_re)
  
# Create table
  out <- cbind(mean = apply(posterior_df_small, 2, mean),
               sd = apply(posterior_df_small, 2, sd))

  
# define row names
  rows <-  c("$\\sigma_\\alpha$", "$\\sigma_\\phi$", 
             "$\\alpha_F$", "$\\alpha_M$", "$\\alpha_H$", 
             "$\\phi_F$", "$\\phi_M$", "$\\phi_H$",
             "$\\gamma_F$", "$\\gamma_M$", "$\\gamma_H$",
             "$\\rho_F$", "$\\rho_M$", "$\\rho_H$",
             "$\\sigma_F / \\sigma_M$", "$\\sigma_H$")
  T <- round(summary(X_re)$summary,2)[c(83+1, (2*83+2):(2*83+16)),c(1,3)]

# reshape
  ptable <- round(
            with(posterior_df_small, 
           {rbind(
              alpha = c(mean(alpha_M), mean(alpha_F), mean(alpha_H), mean(alpha_H-alpha_F), mean(alpha_H>alpha_F)),
              sigma = c(mean(c_MX), mean(c_MX), mean(c_H), mean(c_H-c_MX), mean(c_H>c_MX)),
              phi = c(mean(phi_M), mean(phi_F), mean(phi_H), mean(phi_H-phi_F), mean(phi_H>phi_F)),
              gamma = c(mean(gamma_M), mean(gamma_F), mean(gamma_H), mean(gamma_H-gamma_F), mean(gamma_H>gamma_F)), 
              rho = c(mean(rho_M), mean(rho_F), mean(rho_H), mean(rho_H-rho_F), mean(rho_H>rho_F))
              )}),2)
  
  
 ptable <- rbind(rep("-",5), ptable)
 kable(cbind(motiv, ptable), 
          col.names = c("Parameter", "Motivation/Preference", "Estimation assumptions","Men (mixed)", "Women (mixed)", "Women only", "Composition Effect", "Pr >0"), 
          row.names = FALSE, caption = "\\label{fig:maine} Parameter estimates. Note that $\\sigma$ is constrained to be the same for men and women in the mixed condition. Final column shows the posterior probability that the difference between women only and mixed conditions (for women) is positive.", align = c("cccccc"))
Parameter estimates. Note that \sigma is constrained to be the same for men and women in the mixed condition. Final column shows the posterior probability that the difference between women only and mixed conditions (for women) is positive.
Parameter Motivation/Preference Estimation assumptions Men (mixed) Women (mixed) Women only Composition Effect Pr >0
\alpha_i i’s marginal value for contributing independent of use of funds, matching and sanctioning concerns Varies across individuals within gender groups in villages. \alpha_i is not estimated. - - - - -
\alpha Mean of the distribution from which \alpha_i is drawn Varies by community and potential condition for each gender. 0.57 0.17 5.65 5.49 0.92
\sigma Standard Deviation of the distribution from which \alpha_i is drawn. Varies by potential condition. 12.21 12.21 12.01 -0.2 0.48
\phi Weight on contributing to avoid sanctioning/discomfort if revealed to have given less than 300LD Varies by community and potential condition for each gender 1.54 0.43 2.94 2.51 0.75
\gamma Weight put on matching target contribution \rho\hat{x}_i. Varies by potential condition for each gender. 3.03 2.91 2.09 -0.82 0.04
\rho Share of reported expectation \hat x_i that i would ideally match if no other motivations. Varies by potential condition for each gender. 0.95 0.95 0.77 -0.18 0.26