# sim study Bad controls - Cinelli's paper nreps<-1000 nvec<-c(100,200,500) tau <- 5 # true value of the treatment effect # Model M1 ests.matM1<-array(0,c(length(nvec),2,nreps)) set.seed(12345) for(nval in 1:length(nvec)){ n<-nvec[nval] for(irep in 1:nreps){ Z <- rnorm(n) X <- Z + rnorm(n) Y <- tau * X + Z + rnorm(n) ests.matM1[nval,1,irep] <- coefficients(lm (Y ~ X))[2] ests.matM1[nval,2,irep] <- coefficients(lm (Y ~ X + Z))[2] } } results.M1 <- rbind(apply(ests.matM1[1,,],1,mean), apply(ests.matM1[2,,],1,mean), apply(ests.matM1[3,,],1,mean)) rownames(results.M1) <- c('n=100','n=200','n=500') colnames(results.M1) <- c('lm (Y ~ X)','lm (Y ~ X + Z)') results.M1 # Model M2 ests.matM2<-array(0,c(length(nvec),2,nreps)) set.seed(89345) for(nval in 1:length(nvec)){ n<-nvec[nval] for(irep in 1:nreps){ U <- rnorm(n) Z <- U + rnorm(n) X <- Z + rnorm(n) Y <- tau * X + U + rnorm(n) ests.matM2[nval,1,irep] <- coefficients(lm (Y ~ X))[2] ests.matM2[nval,2,irep] <- coefficients(lm (Y ~ X + Z))[2] } } results.M2 <- rbind(apply(ests.matM2[1,,],1,mean), apply(ests.matM2[2,,],1,mean), apply(ests.matM2[3,,],1,mean)) rownames(results.M2) <- c('n=100','n=200','n=500') colnames(results.M2) <- c('lm (Y ~ X)','lm (Y ~ X + Z)') results.M2 # Model M3 ests.matM3<-array(0,c(length(nvec),2,nreps)) set.seed(90765) for(nval in 1:length(nvec)){ n<-nvec[nval] for(irep in 1:nreps){ U <- rnorm(n) Z <- U + rnorm(n) X <- U + rnorm(n) Y <- tau * X + Z + rnorm(n) ests.matM3[nval,1,irep] <- coefficients(lm (Y ~ X))[2] ests.matM3[nval,2,irep] <- coefficients(lm (Y ~ X + Z))[2] } } results.M3 <- rbind(apply(ests.matM3[1,,],1,mean), apply(ests.matM3[2,,],1,mean), apply(ests.matM3[3,,],1,mean)) rownames(results.M3) <- c('n=100','n=200','n=500') colnames(results.M3) <- c('lm (Y ~ X)','lm (Y ~ X + Z)') results.M3 # Model M4 ests.matM4<-array(0,c(length(nvec),2,nreps)) set.seed(12345) for(nval in 1:length(nvec)){ n<-nvec[nval] for(irep in 1:nreps){ Z <- rnorm(n) X <- Z + rnorm(n) M <- tau * X + Z + rnorm(n) Y <- M + rnorm(n) ests.matM4[nval,1,irep] <- coefficients(lm (Y ~ X))[2] ests.matM4[nval,2,irep] <- coefficients(lm (Y ~ X + Z))[2] } } results.M4 <- rbind(apply(ests.matM4[1,,],1,mean), apply(ests.matM4[2,,],1,mean), apply(ests.matM4[3,,],1,mean)) rownames(results.M4) <- c('n=100','n=200','n=500') colnames(results.M4) <- c('lm (Y ~ X)','lm (Y ~ X + Z)') results.M4 # Model M5 ests.matM5<-array(0,c(length(nvec),2,nreps)) set.seed(89345) for(nval in 1:length(nvec)){ n<-nvec[nval] for(irep in 1:nreps){ U <- rnorm(n) Z <- U + rnorm(n) X <- Z + rnorm(n) M <- tau * X + U + rnorm(n) Y <- M + rnorm(n) ests.matM5[nval,1,irep] <- coefficients(lm (Y ~ X))[2] ests.matM5[nval,2,irep] <- coefficients(lm (Y ~ X + Z))[2] } } results.M5 <- rbind(apply(ests.matM5[1,,],1,mean), apply(ests.matM5[2,,],1,mean), apply(ests.matM5[3,,],1,mean)) rownames(results.M5) <- c('n=100','n=200','n=500') colnames(results.M5) <- c('lm (Y ~ X)','lm (Y ~ X + Z)') results.M5 # Model M6 ests.matM6<-array(0,c(length(nvec),2,nreps)) set.seed(90765) for(nval in 1:length(nvec)){ n<-nvec[nval] for(irep in 1:nreps){ U <- rnorm(n) Z <- U + rnorm(n) X <- U + rnorm(n) M <- tau * X + Z + rnorm(n) Y <- M + rnorm(n) ests.matM6[nval,1,irep] <- coefficients(lm (Y ~ X))[2] ests.matM6[nval,2,irep] <- coefficients(lm (Y ~ X + Z))[2] } } results.M6 <- rbind(apply(ests.matM6[1,,],1,mean), apply(ests.matM6[2,,],1,mean), apply(ests.matM6[3,,],1,mean)) rownames(results.M6) <- c('n=100','n=200','n=500') colnames(results.M6) <- c('lm (Y ~ X)','lm (Y ~ X + Z)') results.M6 # Model M7 ests.matM7<-array(0,c(length(nvec),2,nreps)) set.seed(90765) for(nval in 1:length(nvec)){ n<-nvec[nval] for(irep in 1:nreps){ U1 <- rnorm(n) U2 <- rnorm(n) Z <- U1 + U2 + rnorm(n) X <- U1 + rnorm(n) Y <- tau * X + U2 + rnorm(n) ests.matM7[nval,1,irep] <- coefficients(lm (Y ~ X))[2] ests.matM7[nval,2,irep] <- coefficients(lm (Y ~ X + Z))[2] } } results.M7 <- rbind(apply(ests.matM7[1,,],1,mean), apply(ests.matM7[2,,],1,mean), apply(ests.matM7[3,,],1,mean)) rownames(results.M7) <- c('n=100','n=200','n=500') colnames(results.M7) <- c('lm (Y ~ X)','lm (Y ~ X + Z)') results.M7 # Model M8 ests.matM8<-array(0,c(length(nvec),2,nreps)) set.seed(12345) for(nval in 1:length(nvec)){ n<-nvec[nval] for(irep in 1:nreps){ Z <- rnorm(n) X <- rnorm(n) Y <- tau * X + Z + rnorm(n) ests.matM8[nval,1,irep] <- coefficients(lm (Y ~ X))[2] ests.matM8[nval,2,irep] <- coefficients(lm (Y ~ X + Z))[2] } } results.M8 <- rbind(apply(ests.matM8[1,,],1,mean), apply(ests.matM8[2,,],1,mean), apply(ests.matM8[3,,],1,mean)) rownames(results.M8) <- c('n=100','n=200','n=500') colnames(results.M8) <- c('lm (Y ~ X)','lm (Y ~ X + Z)') results.M8 results.M8_rmse <- sqrt(rbind(apply((ests.matM8[1,,]-tau)^2,1,mean), apply((ests.matM8[2,,]-tau)^2,1,mean), apply((ests.matM8[3,,]-tau)^2,1,mean))) rownames(results.M8_rmse) <- c('n=100','n=200','n=500') colnames(results.M8_rmse) <- c('lm (Y ~ X)','lm (Y ~ X + Z)') results.M8_rmse # Model M9 ests.matM9<-array(0,c(length(nvec),2,nreps)) set.seed(12345) for(nval in 1:length(nvec)){ n<-nvec[nval] for(irep in 1:nreps){ Z <- rnorm(n) X <- Z + rnorm(n) Y <- tau * X + rnorm(n) ests.matM9[nval,1,irep] <- coefficients(lm (Y ~ X))[2] ests.matM9[nval,2,irep] <- coefficients(lm (Y ~ X + Z))[2] } } results.M9 <- rbind(apply(ests.matM9[1,,],1,mean), apply(ests.matM9[2,,],1,mean), apply(ests.matM9[3,,],1,mean)) rownames(results.M9) <- c('n=100','n=200','n=500') colnames(results.M9) <- c('lm (Y ~ X)','lm (Y ~ X + Z)') results.M9 results.M9_rmse <- sqrt(rbind(apply((ests.matM9[1,,]-tau)^2,1,mean), apply((ests.matM9[2,,]-tau)^2,1,mean), apply((ests.matM9[3,,]-tau)^2,1,mean))) rownames(results.M9_rmse) <- c('n=100','n=200','n=500') colnames(results.M9_rmse) <- c('lm (Y ~ X)','lm (Y ~ X + Z)') results.M9_rmse # Model 10 ests.matM10<-array(0,c(length(nvec),2,nreps)) set.seed(12345) for(nval in 1:length(nvec)){ n<-nvec[nval] for(irep in 1:nreps){ U <- rnorm(n) Z <- rnorm(n) X <- U + Z + rnorm(n) Y <- tau * X + U + rnorm(n) ests.matM10[nval,1,irep] <- coefficients(lm (Y ~ X))[2] ests.matM10[nval,2,irep] <- coefficients(lm (Y ~ X + Z))[2] } } results.M10 <- rbind(apply(ests.matM10[1,,],1,mean), apply(ests.matM10[2,,],1,mean), apply(ests.matM10[3,,],1,mean)) rownames(results.M10) <- c('n=100','n=200','n=500') colnames(results.M10) <- c('lm (Y ~ X)','lm (Y ~ X + Z)') results.M10 # Model M11 ests.matM11<-array(0,c(length(nvec),2,nreps)) set.seed(12345) for(nval in 1:length(nvec)){ n<-nvec[nval] for(irep in 1:nreps){ X <- rnorm(n) Z <- X*tau + rnorm(n) Y <- cbind(1,Z)%*%c(1,1) + rnorm(n) ests.matM11[nval,1,irep] <- coefficients(lm (Y ~ X))[2] ests.matM11[nval,2,irep] <- coefficients(lm (Y ~ X + Z))[2] } } results.M11 <- rbind(apply(ests.matM11[1,,],1,mean), apply(ests.matM11[2,,],1,mean), apply(ests.matM11[3,,],1,mean)) rownames(results.M11) <- c('n=100','n=200','n=500') colnames(results.M11) <- c('lm (Y ~ X)','lm (Y ~ X + Z)') results.M11 # Model M12 ests.matM12<-array(0,c(length(nvec),2,nreps)) set.seed(12345) for(nval in 1:length(nvec)){ n<-nvec[nval] for(irep in 1:nreps){ X <- rnorm(n) M <- X*tau + rnorm(n) Z <- M + rnorm(n) Y <- cbind(1,Z)%*%c(1,1) + rnorm(n) ests.matM12[nval,1,irep] <- coefficients(lm (Y ~ X))[2] ests.matM12[nval,2,irep] <- coefficients(lm (Y ~ X + Z))[2] } } results.M12 <- rbind(apply(ests.matM12[1,,],1,mean), apply(ests.matM12[2,,],1,mean), apply(ests.matM12[3,,],1,mean)) rownames(results.M12) <- c('n=100','n=200','n=500') colnames(results.M12) <- c('lm (Y ~ X)','lm (Y ~ X + Z)') results.M12 # Model M13 ests.matM13<-array(0,c(length(nvec),2,nreps)) set.seed(12345) for(nval in 1:length(nvec)){ n<-nvec[nval] for(irep in 1:nreps){ Z <- rnorm(n) X <- rnorm(n) M <- tau * X + Z + rnorm(n) Y <- M + rnorm(n) ests.matM13[nval,1,irep] <- coefficients(lm (Y ~ X))[2] ests.matM13[nval,2,irep] <- coefficients(lm (Y ~ X + Z))[2] } } results.M13 <- rbind(apply(ests.matM13[1,,],1,mean), apply(ests.matM13[2,,],1,mean), apply(ests.matM13[3,,],1,mean)) rownames(results.M13) <- c('n=100','n=200','n=500') colnames(results.M13) <- c('lm (Y ~ X)','lm (Y ~ X + Z)') results.M13 results.M13_rmse <- sqrt(rbind(apply((ests.matM13[1,,]-tau)^2,1,mean), apply((ests.matM13[2,,]-tau)^2,1,mean), apply((ests.matM13[3,,]-tau)^2,1,mean))) rownames(results.M13_rmse) <- c('n=100','n=200','n=500') colnames(results.M13_rmse) <- c('lm (Y ~ X)','lm (Y ~ X + Z)') results.M13_rmse # Model M14 ests.matM14<-array(0,c(length(nvec),2,nreps)) set.seed(12345) for(nval in 1:length(nvec)){ n<-nvec[nval] for(irep in 1:nreps){ X <- rnorm(n) Z <- X + rnorm(n) Y <- X*tau + rnorm(n) ests.matM14[nval,1,irep] <- coefficients(lm (Y ~ X))[2] ests.matM14[nval,2,irep] <- coefficients(lm (Y ~ X + Z))[2] } } results.M14 <- rbind(apply(ests.matM14[1,,],1,mean), apply(ests.matM14[2,,],1,mean), apply(ests.matM14[3,,],1,mean)) rownames(results.M14) <- c('n=100','n=200','n=500') colnames(results.M14) <- c('lm (Y ~ X)','lm (Y ~ X + Z)') results.M14 results.M14_rmse <- sqrt(rbind(apply((ests.matM14[1,,]-tau)^2,1,mean), apply((ests.matM14[2,,]-tau)^2,1,mean), apply((ests.matM14[3,,]-tau)^2,1,mean))) rownames(results.M14_rmse) <- c('n=100','n=200','n=500') colnames(results.M14_rmse) <- c('lm (Y ~ X)','lm (Y ~ X + Z)') results.M14_rmse # Model M15 ests.matM15<-array(0,c(length(nvec),2,nreps)) set.seed(12345) for(nval in 1:length(nvec)){ n<-nvec[nval] for(irep in 1:nreps){ U <- rnorm (n) X <- rnorm(n) Z <- X + rnorm(n) W <- Z + U + rnorm(n) Y <- X*tau + U + rnorm(n) ests.matM15[nval,1,irep] <- coefficients(lm (Y ~ X))[2] ests.matM15[nval,2,irep] <- coefficients(lm (Y ~ X + Z))[2] } } results.M15 <- rbind(apply(ests.matM15[1,,],1,mean), apply(ests.matM15[2,,],1,mean), apply(ests.matM15[3,,],1,mean)) rownames(results.M15) <- c('n=100','n=200','n=500') colnames(results.M15) <- c('lm (Y ~ X)','lm (Y ~ X + Z)') results.M15 results.M15_rmse <- sqrt(rbind(apply((ests.matM15[1,,]-tau)^2,1,mean), apply((ests.matM15[2,,]-tau)^2,1,mean), apply((ests.matM15[3,,]-tau)^2,1,mean))) rownames(results.M15_rmse) <- c('n=100','n=200','n=500') colnames(results.M15_rmse) <- c('lm (Y ~ X)','lm (Y ~ X + Z)') results.M15_rmse # Model M16 ests.matM16<-array(0,c(length(nvec),2,nreps)) set.seed(12345) for(nval in 1:length(nvec)){ n<-nvec[nval] for(irep in 1:nreps){ U <- rnorm (n) X <- rnorm(n) Z <- X + U + rnorm(n) Y <- X*tau + U + rnorm(n) ests.matM16[nval,1,irep] <- coefficients(lm (Y ~ X))[2] ests.matM16[nval,2,irep] <- coefficients(lm (Y ~ X + Z))[2] } } results.M16 <- rbind(apply(ests.matM16[1,,],1,mean), apply(ests.matM16[2,,],1,mean), apply(ests.matM16[3,,],1,mean)) rownames(results.M16) <- c('n=100','n=200','n=500') colnames(results.M16) <- c('lm (Y ~ X)','lm (Y ~ X + Z)') results.M16 # Model M17 ests.matM17<-array(0,c(length(nvec),2,nreps)) set.seed(12345) for(nval in 1:length(nvec)){ n<-nvec[nval] for(irep in 1:nreps){ X <- rnorm(n) Y <- X*tau + rnorm(n) Z <- X + Y + rnorm(n) ests.matM17[nval,1,irep] <- coefficients(lm (Y ~ X))[2] ests.matM17[nval,2,irep] <- coefficients(lm (Y ~ X + Z))[2] } } results.M17 <- rbind(apply(ests.matM17[1,,],1,mean), apply(ests.matM17[2,,],1,mean), apply(ests.matM17[3,,],1,mean)) rownames(results.M17) <- c('n=100','n=200','n=500') colnames(results.M17) <- c('lm (Y ~ X)','lm (Y ~ X + Z)') results.M17 # Model M18 ests.matM18<-array(0,c(length(nvec),2,nreps)) set.seed(12345) for(nval in 1:length(nvec)){ n<-nvec[nval] for(irep in 1:nreps){ X <- rnorm(n) Y <- X*tau + rnorm(n) Z <- Y + rnorm(n) ests.matM18[nval,1,irep] <- coefficients(lm (Y ~ X))[2] ests.matM18[nval,2,irep] <- coefficients(lm (Y ~ X + Z))[2] } } results.M18 <- rbind(apply(ests.matM18[1,,],1,mean), apply(ests.matM18[2,,],1,mean), apply(ests.matM18[3,,],1,mean)) rownames(results.M18) <- c('n=100','n=200','n=500') colnames(results.M18) <- c('lm (Y ~ X)','lm (Y ~ X + Z)') results.M18