data = read.csv("eleicoes-presidenciais-2022.csv") data = data[!is.na(data[,4]),] # Exploratory data analysis lulaSP = data[data[,3]=="SP",5] lulaBA = data[data[,3]=="BA",5] lulaSPBA = c(lulaSP,lulaBA) lulaBR = data[,5] pdf(file="eleicoes-presidenciais-2022.pdf",width=12,height=10) plot(density(lulaSP),xlim=c(0,1),ylim=c(0,6),lwd=2, xlab="% Votos para Lula nos municipios",main="") lines(density(lulaBA),col=2,lwd=2) lines(density(lulaSPBA),col=3,lwd=2) lines(density(lulaBR),col=4,lwd=2) abline(v=0.5,lty=2) legend("topleft",legend=c("SP","BA","SP+BA","BR"),col=1:4,lty=1,bty="n",lwd=2) hist(lulaBR,xlim=c(0,1),prob=TRUE,xlab="% Votos para Lula nos municipios",main="",col=grey(0.8)) theta = seq(0,1,length=1000) lines(density(lulaBR),lwd=3) lines(theta,0.525*dbeta(theta,7.5,10.5)+0.475*dbeta(theta,10,4),lwd=3,col=2) legend("topleft",legend=c("%Lula","Density estimation","Mixture of Betas"),col=c(grey(0.7),1,2),lwd=2,lty=1,bty="n") title("0.525Beta(7.5,10.5)+0.475Beta(10,4)\n Mode=(0.41,0.75) - StDev=(0.113,0.117)") # Preparing covariates for the regression model n = nrow(data) ABS = data[,7] SP = rep(0,n) BA = rep(0,n) POP = rep(0,n) SP[data[,3]=="SP"]=1 BA[data[,3]=="BA"]=1 POP[data[,4]>20000]=1 # Frequentist Gaussian linear regression fit.ols = lm(lulaBR ~ ABS+POP+SP+BA) summary(fit.ols) coef = fit.ols$coef par(mfrow=c(1,1)) plot(ABS,lulaBR,ylim=c(0,1),ylab="Votos Lula",xlab="Abstenções") points(ABS,lm(lulaBR~ABS)$fit,col=8,pch=16) points(ABS,coef[1]+coef[2]*ABS,col=2,pch=16) points(ABS,coef[1]+coef[2]*ABS+coef[3],col=3,pch=16) points(ABS,coef[1]+coef[2]*ABS+coef[4],col=4,pch=16) points(ABS,coef[1]+coef[2]*ABS+coef[3]+coef[4],col=5,pch=16) points(ABS,coef[1]+coef[2]*ABS+coef[3]+coef[5],col=6,pch=16) points(ABS,coef[1]+coef[2]*ABS+coef[5],col=7,pch=16) legend("bottomright",legend=c("Pop<20mil & BR-SP-BA","Pop>20mil & BR-SP-BA","Pop<20mil & SP","Pop>20mil & SP","Pop>20mil & BA","Pop<20mil & BA"),col=2:7,lwd=2,bty="n",pch=16) legend("topright",legend="Irrestrito",col=8,lty=1,pch=16,bty="n",lwd=2) abline(h=0.5,lty=2,lwd=3) abline(h=0.3,lty=2,lwd=3) abline(h=0.65,lty=2,lwd=3) title("Frequentist Gaussian linear regression") # Frequentist Beta Regression # Francisco Cribari-Neto & Achim Zelleis # Beta Regression in R. Journal of Statistical Software, 34(2), 1–24. 2010 # https://www.jstatsoft.org/article/view/v034i02 # install.packages("betareg") library("betareg") fit.beta0 = betareg(lulaBR ~ ABS,link="logit") fit.beta = betareg(lulaBR ~ ABS+POP+SP+BA,link="logit") summary(fit.beta0) summary(fit.beta) coef0 = fit.beta0$coef$mean coef = fit.beta$coef$mean plot(ABS,lulaBR,ylim=c(0,1),ylab="Votos Lula",xlab="Abstenções") eta = coef0[1]+coef0[2]*ABS points(ABS,1/(1+exp(-eta)),col=8) eta = coef[1]+coef[2]*ABS points(ABS,1/(1+exp(-eta)),col=2) eta = coef[1]+coef[2]*ABS+coef[3] points(ABS,1/(1+exp(-eta)),col=3) eta = coef[1]+coef[2]*ABS+coef[4] points(ABS,1/(1+exp(-eta)),col=4) eta = coef[1]+coef[2]*ABS+coef[3]+coef[4] points(ABS,1/(1+exp(-eta)),col=5) eta = coef[1]+coef[2]*ABS+coef[3]+coef[5] points(ABS,1/(1+exp(-eta)),col=6) eta = coef[1]+coef[2]*ABS+coef[5] points(ABS,1/(1+exp(-eta)),col=7) legend("bottomright",legend=c("Pop<20mil & BR-SP-BA","Pop>20mil & BR-SP-BA", "Pop<20mil & SP","Pop>20mil & SP","Pop>20mil & BA","Pop<20mil & BA", "Irrestrito"),col=2:8,lwd=2,bty="n",pch=16) abline(h=0.5,lty=2,lwd=3) abline(h=0.3,lty=2,lwd=3) abline(h=0.65,lty=2,lwd=3) title("Frequentist Beta regression") # Bayesian Beta Regression # betaBayes: Bayesian Beta Regression # Zhou and Huang (2022) . #install.packages("betaBayes") library("betaBayes") fit.bbeta0 = beta4reg(lulaBR ~ ABS,link="logit",model="mean", mcmc=list(nburn=5000, nsave=5000, nskip=0, ndisplay=1000)) coef0 = fit.bbeta0$coef[1:2] eta0 = coef0[1]+coef0[2]*ABS eta0 = 1/(1+exp(-eta0)) fit.bbeta = beta4reg(lulaBR ~ ABS+POP+SP+BA,link="logit",model="mean", mcmc=list(nburn=5000, nsave=5000, nskip=0, ndisplay=1000)) coef = fit.bbeta$coef[1:5] par(mfrow=c(1,1)) plot(ABS,lulaBR,ylim=c(0,1),ylab="Votos Lula",xlab="Abstenções") points(ABS,eta0,col=8) eta = coef[1]+coef[2]*ABS points(ABS,1/(1+exp(-eta)),col=2) eta = coef[1]+coef[2]*ABS+coef[3] points(ABS,1/(1+exp(-eta)),col=3) eta = coef[1]+coef[2]*ABS+coef[4] points(ABS,1/(1+exp(-eta)),col=4) eta = coef[1]+coef[2]*ABS+coef[3]+coef[4] points(ABS,1/(1+exp(-eta)),col=5) eta = coef[1]+coef[2]*ABS+coef[3]+coef[5] points(ABS,1/(1+exp(-eta)),col=6) eta = coef[1]+coef[2]*ABS+coef[5] points(ABS,1/(1+exp(-eta)),col=7) legend("bottomright",legend=c("Pop<20mil & BR-SP-BA","Pop>20mil & BR-SP-BA", "Pop<20mil & SP","Pop>20mil & SP","Pop>20mil & BA","Pop<20mil & BA", "Irrestrito"),col=2:8,lwd=2,bty="n",pch=16) abline(h=0.5,lty=2,lwd=3) abline(h=0.3,lty=2,lwd=3) abline(h=0.65,lty=2,lwd=3) title("Bayesian Beta regression") betas = fit.bbeta$beta namecoef = c("Intercepto","Abstenções","POP>20mil","SP","BA") par(mfrow=c(3,5)) for (i in 1:5) ts.plot(betas[i,],xlab="Iteration",ylab="",main=namecoef[i]) for (i in 1:5) acf(betas[i,],main="") for (i in 1:5) hist(betas[i,],prob=TRUE,xlab="",main="") par(mfrow=c(1,1)) plot(density(1/(1+exp(-betas[1,]))),col=2,xlim=c(0.2,0.7),ylim=c(0,40),lwd=2, main="Intercepto para os diferentes grupos") lines(density(1/(1+exp(-betas[1,]-betas[3,]))),col=3,lwd=2) lines(density(1/(1+exp(-betas[1,]-betas[4,]))),col=4,lwd=2) lines(density(1/(1+exp(-betas[1,]-betas[3,]-betas[4,]))),col=5,lwd=2) lines(density(1/(1+exp(-betas[1,]-betas[3,]-betas[5,]))),col=6,lwd=2) lines(density(1/(1+exp(-betas[1,]-betas[5,]))),col=7,lwd=2) legend("topright",legend=c("Pop<20mil & BR-SP-BA","Pop>20mil & BR-SP-BA", "Pop<20mil & SP","Pop>20mil & SP","Pop>20mil & BA","Pop<20mil & BA"), col=2:7,lwd=2,bty="n",pch=16) abline(v=0.5,lty=2) abline(v=0.3,lty=2) abline(v=0.65,lty=2) dev.off()