data = read.csv("https://hedibert.org/wp-content/uploads/2024/05/eleicoes-presidenciais-2022.csv")
data = data[!is.na(data[,4]),]
head(data)
## code_muni name_muni abbrev_state eleitores prop_Lula prop_B
## 1 1100015 Alta Floresta D'oeste RO 19397 0.2898382 0.6585508
## 2 1100023 Ariquemes RO 69761 0.2097480 0.7352028
## 3 1100031 Cabixi RO 4781 0.3231863 0.6275960
## 4 1100049 Cacoal RO 66497 0.2403642 0.6954839
## 5 1100056 Cerejeiras RO 12930 0.1993061 0.7473212
## 6 1100064 Colorado Do Oeste RO 13741 0.2610487 0.6819821
## prop_abstencoes
## 1 0.2739083
## 2 0.2623816
## 3 0.2486927
## 4 0.2317849
## 5 0.2240526
## 6 0.2735609
lulaSP = data[data[,3]=="SP",5]
lulaBA = data[data[,3]=="BA",5]
lulaSPBA = c(lulaSP,lulaBA)
lulaBR = data[,5]
plot(density(lulaSP),xlim=c(0,1),ylim=c(0,6),lwd=2,xlab="Prop. votos para Lula nos municipios",main="")
lines(density(lulaBA),col=2,lwd=2)
lines(density(lulaSPBA),col=3,lwd=2)
lines(density(lulaBR),col=4,lwd=2)
abline(v=0.5,lty=2)
legend("topleft",legend=c("SP","BA","SP+BA","BR"),col=1:4,lty=1,bty="n",lwd=2)
hist(lulaBR,xlim=c(0,1),prob=TRUE,xlab="Prop. votos para Lula nos municipios",main="",col=grey(0.8),ylab="Densidade")
theta = seq(0,1,length=1000)
lines(density(lulaBR),lwd=3)
lines(theta,0.525*dbeta(theta,7.5,10.5)+0.475*dbeta(theta,10,4),lwd=3,col=2)
legend("topleft",legend=c("Prop. Lula","Estimação de densidade","Mixtura of Betas"),col=c(grey(0.7),1,2),lwd=2,lty=1,bty="n")
title("0.525Beta(7.5,10.5)+0.475Beta(10,4)\n Mode=(0.41,0.75) - StDev=(0.113,0.117)")
n = nrow(data)
ABS = data[,7]
SP = rep(0,n)
BA = rep(0,n)
POP = rep(0,n)
SP[data[,3]=="SP"]=1
BA[data[,3]=="BA"]=1
POP[data[,4]>20000]=1
fit.ols = lm(lulaBR ~ ABS+POP+SP+BA)
summary(fit.ols)
##
## Call:
## lm(formula = lulaBR ~ ABS + POP + SP + BA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.42608 -0.11898 -0.00976 0.12673 0.38034
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.461867 0.010182 45.360 <2e-16 ***
## ABS 0.458619 0.047353 9.685 <2e-16 ***
## POP -0.044056 0.005170 -8.521 <2e-16 ***
## SP -0.162875 0.006969 -23.372 <2e-16 ***
## BA 0.183394 0.008472 21.648 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1635 on 5560 degrees of freedom
## Multiple R-squared: 0.1944, Adjusted R-squared: 0.1938
## F-statistic: 335.3 on 4 and 5560 DF, p-value: < 2.2e-16
coef = fit.ols$coef
par(mfrow=c(1,1))
plot(ABS,lulaBR,ylim=c(0,1),xlim=c(0,0.6),ylab="Prop. votos para Lula nos municipios",xlab="Prop. Abstenções")
points(ABS,lm(lulaBR~ABS)$fit,col=8,pch=16)
points(ABS,coef[1]+coef[2]*ABS,col=2,pch=16)
points(ABS,coef[1]+coef[2]*ABS+coef[3],col=3,pch=16)
points(ABS,coef[1]+coef[2]*ABS+coef[4],col=4,pch=16)
points(ABS,coef[1]+coef[2]*ABS+coef[3]+coef[4],col=5,pch=16)
points(ABS,coef[1]+coef[2]*ABS+coef[3]+coef[5],col=6,pch=16)
points(ABS,coef[1]+coef[2]*ABS+coef[5],col=7,pch=16)
legend("bottomright",legend=c("Pop<20mil & BR-SP-BA","Pop>20mil & BR-SP-BA","Pop<20mil & SP","Pop>20mil & SP","Pop>20mil & BA","Pop<20mil & BA"),col=2:7,lwd=2,bty="n",pch=16)
legend("topright",legend="Irrestrito",col=8,lty=1,pch=16,bty="n",lwd=2)
abline(h=0.5,lty=2,lwd=3)
abline(h=0.3,lty=2,lwd=3)
abline(h=0.65,lty=2,lwd=3)
title("Regressão linear Gaussiana")
Francisco Cribari-Neto & Achim Zelleis, Beta Regression in R. Journal of Statistical Software, 34(2), 1–24. 2010. https://www.jstatsoft.org/article/view/v034i02
# install.packages("betareg")
library("betareg")
fit.beta0 = betareg(lulaBR ~ ABS,link="logit")
fit.beta = betareg(lulaBR ~ ABS+POP+SP+BA,link="logit")
summary(fit.beta0)
##
## Call:
## betareg(formula = lulaBR ~ ABS, link = "logit")
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -2.7407 -0.7812 -0.1275 0.8108 2.7350
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.17593 0.04391 -4.007 6.16e-05 ***
## ABS 1.68106 0.20180 8.330 < 2e-16 ***
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 6.925 0.123 56.32 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 1848 on 3 Df
## Pseudo R-squared: 0.01281
## Number of iterations: 10 (BFGS) + 2 (Fisher scoring)
summary(fit.beta)
##
## Call:
## betareg(formula = lulaBR ~ ABS + POP + SP + BA, link = "logit")
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -3.1300 -0.6946 -0.0876 0.7005 2.9513
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.13875 0.04096 -3.388 0.000705 ***
## ABS 1.81096 0.19100 9.481 < 2e-16 ***
## POP -0.17760 0.02079 -8.541 < 2e-16 ***
## SP -0.62518 0.02818 -22.183 < 2e-16 ***
## BA 0.71528 0.03624 19.735 < 2e-16 ***
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 8.3691 0.1504 55.66 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 2371 on 6 Df
## Pseudo R-squared: 0.1875
## Number of iterations: 13 (BFGS) + 2 (Fisher scoring)
coef0 = fit.beta0$coef$mean
coef = fit.beta$coef$mean
plot(ABS,lulaBR,ylim=c(0,1),xlim=c(0,0.6),ylab="Prop. votos para Lula nos municipios",xlab="Prop. Abstenções")
eta = coef0[1]+coef0[2]*ABS
points(ABS,1/(1+exp(-eta)),col=8)
eta = coef[1]+coef[2]*ABS
points(ABS,1/(1+exp(-eta)),col=2)
eta = coef[1]+coef[2]*ABS+coef[3]
points(ABS,1/(1+exp(-eta)),col=3)
eta = coef[1]+coef[2]*ABS+coef[4]
points(ABS,1/(1+exp(-eta)),col=4)
eta = coef[1]+coef[2]*ABS+coef[3]+coef[4]
points(ABS,1/(1+exp(-eta)),col=5)
eta = coef[1]+coef[2]*ABS+coef[3]+coef[5]
points(ABS,1/(1+exp(-eta)),col=6)
eta = coef[1]+coef[2]*ABS+coef[5]
points(ABS,1/(1+exp(-eta)),col=7)
legend("bottomright",legend=c("Pop<20mil & BR-SP-BA","Pop>20mil & BR-SP-BA",
"Pop<20mil & SP","Pop>20mil & SP","Pop>20mil & BA","Pop<20mil & BA",
"Irrestrito"),col=2:8,lwd=2,bty="n",pch=16)
abline(h=0.5,lty=2,lwd=3)
abline(h=0.3,lty=2,lwd=3)
abline(h=0.65,lty=2,lwd=3)
title("Regressão Beta")
Abaixo usamos o pacote betaBayes: Bayesian Beta Regression, Zhou and Huang (2022) doi:10.1016/j.csda.2021.107345
#install.packages("betaBayes")
library("betaBayes")
fit.bbeta0 = beta4reg(lulaBR ~ ABS,link="logit",model="mean",mcmc=list(nburn=5000, nsave=5000, nskip=0, ndisplay=1000))
## scan = 1000
## scan = 2000
## scan = 3000
## scan = 4000
## scan = 5000
coef0 = fit.bbeta0$coef[1:2]
eta0 = coef0[1]+coef0[2]*ABS
eta0 = 1/(1+exp(-eta0))
fit.bbeta = beta4reg(lulaBR ~ ABS+POP+SP+BA,link="logit",model="mean",mcmc=list(nburn=5000, nsave=5000, nskip=0, ndisplay=1000))
## scan = 1000
## scan = 2000
## scan = 3000
## scan = 4000
## scan = 5000
coef = fit.bbeta$coef[1:5]
betas = fit.bbeta$beta
namecoef = c("Intercepto","Abstenções","POP>20mil","SP","BA")
par(mfrow=c(3,5))
for (i in 1:5)
ts.plot(betas[i,],xlab="Iteration",ylab="",main=namecoef[i])
for (i in 1:5)
acf(betas[i,],main="")
for (i in 1:5)
hist(betas[i,],prob=TRUE,xlab="",main="")
par(mfrow=c(1,1))
plot(ABS,lulaBR,ylim=c(0,1),xlim=c(0,0.6),ylab="Prop. votos para Lula nos municipios",xlab="Prop. Abstenções")
points(ABS,eta0,col=8)
eta = coef[1]+coef[2]*ABS
points(ABS,1/(1+exp(-eta)),col=2)
eta = coef[1]+coef[2]*ABS+coef[3]
points(ABS,1/(1+exp(-eta)),col=3)
eta = coef[1]+coef[2]*ABS+coef[4]
points(ABS,1/(1+exp(-eta)),col=4)
eta = coef[1]+coef[2]*ABS+coef[3]+coef[4]
points(ABS,1/(1+exp(-eta)),col=5)
eta = coef[1]+coef[2]*ABS+coef[3]+coef[5]
points(ABS,1/(1+exp(-eta)),col=6)
eta = coef[1]+coef[2]*ABS+coef[5]
points(ABS,1/(1+exp(-eta)),col=7)
legend("bottomright",legend=c("Pop<20mil & BR-SP-BA","Pop>20mil & BR-SP-BA",
"Pop<20mil & SP","Pop>20mil & SP","Pop>20mil & BA","Pop<20mil & BA",
"Irrestrito"),col=2:8,lwd=2,bty="n",pch=16)
abline(h=0.5,lty=2,lwd=3)
abline(h=0.3,lty=2,lwd=3)
abline(h=0.65,lty=2,lwd=3)
title("Regressão Beta Bayesiana")
par(mfrow=c(1,1))
plot(density(1/(1+exp(-betas[1,]))),col=2,xlim=c(0,1),ylim=c(0,40),lwd=2,ylab="Densidade",
main="Intercepto para os diferentes grupos",xlab="Prop. votos para Lula nos municipios")
lines(density(1/(1+exp(-betas[1,]-betas[3,]))),col=3,lwd=2)
lines(density(1/(1+exp(-betas[1,]-betas[4,]))),col=4,lwd=2)
lines(density(1/(1+exp(-betas[1,]-betas[3,]-betas[4,]))),col=5,lwd=2)
lines(density(1/(1+exp(-betas[1,]-betas[3,]-betas[5,]))),col=6,lwd=2)
lines(density(1/(1+exp(-betas[1,]-betas[5,]))),col=7,lwd=2)
legend("topright",legend=c("Pop<20mil & BR-SP-BA","Pop>20mil & BR-SP-BA",
"Pop<20mil & SP","Pop>20mil & SP","Pop>20mil & BA","Pop<20mil & BA"),
col=2:7,lwd=2,bty="n",pch=16)
abline(v=0.5,lty=2)
abline(v=0.3,lty=2)
abline(v=0.65,lty=2)