#############################################################################################
#
# Cribari-Neto and Zeiles (2010) Beta Regression in R
# Journal of Statistical Software, Volume 34, Issue 2. 
# https://www.jstatsoft.org/article/view/v034i02
#
# Section 5.1. Dyslexia and IQ predicting reading accuracy
# We consider an application that analyzes reading accuracy data for 
# nondyslexic and dyslexic Australian children (Smithson and Verkuilen 2006). 
# The variable of interest is accuracy providing the scores on a test of reading 
# accuracy taken by 44 children, which is predicted by the two regressors dyslexia 
# (a factor with sum contrasts separating a dyslexic and a control group) and 
#  intelligent quotient (iq, converted to z scores), see Figure 6 for a visualization. 
# The sample includes 19 dyslexics and 25 controls who were recruited from 
# primary schools in the Australian Capital Territory. The children’s ages ranged 
# from eight years five months to twelve years three months; mean reading 
# accuracy was 0.606 for dyslexic readers and 0.900 for controls.
#
# Additional reference:
# Ferrari and Cribari-Neto (2004) Beta Regression for Modelling Rates and Proportions, 
# Journal of Applied Statistics, 31:7, 799-815, DOI: 10.1080/0266476042000214501
#
#############################################################################################
install.packages("betareg")
library("betareg")
library("lmtest")


pdf("betaregression.pdf",width=15,height=10)

############################################################
# reading the data and creating dependent and independent variables
############################################################
data("ReadingSkills", package = "betareg") 
y = ReadingSkills$accuracy
x = ReadingSkills$iq
dys = ReadingSkills$dyslexia
d = rep(0,length(dys))
d[dys=="yes"]=1

par(mfrow=c(1,1))
plot(x,y,col=d+1,xlab="IQ score",ylab="Reading accuracy",pch=16)
legend("topleft",legend=c("control","dyslexic"),col=1:2,pch=16)


#################################################################
# Maximum likelihood estimation (approximation based on logit transformation)
#################################################################
y1 = log(y/(1-y))
ols = lm(y1 ~ x+d+x*d)
summary(ols)

xxx = seq(min(x),max(x),length=100)
fit1 = ols$coef[1]+ols$coef[2]*xxx
fit2 = ols$coef[1]+ols$coef[3]+(ols$coef[2]+ols$coef[4])*xxx
fit1 = exp(fit1)/(1+exp(fit1))
fit2 = exp(fit2)/(1+exp(fit2))

par(mfrow=c(1,1))
plot(x,y,col=d+1,xlab="IQ score",ylab="Reading accuracy",pch=16)
lines(xxx,fit1,col=1)
lines(xxx,fit2,col=2)
legend("topleft",legend=c("control","dyslexic"),col=1:2,pch=16)

#################################################################
# Beta regression: maximum likelihood estimation
#################################################################
beta.fit = betareg(y~x*d | x+d,hessian=TRUE)
coeftest(beta.fit)
beta1 = beta.fit$coef$mean[1]+beta.fit$coef$mean[2]*xxx
beta2 = beta.fit$coef$mean[1]+beta.fit$coef$mean[3]+(beta.fit$coef$mean[2]+beta.fit$coef$mean[4])*xxx
beta1 = exp(beta1)/(1+exp(beta1))
beta2 = exp(beta2)/(1+exp(beta2))

par(mfrow=c(1,1))
plot(x,y,col=d+1,xlab="IQ score",ylab="Reading accuracy",pch=1+d)
lines(xxx,fit1,col=1,lty=2)
lines(xxx,fit2,col=2,lty=2)
lines(xxx,beta1,col=1)
lines(xxx,beta2,col=2)
legend("bottomright",legend=c("control","dyslexic"),col=1:2,,pch=1:2)
legend("topleft",legend=c("betareg","lm"),lty=1:2)


#################################################################
# Beta regression:  Bayesian inference
#################################################################
like = function(y,x,d,beta){
   mu = 1/(1+exp(-beta[1]-beta[2]*x-beta[3]*d-beta[4]*x*d))
   phi = exp(beta[5]+beta[6]*x+beta[7]*d)
   return(sum(dbeta(y,phi*mu,phi*(1-mu),log=TRUE)))
}

# Draws from the prior
beta0 = c(2,1,-1.5,-1,1.5,1.5,3.5)
M = 1000000
M1 = M/100
betas = matrix(0,M,7)
for (i in 1:7)
  betas[,i] = rnorm(M,beta0[i],0.5)
  
# Draws from the posterior (via SIR)
w = rep(0,M)
for (i in 1:M) 
  w[i] = like(y,x,d,betas[i,])
w = exp(w-max(w))  

ind = sample(1:M,size=M1,prob=w,replace=TRUE)
betas1 = betas[ind,]

par(mfrow=c(2,4))
for (i in 1:4){
  hist(betas1[,i],main=paste("beta",i,sep=""),xlab="",prob=TRUE)
  abline(v=beta.fit$coef$mean[i],col=2)
}
for (i in 1:3){
  hist(betas1[,4+i],main=paste("beta",4+i,sep=""),xlab="",prob=TRUE)
  abline(v=beta.fit$coef$precision[i],col=2)
}

pairs(betas1[,1:4],label=c(expression(beta[1]),expression(beta[2]),expression(beta[3]),expression(beta[4])))

pairs(betas1[,5:7],label=c(expression(beta[5]),expression(beta[6]),expression(beta[7])))

# Posterior predictive analysis
y.draw0 = matrix(0,M1,100)
y.draw1 = matrix(0,M1,100)
for (i in 1:100){
xx = xxx[i]
mu0 = 1/(1+exp(-betas1[,1]-betas1[,2]*xx))
phi0 = exp(betas1[,5]+betas1[,6]*xx)
mu1 = 1/(1+exp(-(betas1[,1]+betas1[,3])-(betas1[,2]+betas1[,4])*xx))
phi1 = exp(betas1[,5]+betas1[,7]+betas1[,6]*xx)
y.draw0[,i] = rbeta(M1,mu0*phi0,(1-mu0)*phi0)
y.draw1[,i] = rbeta(M1,mu1*phi1,(1-mu1)*phi1)
}
qy0 = t(apply(y.draw0,2,quantile,c(0.25,0.5,0.75)))
qy1 = t(apply(y.draw1,2,quantile,c(0.25,0.5,0.75)))

par(mfrow=c(1,1))
plot(x,y,col=d+1,xlab="IQ score",ylab="Reading accuracy",pch=1+d)
lines(xxx,qy1[,1],col=2,lty=2)
lines(xxx,qy1[,2],col=2,lty=2)
lines(xxx,qy1[,3],col=2,lty=2)
lines(xxx,qy0[,1],lty=2)
lines(xxx,qy0[,2],lty=2)
lines(xxx,qy0[,3],lty=2)
lines(xxx,beta1,col=1,lwd=2)
lines(xxx,beta2,col=2,lwd=2)
lines(xxx,apply(y.draw0,2,mean),col=3,lty=2)
lines(xxx,apply(y.draw1,2,mean),col=3,lty=2)
legend("bottomright",legend=c("control","dyslexic"),col=1:2,,pch=1:2)
legend("right",legend=c("MLE","Bayes (quartiles)","Bayes (mean)"),lty=c(1,2,2),col=c(1,1,3))

dev.off()