#########################
# PART ONE: Bernoulli trials
#########################

y = c(1,1,1,1,0,0,0,1,1,1)
n = length(y)
sumy = sum(y)


# A) Maximum likelihood estimation
#############################

theta.mle = mean(y)

# Pr(y11=1 | y1,...,y10)
theta.mle

# 0.7

# Confidence intervals
theta.mle+qnorm(c(0.05,0.95))*sqrt(theta.mle*(1-theta.mle)/n)
theta.mle+qnorm(c(0.025,0.975))*sqrt(theta.mle*(1-theta.mle)/n)
theta.mle+qnorm(c(0.005,0.995))*sqrt(theta.mle*(1-theta.mle)/n)

# 90% confidence interval - (0.462 ; 0.938)
# 95% confidence interval - (0.416 ; 0.984)
# 99% confidence interval - (0.327 ; 1.073)


# B) Bayesian estimation
####################

# Prior knowledge: theta is Beta(a,b) for a=b=3
a=b=3
thetas = seq(0,1,length=1000)
par(mfrow=c(1,1))
plot(thetas,dbeta(thetas,a,b),lwd=2,type="l")

# Prior mean and standard deviation
a/(a+b)
sqrt(a*b/((a+b)^2*(a+b+1)))
# 0.5
# 0.189

# Prior credibility intervals
qbeta(c(0.05,0.95),a,b)
qbeta(c(0.025,0.975),a,b)
qbeta(c(0.005,0.995),a,b)

# 90% prior credibility interval - (0.189 ; 0.811)
# 95% prior credibility interval - (0.147 ; 0.853)
# 99% prior credibility interval - (0.083 ; 0.917)

# Posterior inference (conjugate analysis)
a1 = sumy+a
b1 = n-sumy+b

# Posterior mean and standard deviation
a1/(a1+b1)
sqrt(a1*b1/((a1+b1)^2*(a1+b1+1)))

# Posterior mean = 0.625
# Posterior standard deviation = 0.117

# Posterior credibility intervals
qbeta(c(0.05,0.95),a1,b1)
qbeta(c(0.025,0.975),a1,b1)
qbeta(c(0.005,0.995),a1,b1)

# 90% posterior credibility interval - (0.423 ; 0.809)
# 95% posterior credibility interval - (0.384 ; 0.837)
# 99% posterior credibility interval - (0.312 ; 0.883)


# Posterior predictive probability
Pr(y11=1 | y1,...,y10) = E(theta|y1,...,y10) = 0.625


###########################
# PART TWO: Logistic regression
###########################
x = c(-0.028,0.222,0.022,0.272,-0.378,-0.328,-0.128,-0.178,0.242,0.282)
plot(x,y,xlab="Regressor (x)",ylab="Response (y)",pch=16)

A) Maximum likelihood estimation - GLM

summary(glm(y~x,family=binomial))

# Fitted probabilities
xx = seq(-0.6,max(x),length=1000)
eta =  2.877+17.995 *xx
prob.mle = 1/(1+exp(-eta))

plot(x,y,xlab="Regressor (x)",ylab="Probability response=one")
lines(xx,prob.mle,col=2)

# B) Bayesian estimation (via sampling-importance-resampling)
# Ignore for now the computation involved in obtaining the
# posterior distribution of (alpha,beta) given {(y1,x1),...,(yn,xn)}
M       = 10000
alpha  = rnorm(M,3,3)
beta   = rnorm(M,18,14)
prob   = matrix(0,M,n)
for (i in 1:n){
  eta =  alpha+beta*x[i]
  prob[,i] = 1/(1+exp(-eta))
}
w = rep(0,M)
for (i in 1:M)
w[i] = prod(dbinom(y,1,prob[i,]))
w = w/sum(w)
ind = sample(1:M,size=M,replace=TRUE,prob=w)
alphas = alpha[ind]
betas = beta[ind]
xx = seq(-0.6,max(x),length=1000)
prob = matrix(0,M,1000)
for (i in 1:1000){
  eta =  alphas+betas*xx[i]
  prob[,i] = 1/(1+exp(-eta))
}
qprob = t(apply(prob,2,quantile,c(0.05,0.5,0.95)))

par(mfrow=c(1,1))
plot(alphas,betas,xlab="alpha",ylab="beta",main="Joint posterior")

par(mfrow=c(1,2))
hist(alphas,prob=TRUE,xlab="alpha",ylab="Marginal posterior density",main="")
abline(v=2.877,col=2,lwd=2)
hist(betas,prob=TRUE,xlab="beta",ylab="Marginal posterior density",main="")
abline(v=17.995,col=2,lwd=2)

par(mfrow=c(1,1))
plot(x,y,xlim=range(xx),xlab="Regressor (x)",ylab="Probability response=one",pch=16)
lines(xx,qprob[,1],col=6,lwd=2,lty=2)
lines(xx,qprob[,2],col=6,lwd=2)
lines(xx,qprob[,3],col=6,lwd=2,lty=2)
lines(xx,prob.mle,col=4,lwd=2)
legend("topleft",legend=c("mle","bayes"),col=c(4,6),lwd=2,lty=1,bty="n")
abline(h=0.5,lty=2)