############################################################################
#
# Midterm take-home exam - SOLUTION
# Bayesian linear hierarchical model
#
# Advanced Bayesian Econometrics
# PhD in Business Economics at INSPER
# Hedibert Freitas Lopes
# February 20th 2021
#
############################################################################
#
# Revisiting Example 5.6 (pages 169-60) of Gamerman and Lopes (2006) 
# on simple linear hierarchical regression on time Souza (1997) entertain 
# several hierarchical and dynamic models to describe the nutritional 
# pattern of pregnant women.  She entertains the following linear 
# hierarchical model
#
# yij     ~ N(alphai+betai*xij,1/tau2)
# alphai  ~ N(alpha.c,1/tau.a)
# betai   ~ N(beta.c,1/tau.b)
# alpha.c ~ N(0,1000)
# beta.c  ~ N(0,1000)
# tau.a   ~ G(0.001,0.001)
# tau.a   ~ G(0.001,0.001)
# tau2    ~ G(0.001,0.001)
#
# for i=1,...,I=68, j=1,...,J=7.
#
############################################################################

I  = 68
J  =  7
yo = matrix(yvec,I,J,byrow = TRUE)
x  = matrix(xvec,I,J,byrow = TRUE)


# Missing imputation via the extrapolation of the fitted Patient regressions
bhat = lm(yvec~xvec)$coef
y = yo
nm = rep(0,I)
for(i in 1:I){
  nm[i] = sum(is.na(yo[i,]))
  if(nm[i]>0){
    bhat = lm(y[i,1:(J-nm[i])]~x[i,1:(J-nm[i])])$coef
    for(j in 1:nm[i])
      y[i,(J-nm[i])+j] = sum(bhat*c(1,x[i,(J-nm[i])+j]))
  }
}
y1  =  matrix(t(y),I*J,1)[,1]   #y and y1 have imputed values


# mat = t(cbind(matrix(t(matrix(1:I,I,J)),I*J,1),
# rep(rep(1:J),I),xvec,round(y1,1)))
# write(mat,"2021-midterm-takehome-exam.txt",ncol=4)
# 

data=read.table("2021-bayesianeconometrics-midterm-takehome-exam.txt",header=TRUE)
xvec = data[,3]
yvec = data[,4]
I    = 68
J    =  7
x = matrix(xvec,I,J,byrow=TRUE)
y = matrix(yvec,I,J,byrow=TRUE)

# Pooled regression
X              = cbind(1,xvec)
iXtX           = solve(t(X)%*%X)
coef.pooled    = iXtX%*%t(X)%*%yvec
sig.pooled     = sqrt(sum((yvec-X%*%coef.pooled)^2)/(I*J-2))
se.coef.pooled = sig.pooled*sqrt(diag(iXtX))


# Patient regressions
coef     = matrix(0,I,2)
sig      = rep(0,I)
se.coef  = matrix(0,I,2)
int.coef = array(0,c(2,I,2))
qs = qt(c(0.025,0.975),df = J-2)
for(i in 1:I){
  xx       = cbind(1,x[i,])
  yy       = y[i,]
  ixtx     = solve(t(xx)%*%xx)
  coef[i,] = ixtx%*%t(xx)%*%yy
  sig[i]   = sqrt(sum((yy-xx%*%coef[i,])^2)/(J-2))
  se.coef[i,]    = sig[i]*sqrt(diag(ixtx))
  int.coef[1,i,] = coef[i,1]+qs*se.coef[i,1]
  int.coef[2,i,] = coef[i,2]+qs*se.coef[i,2]
}


# Some plots
par(mfrow = c(1,1))
plot(xvec,yvec,xlab="Gestational age (in weeks)",ylab="Weight gain (in kilograms",ylim=c(-12,30),cex=0.5,pch = 16)
for(i in 1:I)
  abline(coef[i,],col=grey(0.85))
points(xvec,yvec,cex = 0.5,pch=16)
ind = sort(order(coef[,1])[seq(1,I,length = 7)])
for(i in 1:7){
  abline(lm(y[ind[i],]~x[ind[i],])$coef,col=8-i)
  points(x[ind[i],],y[ind[i],],pch=16,col=8-i)
}
abline(coef.pooled,lwd=5)
legend("topleft",legend=ind,col=7:1,bty="n",pch=16)


par(mfrow = c(1,5))
boxplot(sig,ylim=range(sig,sig.pooled),main="Model standard deviation")
abline(h=sig.pooled,col=2)
boxplot(coef[,1],main="Intercepts")
abline(h=coef.pooled[1],col=2,lwd=2)
boxplot(se.coef[,1],ylim=range(se.coef[,1],se.coef.pooled[1]),main="Standard error\nIntercepts")
abline(h=se.coef.pooled[1],col=2,lwd=2)
boxplot(coef[,2],main="Slopes")
abline(h=coef.pooled[2],col=2,lwd=2)
boxplot(se.coef[,2],ylim=range(se.coef[,2],se.coef.pooled[2]),main="Standard error\nSlopes")
abline(h=se.coef.pooled[2],col=2,lwd=2)


names = c("Intercept","Slope")
par(mfrow = c(1,2))
for(i in 1:2){
  range = range(int.coef[i,,],coef.pooled[i])
  plot(1:I,coef[,i],ylim=range,xlim = c(0,I),xlab="Patient",ylab = names[i],pch = 16)
  segments(1:I,int.coef[i,,1],1:I,int.coef[i,,2])
  abline(h=coef.pooled[i]+qt(0.025,df = I*J-2)*se.coef.pooled[i],col=2,lty = 2)
  abline(h=coef.pooled[i]+qt(0.975,df = I*J-2)*se.coef.pooled[i],col=2,lty = 2)
  abline(h=coef.pooled[i],col=2)
}

# Bayesian hierarchical model
# ---------------------------

# Prior hyperparameters
m.alphac = 0
m.betac  = 0
v.alphac = 100
v.betac  = 100
a.sig2a  = 0.001
b.sig2a  = 0.001
a.sig2b  = 0.001
b.sig2b  = 0.001
a.sig2   = 0.001
b.sig2   = 0.001


# Initial values
alpha  = coef[,1]
beta   = coef[,2]
sig2   = sig.pooled^2
alphac = mean(alpha)
betac  = mean(beta)
sig2a  = var(alpha)
sig2b  = var(beta)

# MCMC setup
burnin = 10000
M      = 10000
niter  = burnin+M
alphas = matrix(0,niter,I)
betas  = matrix(0,niter,I)
params = matrix(0,niter,5)

# Gibbs sampler for linear Gaussian hierarchical model in action!
for(iter in 1:niter){
  # Sampling (alphai,betai) jointly
  iA = diag(1/c(sig2a,sig2b))
  iAa = iA%*%c(alphac,betac)
  for(i in 1:I){
    xx       = cbind(1,x[i,])
    yy       = y[i,]
    var      = solve(t(xx)%*%xx/sig2+iA)
    mean     = var%*%(t(xx)%*%yy/sig2+iAa)
    theta    = mean+t(chol(var))%*%rnorm(2)
    alpha[i] = theta[1]
    beta[i]  = theta[2]
  }

  # Sampling alphac
  var    = 1/(1/v.alphac+I/sig2a)
  mean   = var*(m.alphac/v.alphac+sum(alpha)/sig2a)
  alphac = rnorm(1,mean,sqrt(var))

  # Sampling betac
  var   = 1/(1/v.betac+I/sig2b)
  mean  = var*(m.betac/v.betac+sum(beta)/sig2b)
  betac = rnorm(1,mean,sqrt(var))

  # Sampling sigma2alpha
  sig2a = 1/rgamma(1,a.sig2a+I/2,b.sig2a+sum((alpha-alphac)^2)/2)

  # Sampling sigma2beta
  sig2b = 1/rgamma(1,a.sig2b+I/2,b.sig2b+sum((beta-betac)^2)/2)

  # Sampling sigma2
  error = y-matrix(alpha,I,J)-matrix(beta,I,J)*x
  sig2  = 1/rgamma(1,a.sig2+I*J/2,b.sig2+sum(error^2)/2)

  # Storing the output
  alphas[iter,] = alpha
  betas[iter,]  = beta
  params[iter,] = c(sig2,alphac,betac,sig2a,sig2b)
}
alphas = alphas[(burnin+1):niter,]
betas  = betas[(burnin+1):niter,]
params = params[(burnin+1):niter,]


par(mfrow = c(1,1))
range = range(betas,int.coef[1,,])
boxplot(alphas,outline = FALSE,xlab="Patient",ylab = "Intercept",ylim=range)
points(coef[,1],col=2,pch = 16)
for(i in 1:I)
  segments(i,int.coef[1,i,1],i,int.coef[1,i,2],col=2)
abline(h=quantile(params[,2],0.025),col=4,lty = 2)
abline(h=quantile(params[,2],0.500),col=4)
abline(h=quantile(params[,2],0.975),col=4,lty = 2)
abline(h=coef.pooled[1]+qt(0.025,df = I*J-2)*se.coef.pooled[1],col=6,lty = 2)
abline(h=coef.pooled[1]+qt(0.975,df = I*J-2)*se.coef.pooled[1],col=6,lty = 2)
abline(h=coef.pooled[1],col=6)
title("Red: Individual OLS regression and pooled regression\n Blue: common intercept")

par(mfrow = c(1,1))
range = range(betas,int.coef[2,,])
boxplot(betas,outline = FALSE,xlab="Patient",ylab = "Slope",ylim=range)
points(coef[,2],col=2,pch = 16)
for(i in 1:I)
  segments(i,int.coef[2,i,1],i,int.coef[2,i,2],col=2)
abline(h=quantile(params[,3],0.025),col=4,lty = 2)
abline(h=quantile(params[,3],0.500),col=4)
abline(h=quantile(params[,3],0.975),col=4,lty = 2)
abline(h=coef.pooled[2]+qt(0.025,df = I*J-2)*se.coef.pooled[2],col=6,lty = 2)
abline(h=coef.pooled[2]+qt(0.975,df = I*J-2)*se.coef.pooled[2],col=6,lty = 2)
abline(h=coef.pooled[2],col=6)
title("Red: Individual OLS regression and pooled regression\n Blue: common slope")


med.alphas = apply(alphas,2,median)
med.betas  = apply(betas,2,median)
med.alphac = median(params[,2])
med.betac  = median(params[,3])

par(mfrow=c(2,3))
plot(coef[,1],med.alphas,ylab="Hierarchical linear model",xlab="OLS Patient regressions")
title(expression(alpha))
abline(0,1)
points(coef.pooled[1],med.alphac,col=2,pch=16)
hist(params[,2],xlab="",prob=TRUE,main=expression(alpha[c]))
hist(sqrt(params[,4]),xlab="",prob=TRUE,main=expression(sigma[alpha]))

plot(coef[,2],med.betas,ylab="Hierarchical linear model",xlab="OLS Patient regressions")
title(expression(beta))
abline(0,1)
points(coef.pooled[2],med.betac,col=2,pch=16)
hist(params[,3],xlab="",prob=TRUE,main=expression(beta[c]))
hist(sqrt(params[,5]),xlab="",prob=TRUE,main=expression(sigma[beta]))

par(mfrow=c(1,1))
hist(sqrt(params[,1]),xlab="",prob=TRUE,main=expression(sigma))