##################################################################################################
# 
# Simple linear regression with Student-t errors
#
#         y(i) = alpha + beta*x(i) + sig*e(i)      
#
# where e(1),...,e(n) are iid t(v)
#
# Prior distribution:
#
# alpha ~ N(0,Va)
# beta  ~ N(0,Vb)
#
# (sig,v) are fixed
#
##################################################################################################
#
# Hedibert Freitas Lopes
# Booth School of Business 
# University of Chicago
#
##################################################################################################
logpost = function(alpha,beta){
-0.5*(alpha-mua)^2/Va-0.5*(beta-mub)^2/Vb-0.5*(v+1)*sum(log(1+(y-alpha-beta*x)^2/(v*sig2)))
}

pdf(file="hw1-2013-solution.pdf",width=12,height=10)

# Reading the data
# ----------------
data = read.table("school-spending.txt",header=TRUE)
y = data[,2]
x = data[,3]/10000

# OLS estimation
# --------------
par(mfrow=c(1,1))
plot(x,y,main="OLS fit")
abline(lm(y~x),col=2,lwd=2)

# Fixed parameters and prior hyperparameters
# ------------------------------------------
v      = 4.46
sig2   = 46.7^2
mua    = 0
mub    = 0
Va     = 100000^2
Vb     = 100000^2

# Kernel of the posterior density
# -------------------------------
N      = 200
alphas = seq(-300,200,length=N)
betas  = seq(200,900,length=N)
lpost  = matrix(0,N,N)
for (i in 1:N)
  for (j in 1:N)
    lpost[i,j] = logpost(alphas[i],betas[j])
post = exp(lpost-max(lpost))
ind = 1:N
alpham = alphas[sum(ind*apply(lpost==max(lpost),1,sum))]
betam  = betas[sum(ind*apply(lpost==max(lpost),2,sum))]

par(mfrow=c(1,1))
contour(alphas,betas,post,drawlabels=FALSE,xlab=expression(alpha),ylab=expression(beta))

# Proposal density: Not a wonderful proposal but it will work
# -----------------------------------------------------------
Sa = 100
Sb = 100
lprop  = matrix(0,N,N)
for (i in 1:N)
  for (j in 1:N)
    lprop[i,j]  = dnorm(alphas[i],alpham,Sa,log=TRUE)+dnorm(betas[j],betam,Sb,log=TRUE)
prop = exp(lprop-max(lprop))

par(mfrow=c(1,1))
image(alphas,betas,prop,xlab=expression(alpha),ylab=expression(beta))
contour(alphas,betas,post,add=TRUE,drawlabels=FALSE)
      
# a) SIR for posterior simulation
# -------------------------------
set.seed(34548)
M       = 100000
M1      = 10000
alphas1 = rnorm(M,alpham,Sa)
betas1  = rnorm(M,betam,Sb)
w       = rep(0,M)
for (i in 1:M)
  w[i] = logpost(alphas1[i],betas1[i])-
         dnorm(alphas1[i],alpham,Sa,log=TRUE)-
         dnorm(betas1[i],betam,Sb,log=TRUE)
w = exp(w-max(w))
w = w/sum(w)
ind = sample(1:M,size=M1,replace=TRUE,prob=w)

alphas2 = alphas1[ind]
betas2  = betas1[ind]

par(mfrow=c(1,1))
plot(alphas2,betas2,pch=16,xlab=expression(alpha),ylab=expression(beta),col=grey(0.7))
contour(alphas,betas,post,add=TRUE,drawlabels=FALSE,col=4)

par(mfrow=c(1,2))
hist(alphas2,xlab="",prob=TRUE,main=expression(alpha),breaks=seq(min(alphas2),max(alphas2),length=30))
box()
hist(betas2,xlab="",prob=TRUE,main=expression(beta),breaks=seq(min(betas2),max(betas2),length=30))
box()

# b) Posterior predictive
# -----------------------
K = 200
yy = seq(0,1000,length=K)
xx = 4:12/10
dens = matrix(0,K,9)
for (j in 1:9)
  for (i in 1:K)
    dens[i,j] = mean(dt((yy[i]-alphas2-betas2*xx[j])/sqrt(sig2),v)/sqrt(sig2))
    
par(mfrow=c(1,1))
plot(yy,dens[,1],xlab="Spending",ylab="Density",type="l",ylim=range(dens),lwd=2)
for (i in 2:9)  
  lines(yy,dens[,i],col=i,lwd=2)
legend(700,0.008,legend=10000*xx,col=1:9,lty=1,lwd=2,bty="n")
text(750,0.0081,"Income")
title("Posterior predictive")

# c) Simulating ypred
# -------------------
set.seed(12164)
xx = c(4:12)/10
draws = matrix(0,M1,9)
for (j in 1:9)
draws[,j] = alphas2+betas2*xx[j]+sqrt(sig2)*rt(M1,v)

par(mfrow=c(1,1))
plot(10000*x,y,xlim=c(4000,12000),ylim=c(0,900),xlab="INCOME",ylab="SPENDING",pch=16)
lines(10000*xx,apply(draws,2,quantile,0.025),col=2)
lines(10000*xx,apply(draws,2,quantile,0.5),col=2)
lines(10000*xx,apply(draws,2,quantile,0.975),col=2)

par(mfrow=c(3,3))
for (j in 1:9){
	hist(draws[,j],prob=TRUE,breaks=seq(min(draws[,j]),max(draws[,j]),length=30),main="",xlab="")
  lines(yy,dens[,j],col=2,lwd=2)
  title(paste("Income = ",10000*xx[j],sep=""))
}

# d) Raoblackwallization (when x=7500)
# ------------------------------------
set.seed(1216546)
xxx   = 7500/10000
K     = 1000
yy    = seq(-100,1000,length=K)
dens1 = rep(0,K)
for (i in 1:K)
    dens1[i] = mean(dt((yy[i]-alphas2-betas2*xxx)/sqrt(sig2),v)/sqrt(sig2))

par(mfrow=c(1,1))
plot(yy,dens1,type="l",xlim=c(-100,1000),ylab="Density",main="",xlab="SPENDING")
for (i in 1:100){
  y1 = alphas2+betas2*xxx+sqrt(sig2)*rt(M1,v)
  lines(density(y1),col=3)
}
lines(yy,dens1)
title(paste("Raoblackwellization \n when income = ",10000*xxx,sep=""))
dev.off()