######################################################################
#
# Bayesian statistics: Comparing 6 model/prior combinations
#
# Model 1: y1,...,yn iid N(theta,sig2)          E(y)=theta,V(y)=sig2
# Model 2: y1,...,yn iid t(theta,tau2,nu)       E(y)=theta,V(y)=tau2*nu/(nu-2)
#
# Prior 1: theta ~ N(0,16)
# Prior 2: theta ~ t(0,9,3)
# Prior 3: theta ~ 0.6*N(0,16)+0.4*N(10,25)
#
# Fixed quantities: n=10, sig2=100, nu=3, tau2=100/3, 
#                             such that V(y)=100 for both models.
#
# By Hedibert Freitas Lopes
# 
# This version: May 12th 2017.
#
######################################################################
like.norm      = function(theta){prod(dnorm(y,theta,sig))}
like.t             = function(theta){prod(dt((y-theta)/tau,nu)/tau)}
prior.norm    = function(theta){dnorm(theta,0,4)}
prior.t            = function(theta){dt(theta/3,3)/3}
prior.mixture = function(theta){0.6*dnorm(theta,0,4)+0.4*dnorm(theta,10,5)}
neglike.t        = function(theta){-sum(dt((y-theta)/tau,nu,log=TRUE)-log(tau))}

# Data
n      = 10 
y      = c(-7.78,-3.51,0.81,8.42,9.91,11.57,13.21,17.31,19.10,23.74)
ybar = 9.278

# Fixed parameters from the normal and Student's t models
sig2 = 100
nu    = 3
tau2 = (nu-2)/nu*sig2
sig   = sqrt(sig2)
tau   = sqrt(tau2)


# ML estimation
thetahat.mle1 = ybar
thetahat.mle2 = nlm(neglike.t,ybar)$estimate
c(thetahat.mle1,thetahat.mle2)
# 9.27800 10.89806

pdf(file="severalmodels-SIR.pdf",width=10,height=6)

# Two models & three prior distributions
xxx  = seq(-20,40,length=10000)
dtt   = rep(0,10000)
dnn = rep(0,10000)
for (i in 1:10000){
  dnn[i] = like.norm(xxx[i])
  dtt[i]   = like.t(xxx[i])
}
h       = xxx[2]-xxx[1]  
dnn   = dnn/sum(dnn)/h
dtt     = dtt/sum(dtt)/h
post1 = dnn*prior.norm(xxx)
post1 = post1/sum(post1)/h
post2 = dnn*prior.t(xxx)
post2 = post2/sum(post2)/h
post3 = dnn*prior.mixture(xxx)
post3 = post3/sum(post3)/h
post4 = dtt*prior.norm(xxx)
post4 = post4/sum(post4)/h
post5 = dtt*prior.t(xxx)
post5 = post5/sum(post5)/h
post6 = dtt*prior.mixture(xxx)
post6 = post6/sum(post6)/h


par(mfrow=c(1,1))
plot(xxx,prior.norm(xxx),type="l",ylim=c(0,0.15),xlab=expression(theta),ylab="Prior density")
lines(xxx,prior.t(xxx),col=2)
lines(xxx,prior.mixture(xxx),col=3)
lines(xxx,dnn,col=4,lwd=2)
lines(xxx,dtt,col=6,lwd=2)
title("Priors and likelihoods")
legend("topright",legend=c("Normal","Student's t","Mix. of normals","Model 1: Normal","Model 2: Student's t"),col=c(1:4,6),lty=1,lwd=c(1,1,1,2,2))

par(mfrow=c(1,1))
plot(xxx,post1,type="l",xlab=expression(theta),xlim=c(-15,25),ylab="Posterior density")
title("Posterior distributions")
lines(xxx,post2,col=2)
lines(xxx,post3,col=3)
lines(xxx,post4,col=4)
lines(xxx,post5,col=5)
lines(xxx,post6,col=6)
legend("topright",legend=c("NN","NT","NM","TN","TT","TM"),col=1:6,lty=1)


# Proposal U(-5,20)
set.seed(1234)
M       = 1000000
thetas = runif(M,-5,20)

likenn = rep(0,M)
likett = rep(0,M)
for (i in 1:M){
  likenn[i] = like.norm(thetas[i])
  likett[i] = like.t(thetas[i])
}
priorn = prior.norm(thetas)
priort = prior.t(thetas)
priorm = prior.mixture(thetas)


par(mfrow=c(2,3))
w1 = likenn*priorn/(1/50)
thetas1 = sample(thetas,size=M,replace=TRUE,prob=w1)
xx = seq(min(thetas1),max(thetas1),length=50)
hist(thetas1,prob=TRUE,xlab=expression(theta),main="",breaks=xx,
       xlim=c(-5,20),ylim=c(0,0.17),ylab="Posterior density")
lines(xxx,post1,col=2)
title("Normal likelihood\nnormal prior")

w2 = likenn*priort/(1/50)
thetas2 = sample(thetas,size=M,replace=TRUE,prob=w2)
xx = seq(min(thetas2),max(thetas2),length=50)
hist(thetas2,prob=TRUE,xlab=expression(theta),main="",breaks=xx,
       xlim=c(-5,20),,ylim=c(0,0.17),ylab="Posterior density")
lines(xxx,post2,col=2)
title("Normal likelihood\nStudent's t prior")

w3 = likenn*priorm/(1/50)
thetas3 = sample(thetas,size=M,replace=TRUE,prob=w3)
xx = seq(min(thetas3),max(thetas3),length=50)
hist(thetas3,prob=TRUE,xlab=expression(theta),main="",breaks=xx,
        xlim=c(-5,20),,ylim=c(0,0.17),ylab="Posterior density")
lines(xxx,post3,col=2)
title("Normal likelihood\nmixture prior")

w4 = likett*priorn/(1/50)
thetas4 = sample(thetas,size=M,replace=TRUE,prob=w4)
xx = seq(min(thetas4),max(thetas4),length=50)
hist(thetas4,prob=TRUE,xlab=expression(theta),main="",breaks=xx,
       xlim=c(-5,20),,ylim=c(0,0.17),ylab="Posterior density")
lines(xxx,post4,col=2)
title("Student's t likelihood\nnormal prior")

w5 = likett*priort/(1/50)
thetas5 = sample(thetas,size=M,replace=TRUE,prob=w5)
xx = seq(min(thetas5),max(thetas5),length=50)
hist(thetas5,prob=TRUE,xlab=expression(theta),main="",breaks=xx,
       xlim=c(-5,20),,ylim=c(0,0.17),ylab="Posterior density")
lines(xxx,post5,col=2)
title("Student's t likelihood\nStudent's t prior")

w6 = likett*priorm/(1/50)
thetas6 = sample(thetas,size=M,replace=TRUE,prob=w6)
xx = seq(min(thetas6),max(thetas6),length=50)
hist(thetas6,prob=TRUE,xlab=expression(theta),main="",breaks=xx,
       xlim=c(-5,20),ylim=c(0,0.17),ylab="Posterior density")
lines(xxx,post6,col=2)
title("Student's t likelihood\nmixture prior")


py = c(mean(w1),mean(w2),mean(w3),mean(w4),mean(w5),mean(w6))

py
# 2.451390e-17 2.140104e-17 5.857985e-17 2.645262e-18 2.614443e-18 1.193807e-17

round(py/sum(py),3)
# 0.201 0.176 0.481 0.022 0.021 0.098

M = 1000000
tt = seq(-5,20,length=M)
h = tt[2]-tt[1]
py.exact = matrix(0,M,6)
for (i in 1:M){
  py.exact[i,1] = like.norm(tt[i])*prior.norm(tt[i])
  py.exact[i,2] = like.norm(tt[i])*prior.t(tt[i])
  py.exact[i,3] = like.norm(tt[i])*prior.mixture(tt[i])
  py.exact[i,4] = like.t(tt[i])*prior.norm(tt[i])
  py.exact[i,5] = like.t(tt[i])*prior.t(tt[i])
  py.exact[i,6] = like.t(tt[i])*prior.mixture(tt[i])
}

pyex = apply(py.exact*h,2,sum)

log(cbind(pyex,py))
#                pyex              py
# [1,] -38.93940 -38.24729
# [2,] -39.07513 -38.38309
# [3,] -38.06751 -37.37614
# [4,] -41.16516 -40.47376
# [5,] -41.17687 -40.48548
# [6,] -39.65804 -38.96680


# Predictive densities evaluated at y(1), y(2),...,y(10)
like.norm1      = function(theta,y){dnorm(y,theta,sig)}
like.t1             = function(theta,y){dt((y-theta)/tau,nu)/tau}

M = 1000000
tt = seq(-20,40,length=M)
h = tt[2]-tt[1]
py.exact = array(0,c(M,6,n))
for (j in 1:n){
for (i in 1:M){
  py.exact[i,1,j] = like.norm1(tt[i],y[j])*prior.norm(tt[i])
  py.exact[i,2,j] = like.norm1(tt[i],y[j])*prior.t(tt[i])
  py.exact[i,3,j] = like.norm1(tt[i],y[j])*prior.mixture(tt[i])
  py.exact[i,4,j] = like.t1(tt[i],y[j])*prior.norm(tt[i])
  py.exact[i,5,j] = like.t1(tt[i],y[j])*prior.t(tt[i])
  py.exact[i,6,j] = like.t1(tt[i],y[j])*prior.mixture(tt[i])
}}

pyex = matrix(0,n,6)
for (i in 1:n)
pyex[i,] = apply(py.exact[,,i]*h,2,sum)

cbind(y,log(pyex))
                                                              
#              y                                                            
 # [1,] -7.78 -3.556632 -3.568219 -3.856058 -3.565414 -3.601347 -3.949718
 # [2,] -3.51 -3.348838 -3.359940 -3.577237 -3.103273 -3.111705 -3.447224
 # [3,]  0.81 -3.298562 -3.309324 -3.431359 -2.983943 -2.977500 -3.219727
 # [4,]  8.42 -3.601322 -3.611765 -3.489905 -3.658445 -3.694671 -3.386840
 # [5,]  9.91 -3.719044 -3.728900 -3.546449 -3.893179 -3.925961 -3.474854
 # [6,] 11.57 -3.872738 -3.881377 -3.626327 -4.178092 -4.198404 -3.591633
 # [7,] 13.21 -4.047906 -4.054507 -3.722560 -4.475197 -4.475628 -3.728395
 # [8,] 17.31 -4.587269 -4.582751 -4.037892 -5.234513 -5.169562 -4.175700
 # [9,] 19.10 -4.868191 -4.854638 -4.209026 -5.555877 -5.462992 -4.418908
# [10,] 23.74 -5.724991 -5.667243 -4.748408 -6.325528 -6.177872 -5.153919


par(mfrow=c(2,5))
for (i in 1:10){
  plot(log(pyex)[i,],xlab="Model/Prior",ylab="Log-predictive",pch=16,ylim=range(log(pyex)))
  title(paste("y(",i,")=",y[i],sep=""))
  abline(v=3.5,lty=2)
}


prob = round(py/sum(py),3)

models = c("Normal","Student's t")
priors   = c("Normal","Student's t","Mixture")
par(mfrow=c(2,3))
i = 0
for (k in 1:2)
for (l in 1:3){
  i = i + 1
  plot(y,log(pyex[,i]),type="h",ylim=c(min(log(pyex)),0),xlab="y",ylab="Log predictive")
  for (j in 1:n)
    points(y[j],0,pch=16,col=2)
  title(paste(models[k],"/",priors[l],sep=""))
  legend("bottomleft",legend=c(paste("Prob.Mod=",prob[i],sep="")))
}


# Learning the number of degrees of freedom nu 
# in the Student's t model, normal prior
like.t = function(theta,nu){
  tau = sqrt((nu-2)/nu*sig2)
  return(prod(dt((y-theta)/tau,nu)/tau))
}
set.seed(1234)
M       = 1000000
thetas = runif(M,-5,20)
priorn = prior.norm(thetas)
nus = c(3:19,seq(20,100,by=5),seq(200,1000,by=100))
nnu = length(nus)
likett = matrix(0,M,nnu)
for (j in 1:nnu)
  for (i in 1:M)
    likett[i,j] = like.t(thetas[i],nus[j])
    
w = likett*matrix(priorn,M,nnu)/(1/25)
py = apply(w,2,sum)

par(mfrow=c(1,1))
plot(log10(nus),log(py),xlab=expression(nu),ylab="Log predictive",type="b",pch=16,cex=0.5,axes=FALSE)
title("Student's t model & normal prior")
axis(2);box()
axis(1,at=log10(c(3,10,50,100,500,1000)),lab=c(3,10,50,100,500,1000))
abline(h=log(py[nnu]),lty=2)

dev.off()