---
title: "Bayesian model comparison"
author: "Hedibert Freitas Lopes"
date: "5/8/2018"
output:
#  word_document: default
pdf_document: default
#html_document: default
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

# Simulating the data
Observations $x_1,\ldots,x_n$ are simulated either from a $N(0,\tau^2)$ or from a $t_{\nu}(0,\tau^2)$.

Here we set a flag indicating the data generating process (the true model):
```{r}
true.model="gaussian"
#true.model="student"
true.model
```

```{r fig.width=12,fig.height=6}
#install.packages("invgamma")
library(invgamma)

set.seed(235)
true.model="gaussian"
#true.model="student"
n         = 100
tau2.true = 3
nu.true   = 3
if (true.model=="gaussian"){
  x    = rnorm(n,0,sqrt(tau2.true))
}else{
  x    = sqrt(tau2.true)*rt(n,df=nu.true)
}
sumx2 = sum(x^2)

if (true.model=="gaussian"){
  par(mfrow=c(1,2))
  ts.plot(x,main=paste("Data: N(0,",tau2.true,")",sep=""))
  hist(x,main="",prob=TRUE)
}else{
  par(mfrow=c(1,2))
  ts.plot(x,main=paste("Data: Student's t(0,",tau2.true,",",nu.true,")",sep=""))
  hist(x,main="",prob=TRUE)
}
```

# Model 1: Gaussian model
In this case, $x_1,\ldots,x_n$ are iid $N(0,\tau^2)$ with $\tau^2 \sim IG(c0,d0)$.
We will use $d_0=c_0+1$ so the prior mode is 1 and $c_0=4$ leads to a relatively uninformative prior with a standard deviations of about $1.2$.  Recall that $E(\tau^2)=d_0/(c_0-1)$, Mode$(\tau^2)=d_0/(c_0+1)$ and $V(\tau^2)=d_0^2/(c_0-1)^2(c_0-2))$.  In addition, it is easy to see that ${\hat \tau}^2_{mle}=\sum_{i=1}^n x_i^2/n$.

```{r fig.width=12,fig.height=4}
tau2.mle = sumx2/n
tau2.mle

N = 1000
tau2s = seq(0.001,20,length=N)
like = (2*pi*tau2s)^(-n/2)*exp(-0.5*sumx2/tau2s)

c0=4
d0=c0+1
prior = dinvgamma(tau2s,c0,d0)

logpred.model1 = log(sum(like*prior*(tau2s[2]-tau2s[1])))
logpred.model1 

c1=c0+n/2
d1=d0+sumx2/2
posterior  = dinvgamma(tau2s,c1,d1)

tau2.mode1 = d1/(c1+1)
tau2.mode1 

par(mfrow=c(1,3))
plot(tau2s,like/max(like),xlab=expression(tau^2),ylab="Likelihood",type="l")
title("Model 1: Gaussian")
plot(tau2s,prior,xlab=expression(tau^2),ylab="Prior",type="l")
plot(tau2s,posterior,xlab=expression(tau^2),ylab="Posterior",type="l",lwd=3)
lines(tau2s,prior,col=2,lwd=3)
legend("topright",legend=c("prior","posterior"),col=2:1,lty=1,lwd=3)
```


# Model 1: Student's $t$ model
In this case, $x_1,\ldots,x_n$ are iid $t_{\nu}(0,\tau^2)$ with $\tau^2 \sim IG(e0,f0)$.
We will use $f_0=e_0+1$ so the prior mode is 1 and $e_0=4$ leads to a relatively uninformative prior with a standard deviations of about $1.2$.  For $\nu$, we will use a discretized version of Ju\'arez and Steel's (2010) hierarchical prior with $k=10$, i.e. $p(\nu) \propto 2 k \nu/((\nu+k)^3)$ for $\nu \in \{1,\cdots,50\}$.


```{r fig.width=12,fig.height=4}
e0=4
f0=e0+1
numax = 50
N = 100
tau2s = seq(0.001,20,length=N)
nus = 1:numax

# Let us proceed by using Juare and Steel's hierarchical prior with k=10
k=10
like = matrix(0,N,numax)
prior = matrix(0,N,numax)
normnu = sum(2*k*nus/((nus+k)^3))
for (i in 1:N)
  for (j in 1:numax){
    like[i,j] = prod(dt(x/sqrt(tau2s[i]),nus[j])/sqrt(tau2s[i]))
    prior[i,j] = dinvgamma(tau2s[i],e0,f0)*2*k*nus[j]/((nus[j]+k)^3)/normnu
}
logpred.model2 = log(sum(like*prior*(tau2s[2]-tau2s[1])*(nus[2]-nus[1])))
logpred.model2

like = like/max(like)  
post = prior*like

par(mfrow=c(1,3))
contour(tau2s,nus,like,xlab=expression(tau^2),ylab=expression(nu),drawlabels=FALSE)
title("Likelihood of Model 2: Student's t")
contour(tau2s,nus,prior,xlab=expression(tau^2),ylab=expression(nu),drawlabels=FALSE)
title("Prior")
contour(tau2s,nus,post,xlab=expression(tau^2),ylab=expression(nu),drawlabels=FALSE)
title("Posterior")
     
ind=1:N      
tau2.mode2 = tau2s[ind[apply(post==max(post),1,sum)==1]]
ind=1:numax
nu.mode2 = nus[ind[apply(post==max(post),2,sum)==1]]

c(tau2.mode2,nu.mode2)      
```  

# Model 1: Mixture of Gaussians model
In this case, $x_1,\ldots,x_n$ are iid $w N(0,1)+(1-w)N(0,\tau^2)$ with $\tau^2 \sim IG(a0,b0)$
We will use $b_0=a_0+1$ so the prior mode is 1 and $a_0=4$ leads to a relatively uninformative prior with a standard deviations of about $1.2$.  We assume that, a priori, $w$ varies uniformly, in the discrete set $\{0,0.01,0.02,\ldots,1.0\}$.

```{r fig.width=12,fig.height=4}
a0=4
b0=a0+1
N = 100
tau2s = seq(0.001,20,length=N)
ws = seq(0,1,length=N)
like = matrix(0,N,N)
prior = matrix(0,N,N)
for (i in 1:N)
  for (j in 1:N){
    like[i,j] = prod(ws[j]*dnorm(x)+(1-ws[j])*dnorm(x,0,sqrt(tau2s[i])))
    prior[i,j] = dinvgamma(tau2s[i],a0,b0)*(1/N)
}
logpred.model3 = log(sum(like*prior*(tau2s[2]-tau2s[1])*(ws[2]-ws[1])))
logpred.model3

like = like/max(like)  
post = prior*like

par(mfrow=c(1,3))
contour(tau2s,ws,like,xlab=expression(tau^2),ylab=expression(w),drawlabels=FALSE)
title("Likelihood of Model 3: Mixture of Gaussians")
contour(tau2s,ws,prior,xlab=expression(tau^2),ylab=expression(w),drawlabels=FALSE)
title("Prior")
contour(tau2s,ws,post,xlab=expression(tau^2),ylab=expression(w),drawlabels=FALSE)
title("Posterior")
     
ind=1:N      
tau2.mode3 = tau2s[ind[apply(post==max(post),1,sum)==1]]
w.mode3    = ws[ind[apply(post==max(post),2,sum)==1]]
c(tau2.mode3,w.mode3)
```

# Models at the posterior mode of the parameters
Now that we have fitted three models to the data, we can also compare them via prior predictive checks (or their log verions) or via Bayes factors or posterior model probabilities.

```{r fig.width=10,fig.height=6}
logpreds = c(logpred.model1,logpred.model2,logpred.model3)
post.model = round(exp(logpreds)/sum(exp(logpreds)),5)

xxx = seq(-10,10,length=1000)
if (true.model=="gaussian"){
  den.true = dnorm(xxx,0,sqrt(tau2.true))
}else{
  den.true = dt(xxx/sqrt(tau2.true),df=nu.true)/sqrt(tau2.true)
}
den1 = dnorm(xxx,0,sqrt(tau2.mode1))
den2 = dt(xxx/sqrt(tau2.mode2),df=nu.mode2)/sqrt(tau2.mode2)
den3 = w.mode3*dnorm(xxx)+(1-w.mode3)*dnorm(xxx,0,sqrt(tau2.mode3))
ltrue = log(den.true)
lden1 = log(den1)
lden2 = log(den2)
lden3 = log(den3)

par(mfrow=c(1,1))
plot(xxx,den.true,type="l",ylab="Density",xlab="x",ylim=range(den.true,den1,den2,den3),lwd=3)
lines(xxx,den1,col=2,lwd=3)
lines(xxx,den2,col=3,lwd=3)
lines(xxx,den3,col=4,lwd=3)
if(true.model=="gaussian"){
  legend("topleft",legend=c(
paste("Data: N(0,",tau2.true,")",sep=""),
paste("Model 1: Gaussian (",round(logpred.model1,3),")",sep=""),
paste("Model 2: Student's t (",round(logpred.model2,3),")",sep=""),
paste("Model 3: Mixture of Gaussians (",round(logpred.model3,3),")",sep="")),col=1:4,lty=1,lwd=3)
}else{
  legend("topleft",legend=c(
paste("Data: Student's t(0,",tau2.true,",",nu.true,")",sep=""),
paste("Model 1: Gaussian (",round(logpred.model1,3),")",sep=""),
paste("Model 2: Student's t (",round(logpred.model2,3),")",sep=""),
paste("Model 3: Mixture of Gaussians (",round(logpred.model3,3),")",sep="")),col=1:4,lty=1,lwd=3)
}
legend("topright",legend=c(
paste("Prob1 = ",post.model[1],sep=""),
paste("Prob2 = ",post.model[2],sep=""),
paste("Prob3 = ",post.model[3],sep="")),col=2:4,lty=1,lwd=3)


par(mfrow=c(1,1))
plot(xxx,ltrue,type="l",ylab="Log density",xlab="x",ylim=range(ltrue,lden1,lden2,lden3),lwd=3)
lines(xxx,lden1,col=2,lwd=3)
lines(xxx,lden2,col=3,lwd=3)
lines(xxx,lden3,col=4,lwd=3)
```