--- title: "Bayesian model comparison" author: "Hedibert Freitas Lopes" date: "5/8/2018" output: # word_document: default pdf_document: default #html_document: default --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` # Simulating the data Observations $x_1,\ldots,x_n$ are simulated either from a $N(0,\tau^2)$ or from a $t_{\nu}(0,\tau^2)$. Here we set a flag indicating the data generating process (the true model): ```{r} true.model="gaussian" #true.model="student" true.model ``` ```{r fig.width=12,fig.height=6} #install.packages("invgamma") library(invgamma) set.seed(235) true.model="gaussian" #true.model="student" n = 100 tau2.true = 3 nu.true = 3 if (true.model=="gaussian"){ x = rnorm(n,0,sqrt(tau2.true)) }else{ x = sqrt(tau2.true)*rt(n,df=nu.true) } sumx2 = sum(x^2) if (true.model=="gaussian"){ par(mfrow=c(1,2)) ts.plot(x,main=paste("Data: N(0,",tau2.true,")",sep="")) hist(x,main="",prob=TRUE) }else{ par(mfrow=c(1,2)) ts.plot(x,main=paste("Data: Student's t(0,",tau2.true,",",nu.true,")",sep="")) hist(x,main="",prob=TRUE) } ``` # Model 1: Gaussian model In this case, $x_1,\ldots,x_n$ are iid $N(0,\tau^2)$ with $\tau^2 \sim IG(c0,d0)$. We will use $d_0=c_0+1$ so the prior mode is 1 and $c_0=4$ leads to a relatively uninformative prior with a standard deviations of about $1.2$. Recall that $E(\tau^2)=d_0/(c_0-1)$, Mode$(\tau^2)=d_0/(c_0+1)$ and $V(\tau^2)=d_0^2/(c_0-1)^2(c_0-2))$. In addition, it is easy to see that ${\hat \tau}^2_{mle}=\sum_{i=1}^n x_i^2/n$. ```{r fig.width=12,fig.height=4} tau2.mle = sumx2/n tau2.mle N = 1000 tau2s = seq(0.001,20,length=N) like = (2*pi*tau2s)^(-n/2)*exp(-0.5*sumx2/tau2s) c0=4 d0=c0+1 prior = dinvgamma(tau2s,c0,d0) logpred.model1 = log(sum(like*prior*(tau2s[2]-tau2s[1]))) logpred.model1 c1=c0+n/2 d1=d0+sumx2/2 posterior = dinvgamma(tau2s,c1,d1) tau2.mode1 = d1/(c1+1) tau2.mode1 par(mfrow=c(1,3)) plot(tau2s,like/max(like),xlab=expression(tau^2),ylab="Likelihood",type="l") title("Model 1: Gaussian") plot(tau2s,prior,xlab=expression(tau^2),ylab="Prior",type="l") plot(tau2s,posterior,xlab=expression(tau^2),ylab="Posterior",type="l",lwd=3) lines(tau2s,prior,col=2,lwd=3) legend("topright",legend=c("prior","posterior"),col=2:1,lty=1,lwd=3) ``` # Model 1: Student's $t$ model In this case, $x_1,\ldots,x_n$ are iid $t_{\nu}(0,\tau^2)$ with $\tau^2 \sim IG(e0,f0)$. We will use $f_0=e_0+1$ so the prior mode is 1 and $e_0=4$ leads to a relatively uninformative prior with a standard deviations of about $1.2$. For $\nu$, we will use a discretized version of Ju\'arez and Steel's (2010) hierarchical prior with $k=10$, i.e. $p(\nu) \propto 2 k \nu/((\nu+k)^3)$ for $\nu \in \{1,\cdots,50\}$. ```{r fig.width=12,fig.height=4} e0=4 f0=e0+1 numax = 50 N = 100 tau2s = seq(0.001,20,length=N) nus = 1:numax # Let us proceed by using Juare and Steel's hierarchical prior with k=10 k=10 like = matrix(0,N,numax) prior = matrix(0,N,numax) normnu = sum(2*k*nus/((nus+k)^3)) for (i in 1:N) for (j in 1:numax){ like[i,j] = prod(dt(x/sqrt(tau2s[i]),nus[j])/sqrt(tau2s[i])) prior[i,j] = dinvgamma(tau2s[i],e0,f0)*2*k*nus[j]/((nus[j]+k)^3)/normnu } logpred.model2 = log(sum(like*prior*(tau2s[2]-tau2s[1])*(nus[2]-nus[1]))) logpred.model2 like = like/max(like) post = prior*like par(mfrow=c(1,3)) contour(tau2s,nus,like,xlab=expression(tau^2),ylab=expression(nu),drawlabels=FALSE) title("Likelihood of Model 2: Student's t") contour(tau2s,nus,prior,xlab=expression(tau^2),ylab=expression(nu),drawlabels=FALSE) title("Prior") contour(tau2s,nus,post,xlab=expression(tau^2),ylab=expression(nu),drawlabels=FALSE) title("Posterior") ind=1:N tau2.mode2 = tau2s[ind[apply(post==max(post),1,sum)==1]] ind=1:numax nu.mode2 = nus[ind[apply(post==max(post),2,sum)==1]] c(tau2.mode2,nu.mode2) ``` # Model 1: Mixture of Gaussians model In this case, $x_1,\ldots,x_n$ are iid $w N(0,1)+(1-w)N(0,\tau^2)$ with $\tau^2 \sim IG(a0,b0)$ We will use $b_0=a_0+1$ so the prior mode is 1 and $a_0=4$ leads to a relatively uninformative prior with a standard deviations of about $1.2$. We assume that, a priori, $w$ varies uniformly, in the discrete set $\{0,0.01,0.02,\ldots,1.0\}$. ```{r fig.width=12,fig.height=4} a0=4 b0=a0+1 N = 100 tau2s = seq(0.001,20,length=N) ws = seq(0,1,length=N) like = matrix(0,N,N) prior = matrix(0,N,N) for (i in 1:N) for (j in 1:N){ like[i,j] = prod(ws[j]*dnorm(x)+(1-ws[j])*dnorm(x,0,sqrt(tau2s[i]))) prior[i,j] = dinvgamma(tau2s[i],a0,b0)*(1/N) } logpred.model3 = log(sum(like*prior*(tau2s[2]-tau2s[1])*(ws[2]-ws[1]))) logpred.model3 like = like/max(like) post = prior*like par(mfrow=c(1,3)) contour(tau2s,ws,like,xlab=expression(tau^2),ylab=expression(w),drawlabels=FALSE) title("Likelihood of Model 3: Mixture of Gaussians") contour(tau2s,ws,prior,xlab=expression(tau^2),ylab=expression(w),drawlabels=FALSE) title("Prior") contour(tau2s,ws,post,xlab=expression(tau^2),ylab=expression(w),drawlabels=FALSE) title("Posterior") ind=1:N tau2.mode3 = tau2s[ind[apply(post==max(post),1,sum)==1]] w.mode3 = ws[ind[apply(post==max(post),2,sum)==1]] c(tau2.mode3,w.mode3) ``` # Models at the posterior mode of the parameters Now that we have fitted three models to the data, we can also compare them via prior predictive checks (or their log verions) or via Bayes factors or posterior model probabilities. ```{r fig.width=10,fig.height=6} logpreds = c(logpred.model1,logpred.model2,logpred.model3) post.model = round(exp(logpreds)/sum(exp(logpreds)),5) xxx = seq(-10,10,length=1000) if (true.model=="gaussian"){ den.true = dnorm(xxx,0,sqrt(tau2.true)) }else{ den.true = dt(xxx/sqrt(tau2.true),df=nu.true)/sqrt(tau2.true) } den1 = dnorm(xxx,0,sqrt(tau2.mode1)) den2 = dt(xxx/sqrt(tau2.mode2),df=nu.mode2)/sqrt(tau2.mode2) den3 = w.mode3*dnorm(xxx)+(1-w.mode3)*dnorm(xxx,0,sqrt(tau2.mode3)) ltrue = log(den.true) lden1 = log(den1) lden2 = log(den2) lden3 = log(den3) par(mfrow=c(1,1)) plot(xxx,den.true,type="l",ylab="Density",xlab="x",ylim=range(den.true,den1,den2,den3),lwd=3) lines(xxx,den1,col=2,lwd=3) lines(xxx,den2,col=3,lwd=3) lines(xxx,den3,col=4,lwd=3) if(true.model=="gaussian"){ legend("topleft",legend=c( paste("Data: N(0,",tau2.true,")",sep=""), paste("Model 1: Gaussian (",round(logpred.model1,3),")",sep=""), paste("Model 2: Student's t (",round(logpred.model2,3),")",sep=""), paste("Model 3: Mixture of Gaussians (",round(logpred.model3,3),")",sep="")),col=1:4,lty=1,lwd=3) }else{ legend("topleft",legend=c( paste("Data: Student's t(0,",tau2.true,",",nu.true,")",sep=""), paste("Model 1: Gaussian (",round(logpred.model1,3),")",sep=""), paste("Model 2: Student's t (",round(logpred.model2,3),")",sep=""), paste("Model 3: Mixture of Gaussians (",round(logpred.model3,3),")",sep="")),col=1:4,lty=1,lwd=3) } legend("topright",legend=c( paste("Prob1 = ",post.model[1],sep=""), paste("Prob2 = ",post.model[2],sep=""), paste("Prob3 = ",post.model[3],sep="")),col=2:4,lty=1,lwd=3) par(mfrow=c(1,1)) plot(xxx,ltrue,type="l",ylab="Log density",xlab="x",ylim=range(ltrue,lden1,lden2,lden3),lwd=3) lines(xxx,lden1,col=2,lwd=3) lines(xxx,lden2,col=3,lwd=3) lines(xxx,lden3,col=4,lwd=3) ```