################################################################# # # Prior predictive, Bayes factor and Posterior model probability # # Normal vs student's t # ################################################################# set.seed(22345) n = 30 mu = 0 sig2 = 1 nu = 15 y1 = mu + sqrt(sig2)*rnorm(n) y2 = mu + sqrt(sig2)*rt(n,df=nu) par(mfrow=c(1,3)) boxplot(y1) boxplot(y2) boxplot(y1,y2,names=c("Gaussian data","Student-t data"),outline=FALSE) y = y2 ################################################################## # # Posterior inference about centrality parameter # ################################################################## # ----------------- # model 1: Gaussian # ----------------- mu0 = 0 tau20 = 1 tau21 = 1/(n/sig2+1/tau20) mu1 = tau21*(sum(y)/sig2+mu0/tau20) # ------------------ # model 2: Student-t # ------------------ loglikelihood = function(mu){ sum(dt((y-mu)/sqrt(sig2),df=nu,log=TRUE))-0.5*n*log(sig2) } logposterior = function(mu){ loglikelihood(mu) + dnorm(mu,mu0,sqrt(tau20),log=TRUE) } mu0 = 0 tau20 = 1 # SIR with draws from N(mean(y),var(y)) set.seed(154321) M = 50000 m = mean(y) sd = sqrt(var(y)) mu.d = rnorm(M,m,sd) w = rep(0,M) for (i in 1:M) w[i] = logposterior(mu.d[i])-dnorm(mu.d[i],m,sd,log=TRUE) w = exp(w-max(w)) mu.d = sample(mu.d,size=M/2,replace=TRUE,prob=w) par(mfrow=c(1,1)) mus = seq(-1,1,length=1000) plot(mus,dnorm(mus,mu1,sqrt(tau21)),ylab="Density",main="",xlim=range(mu.d),type="l",lwd=2) lines(mus,dnorm(mus,mu0,sqrt(tau20)),col=2,lwd=2) lines(density(mu.d),col=3,lwd=2) legend("topright",legend=c("Prior","Posterior (Gaussian)","Posterior (Student-t)"),lty=1,col=c(2,1,3),lwd=2) ################################################################## # # Computing marginal likelihood # aka prior predictive density # aka normalizing constant # ################################################################## # ----------------- # model 1: Gaussian # ----------------- # install.packages("mvtnorm") library("mvtnorm") V = matrix(tau20,n,n)+diag(sig2,n) pred.M1 = dmvnorm(y,rep(0,n),V) # ------------------ # model 2: Student-t # ------------------ N = 1000000 grid = seq(-1,1,length=N) kernel = rep(0,N) for (j in 1:N) kernel[j] = logposterior(grid[j]) pred.M2.exact = sum(exp(kernel)*(grid[2]-grid[1])) BF.exact = pred.M1/pred.M2.exact PMP.exact = BF.exact/(1+BF.exact) # Naive Monte Carlo (sampling from the prior, usually pretty vague!) set.seed(2468) M = 100000 draws = rnorm(M,mu0,sqrt(tau20)) loglike = rep(0,M) for (j in 1:M) loglike[j] = loglikelihood(draws[j]) pred.M2a = mean(exp(loglike)) BF.a = pred.M1/pred.M2a PMP.a = BF.a/(1+BF.a) # Not so naive Monte Carlo (MC via importance sampling) logproposal = function(mu){ dnorm(mu,mu1,sqrt(tau21),log=TRUE) } draws = rnorm(M,mu1,sqrt(tau21)) kernel = rep(0,M) for (j in 1:M) kernel[j] = logposterior(draws[j])-logproposal(draws[j]) pred.M2b = mean(exp(kernel)) BF.b = pred.M1/pred.M2b PMP.b = BF.b/(1+BF.b) table = cbind(c(log(pred.M1),log(pred.M2.exact),BF.exact,PMP.exact), c(log(pred.M1),log(pred.M2a),BF.a,PMP.a), c(log(pred.M1),log(pred.M2b),BF.b,PMP.b)) colnames(table) = c("Exact","Naive MC","Clever MC") rownames(table) = c("Log pred M1","Log pred M2","BF12","PMP") table ########################################################################################## # # Monte Carlo exercise to compare both strategies to approximate the # computation of the normalizing constant. # ########################################################################################### nsim = 10 M = 5000 preds = array(0,c(2,nsim,M)) BF = array(0,c(2,nsim,M)) PMP = array(0,c(2,nsim,M)) for (i in 1:nsim){ # Naive Monte Carlo draws = rnorm(M,mu0,sqrt(tau20)) loglike = rep(0,M) for (j in 1:M) loglike[j] = loglikelihood(draws[j]) preds[1,i,] = cumsum(exp(loglike))/1:M BF[1,i,] = pred.M1/preds[1,i,] PMP[1,i,] = BF[1,i,]/(1+BF[1,i,]) # Not so naive Monte Carlo draws = rnorm(M,mu1,sqrt(tau21)) kernel = rep(0,M) for (j in 1:M) kernel[j] = logposterior(draws[j])-logproposal(draws[j]) preds[2,i,] = cumsum(exp(kernel))/1:M BF[2,i,] = pred.M1/preds[2,i,] PMP[2,i,] = BF[2,i,]/(1+BF[2,i,]) } preds = preds/max(preds) par(mfrow=c(1,1)) plot(preds[1,1,],ylim=range(preds[,,1000:M]),type="l", xlab="Number of draws",ylab="Marginal likelihood") for (i in 2:nsim) lines(preds[1,i,]) for (i in 1:nsim) lines(preds[2,i,],col=2) abline(h=pred.M2.exact,col=3,lwd=2) plot(BF[1,1,],ylim=range(BF[,,1000:M]),type="l", xlab="Number of draws",ylab="Bayes factor") for (i in 2:nsim) lines(BF[1,i,]) for (i in 1:nsim) lines(BF[2,i,],col=2) abline(h=BF.exact,col=3,lwd=2) plot(PMP[1,1,],ylim=range(PMP[,,1000:M]),type="l", xlab="Number of draws",ylab="Posterior model probability") for (i in 2:nsim) lines(PMP[1,i,]) for (i in 1:nsim) lines(PMP[2,i,],col=2) abline(h=PMP.exact,col=3,lwd=2)