################################################################################ # Simple linear regression with Gaussian, Student's t and Laplace errors ################################################################################ # # e ~ N(0,sig2) - var(e) = sig2 # e ~ t(0,tau2,nu) - var(e) = nu/(nu-2)*tau2 => tau2 = (nu-2)*sig2/nu # e ~ L(0,w2) - var(2) = 2*w2 => w2 = sig2/2 # # p(e|laplace) = exp(-|e|/w2)/(2*w2) # ################################################################################ # Let us first look at the behavior of these three distributions # -------------------------------------------------------------- nu = 4 sig2 = 1 sig = sqrt(sig2) tau2 = (nu-2)/2*sig2 tau = sqrt(tau2) w2 = sig2/2 par(mfrow=c(1,2)) e = seq(-5,5,length=1000) plot(e,dnorm(e,0,sig),xlab="error",ylab="Density",main="",type="l",ylim=c(0,1),lwd=2) lines(e,dt(e/sig,df=nu)/sig,col=2,lwd=2) lines(e,exp(-abs(e)/w2)/(2*w2),col=3,lwd=2) legend("topleft",legend=c("Gaussian","Student's t","Laplace"),col=1:3,lty=1,lwd=2,bty="n") e = seq(-5,0,length=1000) plot(e,dnorm(e,0,sig,log=TRUE),xlab="error",ylab="Log density",main="",type="l",ylim=c(-15,0),lwd=2) lines(e,dt(e/sig,df=nu,log=TRUE)-log(sig),col=2,lwd=2) lines(e,log(exp(-abs(e)/w2)/(2*w2)),col=3,lwd=2) # Let us now simulate a simple linear regression data # --------------------------------------------------- set.seed(235) n = 100 alpha = 0.5 beta = 1.2 sig = 1 nu = 3 #e = sig*rt(n,df=nu) #e = sig*rnorm(n) e = rgamma(n,1,1/w2)*sample(c(-1,1),replace=TRUE,size=n) x = rnorm(n,0.5,0.25) y = alpha+beta*x+e par(mfrow=c(1,3)) hist(e) plot(x,y) hist(lm(y~x)$res) alpha.mle = lm(y~x)$coef[1] beta.mle = lm(y~x)$coef[2] c(alpha.mle,beta.mle) # (Intercept) x # 0.4680836 1.2286479 # Let us now plot the likelihood of (alpha,beta) under each distribution # ---------------------------------------------------------------------- loglike.g = function(alpha,beta,sig){ sum(dnorm(y,alpha+beta*x,sig,log=TRUE)) } loglike.t = function(alpha,beta,sig){ tau = sqrt((nu-2)*sig^2/nu) sum(dt((y-alpha-beta*x)/tau,df=nu,log=TRUE))-n*log(tau) } loglike.l = function(alpha,beta,sig){ w2 = sig^2/2 sum(-abs(y-alpha-beta*x)/w2)-n*log(2*w2) } N = 200 alphas = seq(0,2,length=N) betas = seq(0,2.5,length=N) loglikes = array(0,c(3,N,N)) for (i in 1:N) for (j in 1:N){ loglikes[1,i,j] = loglike.g(alphas[i],betas[j],sig) loglikes[2,i,j] = loglike.t(alphas[i],betas[j],sig) loglikes[3,i,j] = loglike.l(alphas[i],betas[j],sig) } par(mfrow=c(1,3)) contour(alphas,betas,exp(loglikes[1,,]),xlab=expression(alpha),ylab=expression(beta),drawlabels=FALSE) title("Gaussian model") points(alpha,beta,col=2,pch=16) contour(alphas,betas,exp(loglikes[2,,]),xlab=expression(alpha),ylab=expression(beta),drawlabels=FALSE) title("Student's t model") points(alpha,beta,col=2,pch=16) contour(alphas,betas,exp(loglikes[3,,]),xlab=expression(alpha),ylab=expression(beta),drawlabels=FALSE) title("Laplace model") points(alpha,beta,col=2,pch=16) # Let us now consider Bayesian inference via SIR # ---------------------------------------------- M = 100000 alpha.d = runif(M,-0.5,2) beta.d = runif(M,-0.5,2.5) w = matrix(0,M,3) for (i in 1:M){ w[i,1] = loglike.g(alpha.d[i],beta.d[i],sig) w[i,2] = loglike.t(alpha.d[i],beta.d[i],sig) w[i,3] = loglike.l(alpha.d[i],beta.d[i],sig) } ww = exp(w) w[,1] = exp(w[,1]-max(w[,1])) w[,2] = exp(w[,2]-max(w[,2])) w[,3] = exp(w[,3]-max(w[,3])) ind1 = sample(1:M,size=M,replace=TRUE,prob=w[,1]) ind2 = sample(1:M,size=M,replace=TRUE,prob=w[,2]) ind3 = sample(1:M,size=M,replace=TRUE,prob=w[,3]) # Posterior inference for each model pars = array(0,c(3,M,2)) pars[1,,] = cbind(alpha.d[ind1],beta.d[ind1]) pars[2,,] = cbind(alpha.d[ind2],beta.d[ind2]) pars[3,,] = cbind(alpha.d[ind3],beta.d[ind3]) par(mfrow=c(1,3)) plot(pars[1,,],xlab=expression(alpha),ylab=expression(beta),xlim=c(-0.5,2),ylim=c(-0.5,2.5)) contour(alphas,betas,exp(loglikes[1,,]),col=2,add=TRUE,drawlabels=FALSE,lwd=2) plot(pars[2,,],xlab=expression(alpha),ylab=expression(beta),xlim=c(-0.5,2),ylim=c(-0.5,2.5)) contour(alphas,betas,exp(loglikes[2,,]),col=2,add=TRUE,drawlabels=FALSE,lwd=2) plot(pars[3,,],xlab=expression(alpha),ylab=expression(beta),xlim=c(-0.5,2),ylim=c(-0.5,2.5)) contour(alphas,betas,exp(loglikes[3,,]),col=2,add=TRUE,drawlabels=FALSE,lwd=2) par(mfrow=c(2,3)) hist(pars[1,,1],prob=TRUE,main=expression(alpha),xlab="");abline(v=alpha.mle,col=2,lwd=2) hist(pars[2,,1],prob=TRUE,main=expression(alpha),xlab="");abline(v=alpha.mle,col=2,lwd=2) hist(pars[3,,1],prob=TRUE,main=expression(alpha),xlab="");abline(v=alpha.mle,col=2,lwd=2) hist(pars[1,,2],prob=TRUE,main=expression(beta),xlab="");abline(v=beta.mle,col=2,lwd=2) hist(pars[2,,2],prob=TRUE,main=expression(beta),xlab="");abline(v=beta.mle,col=2,lwd=2) hist(pars[3,,2],prob=TRUE,main=expression(beta),xlab="");abline(v=beta.mle,col=2,lwd=2) par(mfrow=c(1,2)) boxplot(t(pars[,,1]),outline=FALSE,main=expression(alpha),names=c("Gaussian","Student's t","Laplace")) abline(h=alpha.mle,col=2,lwd=2) boxplot(t(pars[,,2]),outline=FALSE,main=expression(beta),names=c("Gaussian","Student's t","Laplace")) abline(h=beta.mle,col=2,lwd=2) # Bayesian model comparison via prior predictive, Bayes factors and posterior model probabilities # ----------------------------------------------------------------------------------------------- pred = apply(ww,2,mean) pmp = pred/sum(pred) B21 = pred[2]/pred[1] B23 = pred[2]/pred[3] B31 = pred[3]/pred[1] c(B21,B23,B31) # 3.536969e+03 6.919982e-02 5.111240e+04 cbind(pred,pmp) # pred pmp # 1.228194e-52 1.829814e-05 # 4.344085e-49 6.471995e-02 # 6.277596e-48 9.352618e-01 # Comparing posterior model probabilities for the three scenarios # --------------------------------------------------------------- set.seed(1235) n = 100 alpha = 0.5 beta = 1.2 sig = 1 nu = 3 x = rnorm(n,0.5,0.25) M = 100000 alpha.d = runif(M,-0.5,2) beta.d = runif(M,-0.5,2.5) w = matrix(0,M,3) pred = matrix(0,3,3) pmp = matrix(0,3,3) e = sig*rnorm(n) y = alpha+beta*x+e for (i in 1:M){ w[i,1] = loglike.g(alpha.d[i],beta.d[i],sig) w[i,2] = loglike.t(alpha.d[i],beta.d[i],sig) w[i,3] = loglike.l(alpha.d[i],beta.d[i],sig) } ww = exp(w) pred = apply(ww,2,mean) pmp[1,] = pred/sum(pred) e = sig*rt(n,df=nu) y = alpha+beta*x+e for (i in 1:M){ w[i,1] = loglike.g(alpha.d[i],beta.d[i],sig) w[i,2] = loglike.t(alpha.d[i],beta.d[i],sig) w[i,3] = loglike.l(alpha.d[i],beta.d[i],sig) } ww = exp(w) pred = apply(ww,2,mean) pmp[2,] = pred/sum(pred) e = rgamma(n,1,1/w2)*sample(c(-1,1),replace=TRUE,size=n) y = alpha+beta*x+e for (i in 1:M){ w[i,1] = loglike.g(alpha.d[i],beta.d[i],sig) w[i,2] = loglike.t(alpha.d[i],beta.d[i],sig) w[i,3] = loglike.l(alpha.d[i],beta.d[i],sig) } ww = exp(w) pred = apply(ww,2,mean) pmp[3,] = pred/sum(pred) tab = round(pmp,3) rownames(tab)=c("True Gaussian","True Student's t","True Laplace") colnames(tab)=c("Fit Gaussian","Fit Student's t","Fit Laplace") tab # Fit Gaussian Fit Student's t Fit Laplace # True Gaussian 0.997 0.003 0.000 # True Student's t 0.000 1.000 0.000 # True Laplace 0.000 0.598 0.402 # Replication the comparison for the three scenarios # -------------------------------------------------- set.seed(1235) n = 200 alpha = 0.5 beta = 1.2 sig = 1 nu = 3 M = 100000 alpha.d = runif(M,-0.5,2) beta.d = runif(M,-0.5,2.5) w = matrix(0,M,3) Rep = 100 pmp = array(0,c(Rep,3,3)) for (r in 1:Rep){ x = rnorm(n,0.5,0.25) e = sig*rnorm(n) y = alpha+beta*x+e for (i in 1:M){ w[i,1] = loglike.g(alpha.d[i],beta.d[i],sig) w[i,2] = loglike.t(alpha.d[i],beta.d[i],sig) w[i,3] = loglike.l(alpha.d[i],beta.d[i],sig) } ww = exp(w) pred = apply(ww,2,mean) pmp[r,1,] = pred/sum(pred) e = sig*rt(n,df=nu) y = alpha+beta*x+e for (i in 1:M){ w[i,1] = loglike.g(alpha.d[i],beta.d[i],sig) w[i,2] = loglike.t(alpha.d[i],beta.d[i],sig) w[i,3] = loglike.l(alpha.d[i],beta.d[i],sig) } ww = exp(w) pred = apply(ww,2,mean) pmp[r,2,] = pred/sum(pred) e = rgamma(n,1,1/w2)*sample(c(-1,1),replace=TRUE,size=n) y = alpha+beta*x+e for (i in 1:M){ w[i,1] = loglike.g(alpha.d[i],beta.d[i],sig) w[i,2] = loglike.t(alpha.d[i],beta.d[i],sig) w[i,3] = loglike.l(alpha.d[i],beta.d[i],sig) } ww = exp(w) pred = apply(ww,2,mean) pmp[r,3,] = pred/sum(pred) } tab = rbind(apply(pmp[,1,]==apply(pmp[,1,],1,max),2,sum)/Rep, apply(pmp[,2,]==apply(pmp[,2,],1,max),2,sum)/Rep, apply(pmp[,3,]==apply(pmp[,3,],1,max),2,sum)/Rep) rownames(tab)=c("True Gaussian","True Student's t","True Laplace") colnames(tab)=c("Fit Gaussian","Fit Student's t","Fit Laplace") tab # n=100 Fit Gaussian Fit Student's t Fit Laplace # True Gaussian 1.0 0.00 0.00 # True Student's t 0.1 0.90 0.00 # True Laplace 0.0 0.08 0.92 # n=50 Fit Gaussian Fit Student's t Fit Laplace # True Gaussian 0.9 0.09 0.01 # True Student's t 0.2 0.80 0.00 # True Laplace 0.0 0.16 0.84 # n=25 Fit Gaussian Fit Student's t Fit Laplace # True Gaussian 0.85 0.11 0.04 # True Student's t 0.45 0.55 0.00 # True Laplace 0.04 0.13 0.83