################################################################################
# Simple linear regression with Gaussian, Student's t and Laplace errors
################################################################################
#
# e ~ N(0,sig2)    - var(e) = sig2
# e ~ t(0,tau2,nu) - var(e) = nu/(nu-2)*tau2   => tau2 = (nu-2)*sig2/nu
# e ~ L(0,w2)      - var(2) = 2*w2             => w2 = sig2/2
#
# p(e|laplace) = exp(-|e|/w2)/(2*w2)
#
################################################################################

# Let us first look at the behavior of these three distributions
# --------------------------------------------------------------
nu   = 4
sig2 = 1
sig  = sqrt(sig2)
tau2 = (nu-2)/2*sig2
tau  = sqrt(tau2)
w2   = sig2/2

par(mfrow=c(1,2))
e   = seq(-5,5,length=1000)
plot(e,dnorm(e,0,sig),xlab="error",ylab="Density",main="",type="l",ylim=c(0,1),lwd=2)
lines(e,dt(e/sig,df=nu)/sig,col=2,lwd=2)
lines(e,exp(-abs(e)/w2)/(2*w2),col=3,lwd=2)
legend("topleft",legend=c("Gaussian","Student's t","Laplace"),col=1:3,lty=1,lwd=2,bty="n")

e   = seq(-5,0,length=1000)
plot(e,dnorm(e,0,sig,log=TRUE),xlab="error",ylab="Log density",main="",type="l",ylim=c(-15,0),lwd=2)
lines(e,dt(e/sig,df=nu,log=TRUE)-log(sig),col=2,lwd=2)
lines(e,log(exp(-abs(e)/w2)/(2*w2)),col=3,lwd=2)


# Let us now simulate a simple linear regression data
# ---------------------------------------------------
set.seed(235)
n     = 100
alpha = 0.5
beta  = 1.2
sig   = 1
nu    = 3
#e    = sig*rt(n,df=nu)
#e    = sig*rnorm(n)
e     = rgamma(n,1,1/w2)*sample(c(-1,1),replace=TRUE,size=n)
x     = rnorm(n,0.5,0.25)
y     = alpha+beta*x+e

par(mfrow=c(1,3))
hist(e)
plot(x,y)
hist(lm(y~x)$res)

alpha.mle = lm(y~x)$coef[1]
beta.mle = lm(y~x)$coef[2]
c(alpha.mle,beta.mle)

# (Intercept)           x 
#   0.4680836   1.2286479 
  
  
# Let us now plot the likelihood of (alpha,beta) under each distribution
# ----------------------------------------------------------------------
loglike.g = function(alpha,beta,sig){
  sum(dnorm(y,alpha+beta*x,sig,log=TRUE))
}
loglike.t = function(alpha,beta,sig){
	tau = sqrt((nu-2)*sig^2/nu)
    sum(dt((y-alpha-beta*x)/tau,df=nu,log=TRUE))-n*log(tau)
}
loglike.l = function(alpha,beta,sig){
  w2 = sig^2/2
  sum(-abs(y-alpha-beta*x)/w2)-n*log(2*w2)
}


N      = 200
alphas = seq(0,2,length=N)
betas  = seq(0,2.5,length=N)
loglikes = array(0,c(3,N,N))
for (i in 1:N)
  for (j in 1:N){
    loglikes[1,i,j] = loglike.g(alphas[i],betas[j],sig)
    loglikes[2,i,j] = loglike.t(alphas[i],betas[j],sig)
    loglikes[3,i,j] = loglike.l(alphas[i],betas[j],sig)
}


par(mfrow=c(1,3))
contour(alphas,betas,exp(loglikes[1,,]),xlab=expression(alpha),ylab=expression(beta),drawlabels=FALSE)
title("Gaussian model")
points(alpha,beta,col=2,pch=16)
contour(alphas,betas,exp(loglikes[2,,]),xlab=expression(alpha),ylab=expression(beta),drawlabels=FALSE)
title("Student's t model")
points(alpha,beta,col=2,pch=16)
contour(alphas,betas,exp(loglikes[3,,]),xlab=expression(alpha),ylab=expression(beta),drawlabels=FALSE)
title("Laplace model")
points(alpha,beta,col=2,pch=16)


# Let us now consider Bayesian inference via SIR
# ----------------------------------------------
M       = 100000
alpha.d = runif(M,-0.5,2)
beta.d  = runif(M,-0.5,2.5)
w       = matrix(0,M,3)
for (i in 1:M){
  w[i,1] = loglike.g(alpha.d[i],beta.d[i],sig)
  w[i,2] = loglike.t(alpha.d[i],beta.d[i],sig)
  w[i,3] = loglike.l(alpha.d[i],beta.d[i],sig)
}
ww = exp(w)
w[,1] = exp(w[,1]-max(w[,1]))
w[,2] = exp(w[,2]-max(w[,2]))
w[,3] = exp(w[,3]-max(w[,3]))
ind1  = sample(1:M,size=M,replace=TRUE,prob=w[,1])
ind2  = sample(1:M,size=M,replace=TRUE,prob=w[,2])
ind3  = sample(1:M,size=M,replace=TRUE,prob=w[,3])

# Posterior inference for each model
pars = array(0,c(3,M,2))
pars[1,,] = cbind(alpha.d[ind1],beta.d[ind1])
pars[2,,] = cbind(alpha.d[ind2],beta.d[ind2])
pars[3,,] = cbind(alpha.d[ind3],beta.d[ind3])

par(mfrow=c(1,3))
plot(pars[1,,],xlab=expression(alpha),ylab=expression(beta),xlim=c(-0.5,2),ylim=c(-0.5,2.5))
contour(alphas,betas,exp(loglikes[1,,]),col=2,add=TRUE,drawlabels=FALSE,lwd=2)
plot(pars[2,,],xlab=expression(alpha),ylab=expression(beta),xlim=c(-0.5,2),ylim=c(-0.5,2.5))
contour(alphas,betas,exp(loglikes[2,,]),col=2,add=TRUE,drawlabels=FALSE,lwd=2)
plot(pars[3,,],xlab=expression(alpha),ylab=expression(beta),xlim=c(-0.5,2),ylim=c(-0.5,2.5))
contour(alphas,betas,exp(loglikes[3,,]),col=2,add=TRUE,drawlabels=FALSE,lwd=2)


par(mfrow=c(2,3))
hist(pars[1,,1],prob=TRUE,main=expression(alpha),xlab="");abline(v=alpha.mle,col=2,lwd=2)
hist(pars[2,,1],prob=TRUE,main=expression(alpha),xlab="");abline(v=alpha.mle,col=2,lwd=2)
hist(pars[3,,1],prob=TRUE,main=expression(alpha),xlab="");abline(v=alpha.mle,col=2,lwd=2)
hist(pars[1,,2],prob=TRUE,main=expression(beta),xlab="");abline(v=beta.mle,col=2,lwd=2)
hist(pars[2,,2],prob=TRUE,main=expression(beta),xlab="");abline(v=beta.mle,col=2,lwd=2)
hist(pars[3,,2],prob=TRUE,main=expression(beta),xlab="");abline(v=beta.mle,col=2,lwd=2)


par(mfrow=c(1,2))
boxplot(t(pars[,,1]),outline=FALSE,main=expression(alpha),names=c("Gaussian","Student's t","Laplace"))
abline(h=alpha.mle,col=2,lwd=2)
boxplot(t(pars[,,2]),outline=FALSE,main=expression(beta),names=c("Gaussian","Student's t","Laplace"))
abline(h=beta.mle,col=2,lwd=2)


# Bayesian model comparison via prior predictive, Bayes factors and posterior model probabilities
# -----------------------------------------------------------------------------------------------
pred = apply(ww,2,mean)
pmp = pred/sum(pred)
B21 = pred[2]/pred[1]
B23 = pred[2]/pred[3]
B31 = pred[3]/pred[1]

c(B21,B23,B31)
# 3.536969e+03 6.919982e-02 5.111240e+04

cbind(pred,pmp)
#         pred          pmp
# 1.228194e-52 1.829814e-05
# 4.344085e-49 6.471995e-02
# 6.277596e-48 9.352618e-01


# Comparing posterior model probabilities for the three scenarios
# ---------------------------------------------------------------
set.seed(1235)
n     = 100
alpha = 0.5
beta  = 1.2
sig   = 1
nu    = 3
x     = rnorm(n,0.5,0.25)

M       = 100000
alpha.d = runif(M,-0.5,2)
beta.d  = runif(M,-0.5,2.5)
w       = matrix(0,M,3)

pred = matrix(0,3,3)
pmp  = matrix(0,3,3)

e = sig*rnorm(n)
y = alpha+beta*x+e
for (i in 1:M){
  w[i,1] = loglike.g(alpha.d[i],beta.d[i],sig)
  w[i,2] = loglike.t(alpha.d[i],beta.d[i],sig)
  w[i,3] = loglike.l(alpha.d[i],beta.d[i],sig)
}
ww = exp(w)
pred = apply(ww,2,mean)
pmp[1,]  = pred/sum(pred)

e = sig*rt(n,df=nu)
y = alpha+beta*x+e
for (i in 1:M){
  w[i,1] = loglike.g(alpha.d[i],beta.d[i],sig)
  w[i,2] = loglike.t(alpha.d[i],beta.d[i],sig)
  w[i,3] = loglike.l(alpha.d[i],beta.d[i],sig)
}
ww = exp(w)
pred = apply(ww,2,mean)
pmp[2,]  = pred/sum(pred)


e     = rgamma(n,1,1/w2)*sample(c(-1,1),replace=TRUE,size=n)
y     = alpha+beta*x+e
for (i in 1:M){
  w[i,1] = loglike.g(alpha.d[i],beta.d[i],sig)
  w[i,2] = loglike.t(alpha.d[i],beta.d[i],sig)
  w[i,3] = loglike.l(alpha.d[i],beta.d[i],sig)
}
ww = exp(w)
pred = apply(ww,2,mean)
pmp[3,]  = pred/sum(pred)

tab = round(pmp,3)
rownames(tab)=c("True Gaussian","True Student's t","True Laplace")
colnames(tab)=c("Fit Gaussian","Fit Student's t","Fit Laplace")
tab

#                  Fit Gaussian Fit Student's t Fit Laplace
# True Gaussian           0.997           0.003       0.000
# True Student's t        0.000           1.000       0.000
# True Laplace            0.000           0.598       0.402


# Replication the comparison for the three scenarios
# --------------------------------------------------
set.seed(1235)
n     = 200
alpha = 0.5
beta  = 1.2
sig   = 1
nu    = 3
M       = 100000
alpha.d = runif(M,-0.5,2)
beta.d  = runif(M,-0.5,2.5)
w       = matrix(0,M,3)

Rep = 100
pmp  = array(0,c(Rep,3,3))

for (r in 1:Rep){
x = rnorm(n,0.5,0.25)
e = sig*rnorm(n)
y = alpha+beta*x+e
for (i in 1:M){
  w[i,1] = loglike.g(alpha.d[i],beta.d[i],sig)
  w[i,2] = loglike.t(alpha.d[i],beta.d[i],sig)
  w[i,3] = loglike.l(alpha.d[i],beta.d[i],sig)
}
ww = exp(w)
pred = apply(ww,2,mean)
pmp[r,1,]  = pred/sum(pred)

e = sig*rt(n,df=nu)
y = alpha+beta*x+e
for (i in 1:M){
  w[i,1] = loglike.g(alpha.d[i],beta.d[i],sig)
  w[i,2] = loglike.t(alpha.d[i],beta.d[i],sig)
  w[i,3] = loglike.l(alpha.d[i],beta.d[i],sig)
}
ww = exp(w)
pred = apply(ww,2,mean)
pmp[r,2,]  = pred/sum(pred)


e     = rgamma(n,1,1/w2)*sample(c(-1,1),replace=TRUE,size=n)
y     = alpha+beta*x+e
for (i in 1:M){
  w[i,1] = loglike.g(alpha.d[i],beta.d[i],sig)
  w[i,2] = loglike.t(alpha.d[i],beta.d[i],sig)
  w[i,3] = loglike.l(alpha.d[i],beta.d[i],sig)
}
ww = exp(w)
pred = apply(ww,2,mean)
pmp[r,3,]  = pred/sum(pred)
}

tab = rbind(apply(pmp[,1,]==apply(pmp[,1,],1,max),2,sum)/Rep,
apply(pmp[,2,]==apply(pmp[,2,],1,max),2,sum)/Rep,
apply(pmp[,3,]==apply(pmp[,3,],1,max),2,sum)/Rep)
rownames(tab)=c("True Gaussian","True Student's t","True Laplace")
colnames(tab)=c("Fit Gaussian","Fit Student's t","Fit Laplace")
tab


# n=100            Fit Gaussian Fit Student's t Fit Laplace
# True Gaussian             1.0            0.00        0.00
# True Student's t          0.1            0.90        0.00
# True Laplace              0.0            0.08        0.92

# n=50             Fit Gaussian Fit Student's t Fit Laplace
# True Gaussian             0.9            0.09        0.01
# True Student's t          0.2            0.80        0.00
# True Laplace              0.0            0.16        0.84

# n=25             Fit Gaussian Fit Student's t Fit Laplace
# True Gaussian            0.85            0.11        0.04
# True Student's t         0.45            0.55        0.00
# True Laplace             0.04            0.13        0.83