Gaussian and Student’s \(t\) simple linear regression

We assume that the (continuous and real-valued) observations \(y_1,\ldots,y_n\) are conditionally independent with \[ y_i = \alpha + \beta x_i + \varepsilon_i \] where the errors are i.i.d. Student’s \(t\) with \(\eta\) degrees of freedom, i.e. \[ \varepsilon_i \sim t_\nu(0,\sigma^2), \qquad =1,\ldots,n. \] The quantity \(\nu\) will be considered known in this study, so the unknown quantities (parameters) are \[ \theta = (\alpha,\beta,\sigma^2). \]

set.seed(2353)
n          = 40
alpha.true = 1
beta.true  = 3
sig.true   = 1
x = rnorm(n)
y = alpha.true+beta.true*x+rnorm(n,0,sig.true)

par(mfrow=c(1,1))
plot(x,y)
ols = lm(y~x)
abline(ols$coef,lwd=2)

ols
## 
## Call:
## lm(formula = y ~ x)
## 
## Coefficients:
## (Intercept)            x  
##      0.9633       2.6044

Let us add some outliers

par(mfrow=c(1,1))
plot(x,y)
ols = lm(y~x)
abline(ols$coef,lwd=2)
obs1 = order(x)[1]
obs2 = order(x)[2]
obs3 = order(x)[3]
y[obs1]=max(y)
y[obs2]=max(y)
y[obs3]=max(y)
points(x[obs1],y[obs1],pch=16,col=2)
points(x[obs2],y[obs2],pch=16,col=2)
points(x[obs3],y[obs3],pch=16,col=2)
ols1 = lm(y~x)
ols1
## 
## Call:
## lm(formula = y ~ x)
## 
## Coefficients:
## (Intercept)            x  
##       1.771        1.574
abline(coef(ols1),lwd=2,col=2)

SIR-based Bayesian posterior inference

logdt = function(y,m,s,df){
  dt((y-m)/s,df=df,log=TRUE)-log(s)
}

reg.t = function(alpha,beta,sigma,df){
  sum(logdt(y,alpha+beta*x,sigma,df=df))
}

set.seed(12135)
M        = 10000
df       = 30
sd.alpha = 1
sd.beta  = 3
nu       = 2
sig0     = 1
alpha = rnorm(M,0,sd.alpha)
beta  = rnorm(M,0,sd.beta)
sig   = sqrt(1/rgamma(M,nu/2,nu*sig0^2/2))
w     = rep(0,M)
for (i in 1:M)
  w[i] = reg.t(alpha[i],beta[i],sig[i],df)
w = exp(w) 
log.predictive = log(mean(w))
ind    = sample(1:M,size=M,replace=TRUE,prob=w)
alpha1 = alpha[ind]
beta1  = beta[ind]
sig1   = sig[ind]
sig11 = sqrt((df-2)/df)*sig1

par(mfrow=c(2,2))
hist(alpha1,xlim=range(alpha,alpha1),prob=TRUE,
     xlab=expression(alpha),ylab="Density",main="")
lines(density(alpha),col=2)
points(alpha.true,0,col=4,pch=16)
legend("topleft",legend=c("Prior","Posterior"),col=2:1,lty=1)
hist(beta1,xlim=range(beta,beta1),prob=TRUE,
     xlab=expression(beta),ylab="Density",main="")
lines(density(beta),col=2)
points(beta.true,0,col=4,pch=16)
hist(sig11,xlim=range(sig,sig11),prob=TRUE,
     xlab=expression(sigma),ylab="Density",main="")
lines(density(sig),col=2)
points(sig.true,0,col=4,pch=16)
plot(alpha,beta,xlab=expression(alpha),ylab=expression(beta))
points(alpha1,beta1,col=2)
points(alpha.true,beta.true,pch=16,col=3)

Learning about \(nu\) via prior predictive densities

set.seed(12135)
M        = 10000
df       = 30
sd.alpha = 1
sd.beta  = 3
sig0     = 1
alpha = rnorm(M,0,sd.alpha)
beta  = rnorm(M,0,sd.beta)
sig   = sqrt(1/rgamma(M,nu/2,nu*sig0^2/2))
N = 30
df = 1:N
log.predictive = rep(0,N)
w     = rep(0,M)
for (j in 1:N){
  for (i in 1:M)
    w[i] = reg.t(alpha[i],beta[i],sig[i],df[j])
  w = exp(w) 
  log.predictive[j] = log(mean(w))
}
pred = exp(log.predictive)
pred = pred/sum(pred)

par(mfrow=c(1,1))
plot(df,pred,type="h",xlab="Degrees of freedom",ylab="Prior predictive")