We assume that the (continuous and real-valued) observations \(y_1,\ldots,y_n\) are conditionally independent with \[ y_i = \alpha + \beta x_i + \varepsilon_i \] where the errors are i.i.d. Student’s \(t\) with \(\eta\) degrees of freedom, i.e. \[ \varepsilon_i \sim t_\nu(0,\sigma^2), \qquad =1,\ldots,n. \] The quantity \(\nu\) will be considered known in this study, so the unknown quantities (parameters) are \[ \theta = (\alpha,\beta,\sigma^2). \]
set.seed(2353)
n = 40
alpha.true = 1
beta.true = 3
sig.true = 1
x = rnorm(n)
y = alpha.true+beta.true*x+rnorm(n,0,sig.true)
par(mfrow=c(1,1))
plot(x,y)
ols = lm(y~x)
abline(ols$coef,lwd=2)
ols
##
## Call:
## lm(formula = y ~ x)
##
## Coefficients:
## (Intercept) x
## 0.9633 2.6044
par(mfrow=c(1,1))
plot(x,y)
ols = lm(y~x)
abline(ols$coef,lwd=2)
obs1 = order(x)[1]
obs2 = order(x)[2]
obs3 = order(x)[3]
y[obs1]=max(y)
y[obs2]=max(y)
y[obs3]=max(y)
points(x[obs1],y[obs1],pch=16,col=2)
points(x[obs2],y[obs2],pch=16,col=2)
points(x[obs3],y[obs3],pch=16,col=2)
ols1 = lm(y~x)
ols1
##
## Call:
## lm(formula = y ~ x)
##
## Coefficients:
## (Intercept) x
## 1.771 1.574
abline(coef(ols1),lwd=2,col=2)
logdt = function(y,m,s,df){
dt((y-m)/s,df=df,log=TRUE)-log(s)
}
reg.t = function(alpha,beta,sigma,df){
sum(logdt(y,alpha+beta*x,sigma,df=df))
}
set.seed(12135)
M = 10000
df = 30
sd.alpha = 1
sd.beta = 3
nu = 2
sig0 = 1
alpha = rnorm(M,0,sd.alpha)
beta = rnorm(M,0,sd.beta)
sig = sqrt(1/rgamma(M,nu/2,nu*sig0^2/2))
w = rep(0,M)
for (i in 1:M)
w[i] = reg.t(alpha[i],beta[i],sig[i],df)
w = exp(w)
log.predictive = log(mean(w))
ind = sample(1:M,size=M,replace=TRUE,prob=w)
alpha1 = alpha[ind]
beta1 = beta[ind]
sig1 = sig[ind]
sig11 = sqrt((df-2)/df)*sig1
par(mfrow=c(2,2))
hist(alpha1,xlim=range(alpha,alpha1),prob=TRUE,
xlab=expression(alpha),ylab="Density",main="")
lines(density(alpha),col=2)
points(alpha.true,0,col=4,pch=16)
legend("topleft",legend=c("Prior","Posterior"),col=2:1,lty=1)
hist(beta1,xlim=range(beta,beta1),prob=TRUE,
xlab=expression(beta),ylab="Density",main="")
lines(density(beta),col=2)
points(beta.true,0,col=4,pch=16)
hist(sig11,xlim=range(sig,sig11),prob=TRUE,
xlab=expression(sigma),ylab="Density",main="")
lines(density(sig),col=2)
points(sig.true,0,col=4,pch=16)
plot(alpha,beta,xlab=expression(alpha),ylab=expression(beta))
points(alpha1,beta1,col=2)
points(alpha.true,beta.true,pch=16,col=3)
set.seed(12135)
M = 10000
df = 30
sd.alpha = 1
sd.beta = 3
sig0 = 1
alpha = rnorm(M,0,sd.alpha)
beta = rnorm(M,0,sd.beta)
sig = sqrt(1/rgamma(M,nu/2,nu*sig0^2/2))
N = 30
df = 1:N
log.predictive = rep(0,N)
w = rep(0,M)
for (j in 1:N){
for (i in 1:M)
w[i] = reg.t(alpha[i],beta[i],sig[i],df[j])
w = exp(w)
log.predictive[j] = log(mean(w))
}
pred = exp(log.predictive)
pred = pred/sum(pred)
par(mfrow=c(1,1))
plot(df,pred,type="h",xlab="Degrees of freedom",ylab="Prior predictive")