--- title: "Bayesian Student t linear regression" author: "Hedibert Freitas Lopes" date: "1/20/2020" output: pdf_document: default --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` # Gaussian and Student's $t$ simple linear regression We assume that the (continuous and real-valued) observations $y_1,\ldots,y_n$ are conditionally independent with $$ y_i = \alpha + \beta x_i + \varepsilon_i $$ where the errors are i.i.d. Student's $t$ with $\eta$ degrees of freedom, i.e. $$ \varepsilon_i \sim t_\nu(0,\sigma^2), \qquad =1,\ldots,n. $$ The quantity $\nu$ will be considered known in this study, so the unknown quantities (parameters) are $$ \theta = (\alpha,\beta,\sigma^2). $$ ```{r fig.width=12, fig.height=7} set.seed(2353) n = 40 alpha.true = 1 beta.true = 3 sig.true = 1 x = rnorm(n) y = alpha.true+beta.true*x+rnorm(n,0,sig.true) par(mfrow=c(1,1)) plot(x,y) ols = lm(y~x) abline(ols$coef,lwd=2) ols ``` # Let us add some outliers ```{r fig.width=12, fig.height=7} par(mfrow=c(1,1)) plot(x,y) ols = lm(y~x) abline(ols$coef,lwd=2) obs1 = order(x)[1] obs2 = order(x)[2] obs3 = order(x)[3] y[obs1]=max(y) y[obs2]=max(y) y[obs3]=max(y) points(x[obs1],y[obs1],pch=16,col=2) points(x[obs2],y[obs2],pch=16,col=2) points(x[obs3],y[obs3],pch=16,col=2) ols1 = lm(y~x) ols1 abline(coef(ols1),lwd=2,col=2) ``` # SIR-based Bayesian posterior inference ```{r fig.width=12, fig.height=10} logdt = function(y,m,s,df){ dt((y-m)/s,df=df,log=TRUE)-log(s) } reg.t = function(alpha,beta,sigma,df){ sum(logdt(y,alpha+beta*x,sigma,df=df)) } set.seed(12135) M = 10000 df = 30 sd.alpha = 1 sd.beta = 3 nu = 2 sig0 = 1 alpha = rnorm(M,0,sd.alpha) beta = rnorm(M,0,sd.beta) sig = sqrt(1/rgamma(M,nu/2,nu*sig0^2/2)) w = rep(0,M) for (i in 1:M) w[i] = reg.t(alpha[i],beta[i],sig[i],df) w = exp(w) log.predictive = log(mean(w)) ind = sample(1:M,size=M,replace=TRUE,prob=w) alpha1 = alpha[ind] beta1 = beta[ind] sig1 = sig[ind] sig11 = sqrt((df-2)/df)*sig1 par(mfrow=c(2,2)) hist(alpha1,xlim=range(alpha,alpha1),prob=TRUE, xlab=expression(alpha),ylab="Density",main="") lines(density(alpha),col=2) points(alpha.true,0,col=4,pch=16) legend("topleft",legend=c("Prior","Posterior"),col=2:1,lty=1) hist(beta1,xlim=range(beta,beta1),prob=TRUE, xlab=expression(beta),ylab="Density",main="") lines(density(beta),col=2) points(beta.true,0,col=4,pch=16) hist(sig11,xlim=range(sig,sig11),prob=TRUE, xlab=expression(sigma),ylab="Density",main="") lines(density(sig),col=2) points(sig.true,0,col=4,pch=16) plot(alpha,beta,xlab=expression(alpha),ylab=expression(beta)) points(alpha1,beta1,col=2) points(alpha.true,beta.true,pch=16,col=3) ``` # Learning about $nu$ via prior predictive densities ```{r fig.width=12, fig.height=7} set.seed(12135) M = 10000 df = 30 sd.alpha = 1 sd.beta = 3 sig0 = 1 alpha = rnorm(M,0,sd.alpha) beta = rnorm(M,0,sd.beta) sig = sqrt(1/rgamma(M,nu/2,nu*sig0^2/2)) N = 30 df = 1:N log.predictive = rep(0,N) w = rep(0,M) for (j in 1:N){ for (i in 1:M) w[i] = reg.t(alpha[i],beta[i],sig[i],df[j]) w = exp(w) log.predictive[j] = log(mean(w)) } pred = exp(log.predictive) pred = pred/sum(pred) par(mfrow=c(1,1)) plot(df,pred,type="h",xlab="Degrees of freedom",ylab="Prior predictive") ```