############################################################################################ # # Bayesian linear regression with Student t errors # ############################################################################################ # # HEDIBERT FREITAS LOPES # Associate Professor of Econometrics and Statistics # The University of Chicago Booth School of Business # 5807 South Woodlawn Avenue # Chicago, Illinois, 60637 # Email : hlopes@ChicagoBooth.edu # URL: http://faculty.chicagobooth.edu/hedibert.lopes/research/ # ############################################################################################ rm(list=ls()) sampleL = function(x){sample(1:length(x),size=1,replace=T,prob=x)} dtt = function(x,df,sd){dt(x/sd,df)/sd} ldig = function(x,a,b){dgamma(1/x,a,b,log=TRUE)-2*log(x)} quant05 = function(x){quantile(x,0.05)} quant95 = function(x){quantile(x,0.95)} #------------------------------------------------------- # y = X*beta + u u ~ N(0,sig2*I_n) # beta|sig2 ~ N(b,sig2*inv(A)) # sig2 ~ IG(v/2,v*lam/2) #------------------------------------------------------- fixedpar = function(y,X,A,b,v,lam){ n = length(y) k = ncol(X) par1 = (v+n)/2 var = solve(crossprod(X,X)+A) mean = matrix(var%*%(crossprod(X,y)+crossprod(A,b)),k,1) par2 = v*lam + sum((y-crossprod(t(X),mean))^2) par2 = (par2 + crossprod(t(crossprod(mean-b,A)),mean-b))/2 sig2 = 1/rgamma(1,par1,par2) var = var*sig2 mean = mean + crossprod(t(chol(var)),rnorm(2)) return(c(mean,sig2)) } data = matrix(scan("logwages-yearseducation.txt"),byrow=TRUE,ncol=2) n = nrow(data) y = data[,1] x = data[,2] X = cbind(1,x) bhat = lm(y~x)$coef reshat = lm(y~x)$res s2hat = var(reshat) pdf(file="ols-regression.pdf",width=10,height=10) par(mfrow=c(2,2)) plot(x,y,xlab="Years of education completed",ylab="Log wages") abline(bhat,col=2) plot(reshat,xlab="Observation",ylab="OLS residuals",pch=16) abline(h=0,lty=2) abline(h=-3*sqrt(s2hat),lty=2) abline(h=3*sqrt(s2hat),lty=2) mean(abs(reshat)<2*sqrt(s2hat)) xxx = sort(reshat) plot(xxx,1:n/n,xlab="Residuals",ylab="Cumulative distribution",type="l",lwd=3) lines(xxx,pnorm(xxx,0,sqrt(s2hat)),col=2,lwd=2) qqplot(reshat,rnorm(100000,0,sqrt(s2hat)),xlab="Empirical quantiles",ylab="Theoretical quantiles") abline(0,1) dev.off() # Prior b = c(0,0) A = diag(c(0.1,0.1)) v = 6 lam = 0.1333 nu0 = 25 sd = 1.0 # Posterior inference under normal model In = diag(1,n) iC = t(X)%*%X+A C = solve(iC) mu = C%*%(t(X)%*%y+A%*%b) par1 = (nu0+n)/2 par2 = (nu0*lam+t(y-X%*%mu)%*%y+t(b-mu)%*%A%*%b)[1,1]/2 sig2s = seq(0.2,0.35,length=1000) fsig2 = dgamma(1/sig2s,par1/2,par2/2)/sig2s^2 sds = sqrt(diag(par2/par1*C)) b0s = seq(mu[1]-4*sds[1],mu[1]+4*sds[1],length=1000) b1s = seq(mu[2]-4*sds[2],mu[2]+4*sds[2],length=1000) fb0 = dnorm(b0s,mu[1],sds[1]) fb1 = dnorm(b1s,mu[2],sds[2]) pdf(file="normal-regression.pdf",width=10,height=5) par(mfrow=c(1,3)) plot(b0s,fb0,main=expression(beta[0]),ylab="Density",xlab="",type="l") plot(b1s,fb1,main=expression(beta[1]),ylab="Density",xlab="",type="l") plot(sig2s,fsig2,main=expression(sigma^2),ylab="Density",xlab="",type="l") dev.off() # MCMC for Student's t model set.seed(121645) step = 100 M0 = 1000 M = 1000 niter = M0+step*M pars = matrix(0,M,4) l = 0 draw = c(bhat,s2hat) nu = 1.0 for (iter in 1:niter){ res2 = (y-draw[1]-draw[2]*x)^2/draw[3] lt = 1/rgamma(n,(nu+1)/2,(nu+res2)/2) y1 = y/sqrt(lt) draw = fixedpar(y1,X,A,b,v,lam) nu1 = rnorm(1,nu,sd) if (nu1>0){ num = sum(ldig(lt,nu1/2,nu1/2))+dgamma(nu1,1,1/nu0,log=TRUE) den = sum(ldig(lt,nu/2,nu/2))+dgamma(nu,1,1/nu0,log=TRUE) lacc = num-den if (log(runif(1))M0)&((iter%%step)==0)){ l = l + 1 print(c(round(l,0),round(draw,3),round(nu,1))) pars[l,] = c(draw,nu) } } pdf(file="trace1-regression-t.pdf",width=10,height=10) par(mfrow=c(3,2)) ts.plot(pars[,1],xlab="Iteration",ylab="",main=expression(beta[0])) hist(pars[,1],xlab="",ylab="Density",prob=TRUE,main=expression(beta[0]),ylim=c(0,5),xlim=range(pars[,1],b0s)) lines(b0s,fb0,lwd=2,col=grey(0.5)) ts.plot(pars[,2],xlab="Iteration",ylab="",main=expression(beta[1])) hist(pars[,2],xlab="",ylab="Density",prob=TRUE,main=expression(beta[1]),xlim=range(pars[,2],b1s)) lines(b1s,fb1,lwd=2,col=grey(0.5)) ts.plot(pars[,3],xlab="Iteration",ylab="",main=expression(sigma^2)) hist(pars[,3],xlab="",ylab="Density",prob=TRUE,main=expression(sigma^2),ylim=c(0,30),xlim=range(pars[,3],sig2s)) lines(sig2s,fsig2,lwd=2,col=grey(0.5)) dev.off() pdf(file="trace2-regression-t.pdf",width=10,height=5) par(mfrow=c(1,2)) ts.plot(pars[,4],xlab="Iteration",ylab="",main=expression(nu)) hist(pars[,4],xlab="",prob=TRUE,main="") dev.off() pdf(file="post-regression-t.pdf",width=10,height=7) par(mfrow=c(1,2)) plot(pars[,1:2],xlab=expression(beta[0]),ylab=expression(beta[1]),pch=16,main="") plot(pars[,3:4],xlab=expression(sigma^2),ylab=expression(nu),pch=16,main="") dev.off() pgamma(quantile(pars[,4],0.975),1,1/nu0)- pgamma(quantile(pars[,4],0.025),1,1/nu0)