################################################################################################## # # Simple linear regression with Student-t errors # # y(i) = alpha + beta*x(i) + sig*e(i) # # where e(1),...,e(n) are iid t(v) # # Prior distribution: # # alpha ~ N(0,Va) # beta ~ N(0,Vb) # # (sig,v) are fixed # ################################################################################################## # # Hedibert Freitas Lopes # Booth School of Business # University of Chicago # ################################################################################################## logpost = function(alpha,beta){ -0.5*(alpha-mua)^2/Va-0.5*(beta-mub)^2/Vb-0.5*(v+1)*sum(log(1+(y-alpha-beta*x)^2/(v*sig2))) } pdf(file="hw1-2013-solution.pdf",width=12,height=10) # Reading the data # ---------------- data = read.table("school-spending.txt",header=TRUE) y = data[,2] x = data[,3]/10000 # OLS estimation # -------------- par(mfrow=c(1,1)) plot(x,y,main="OLS fit") abline(lm(y~x),col=2,lwd=2) # Fixed parameters and prior hyperparameters # ------------------------------------------ v = 4.46 sig2 = 46.7^2 mua = 0 mub = 0 Va = 100000^2 Vb = 100000^2 # Kernel of the posterior density # ------------------------------- N = 200 alphas = seq(-300,200,length=N) betas = seq(200,900,length=N) lpost = matrix(0,N,N) for (i in 1:N) for (j in 1:N) lpost[i,j] = logpost(alphas[i],betas[j]) post = exp(lpost-max(lpost)) ind = 1:N alpham = alphas[sum(ind*apply(lpost==max(lpost),1,sum))] betam = betas[sum(ind*apply(lpost==max(lpost),2,sum))] par(mfrow=c(1,1)) contour(alphas,betas,post,drawlabels=FALSE,xlab=expression(alpha),ylab=expression(beta)) # Proposal density: Not a wonderful proposal but it will work # ----------------------------------------------------------- Sa = 100 Sb = 100 lprop = matrix(0,N,N) for (i in 1:N) for (j in 1:N) lprop[i,j] = dnorm(alphas[i],alpham,Sa,log=TRUE)+dnorm(betas[j],betam,Sb,log=TRUE) prop = exp(lprop-max(lprop)) par(mfrow=c(1,1)) image(alphas,betas,prop,xlab=expression(alpha),ylab=expression(beta)) contour(alphas,betas,post,add=TRUE,drawlabels=FALSE) # a) SIR for posterior simulation # ------------------------------- set.seed(34548) M = 100000 M1 = 10000 alphas1 = rnorm(M,alpham,Sa) betas1 = rnorm(M,betam,Sb) w = rep(0,M) for (i in 1:M) w[i] = logpost(alphas1[i],betas1[i])- dnorm(alphas1[i],alpham,Sa,log=TRUE)- dnorm(betas1[i],betam,Sb,log=TRUE) w = exp(w-max(w)) w = w/sum(w) ind = sample(1:M,size=M1,replace=TRUE,prob=w) alphas2 = alphas1[ind] betas2 = betas1[ind] par(mfrow=c(1,1)) plot(alphas2,betas2,pch=16,xlab=expression(alpha),ylab=expression(beta),col=grey(0.7)) contour(alphas,betas,post,add=TRUE,drawlabels=FALSE,col=4) par(mfrow=c(1,2)) hist(alphas2,xlab="",prob=TRUE,main=expression(alpha),breaks=seq(min(alphas2),max(alphas2),length=30)) box() hist(betas2,xlab="",prob=TRUE,main=expression(beta),breaks=seq(min(betas2),max(betas2),length=30)) box() # b) Posterior predictive # ----------------------- K = 200 yy = seq(0,1000,length=K) xx = 4:12/10 dens = matrix(0,K,9) for (j in 1:9) for (i in 1:K) dens[i,j] = mean(dt((yy[i]-alphas2-betas2*xx[j])/sqrt(sig2),v)/sqrt(sig2)) par(mfrow=c(1,1)) plot(yy,dens[,1],xlab="Spending",ylab="Density",type="l",ylim=range(dens),lwd=2) for (i in 2:9) lines(yy,dens[,i],col=i,lwd=2) legend(700,0.008,legend=10000*xx,col=1:9,lty=1,lwd=2,bty="n") text(750,0.0081,"Income") title("Posterior predictive") # c) Simulating ypred # ------------------- set.seed(12164) xx = c(4:12)/10 draws = matrix(0,M1,9) for (j in 1:9) draws[,j] = alphas2+betas2*xx[j]+sqrt(sig2)*rt(M1,v) par(mfrow=c(1,1)) plot(10000*x,y,xlim=c(4000,12000),ylim=c(0,900),xlab="INCOME",ylab="SPENDING",pch=16) lines(10000*xx,apply(draws,2,quantile,0.025),col=2) lines(10000*xx,apply(draws,2,quantile,0.5),col=2) lines(10000*xx,apply(draws,2,quantile,0.975),col=2) par(mfrow=c(3,3)) for (j in 1:9){ hist(draws[,j],prob=TRUE,breaks=seq(min(draws[,j]),max(draws[,j]),length=30),main="",xlab="") lines(yy,dens[,j],col=2,lwd=2) title(paste("Income = ",10000*xx[j],sep="")) } # d) Raoblackwallization (when x=7500) # ------------------------------------ set.seed(1216546) xxx = 7500/10000 K = 1000 yy = seq(-100,1000,length=K) dens1 = rep(0,K) for (i in 1:K) dens1[i] = mean(dt((yy[i]-alphas2-betas2*xxx)/sqrt(sig2),v)/sqrt(sig2)) par(mfrow=c(1,1)) plot(yy,dens1,type="l",xlim=c(-100,1000),ylab="Density",main="",xlab="SPENDING") for (i in 1:100){ y1 = alphas2+betas2*xxx+sqrt(sig2)*rt(M1,v) lines(density(y1),col=3) } lines(yy,dens1) title(paste("Raoblackwellization \n when income = ",10000*xxx,sep="")) dev.off()