# # Linear regression with a few x values missing # Gibbs samplers # # This version: February 28th 2022 # # By Hedibert Lopes, PhD # hedibert.org set.seed(3141593) n = 10 d = 5 x1 = rnorm(n) alpha = 0 beta = 1 sig = 1 true = c(alpha,beta,sig) y1 = alpha+beta*x1+rnorm(n,0,sig) missing = sort(sample(1:n,size=d,replace=FALSE)) y = c(y1[-missing],y1[missing]) x = c(x1[-missing],x1[missing]) par(mfrow=c(1,1)) plot(x,y,pch=16) points(x[(n-d+1):n],y[(n-d+1):n],col=4,pch=16,cex=2) abline(lm(y~x)$coef,col=4,lwd=2) abline(lm(y[1:(n-d)]~x[1:(n-d)])$coef,col=1,lwd=2) legend("topleft",legend=c("OLS fit n-ds obs", "OLS fit n obs"),col=c(1,4),lwd=2) # OLS estimation ols = lm(y[1:(n-d)]~x[1:(n-d)])$coef # Prior hiperparameters a0 = 0 A0 = 1 b0 = 0 B0 = 1 c0 = 3 d0 = 4 # Initial values xmiss = x[(n-d+1):n] alpha = ols[1] beta = ols[2] # Gibbs samplers M0 = 10000 M = 1000 niter = M0+M draws = matrix(0,niter,3+d) for (iter in 1:niter){ xx = c(x[1:(n-d)],xmiss) # Sampling sig2 sig2 = 1/rgamma(1,c0+n/2,d0+(sum((y-alpha-beta*xx)^2))/2) # Sampling alpha A1 = 1/(1/A0+n/sig2) a1 = A1*(a0/A0+sum(y-beta*xx)/sig2) alpha = rnorm(1,a1,sqrt(A1)) # Sampling beta B1 = 1/(1/B0+sum(xx^2)/sig2) b1 = B1*(b0/B0+sum((y-alpha)*xx)/sig2) beta = rnorm(1,b1,sqrt(B1)) # Sampling missing x's vx = 1/(1+beta^2/sig2) mx = vx*(beta*(y[(n-d+1):n]-alpha)/sig2) xmiss = rnorm(d,mx,sqrt(vx)) draws[iter,] = c(alpha,beta,sqrt(sig2),xmiss) } draws = draws[(M0+1):niter,] par(mfrow=c(3,3)) ts.plot(draws[,1],xlab="Iterations",ylab="",main=expression(alpha)) abline(v=M0,col=2,lwd=2) acf(draws[,1],main="") hist(draws[,1],prob=TRUE,main="",xlab=expression(alpha)) abline(v=true[1],col=2,lwd=3) ts.plot(draws[,2],xlab="Iterations",ylab="",main=expression(beta)) abline(v=M0,col=2,lwd=2) acf(draws[,2],main="") hist(draws[,2],prob=TRUE,main="",xlab=expression(beta)) abline(v=true[2],col=2,lwd=3) ts.plot(draws[,3],xlab="Iterations",ylab="",main=expression(sigma)) abline(v=M0,col=2,lwd=2) acf(draws[,3],main="") hist(draws[,3],prob=TRUE,main="",xlab=expression(sigma)) abline(v=true[3],col=2,lwd=3) pairs(draws[,1:3], labels=c(expression(mu),expression(beta),expression(sigma))) qx = t(apply(draws[,4:(3+d)],2,quantile,c(0.25,0.5,0.75))) par(mfrow=c(1,1)) plot(x,y,pch=16,xlim=range(x,qx)) points(x[(n-d+1):n],y[(n-d+1):n],col=4,pch=16,cex=1.2) points(qx[,2],y[(n-d+1):n],col=6,pch=16,cex=1.2) for (i in 1:d) segments(qx[i,1],y[n-d+i],qx[i,3],y[n-d+i],col=6) par(mfrow=c(1,1)) plot(x,y,pch=16,xlim=range(x,qx)) points(x[(n-d+1):n],y[(n-d+1):n],col=4,pch=16,cex=1.2) xxx = seq(min(x),max(x),length=100) fx = matrix(0,M,100) for (i in 1:100) fx[,i] = draws[,1]+draws[,2]*xxx[i] qfx = t(apply(fx,2,quantile,c(0.25,0.5,0.75))) lines(xxx,qfx[,1],col=2,lty=2,lwd=2) lines(xxx,qfx[,2],col=2,lwd=2) lines(xxx,qfx[,3],col=2,lty=2,lwd=2) abline(lm(y[1:(n-d)]~x[1:(n-d)])$coef,lwd=2) abline(lm(y~x)$coef,col=4,lwd=2) legend("topleft",legend=c("OLS fit n-ds obs", "OLS fit n obs","Bayes"),col=c(1,4,2),lwd=2)