First we simulated \(n\) pairs of observations, \((y_i,x_i)\) as follows \[ x_i \sim N(0,1) \ \ \ \mbox{and} \ \ \ y_i|x_i \sim N(\alpha+\beta x_i,\sigma^2), \] and randomly select \(d\) regressores \(x_i\) as missing. For convenience, we rearrange the pairs \((y_i,x_i)\) so the last \(d\) are the missing cases.
set.seed(3141593)
set.seed(12345)
n = 10
d = 5
x1 = rnorm(n)
alpha = 0
beta = 1
sig = 1
true = c(alpha,beta,sig)
y1 = alpha+beta*x1+rnorm(n,0,sig)
missing = sort(sample(1:n,size=d,replace=FALSE))
y = c(y1[-missing],y1[missing])
x = c(x1[-missing],x1[missing])
par(mfrow=c(1,1))
plot(x,y,pch=16)
points(x[(n-d+1):n],y[(n-d+1):n],col=4,pch=16,cex=2)
abline(lm(y~x)$coef,col=4,lwd=2)
abline(lm(y[1:(n-d)]~x[1:(n-d)])$coef,col=1,lwd=2)
legend("topleft",legend=c("OLS fit n-ds obs",
"OLS fit n obs"),col=c(1,4),lwd=2)
ols = lm(y~x)
summary(ols)
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.95785 -0.54544 -0.07149 0.13093 1.76813
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.2486 0.2757 0.902 0.3936
## x 0.7189 0.3520 2.042 0.0754 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8593 on 8 degrees of freedom
## Multiple R-squared: 0.3427, Adjusted R-squared: 0.2605
## F-statistic: 4.171 on 1 and 8 DF, p-value: 0.07541
ols = lm(y[1:(n-d)]~x[1:(n-d)])
summary(ols)
##
## Call:
## lm(formula = y[1:(n - d)] ~ x[1:(n - d)])
##
## Residuals:
## 1 2 3 4 5
## 0.07834 -0.01394 -0.04091 -0.10670 0.08322
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.41399 0.04482 -9.237 0.00268 **
## x[1:(n - d)] 0.31526 0.04486 7.027 0.00592 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09366 on 3 degrees of freedom
## Multiple R-squared: 0.9427, Adjusted R-squared: 0.9236
## F-statistic: 49.39 on 1 and 3 DF, p-value: 0.005919
We will assume the missing values are completely at random and perform Bayesian inference where the unknowns are \[ \theta = (\alpha,\beta,\sigma^2,x_{d+1},\ldots,x_n), \] with independent prior \[ p(\theta) = p(\alpha)p(\beta)p(\sigma^2)\prod_{i=(n-d)+1}^n p(x_{n-d+i}), \] where \(\mu \sim N(a_0,A_0)\), \(\beta \sim N(b_0,B_0)\), \(\sigma^2 \sim IG(c_0,d_0)\) and \(x_{n-d+i} \sim N(0,1)\), for \(i=1,\ldots,d\).
a0 = 0
A0 = 1
b0 = 0
B0 = 1
c0 = 3
d0 = 4
The MCMC scheme cycles through the following \(d+3\) full conditionals: \[\begin{eqnarray*} p(\alpha|y_{1:n},x_{1:n},\beta,\sigma^2)\\ p(\beta|y_{1:n},x_{1:n},\beta,\sigma^2)\\ p(\sigma^2|y_{1:n},x_{1:n},\beta,\sigma^2)\\ p(x_i|y_{1:n},x_{-i},\alpha,\beta,\sigma^2), \end{eqnarray*}\] for \(i=n-d+1,\ldots,n\). Show that these full conditionals are as follows. \[\begin{eqnarray*} [\alpha] &\sim& N\left\{\left(\frac{1}{A_0}+\frac{n}{\sigma^2}\right)^{-1}\left( \frac{a_0}{A_0}+\frac{\sum_{i=1}^n (y_i-\beta x_i)}{\sigma^2} \right) ;\left(\frac{1}{A_0}+\frac{n}{\sigma^2}\right)^{-1}\right\}\\ [\beta] &\sim& N\left\{\left(\frac{1}{B_0}+\frac{\sum_{i=1}^n x_i^2}{\sigma^2}\right)^{-1}\left( \frac{b_0}{B_0}+\frac{\sum_{i=1}^n (y_i-\alpha)x_i}{\sigma^2} \right) ;\left(\frac{1}{B_0}+\frac{\sum_{i=1}^n x_i^2}{\sigma^2}\right)^{-1}\right\}\\ [\sigma^2] &\sim& IG\left\{c_0+\frac{n}{2};d_0+\frac{\sum_{i=1}^n (y_i-\alpha-\beta x_i)/\sigma^2}{2}\right\}\\ [x_i] &\sim& N\left\{ \left(1+\frac{\beta^2}{\sigma^2}\right)^{-1}\beta(y_i-\alpha); \left(1+\frac{\beta^2}{\sigma^2}\right)^{-1}\right\}, \qquad i=n-d+1,\ldots,n. \end{eqnarray*}\]
# Initial values
xmiss = rep(mean(x),d)
alpha = 0
beta = 0
# Markov chain parameters
M0 = 10000
M = 1000
niter = M0+M
draws = matrix(0,niter,3+d)
# Gibbs sampler
for (iter in 1:niter){
xx = c(x[1:(n-d)],xmiss)
# Sampling sig2
sig2 = 1/rgamma(1,c0+n/2,d0+(sum((y-alpha-beta*xx)^2))/2)
# Sampling alpha
A1 = 1/(1/A0+n/sig2)
a1 = A1*(a0/A0+sum(y-beta*xx)/sig2)
alpha = rnorm(1,a1,sqrt(A1))
# Sampling beta
B1 = 1/(1/B0+sum(xx^2)/sig2)
b1 = B1*(b0/B0+sum((y-alpha)*xx)/sig2)
beta = rnorm(1,b1,sqrt(B1))
# Sampling missing x's
vx = 1/(1+beta^2/sig2)
mx = vx*(beta*(y[(n-d+1):n]-alpha)/sig2)
xmiss = rnorm(d,mx,sqrt(vx))
# Storing the Monte Carlo chains
draws[iter,] = c(alpha,beta,sqrt(sig2),xmiss)
}
draws = draws[(M0+1):niter,]
par(mfrow=c(3,3))
ts.plot(draws[,1],xlab="Iterations",ylab="",main=expression(alpha))
abline(v=M0,col=2,lwd=2)
acf(draws[,1],main="")
hist(draws[,1],prob=TRUE,main="",xlab=expression(alpha))
abline(v=true[1],col=2,lwd=3)
ts.plot(draws[,2],xlab="Iterations",ylab="",main=expression(beta))
abline(v=M0,col=2,lwd=2)
acf(draws[,2],main="")
hist(draws[,2],prob=TRUE,main="",xlab=expression(beta))
abline(v=true[2],col=2,lwd=3)
ts.plot(draws[,3],xlab="Iterations",ylab="",main=expression(sigma))
abline(v=M0,col=2,lwd=2)
acf(draws[,3],main="")
hist(draws[,3],prob=TRUE,main="",xlab=expression(sigma))
abline(v=true[3],col=2,lwd=3)
pairs(draws[,1:3],labels=c(expression(mu),expression(beta),expression(sigma)))
qx = t(apply(draws[,4:(3+d)],2,quantile,c(0.25,0.5,0.75)))
par(mfrow=c(1,1))
plot(x,y,pch=16,xlim=range(x,qx))
points(x[(n-d+1):n],y[(n-d+1):n],col=4,pch=16,cex=1.2)
points(qx[,2],y[(n-d+1):n],col=6,pch=16,cex=1.2)
for (i in 1:d)
segments(qx[i,1],y[n-d+i],qx[i,3],y[n-d+i],col=6)
xxx = seq(min(x),max(x),length=100)
fx = matrix(0,M,100)
for (i in 1:100)
fx[,i] = draws[,1]+draws[,2]*xxx[i]
qfx = t(apply(fx,2,quantile,c(0.25,0.5,0.75)))
par(mfrow=c(1,1))
plot(x,y,pch=16,xlim=range(x,qx))
points(x[(n-d+1):n],y[(n-d+1):n],col=4,pch=16,cex=1.2)
lines(xxx,qfx[,1],col=2,lty=2,lwd=2)
lines(xxx,qfx[,2],col=2,lwd=2)
lines(xxx,qfx[,3],col=2,lty=2,lwd=2)
abline(lm(y[1:(n-d)]~x[1:(n-d)])$coef,lwd=2)
abline(lm(y~x)$coef,col=4,lwd=2)
legend("topleft",legend=c("OLS fit n-ds obs","OLS fit n obs","Bayes"),col=c(1,4,2),lwd=2)