This is Example 5.6 (pages 159-60) of Gamerman and Lopes (2006), on Bayesian inference on hierarchical Gaussian linear regression. Souza (1997) entertain several hierarchical and dynamic models to describe the nutritional pattern of pregnant women. The data records the weight gains of \(I = 68\) pregnant women at 5 to 7 visits to the Instituto de Puericultura e Pediatria Martagão Gesteira from the Universidade Federal do Rio de Janeiro (UFRJ).
y <- matrix(c( 3.3, 6.1, 8.5, 7.9, 11.8, 14.8, NA,
2.0, 4.3, 8.0, 11.0, 12.2, 15.6, NA,
4.5, 6.8, 8.1, 13.5, 15.0, NA, NA,
5.5, 5.5, 8.0, 13.2, 13.9, 15.1, NA,
2.0, 9.5, 12.2, 18.2, 19.3, 22.0, NA,
1.8, 2.0, 4.5, 7.1, 9.5, 12.6, 13.5,
0.2, 1.4, 3.6, 5.8, 8.3, 9.0, NA,
3.8, 3.4, 5.4, 8.8, 10.4, 12.3, 15.3,
1.9, 5.9, 6.0, 10.6, 14.0, 21.1, NA,
0.0, 1.3, 3.0, 6.4, 8.3, 10.8, 11.2,
1.6, 1.9, 3.4, 7.5, 8.6, 10.0, 11.3,
1.2, 3.0, 4.0, 8.0, 9.0, 12.0, 14.5,
5.4, 6.3, 10.8, 11.2, 13.3, 15.0, NA,
9.5, 10.3, 14.1, 15.4, 17.3, 17.6, NA,
-1.7, 1.7, 5.4, 7.2, 8.9, NA, NA,
2.0, 2.4, 4.0, 4.8, 9.8, 12.1, NA,
-2.8, -2.0, 1.5, 4.2, 5.7, 6.7, NA,
-3.0, -0.5, 1.7, 6.0, 8.5, 11.0, 11.0,
0.9, 1.2, 2.3, 5.0, 8.0, 9.3, 10.9,
2.0, 3.2, 4.4, 6.5, 7.5, 11.5, 14.6,
3.6, 5.8, 7.3, 9.0, 12.5, 13.4, NA,
1.1, 1.1, 6.7, 9.6, 13.1, 17.9, 18.2,
9.0, 11.3, 12.9, 16.2, 17.5, 19.0, 21.3,
0.6, 0.8, 3.0, 3.5, 4.5, 6.5, 8.0,
3.2, 5.6, 8.1, 11.5, 14.4, 17.0, 20.0,
3.0, 3.1, 3.5, 6.0, 7.2, 10.0, 16.5,
-0.2, -0.6, 0.0, 2.0, 8.7, 11.0, 13.5,
0.5, -1.2, 1.6, 3.2, 3.8, 6.0, 8.4,
0.9, 3.5, 7.2, 9.5, 11.6, 14.4, NA,
3.2, 4.0, 5.5, 8.0, 9.8, 10.7, 14.5,
1.6, 4.0, 6.5, 7.7, 10.5, 12.0, NA,
5.2, 7.0, 9.7, 13.8, 15.4, NA, NA,
2.8, 6.6, 11.0, 11.5, 18.0, 20.5, 26.3,
3.7, 3.9, 6.8, 12.3, 15.0, NA, NA,
-2.6, -3.5, -1.5, 1.2, 4.0, 6.0, NA,
7.0, 8.1, 10.5, 14.5, 17.4, NA, NA,
-1.5, 0.3, 4.0, 8.4, 10.3, 14.5, NA,
-0.2, 0.0, 4.2, 5.8, 7.5, 9.2, NA,
2.8, 2.5, 5.2, 8.5, 10.5, 13.2, NA,
-1.4, -0.2, 0.5, 2.5, 4.5, 6.7, 11.0,
1.0, 1.0, 0.0, 4.9, 9.2, 11.5, NA,
0.7, 0.1, 4.5, 5.7, 8.0, 12.5, NA,
4.0, 3.9, 6.0, 8.7, 10.0, 13.0, NA,
4.0, 4.5, 6.8, 12.0, 14.0, 15.9, NA,
2.5, 5.4, 10.3, 13.2, 17.4, 19.2, 24.0,
1.7, 2.0, 1.5, 3.0, 4.7, 6.0, 11.4,
1.6, 2.0, 4.0, 6.5, 6.5, 8.5, 9.5,
0.3, 6.0, 10.0, 13.0, 18.7, 20.9, NA,
1.9, -0.5, 1.5, 3.8, 3.8, 7.5, NA,
3.5, 2.5, 3.5, 5.0, 8.1, 12.0, 15.0,
4.6, 5.1, 6.4, 9.5, 10.6, 14.5, NA,
-4.6, -4.7, -0.9, 3.7, 3.0, 10.0, 10.5,
2.0, 2.2, 3.7, 6.5, 9.0, 8.5, NA,
-1.5, -1.4, -1.5, 2.8, 5.7, 6.2, NA,
0.2, 1.2, 3.0, 4.5, 7.0, 5.7, NA,
-1.0, -0.7, 0.0, 4.0, 7.5, 8.0, 10.5,
2.0, 5.4, 11.5, 14.2, 20.5, NA, NA,
3.0, 3.8, 6.5, 8.8, 11.0, NA, NA,
7.0, 10.4, 13.0, 15.0, 16.0, 18.0, NA,
3.1, 4.5, 7.5, 8.2, 9.5, 11.7, NA,
-1.0, 0.6, 2.8, 5.5, 7.9, 9.2, NA,
-2.0, -0.7, 2.4, 5.7, 9.8, 11.1, 11.5,
0.5, 0.5, -2.9, -1.0, 1.5, 3.4, 4.8,
3.5, 6.9, 9.0, 16.0, 15.7, NA, NA,
0.3, 1.7, 3.4, 3.1, 4.3, 3.4, NA,
-5.5, -5.0, -4.8, -1.0, 2.5, 5.0, 14.5,
-3.0, 2.0, 2.5, 8.0, 9.8, 12.2, NA,
-2.6, -3.2, -2.0, 1.0, 3.0, 4.4, NA),7,68)
x <- matrix(c(8.571, 13.000, 17.571, 20.857, 24.571, 35.429, 40.143,
14.429, 18.714, 23.000, 27.000, 31.000, 37.429, 37.857,
13.429, 18.857, 23.143, 34.143, 37.000, 38.000, 39.286,
12.571, 17.000, 21.429, 32.429, 34.286, 38.571, 40.571,
13.714, 23.571, 27.714, 32.429, 35.286, 39.571, 41.857,
11.286, 14.857, 21.286, 25.000, 30.000, 34.429, 39.000,
13.143, 17.143, 23.000, 28.000, 31.429, 36.000, 38.000,
10.429, 14.429, 17.857, 21.857, 25.714, 30.429, 36.000,
8.857, 14.429, 17.714, 23.429, 28.000, 37.714, 38.429,
12.000, 15.000, 19.857, 24.286, 26.714, 31.000, 37.286,
11.429, 15.143, 19.429, 23.571, 27.857, 32.143, 34.571,
9.143, 14.857, 19.143, 26.000, 28.571, 34.000, 39.000,
10.143, 15.571, 26.286, 29.286, 33.286, 37.286, 37.571,
9.143, 15.000, 21.000, 23.857, 28.000, 31.000, 35.857,
12.000, 22.429, 26.429, 32.286, 35.571, 38.000, 41.429,
6.857, 11.000, 15.571, 19.857, 24.000, 28.714, 36.000,
11.429, 15.857, 20.857, 25.857, 30.429, 34.571, 39.286,
8.571, 14.143, 18.571, 23.286, 29.143, 33.286, 37.286,
11.857, 16.857, 20.714, 24.857, 28.857, 33.286, 37.000,
13.143, 17.429, 21.857, 26.286, 30.286, 35.429, 39.143,
14.000, 18.286, 22.857, 27.000, 31.286, 36.714, 41.200,
13.286, 17.143, 21.571, 26.143, 30.429, 35.714, 39.857,
11.857, 16.000, 23.000, 27.000, 29.000, 33.000, 40.000,
13.429, 18.571, 22.286, 26.143, 33.286, 35.143, 39.714,
15.143, 20.143, 25.286, 29.429, 33.857, 37.286, 39.286,
8.571, 13.286, 16.429, 20.286, 24.286, 29.286, 42.429,
9.571, 14.000, 18.429, 22.429, 34.571, 37.000, 39.000,
11.571, 16.857, 20.143, 24.143, 28.857, 33.714, 38.000,
14.714, 19.000, 23.286, 29.000, 33.286, 39.286, 39.571,
14.714, 19.000, 23.714, 28.571, 31.143, 35.143, 37.571,
10.143, 17.286, 20.000, 24.500, 30.714, 35.000, 39.857,
11.857, 19.714, 23.714, 28.000, 33.000, 35.000, 40.286,
10.857, 15.286, 21.857, 24.857, 31.143, 34.000, 40.000,
11.429, 15.429, 20.429, 31.000, 36.429, 38.000, 40.000,
11.429, 16.000, 19.286, 24.857, 29.429, 34.000, 40.571,
14.857, 19.571, 24.286, 31.571, 42.143, 43.000, 44.000,
14.429, 20.286, 25.000, 30.143, 33.143, 37.286, 40.000,
8.000, 15.000, 22.714, 24.571, 30.429, 34.857, 39.000,
13.000, 17.286, 21.857, 25.571, 32.571, 35.714, 39.000,
8.429, 13.429, 17.286, 23.429, 27.429, 31.429, 39.429,
6.714, 12.571, 16.571, 21.286, 26.571, 34.571, 38.571,
11.286, 16.143, 22.429, 27.286, 30.714, 37.571, 39.571,
11.714, 15.857, 20.286, 25.143, 28.857, 37.714, 39.143,
8.857, 11.571, 16.000, 23.857, 28.286, 35.714, 39.714,
13.500, 19.857, 25.000, 29.143, 33.857, 36.143, 42.000,
8.429, 11.571, 16.571, 20.857, 25.429, 29.714, 39.714,
8.286, 13.143, 18.286, 22.571, 27.143, 31.571, 41.429,
15.000, 20.714, 25.000, 29.143, 36.857, 41.000, 42.000,
9.429, 15.857, 20.429, 25.286, 29.857, 35.714, 41.286,
7.429, 11.429, 16.571, 20.471, 27.429, 35.857, 39.571,
12.286, 16.857, 22.000, 27.571, 32.000, 37.286, 41.429,
12.286, 16.571, 21.286, 25.571, 30.429, 34.571, 36.143,
11.429, 17.571, 22.143, 27.143, 32.429, 37.143, 41.571,
12.143, 16.429, 20.857, 25.143, 29.286, 33.429, 39.143,
14.714, 19.143, 23.571, 27.714, 32.143, 38.143, 40.714,
8.143, 12.857, 16.286, 21.857, 25.143, 30.857, 40.714,
8.286, 15.714, 26.286, 30.571, 37.714, 39.000, 40.143,
13.571, 18.571, 23.143, 31.429, 34.429, 37.000, 39.143,
14.857, 18.857, 23.571, 30.571, 32.286, 37.714, 38.286,
12.857, 17.143, 21.857, 26.571, 31.000, 35.571, 38.714,
6.857, 13.714, 19.714, 23.857, 30.000, 34.286, 41.286,
10.143, 15.143, 19.429, 24.143, 29.571, 33.000, 39.143,
8.571, 13.000, 18.143, 23.000, 28.143, 35.286, 40.857,
12.286, 16.571, 21.571, 30.857, 33.429, 35.000, 37.714,
7.000, 9.000, 13.857, 16.857, 25.857, 30.000, 37.000,
8.857, 11.000, 15.857, 20.000, 26.857, 32.571, 41.286,
10.000, 12.000, 14.714, 21.714, 26.571, 29.714, 40.714,
9.429, 13.714, 17.714, 22.000, 26.286, 30.571, 36.714),7,68)
I = 68
par(mfrow=c(1,1))
plot(x[,1],y[,1],type="l",xlab="week",ylab="weight gain (kg)",xlim=c(6,44),ylim=c(-8,28),axes=F)
axis(1)
axis(2)
for(i in 2:I){
lines(x[,i],y[,i])
}
Below we run \(I\) individual regression: \[ y_{ij} \sim N(\alpha_i+\beta_i x_{ij},\sigma^2_j), \] for \(i=1,\ldots,I\) and \(j=1,\ldots,n_i\).
alphas = NULL
betas = NULL
sigma2s = NULL
for (i in 1:I){
fit = lm(y[,i]~x[,i])
coef = fit$coef
alphas = c(alphas,coef[1])
betas = c(betas,coef[2])
sigma2s = c(sigma2s,summary(fit)$sigma^2)
}
xx = seq(min(x),max(x),length(1000))
par(mfrow=c(2,2))
plot(x[,1],y[,1],xlab="week",ylab="weight gain (kg)",xlim=c(6,44),ylim=c(-8,28),axes=F)
axis(1);axis(2)
lines(xx,alphas[1]+betas[1]*xx)
for(i in 2:68){
points(x[,i],y[,i])
lines(xx,alphas[i]+betas[i]*xx)
}
hist(alphas,main=expression(alpha[i]),xlab="",prob=TRUE)
hist(betas,main=expression(beta[i]),xlab="",prob=TRUE)
hist(sqrt(sigma2s),main=expression(sigma[i]),xlab="",prob=TRUE)
Respectively, 8, 33 and 27 women are observed over 5, 6 and 7 visits, for a total of \(n=427\) pairs \((x,y)\).
# Basic settings
nobs = rep(0,I)
for (i in 1:I)
nobs[i] = sum(!is.na(y[,i]))
n = sum(nobs)
n
## [1] 427
table(nobs)
## nobs
## 5 6 7
## 8 33 27
# Pooled regression
yy = NULL
xx = NULL
for (i in 1:I){
y1 = y[1:nobs[i],i]
X = cbind(1,x[1:nobs[i],i])
yy = c(yy,y1)
xx = rbind(xx,X)
}
fit = lm(yy~xx-1)
summary(fit)
##
## Call:
## lm(formula = yy ~ xx - 1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.1352 -2.4042 0.0026 2.1768 11.7730
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## xx1 -4.52639 0.48680 -9.298 <2e-16 ***
## xx2 0.47633 0.01897 25.113 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.612 on 425 degrees of freedom
## Multiple R-squared: 0.837, Adjusted R-squared: 0.8362
## F-statistic: 1091 on 2 and 425 DF, p-value: < 2.2e-16
alpha.ols = fit$coef[1]
beta.ols = fit$coef[2]
sigma.ols = summary(fit)$sigma
c(alpha.ols,beta.ols,sigma.ols)
## xx1 xx2
## -4.526386 0.476334 3.611520
par(mfrow=c(1,3))
hist(alphas,main=expression(alpha[i]),xlab="",prob=TRUE)
abline(v=alpha.ols,col=2,lwd=3)
hist(betas,main=expression(beta[i]),xlab="",prob=TRUE)
abline(v=beta.ols,col=2,lwd=3)
hist(sqrt(sigma2s),main=expression(sigma[i]),xlab="",prob=TRUE,xlim=c(0,4))
abline(v=sigma.ols,col=2,lwd=3)
Let \(y_{ij}\) be the weight gain for woman \(i\) at time \(x_{ij}\), then the hierarchical model we consider is the following:
Observation level: \(y_{ij} \sim N(\alpha_i+\beta_i x_{ij},\sigma^2)\),
Random effects level: \((\alpha_i,\beta_i) \sim N(\alpha,1/\tau_\alpha)N(\beta,1/\tau_\beta)\),
Parameters level: \((\alpha,\tau_\alpha) \sim N(0,P_\alpha)G(a,b), \ \ (\beta,\tau_\beta) \sim N(0,P_\beta)G(a,b) \ \ \mbox{and} \ \ \sigma^2 \sim IG(a,b)\),
for \(i = 1,\ldots,I=68\), \(n = n_1 +\cdots+ n_i = 427\), and hyperparameters \(P_\alpha=P_\beta=1=a=b=1\).
The unknowns of the hierarchical model is \(\theta=(\sigma^2,\alpha,\beta,\tau_\alpha,\tau_\beta,\alpha_1,\ldots,\alpha_I,\beta_1,\ldots,\beta_I)\), ie a \(p\)-dimensional vector where \(p=5+2I=141\). The joint posterior is \[ p(\theta|\mbox{data}) \propto \left\{\prod_{i=1}^I \left[\prod_{j=1}^{n_i} p_N(y_{ij};\alpha_i+\beta_i x_{ij},\sigma^2)\right] p_N(\alpha_i;\alpha,1/\tau_\alpha)p_N(\beta_i;\beta,1/\tau_\alpha) \right\}p(\alpha)p(\beta)p(\tau_\alpha)p(\tau_\beta)p(\sigma^2). \] Exact posterior inference is unavailable. Here we show that all full conditional distributions are readily available, where the Gibbs Sampler becomes standard solution for approximate Bayesian inference.
It is worth removing all conditioning quantities from the above
posterior to highlight what components are important to obtain the full
conditional of \((\alpha_i,\beta_i)\):
\[
p(\alpha_i,\beta_i|\mbox{others, data}) \propto \left[\prod_{j=1}^{n_i}
p_N(y_{ij};\alpha_i+\beta_i
x_{ij},\sigma^2)\right]p_N(\alpha_i;\alpha,1/\tau_\alpha)p_N(\beta_i;\beta,1/\tau_\alpha),
\] where others are all other parameters of the
model.
It is straightforward to see that, for \(i=1,\ldots,N\), \[
(\alpha_i,\beta_i)|\mbox{others,data} \sim N(m_i,V_i),
\] where \(V_i=(A^{-1} +
X_i'X_i/\sigma^2)\) and \(m_i=V_i(A^{-1}(\alpha,\beta)'+X_i'y_i)\),
for \(A^{-1}=
\mbox{diag}(\tau_\alpha,\tau_\beta)\) the prior precision matrix
for \((\alpha_i,\beta_i)\).
Similarly, \[ p(\alpha|\mbox{others,data}) \propto p(\alpha;0,1/P_\alpha)\prod_{i=1}^N p_N(\alpha_i;\alpha,1/\tau_\alpha), \] which is Gaussian with variance \(V_\alpha^{-1}=\tau_\alpha I + P_\alpha\) and \(m_\alpha=V_\alpha(\tau_\alpha\sum_{i=1}^N \alpha_i)\). Similarly for \(\beta\), whose full conditional is Gaussian with variance \(V_\beta^{-1}=\tau_\beta I + P_\beta\) and \(m_\beta=V_\beta(\tau_\beta\sum_{i=1}^N \beta_i)\).
It is easy to see that \[\begin{eqnarray*} \tau_\alpha|\mbox{others,data} &\sim& G\left(a+I/2;b+\sum_{i=1}^I (\alpha_i-\alpha)^2)/2\right)\\ \tau_\beta|\mbox{others,data} &\sim& G\left(a+I/2;b+\sum_{i=1}^I (\beta_i-\beta)^2)/2\right)\\ \end{eqnarray*}\]
Finally, the full conditional of \(\sigma^2\) is also inverse gamma with parameters \[ a+n/2 \ \ \ \mbox{and} \ \ \ b + \sum_{i=1}^I \sum_{j=1}^{n_i} (y_{ij}-\alpha_i-\beta_ix_{ij})^2/2. \]
run.gibbs = function(alpha,beta,sigma2,tau.alpha,tau.beta,M){
XtX = array(0,c(I,2,2))
Xty = matrix(0,2,I)
for (i in 1:I){
y1 = y[1:nobs[i],i]
X = cbind(1,x[1:nobs[i],i])
XtX[i,,] = t(X)%*%X
Xty[,i] = t(X)%*%y1
}
alphas = rep(0,I)
betas = rep(0,I)
chain = matrix(0,M,5+2*I)
for (iter in 1:M){
iA = diag(c(tau.alpha,tau.beta))
# Sampling alphas and betas jointly
for (i in 1:I){
S = solve(iA + XtX[i,,]/sigma2)
draw = S%*%(iA%*%c(alpha,beta)+Xty[,i]/sigma2) + t(chol(S))%*%rnorm(2)
alphas[i] = draw[1]
betas[i] = draw[2]
}
# Sampling alpha
var = 1/(tau.alpha*I+Pa)
mean = var*(tau.alpha*sum(alphas))
alpha = rnorm(1,mean,sqrt(var))
# Sampling beta
var = 1/(tau.beta*I+Pb)
mean = var*(tau.beta*sum(betas))
beta = rnorm(1,mean,sqrt(var))
# Sampling tau.alpha
tau.alpha = rgamma(1,a+I/2,b+sum((alphas-alpha)^2)/2)
# Sampling tau.beta
tau.beta = rgamma(1,a+I/2,b+sum((betas-beta)^2)/2)
# Sampling sigma2
RSS = 0.0
for (i in 1:I)
RSS = RSS + sum((y[1:nobs[i],i]-alphas[i]-betas[i]*x[1:nobs[i],i])^2)
sigma2 = 1/rgamma(1,a+n/2,b+RSS/2)
sigma = sqrt(sigma2)
chain[iter,] = c(alpha,beta,sigma,tau.alpha,tau.beta,alphas,betas)
}
return(chain)
}
# alpha ~ N(0,1/Pa)
# beta ~ N(0,1/Pb)
# 1/sigma2,tau.alpha and tau.beta ~ G(a,b)
Pa = 1
Pb = 1
a = 1
b = 1
M0 = 1000 # Burn-in
M = M0+5000 # Total draws
# Initial values
alpha = mean(alphas)
beta = mean(betas)
sigma2 = sigma.ols
tau.alpha = 1/var(alphas)
tau.beta = 1/var(betas)
draws = run.gibbs(alpha,beta,sigma2,tau.alpha,tau.beta,M)
draws = draws[(M0+1):M,]
qalpha = quantile(draws[,1],c(0.05,0.5,0.95))
qbeta = quantile(draws[,2],c(0.05,0.5,0.95))
names = c("alpha","sqrt(1/tau.alpha)","beta","sqrt(1/tau.beta)","sigma")
draws[,4:5] = sqrt(1/draws[,4:5])
draws[,1:5] = draws[,c(1,4,2,5,3)]
par(mfrow=c(3,5))
for (i in 1:5) ts.plot(draws[,i],xlab="iteration",ylab="",main=names[i])
for (i in 1:5) acf(draws[,i],main="")
for (i in 1:5) hist(draws[,i],prob=TRUE,main="",xlab="")
alphas.p = draws[,6:(5+I)]
betas.p = draws[,(6+I):ncol(draws)]
malpha = apply(alphas.p,2,median)
mbeta = apply(betas.p,2,median)
par(mfrow=c(2,2))
boxplot(alphas.p,xlab="Women",outline=FALSE,axes=FALSE,main="Intercepts",ylab="Posterior densities")
axis(2);box()
plot(alphas,malpha,xlim=range(alphas,malpha),ylim=range(alphas,malpha),
xlab="Intercepts (ols)",ylab="Intercepts (HBM)")
abline(0,1,lty=2)
abline(h=qalpha[2],col=4,lwd=2)
abline(h=alpha.ols,col=6,lwd=2)
legend("topleft",legend=c("OLS","Posterior mean"),lty=1,col=c(4,6),bty="n",lwd=3)
boxplot(betas.p,xlab="Women",outline=FALSE,axes=FALSE,main="Slopes",ylab="Posterior densities")
axis(2);box()
plot(betas,mbeta,xlim=range(betas,mbeta),ylim=range(betas,mbeta),
xlab="Slopes (ols)",ylab="Slopes (HBM)")
abline(0,1,lty=2)
abline(h=qbeta[2],col=4,lwd=2)
abline(h=beta.ols,col=6,lwd=2)