The data used here is from Souza (1999) Approximate Methods in Bayesian Dynamic Hierarchical Models, unpublished Ph.D. Thesis, COPPE-UFRJ (in Portuguese).
She considers a number of hierarchical and dynamic models to describe the nutritional pattern of pregnant women (data below).
The data depicted in Figure 5.5 (page 161) of Gamerman and Lopes (2006) consist of the weight gains (\(y_{ij}\)s) of \(I=68\) pregnant women at 5 to 7 visits (\(x_{ij}\)s) to the Instituto de Puericultura e Pediatria Martagão Gesteira from the Universidade Federal do Rio de Janeiro.
I = 68
k = 7
y = t(matrix(c(
3.3, 6.1, 8.5, 7.9, 11.8, 14.8, NA,
2.0, 4.3, 8.0, 11.0, 12.2, 15.6, NA,
4.5, 6.8, 8.1, 13.5, 15.0, NA, NA,
5.5, 5.5, 8.0, 13.2, 13.9, 15.1, NA,
2.0, 9.5, 12.2, 18.2, 19.3, 22.0, NA,
1.8, 2.0, 4.5, 7.1, 9.5, 12.6, 13.5,
0.2, 1.4, 3.6, 5.8, 8.3, 9.0, NA,
3.8, 3.4, 5.4, 8.8, 10.4, 12.3, 15.3,
1.9, 5.9, 6.0, 10.6, 14.0, 21.1, NA,
0.0, 1.3, 3.0, 6.4, 8.3, 10.8, 11.2,
1.6, 1.9, 3.4, 7.5, 8.6, 10.0, 11.3,
1.2, 3.0, 4.0, 8.0, 9.0, 12.0, 14.5,
5.4, 6.3, 10.8, 11.2, 13.3, 15.0, NA,
9.5, 10.3, 14.1, 15.4, 17.3, 17.6, NA,
-1.7, 1.7, 5.4, 7.2, 8.9, NA, NA,
2.0, 2.4, 4.0, 4.8, 9.8, 12.1, NA,
-2.8, -2.0, 1.5, 4.2, 5.7, 6.7, NA,
-3.0, -0.5, 1.7, 6.0, 8.5, 11.0, 11.0,
0.9, 1.2, 2.3, 5.0, 8.0, 9.3, 10.9,
2.0, 3.2, 4.4, 6.5, 7.5, 11.5, 14.6,
3.6, 5.8, 7.3, 9.0, 12.5, 13.4, NA,
1.1, 1.1, 6.7, 9.6, 13.1, 17.9, 18.2,
9.0, 11.3, 12.9, 16.2, 17.5, 19.0, 21.3,
0.6, 0.8, 3.0, 3.5, 4.5, 6.5, 8.0,
3.2, 5.6, 8.1, 11.5, 14.4, 17.0, 20.0,
3.0, 3.1, 3.5, 6.0, 7.2, 10.0, 16.5,
-0.2, -0.6, 0.0, 2.0, 8.7, 11.0, 13.5,
0.5, -1.2, 1.6, 3.2, 3.8, 6.0, 8.4,
0.9, 3.5, 7.2, 9.5, 11.6, 14.4, NA,
3.2, 4.0, 5.5, 8.0, 9.8, 10.7, 14.5,
1.6, 4.0, 6.5, 7.7, 10.5, 12.0, NA,
5.2, 7.0, 9.7, 13.8, 15.4, NA, NA,
2.8, 6.6, 11.0, 11.5, 18.0, 20.5, 26.3,
3.7, 3.9, 6.8, 12.3, 15.0, NA, NA,
-2.6, -3.5, -1.5, 1.2, 4.0, 6.0, NA,
7.0, 8.1, 10.5, 14.5, 17.4, NA, NA,
-1.5, 0.3, 4.0, 8.4, 10.3, 14.5, NA,
-0.2, 0.0, 4.2, 5.8, 7.5, 9.2, NA,
2.8, 2.5, 5.2, 8.5, 10.5, 13.2, NA,
-1.4, -0.2, 0.5, 2.5, 4.5, 6.7, 11.0,
1.0, 1.0, 0.0, 4.9, 9.2, 11.5, NA,
0.7, 0.1, 4.5, 5.7, 8.0, 12.5, NA,
4.0, 3.9, 6.0, 8.7, 10.0, 13.0, NA,
4.0, 4.5, 6.8, 12.0, 14.0, 15.9, NA,
2.5, 5.4, 10.3, 13.2, 17.4, 19.2, 24.0,
1.7, 2.0, 1.5, 3.0, 4.7, 6.0, 11.4,
1.6, 2.0, 4.0, 6.5, 6.5, 8.5, 9.5,
0.3, 6.0, 10.0, 13.0, 18.7, 20.9, NA,
1.9, -0.5, 1.5, 3.8, 3.8, 7.5, NA,
3.5, 2.5, 3.5, 5.0, 8.1, 12.0, 15.0,
4.6, 5.1, 6.4, 9.5, 10.6, 14.5, NA,
-4.6, -4.7, -0.9, 3.7, 3.0, 10.0, 10.5,
2.0, 2.2, 3.7, 6.5, 9.0, 8.5, NA,
-1.5, -1.4, -1.5, 2.8, 5.7, 6.2, NA,
0.2, 1.2, 3.0, 4.5, 7.0, 5.7, NA,
-1.0, -0.7, 0.0, 4.0, 7.5, 8.0, 10.5,
2.0, 5.4, 11.5, 14.2, 20.5, NA, NA,
3.0, 3.8, 6.5, 8.8, 11.0, NA, NA,
7.0, 10.4, 13.0, 15.0, 16.0, 18.0, NA,
3.1, 4.5, 7.5, 8.2, 9.5, 11.7, NA,
-1.0, 0.6, 2.8, 5.5, 7.9, 9.2, NA,
-2.0, -0.7, 2.4, 5.7, 9.8, 11.1, 11.5,
0.5, 0.5, -2.9, -1.0, 1.5, 3.4, 4.8,
3.5, 6.9, 9.0, 16.0, 15.7, NA, NA,
0.3, 1.7, 3.4, 3.1, 4.3, 3.4, NA,
-5.5, -5.0, -4.8, -1.0, 2.5, 5.0, 14.5,
-3.0, 2.0, 2.5, 8.0, 9.8, 12.2, NA,
-2.6, -3.2, -2.0, 1.0, 3.0, 4.4, NA),k,I))
X = t(matrix(c(
8.571, 13.000, 17.571, 20.857, 24.571, 35.429, 40.143,
14.429, 18.714, 23.000, 27.000, 31.000, 37.429, 37.857,
13.429, 18.857, 23.143, 34.143, 37.000, 38.000, 39.286,
12.571, 17.000, 21.429, 32.429, 34.286, 38.571, 40.571,
13.714, 23.571, 27.714, 32.429, 35.286, 39.571, 41.857,
11.286, 14.857, 21.286, 25.000, 30.000, 34.429, 39.000,
13.143, 17.143, 23.000, 28.000, 31.429, 36.000, 38.000,
10.429, 14.429, 17.857, 21.857, 25.714, 30.429, 36.000,
8.857, 14.429, 17.714, 23.429, 28.000, 37.714, 38.429,
12.000, 15.000, 19.857, 24.286, 26.714, 31.000, 37.286,
11.429, 15.143, 19.429, 23.571, 27.857, 32.143, 34.571,
9.143, 14.857, 19.143, 26.000, 28.571, 34.000, 39.000,
10.143, 15.571, 26.286, 29.286, 33.286, 37.286, 37.571,
9.143, 15.000, 21.000, 23.857, 28.000, 31.000, 35.857,
12.000, 22.429, 26.429, 32.286, 35.571, 38.000, 41.429,
6.857, 11.000, 15.571, 19.857, 24.000, 28.714, 36.000,
11.429, 15.857, 20.857, 25.857, 30.429, 34.571, 39.286,
8.571, 14.143, 18.571, 23.286, 29.143, 33.286, 37.286,
11.857, 16.857, 20.714, 24.857, 28.857, 33.286, 37.000,
13.143, 17.429, 21.857, 26.286, 30.286, 35.429, 39.143,
14.000, 18.286, 22.857, 27.000, 31.286, 36.714, 41.200,
13.286, 17.143, 21.571, 26.143, 30.429, 35.714, 39.857,
11.857, 16.000, 23.000, 27.000, 29.000, 33.000, 40.000,
13.429, 18.571, 22.286, 26.143, 33.286, 35.143, 39.714,
15.143, 20.143, 25.286, 29.429, 33.857, 37.286, 39.286,
8.571, 13.286, 16.429, 20.286, 24.286, 29.286, 42.429,
9.571, 14.000, 18.429, 22.429, 34.571, 37.000, 39.000,
11.571, 16.857, 20.143, 24.143, 28.857, 33.714, 38.000,
14.714, 19.000, 23.286, 29.000, 33.286, 39.286, 39.571,
14.714, 19.000, 23.714, 28.571, 31.143, 35.143, 37.571,
10.143, 17.286, 20.000, 24.500, 30.714, 35.000, 39.857,
11.857, 19.714, 23.714, 28.000, 33.000, 35.000, 40.286,
10.857, 15.286, 21.857, 24.857, 31.143, 34.000, 40.000,
11.429, 15.429, 20.429, 31.000, 36.429, 38.000, 40.000,
11.429, 16.000, 19.286, 24.857, 29.429, 34.000, 40.571,
14.857, 19.571, 24.286, 31.571, 42.143, 43.000, 44.000,
14.429, 20.286, 25.000, 30.143, 33.143, 37.286, 40.000,
8.000, 15.000, 22.714, 24.571, 30.429, 34.857, 39.000,
13.000, 17.286, 21.857, 25.571, 32.571, 35.714, 39.000,
8.429, 13.429, 17.286, 23.429, 27.429, 31.429, 39.429,
6.714, 12.571, 16.571, 21.286, 26.571, 34.571, 38.571,
11.286, 16.143, 22.429, 27.286, 30.714, 37.571, 39.571,
11.714, 15.857, 20.286, 25.143, 28.857, 37.714, 39.143,
8.857, 11.571, 16.000, 23.857, 28.286, 35.714, 39.714,
13.500, 19.857, 25.000, 29.143, 33.857, 36.143, 42.000,
8.429, 11.571, 16.571, 20.857, 25.429, 29.714, 39.714,
8.286, 13.143, 18.286, 22.571, 27.143, 31.571, 41.429,
15.000, 20.714, 25.000, 29.143, 36.857, 41.000, 42.000,
9.429, 15.857, 20.429, 25.286, 29.857, 35.714, 41.286,
7.429, 11.429, 16.571, 20.471, 27.429, 35.857, 39.571,
12.286, 16.857, 22.000, 27.571, 32.000, 37.286, 41.429,
12.286, 16.571, 21.286, 25.571, 30.429, 34.571, 36.143,
11.429, 17.571, 22.143, 27.143, 32.429, 37.143, 41.571,
12.143, 16.429, 20.857, 25.143, 29.286, 33.429, 39.143,
14.714, 19.143, 23.571, 27.714, 32.143, 38.143, 40.714,
8.143, 12.857, 16.286, 21.857, 25.143, 30.857, 40.714,
8.286, 15.714, 26.286, 30.571, 37.714, 39.000, 40.143,
13.571, 18.571, 23.143, 31.429, 34.429, 37.000, 39.143,
14.857, 18.857, 23.571, 30.571, 32.286, 37.714, 38.286,
12.857, 17.143, 21.857, 26.571, 31.000, 35.571, 38.714,
6.857, 13.714, 19.714, 23.857, 30.000, 34.286, 41.286,
10.143, 15.143, 19.429, 24.143, 29.571, 33.000, 39.143,
8.571, 13.000, 18.143, 23.000, 28.143, 35.286, 40.857,
12.286, 16.571, 21.571, 30.857, 33.429, 35.000, 37.714,
7.000, 9.000, 13.857, 16.857, 25.857, 30.000, 37.000,
8.857, 11.000, 15.857, 20.000, 26.857, 32.571, 41.286,
10.000, 12.000, 14.714, 21.714, 26.571, 29.714, 40.714,
9.429, 13.714, 17.714, 22.000, 26.286, 30.571, 36.714),k,I))
ni = c(6,6,5,6,6,7,6,7,6,7,7,7,6,6,5,6,6,7,7,7,6,7,7,7,7,7,7,7,
6,7,6,5,7,5,6,5,6,6,6,7,6,6,6,6,7,7,7,6,6,7,6,7,6,6,
6,7,5,5,6,6,6,7,7,5,6,7,6,6)
n = sum(ni)
The simplest model ignores the heterogeneity of the different patients and pool all observations an a single linear regression model \[ y_{ij} = \alpha + \beta x_{ij} + \epsilon_{ij} \qquad \epsilon_{ij} \ \ \mbox{iid} \ \ N(0,\sigma^2), \] for \(i=1,\ldots,I\) and \(j=1,\ldots,n_i\), for a total of \(n=427\) observations.
x1 = matrix(t(X),I*k)
y1 = matrix(t(y),I*k)
plot(x1,y1,xlab="Visit",ylab="Weight gains")
abline(lm(y1~x1)$coef,lwd=5)
ii = sort(sample(1:I,size=8))
for (i in 1:8)
lines(X[ii[i],],y[ii[i],],col=i,pch=16,lwd=2,type="b")
legend("topleft",legend=ii,col=1:8,bty="b",pch=16)
fit.pooled = lm(y1~x1)
summary(fit.pooled)
##
## Call:
## lm(formula = y1 ~ x1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.1352 -2.4042 0.0026 2.1768 11.7730
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.52639 0.48680 -9.298 <2e-16 ***
## x1 0.47633 0.01897 25.113 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.612 on 425 degrees of freedom
## (49 observations deleted due to missingness)
## Multiple R-squared: 0.5974, Adjusted R-squared: 0.5965
## F-statistic: 630.7 on 1 and 425 DF, p-value: < 2.2e-16
beta.pool = fit.pooled$coef
sigma.pool = summary(fit.pooled)$sigma
coefs = matrix(0,I,2)
sig = rep(0,I)
for (i in 1:I){
fit = lm(y[i,1:ni[i]]~X[i,1:ni[i]])
coefs[i,] = fit$coef
sig[i] = summary(fit)$sigma
}
round(apply(coefs,2,summary),3)
## [,1] [,2]
## Min. -14.432 0.126
## 1st Qu. -6.744 0.395
## Median -4.260 0.450
## Mean -4.412 0.470
## 3rd Qu. -2.330 0.525
## Max. 5.157 0.801
summary(sig)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.3484 0.7347 0.9551 1.0133 1.1571 2.1118
par(mfrow=c(2,2))
hist(coefs[,1],prob=TRUE,xlab="",main=expression(alpha))
abline(v=beta.pool[1],col=2,lwd=2)
hist(coefs[,2],prob=TRUE,xlab="",main=expression(beta))
abline(v=beta.pool[2],col=2,lwd=2)
plot(coefs,xlab=expression(alpha),ylab=expression(beta))
points(beta.pool[1],beta.pool[2],col=2,pch=16,cex=2)
hist(sig,prob=TRUE,xlab="",main=expression(sigma[y]),xlim=range(sig,sigma.pool))
abline(v=sigma.pool,col=2,lwd=2)
plot(x1,y1,pch=16,xlab="Visit",ylab="Weight gains")
for (i in 1:I)
abline(coefs[i,1],coefs[i,2],col=grey(0.85))
One of the simplest models she adopted was the simple hierarchical regression on time where \[\begin{eqnarray*} y_{ij} | \alpha_i,\beta_i,\sigma^2 &\sim& N(\alpha_i + \beta_i x_{ij}, \sigma_y^2)\\ (\alpha_i,\beta_i)'|\alpha,\beta &\sim& N((\alpha,\beta)',\mbox{diag}(\sigma_\alpha^2,\sigma_\beta^2))\\ (\alpha,\beta) &\sim& N(0_2,1000I_2), \end{eqnarray*}\] for \(i=1,\ldots,I=68\), \(n=n_1+\cdots n_I=427\). The variance parameters \(\sigma_y^2,\sigma_\alpha^2\) and \(\sigma_\beta^2\) are all a priori independent \(IG(0.001,0.001)\).
A0 = diag(1000,2)
a0 = 0.001
b0 = 0.001
c0 = 0.001
d0 = 0.001
e0 = 0.001
f0 = 0.001
iA0 = diag(1/diag(A0))
thetas = coefs
alpha = mean(thetas[,1])
beta = mean(thetas[,2])
sig2.alpha = var(thetas[,1])
sig2.beta = var(thetas[,2])
iSig = diag(c(1/sig2.alpha,1/sig2.beta))
M0 = 1000
M = 2000
niter = M0+M
draws1 = matrix(0,niter,5)
draws2 = array(0,c(niter,I,2))
for (iter in 1:niter){
# [alpha,beta]
V = diag(1/diag(iA0+I*iSig))
mean = V%*%iSig%*%apply(thetas,2,sum)
alpha = rnorm(1,mean[1],sqrt(V[1,1]))
beta = rnorm(1,mean[2],sqrt(V[2,2]))
theta = c(alpha,beta)
# [sig2.y]
par1 = a0+n/2
par2 = b0
for (i in 1:I)
for (j in 1:ni[i])
par2 = par2 + (y[i,j]-thetas[i,1]-thetas[i,2]*X[i,j])^2/2
sig2.y = 1/rgamma(1,par1,par2)
# [sig2.alpha]
par1 = c0+I/2
par2 = d0+sum((thetas[,1]-alpha)^2)/2
sig2.alpha = 1/rgamma(1,par1,par2)
# [sig2.beta]
par1 = e0+I/2
par2 = f0+sum((thetas[,2]-beta)^2)/2
sig2.beta = 1/rgamma(1,par1,par2)
iSig = diag(c(1/sig2.alpha,1/sig2.beta))
# [thetas]
for (i in 1:I){
yy = y[i,1:ni[i]]
xx = cbind(1,X[i,1:ni[i]])
var = solve(iSig+t(xx)%*%xx/sig2.y)
mean = var%*%(iSig%*%theta+t(xx)%*%yy/sig2.y)
thetas[i,] = mean+t(chol(var))%*%rnorm(2)
}
draws1[iter,] = c(alpha,beta,sqrt(c(sig2.y,sig2.alpha,sig2.beta)))
draws2[iter,,] = thetas
}
draws1 = draws1[(M0+1):niter,]
draws2 = draws2[(M0+1):niter,,]
names = c("alpha","beta","sig.y","sig.alpha","sig.beta")
par(mfrow=c(2,5))
for (i in 1:5)
ts.plot(draws1[,i],xlab="iterations",ylab="draws",main=names[i])
#for (i in 1:5)
# acf(draws1[,i],main="")
for (i in 1:5)
hist(draws1[,i],main="",xlab="",prob=TRUE)
par(mfrow=c(2,1))
boxplot(draws2[,,1],outline=FALSE,main=expression(alpha))
points(coefs[,1],col=3,pch=16)
boxplot(draws2[,,2],outline=FALSE,main=expression(beta))
points(coefs[,2],col=3,pch=16)
y[1:4,]
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 3.3 6.1 8.5 7.9 11.8 14.8 NA
## [2,] 2.0 4.3 8.0 11.0 12.2 15.6 NA
## [3,] 4.5 6.8 8.1 13.5 15.0 NA NA
## [4,] 5.5 5.5 8.0 13.2 13.9 15.1 NA
X[1:4,]
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 8.571 13.000 17.571 20.857 24.571 35.429 40.143
## [2,] 14.429 18.714 23.000 27.000 31.000 37.429 37.857
## [3,] 13.429 18.857 23.143 34.143 37.000 38.000 39.286
## [4,] 12.571 17.000 21.429 32.429 34.286 38.571 40.571
par(mfrow=c(2,2))
for (i in 1:2){
yy = draws2[,i,1]+draws2[,i,2]*X[i,7]
qyy = quantile(yy,c(0.025,0.5,0.975))
plot(X[i,],y[i,],ylim=range(y[1,1:ni[i]],qyy),
ylab="Weight gain",xlab="Time of visit")
points(X[i,7],qyy[2],pch=16)
segments(X[i,7],qyy[1],X[i,7],qyy[3],lwd=3)
xx = seq(5,40,by=1)
title(paste("Observation ",i,sep=""))
}
yy = draws2[,3,1]+draws2[,3,2]*X[3,7]
qyy = quantile(yy,c(0.025,0.5,0.975))
plot(X[3,],y[3,],ylim=range(y[1,1:ni[3]],qyy),
ylab="Weight gain",xlab="Time of visit")
points(X[3,7],qyy[2],pch=16)
segments(X[3,7],qyy[1],X[3,7],qyy[3],lwd=3)
yy = draws2[,3,1]+draws2[,3,2]*X[3,6]
qyy = quantile(yy,c(0.025,0.5,0.975))
points(X[3,6],qyy[2],pch=16)
segments(X[3,6],qyy[1],X[3,6],qyy[3],lwd=3)
title("Observation 3")
yy = draws2[,4,1]+draws2[,4,2]*X[4,7]
qyy = quantile(yy,c(0.025,0.5,0.975))
plot(X[4,],y[4,],ylim=range(y[4,1:ni[4]],qyy),
ylab="Weight gain",xlab="Time of visit")
points(X[4,7],qyy[2],pch=16)
segments(X[4,7],qyy[1],X[4,7],qyy[3],lwd=3)
title("Observation 4")