The variable to predict is the monthly growth rate of US industrial production, and the dataset consists of 130 possible predictors, including various monthly macroeconomic indicators, such as measures of output, income, consumption, orders, surveys, labor market variables, house prices, consumer and producer prices, money, credit and asset prices. The sample ranges from February 1960 to December 2014, and all the data have been transformed to obtain stationarity, as in the work of Stock and Watson.
\(y\): the monthly growth rate of US industrial production.
\(X\): monthly macroeconomic indicators, such as measures of output, income, consumption, orders, surveys, labor market variables, house prices, consumer and producer prices, money, credit and asset prices.
Period: February 1960 to December 2014 (about 660 observations)
Full description of X: See appendix B of Stock and Watson (2002b), pages 157-161.
References:
Stock and Watson (2002a) Forecasting Using Principal Components from a Large Number of Predictors, JASA, 97, 147–162. https://scholar.harvard.edu/files/stock/files/forecasting_using_principal_components_from_a_large_number_of_predictors.pdf
Stock and Watson (2002b) Macroeconomic Forecasting Using Diffusion Indexes, JBES, 20, 147–162. https://scholar.harvard.edu/files/stock/files/macroeconomic_forecasting_using_diffusion_indexes.pdf
Giannone, Lenza ad Primiceri (2020) Economic predictions with big data: the illusion of sparsity. https://faculty.wcas.northwestern.edu/~gep575/illusion4-2.pdf
Fava and Lopes (2020) The illusion of the illusion of sparsty: an exercise in prior sensitivity. https://arxiv.org/abs/2009.14296
#install.packages("bayeslm")
#install.packages("leaps")
library("bayeslm")
library("leaps")
macrodata = read.table("stockwatson2002-data.txt",header=FALSE)
k = ncol(macrodata)-1
y = macrodata[,1]
X = as.matrix(macrodata[,2:(k+1)])
par(mfrow=c(1,1))
boxplot(cbind(y,X),outline=FALSE,col=c(2,rep(grey(0.8),k)))
cors = cor(cbind(y,X))[1,2:(1+k)]
ind = 1:k
ord = ind[order(cors,decreasing=TRUE)]
par(mfrow=c(1,2))
plot(cors,xlab="Predictor",ylab="Correlation with Y")
abline(h=0,col=2)
plot(cors[ord],xlab="Predictor",ylab="Correlation with Y")
abline(h=0,col=2)
par(mfrow=c(1,1))
svdX = svd(X)$d
plot(100*svdX/sum(svdX),xlab="Principal Component",ylab="% variabiliity")
ols = lm(y~X-1)
sighat = round(summary(ols)$sig,3)
se.beta = sighat*sqrt(diag(solve(t(X)%*%X)))
L = ols$coef-se.beta
U = ols$coef+se.beta
par(mfrow=c(1,1))
plot(ols$coef,xlab="Predictor",ylab="OLS coefficient",ylim=range(L,U),cex=0.5,col=2,pch=16)
for (i in 1:k){
segments(i,L[i],i,U[i],col=2)
if ((L[i]>0)|(U[i]<0)){
text(i,max(U),i,cex=0.5)
}
abline(h=0,col=2)
}
ind = 1:k
cond = ((L>0)|(U<0))
pred1 = ind[cond]
k1 = length(pred1)
X1 = X[,pred1]
pred1
## [1] 2 4 5 9 10 11 16 20 26 27 28 32 35 37 38 39 44 48 49
## [20] 50 51 52 53 54 55 56 57 58 59 63 65 66 67 72 73 75 76 77
## [39] 80 83 84 85 89 90 91 92 95 97 99 106 109 110 113 125 126 127
ols1 = lm(y~X1-1)
sighat1 = round(summary(ols1)$sig,3)
se.beta1 = sighat1*sqrt(diag(solve(t(X1)%*%X1)))
L1 = ols1$coef-2*se.beta1
U1 = ols1$coef+2*se.beta1
par(mfrow=c(1,1))
plot(ols1$coef,xlab="Predictor",ylab="OLS coefficient",ylim=range(L1,U1),cex=0.5,col=2,pch=16)
for (i in 1:k1){
segments(i,L1[i],i,U1[i],col=2)
if ((L1[i]>0)|(U1[i]<0)){
text(i,max(U1),pred1[i],cex=0.5)
}
abline(h=0,col=2)
}
cond1 = ((L1>0)|(U1<0))
pred2 = pred1[cond1]
k2 = length(pred2)
X2 = X[,pred2]
pred2
## [1] 4 9 10 11 16 20 26 32 35 48 49 54 55 56 57 58 59 65 72
## [20] 76 77 83 89 90 91 92 110 125 127
ols2 = lm(y~X2-1)
sighat2 = round(summary(ols2)$sig,3)
se.beta2 = sighat1*sqrt(diag(solve(t(X2)%*%X2)))
L2 = ols2$coef-2*se.beta2
U2 = ols2$coef+2*se.beta2
par(mfrow=c(1,1))
plot(ols2$coef,xlab="Predictor",ylab="OLS coefficient",ylim=range(L2,U2),cex=0.5,col=2,pch=16)
for (i in 1:k2){
segments(i,L2[i],i,U2[i],col=2)
if ((L2[i]>0)|(U2[i]<0)){
text(i,max(U2),pred2[i],cex=0.5)
}
abline(h=0,col=2)
}
Here we select from the initial variables in \(X\) and pick models with up to 30 predictors.
nvmax = 30
data = data.frame(y=y,X=X)
regs = regsubsets(y~.,data=data,method="seq",nvmax=nvmax,intercept=FALSE)
regs.summary = summary(regs)
par(mfrow=c(1,2))
plot(regs.summary$bic,pch=16,xlab="Number of predictors",ylab="BIC")
plot(regs,scale="bic")
bic = regs.summary$bic
ii = 1:nvmax
coef(regs,ii[bic==min(bic)])
## X.V10 X.V27 X.V33 X.V36 X.V40 X.V58
## -0.09533095 0.08425772 -0.11968020 0.10282605 0.25107332 0.12459957
## X.V62 X.V90 X.V91 X.V110 X.V126
## 0.33563777 0.36601094 -0.29652451 -0.17303238 0.11128935
pred4 = c(9,26,32,35,39,57,51,89,90,109,125)
pred1
## [1] 2 4 5 9 10 11 16 20 26 27 28 32 35 37 38 39 44 48 49
## [20] 50 51 52 53 54 55 56 57 58 59 63 65 66 67 72 73 75 76 77
## [39] 80 83 84 85 89 90 91 92 95 97 99 106 109 110 113 125 126 127
pred2
## [1] 4 9 10 11 16 20 26 32 35 48 49 54 55 56 57 58 59 65 72
## [20] 76 77 83 89 90 91 92 110 125 127
pred4
## [1] 9 26 32 35 39 57 51 89 90 109 125
M0 = 10000
M = 1000
skip = 10
bvec = rep(1,k)
fit.r = bayeslm(y~X-1,prior="ridge",block_vec=bvec,N=M,burnin=M0,thinning=skip)
## ridge prior
## fixed running time 0.0673346
## sampling time 7.49415
fit.l = bayeslm(y~X-1,prior="laplace",block_vec=bvec,N=M,burnin=M0,thinning=skip)
## laplace prior
## fixed running time 0.0630364
## sampling time 6.53361
fit.h = bayeslm(y~X-1,prior="horseshoe",block_vec=bvec,N=M,burnin=M0,thinning=skip)
## horseshoe prior
## fixed running time 0.056811
## sampling time 7.8979
q.r = t(apply(fit.r$beta,2,quantile,c(0.15,0.5,0.85)))
q.l = t(apply(fit.l$beta,2,quantile,c(0.15,0.5,0.85)))
q.h = t(apply(fit.h$beta,2,quantile,c(0.15,0.5,0.85)))
cond.r = ((q.r[,1]>0)|(q.r[,3]<0))
cond.l = ((q.l[,1]>0)|(q.l[,3]<0))
cond.h = ((q.h[,1]>0)|(q.h[,3]<0))
pred5.r = ind[cond.r]
pred5.l = ind[cond.l]
pred5.h = ind[cond.h]
pred5.r
## [1] 5 11 19 26 28 32 35 39 43 48 60 61 62 63 65 72 75 76 77
## [20] 89 90 92 99 102 109 123 124 125 127
pred5.l
## [1] 11 26 32 35 39 41 43 48 61 63 72 76 77 89 90 92 109 125
pred5.h
## [1] 32 39 61 72 89 92 109 125
k.r = sum(cond.r)
k.l = sum(cond.l)
k.h = sum(cond.h)
c(k.r,k.l,k.h)
## [1] 29 18 8
par(mfrow=c(2,2),mai=c(0.5,0.5,0.5,0.5))
plot(ols$coef,xlab="Predictor",ylab="Coeffients",main="",ylim=range(L,U),pch=16,cex=0.5)
for (i in 1:k) segments(i,L[i],i,U[i])
abline(h=0,col=2)
title(paste("OLS\n k=",k1,sep=""))
plot(q.r[,2],xlab="Predictor",ylab="Coeffients",main="",ylim=range(q.r),pch=16,cex=0.5)
for (i in 1:k) segments(i,q.r[i,1],i,q.r[i,3])
abline(h=0,col=2)
title(paste("Ridge prior\n k=",k.r,sep=""))
plot(q.l[,2],xlab="Predictor",ylab="Coeffients",main="",ylim=range(q.l),pch=16,cex=0.5)
for (i in 1:k) segments(i,q.l[i,1],i,q.l[i,3])
abline(h=0,col=2)
title(paste("Laplace prior\n k=",k.l,sep=""))
plot(q.h[,2],xlab="Predictor",ylab="Coeffients",main="",ylim=range(q.h),pch=16,cex=0.5)
for (i in 1:k) segments(i,q.h[i,1],i,q.h[i,3])
abline(h=0,col=2)
title(paste("Horseshoe prior\n k=",k.h,sep=""))
Reduce(intersect, list(pred2,pred4))
## [1] 9 26 32 35 57 89 90 125
Reduce(intersect, list(pred5.r,pred5.l,pred5.h))
## [1] 32 39 61 72 89 92 109 125
Reduce(intersect, list(pred2,pred5.r,pred5.l,pred5.h))
## [1] 32 72 89 92 125