1 Introduction
2 Loadings packages ad reading the data
3 Exploratory data analysis
4 Classical regression modeling
5 Bayesan regularizaton via ridge, Laplace & horseshoe priors
- 5.1 Comparing marginal posterior distributions
- 5.2 Intersection of all regression models

1 Introduction

The variable to predict is the monthly growth rate of US industrial production, and the dataset consists of 130 possible predictors, including various monthly macroeconomic indicators, such as measures of output, income, consumption, orders, surveys, labor market variables, house prices, consumer and producer prices, money, credit and asset prices. The sample ranges from February 1960 to December 2014, and all the data have been transformed to obtain stationarity, as in the work of Stock and Watson.

\(y\): the monthly growth rate of US industrial production.
\(X\): monthly macroeconomic indicators, such as measures of output, income, consumption, orders, surveys, labor market variables, house prices, consumer and producer prices, money, credit and asset prices.
Period: February 1960 to December 2014 (about 660 observations)
Full description of X: See appendix B of Stock and Watson (2002b), pages 157-161.
References:
- Stock and Watson (2002a) Forecasting Using Principal Components from a Large Number of Predictors, JASA, 97, 147–162. https://scholar.harvard.edu/files/stock/files/forecasting_using_principal_components_from_a_large_number_of_predictors.pdf
- Stock and Watson (2002b) Macroeconomic Forecasting Using Diffusion Indexes, JBES, 20, 147–162. https://scholar.harvard.edu/files/stock/files/macroeconomic_forecasting_using_diffusion_indexes.pdf
- Giannone, Lenza ad Primiceri (2020) Economic predictions with big data: the illusion of sparsity. https://faculty.wcas.northwestern.edu/~gep575/illusion4-2.pdf
- Fava and Lopes (2020) The illusion of the illusion of sparsty: an exercise in prior sensitivity. https://arxiv.org/abs/2009.14296

2 Loadings packages ad reading the data

#install.packages("bayeslm") 
#install.packages("leaps") 
library("bayeslm")
library("leaps")

macrodata = read.table("stockwatson2002-data.txt",header=FALSE)
k = ncol(macrodata)-1
y = macrodata[,1]
X = as.matrix(macrodata[,2:(k+1)])

3 Exploratory data analysis

3.1 Variability of y and X

par(mfrow=c(1,1))
boxplot(cbind(y,X),outline=FALSE,col=c(2,rep(grey(0.8),k)))

3.2 Correlations between y and X

cors = cor(cbind(y,X))[1,2:(1+k)]
ind = 1:k
ord = ind[order(cors,decreasing=TRUE)]

par(mfrow=c(1,2))
plot(cors,xlab="Predictor",ylab="Correlation with Y")
abline(h=0,col=2)
plot(cors[ord],xlab="Predictor",ylab="Correlation with Y")
abline(h=0,col=2)

3.3 Singular value decomposition of X

par(mfrow=c(1,1))
svdX = svd(X)$d
plot(100*svdX/sum(svdX),xlab="Principal Component",ylab="% variabiliity")

4 Classical regression modeling

4.1 Regresson 1: OLS regression with all predictors

ols     = lm(y~X-1)
sighat  = round(summary(ols)$sig,3)
se.beta = sighat*sqrt(diag(solve(t(X)%*%X)))
L = ols$coef-se.beta
U = ols$coef+se.beta

par(mfrow=c(1,1))
plot(ols$coef,xlab="Predictor",ylab="OLS  coefficient",ylim=range(L,U),cex=0.5,col=2,pch=16)
for (i in 1:k){
  segments(i,L[i],i,U[i],col=2)
  if ((L[i]>0)|(U[i]<0)){
     text(i,max(U),i,cex=0.5)
  }
  abline(h=0,col=2)
}

4.2 Regression 2: OLS regression with the significant predictors found above

ind   = 1:k
cond  = ((L>0)|(U<0))
pred1 = ind[cond]
k1    = length(pred1)
X1    = X[,pred1]

pred1

##  [1]   2   4   5   9  10  11  16  20  26  27  28  32  35  37  38  39  44  48  49
## [20]  50  51  52  53  54  55  56  57  58  59  63  65  66  67  72  73  75  76  77
## [39]  80  83  84  85  89  90  91  92  95  97  99 106 109 110 113 125 126 127

4.2.1 Fitting subset model

ols1     = lm(y~X1-1)
sighat1  = round(summary(ols1)$sig,3)
se.beta1 = sighat1*sqrt(diag(solve(t(X1)%*%X1)))
L1 = ols1$coef-2*se.beta1
U1 = ols1$coef+2*se.beta1

par(mfrow=c(1,1))
plot(ols1$coef,xlab="Predictor",ylab="OLS  coefficient",ylim=range(L1,U1),cex=0.5,col=2,pch=16)
for (i in 1:k1){
  segments(i,L1[i],i,U1[i],col=2)
  if ((L1[i]>0)|(U1[i]<0)){
     text(i,max(U1),pred1[i],cex=0.5)
  }
  abline(h=0,col=2)
}

4.3 Regression 3: OLS regression with the significant predictors found above

cond1 = ((L1>0)|(U1<0))
pred2 = pred1[cond1]
k2  = length(pred2)
X2  = X[,pred2]
pred2

##  [1]   4   9  10  11  16  20  26  32  35  48  49  54  55  56  57  58  59  65  72
## [20]  76  77  83  89  90  91  92 110 125 127

ols2     = lm(y~X2-1)
sighat2  = round(summary(ols2)$sig,3)
se.beta2 = sighat1*sqrt(diag(solve(t(X2)%*%X2)))
L2 = ols2$coef-2*se.beta2
U2 = ols2$coef+2*se.beta2

par(mfrow=c(1,1))
plot(ols2$coef,xlab="Predictor",ylab="OLS  coefficient",ylim=range(L2,U2),cex=0.5,col=2,pch=16)
for (i in 1:k2){
  segments(i,L2[i],i,U2[i],col=2)
  if ((L2[i]>0)|(U2[i]<0)){
     text(i,max(U2),pred2[i],cex=0.5)
  }
  abline(h=0,col=2)
}

4.4 Regression 4: Sub-set stepwise (sequential replacement) selection

Here we select from the initial variables in \(X\) and pick models with up to 30 predictors.

nvmax = 30
data  = data.frame(y=y,X=X)
regs  = regsubsets(y~.,data=data,method="seq",nvmax=nvmax,intercept=FALSE)
regs.summary = summary(regs)

par(mfrow=c(1,2))
plot(regs.summary$bic,pch=16,xlab="Number of predictors",ylab="BIC")
plot(regs,scale="bic")

bic = regs.summary$bic
ii  = 1:nvmax
coef(regs,ii[bic==min(bic)])

##       X.V10       X.V27       X.V33       X.V36       X.V40       X.V58 
## -0.09533095  0.08425772 -0.11968020  0.10282605  0.25107332  0.12459957 
##       X.V62       X.V90       X.V91      X.V110      X.V126 
##  0.33563777  0.36601094 -0.29652451 -0.17303238  0.11128935

pred4 = c(9,26,32,35,39,57,51,89,90,109,125)

4.5 Summary

pred1

##  [1]   2   4   5   9  10  11  16  20  26  27  28  32  35  37  38  39  44  48  49
## [20]  50  51  52  53  54  55  56  57  58  59  63  65  66  67  72  73  75  76  77
## [39]  80  83  84  85  89  90  91  92  95  97  99 106 109 110 113 125 126 127

pred2

##  [1]   4   9  10  11  16  20  26  32  35  48  49  54  55  56  57  58  59  65  72
## [20]  76  77  83  89  90  91  92 110 125 127

pred4

##  [1]   9  26  32  35  39  57  51  89  90 109 125

5 Bayesan regularizaton via ridge, Laplace & horseshoe priors

M0    = 10000
M     = 1000
skip  = 10
bvec  = rep(1,k)
fit.r = bayeslm(y~X-1,prior="ridge",block_vec=bvec,N=M,burnin=M0,thinning=skip)

## ridge prior 
## fixed running time 0.0673346
## sampling time 7.49415

fit.l = bayeslm(y~X-1,prior="laplace",block_vec=bvec,N=M,burnin=M0,thinning=skip)

## laplace prior 
## fixed running time 0.0630364
## sampling time 6.53361

fit.h = bayeslm(y~X-1,prior="horseshoe",block_vec=bvec,N=M,burnin=M0,thinning=skip)

## horseshoe prior 
## fixed running time 0.056811
## sampling time 7.8979

q.r   = t(apply(fit.r$beta,2,quantile,c(0.15,0.5,0.85)))
q.l   = t(apply(fit.l$beta,2,quantile,c(0.15,0.5,0.85)))
q.h   = t(apply(fit.h$beta,2,quantile,c(0.15,0.5,0.85)))

cond.r = ((q.r[,1]>0)|(q.r[,3]<0))
cond.l = ((q.l[,1]>0)|(q.l[,3]<0))
cond.h = ((q.h[,1]>0)|(q.h[,3]<0))

pred5.r = ind[cond.r]
pred5.l = ind[cond.l]
pred5.h = ind[cond.h]

pred5.r

##  [1]   5  11  19  26  28  32  35  39  43  48  60  61  62  63  65  72  75  76  77
## [20]  89  90  92  99 102 109 123 124 125 127

pred5.l

##  [1]  11  26  32  35  39  41  43  48  61  63  72  76  77  89  90  92 109 125

pred5.h

## [1]  32  39  61  72  89  92 109 125

k.r = sum(cond.r)
k.l = sum(cond.l)
k.h = sum(cond.h)

c(k.r,k.l,k.h)

## [1] 29 18  8

5.1 Comparing marginal posterior distributions

par(mfrow=c(2,2),mai=c(0.5,0.5,0.5,0.5))
plot(ols$coef,xlab="Predictor",ylab="Coeffients",main="",ylim=range(L,U),pch=16,cex=0.5)
for (i in 1:k) segments(i,L[i],i,U[i])
abline(h=0,col=2)
title(paste("OLS\n k=",k1,sep=""))
plot(q.r[,2],xlab="Predictor",ylab="Coeffients",main="",ylim=range(q.r),pch=16,cex=0.5)
for (i in 1:k) segments(i,q.r[i,1],i,q.r[i,3])
abline(h=0,col=2)
title(paste("Ridge prior\n k=",k.r,sep=""))
plot(q.l[,2],xlab="Predictor",ylab="Coeffients",main="",ylim=range(q.l),pch=16,cex=0.5)
for (i in 1:k) segments(i,q.l[i,1],i,q.l[i,3])
abline(h=0,col=2)
title(paste("Laplace prior\n k=",k.l,sep=""))
plot(q.h[,2],xlab="Predictor",ylab="Coeffients",main="",ylim=range(q.h),pch=16,cex=0.5)
for (i in 1:k) segments(i,q.h[i,1],i,q.h[i,3])
abline(h=0,col=2)
title(paste("Horseshoe prior\n k=",k.h,sep=""))

5.2 Intersection of all regression models

Reduce(intersect, list(pred2,pred4))

## [1]   9  26  32  35  57  89  90 125

Reduce(intersect, list(pred5.r,pred5.l,pred5.h))

## [1]  32  39  61  72  89  92 109 125

Reduce(intersect, list(pred2,pred5.r,pred5.l,pred5.h))

## [1]  32  72  89  92 125

Bayesian multiple linear regression with shrinkage/sparsity-inducing priors

Revisiting Stock and Watson macroeconomic data

Hedibert Freitas Lopes

3/3/2021