install.packages("leaps") install.packages("bayesreg") library("leaps") library("bayesreg") macrodata = read.table("https://hedibert.org/wp-content/uploads/2021/03/stockwatson2002-data.txt",header=FALSE) k = ncol(macrodata)-1 y = macrodata[,1] X = as.matrix(macrodata[,2:(k+1)]) # Subset selection nvmax = 10 data = data.frame(y=y,X=X) regs = regsubsets(y~.,data=data,method="seq",nvmax=nvmax,intercept=FALSE) regs.summary = summary(regs) par(mfrow=c(1,2)) plot(regs.summary$bic,pch=16,xlab="Number of predictors",ylab="BIC") plot(regs,scale="bic") bic = regs.summary$bic ii = 1:nvmax beta.ols = coef(regs,ii[bic==min(bic)]) coef(regs,ii[bic==min(bic)]) pred = c(11,32,35,39,57,61,89,90,109,125) # Laplace and horseshoe priors ind = 1:ncol(X) M0 = 10000 M = 10000 skip = 1 data1 = data.frame(y=y,X=X) fit.r = bayesreg(y~X,data=data1,model="normal",prior="ridge",n.samples=M,burnin=M0,thin=skip) q.r = t(apply(fit.r$beta,1,quantile,c(0.025,0.5,0.975))) cond.r = ((q.r[,1]>0)|(q.r[,3]<0)) pred.r = ind[cond.r] k.r = sum(cond.r) pred.r fit.l = bayesreg(y~X,data=data1,model="normal",prior="lasso",n.samples=M,burnin=M0,thin=skip) q.l = t(apply(fit.l$beta,1,quantile,c(0.025,0.5,0.975))) cond.l = ((q.l[,1]>0)|(q.l[,3]<0)) pred.l = ind[cond.l] k.l = sum(cond.l) pred.l fit.h = bayesreg(y~X,data=data1,model="normal",prior="horseshoe",n.samples=M,burnin=M0,thin=skip) q.h = t(apply(fit.h$beta,1,quantile,c(0.025,0.5,0.975))) cond.h = ((q.h[,1]>0)|(q.h[,3]<0)) pred.h = ind[cond.h] k.h = sum(cond.h) pred.h pred pred.r pred.l pred.h e = y-X[,pred]%*%beta.ols e.r = y-X[,pred.r]%*%fit.r$mu.beta[pred.r] e.l = y-X[,pred.l]%*%fit.l$mu.beta[pred.l] e.h = y-X[,pred.h]%*%fit.h$mu.beta[pred.h] c(mean(e^2),mean(e.r^2),mean(e.l^2),mean(e.h^2)) # Let us see what happens out-of-sample set.seed(12345) nvmax = 10 n = nrow(X) alpha = 0.8 n1 = trunc(alpha*n) n2 = n-n1 M0 = 1000 M = 2000 skip = 1 Rep = 20 stats = array(0,c(2,Rep,6)) for (i in 1:Rep){ obs1 = sort(sample(1:n,size=n1,replace=FALSE)) obs2 = setdiff(1:n,obs1) y1 = y[obs1] y2 = y[obs2] X1 = X[obs1,] X2 = X[obs2,] ind = 1:ncol(X1) data1 = data.frame(y=y1,X=X1) fit.r = bayesreg(y~.,data=data1,model="normal",prior="ridge",n.samples=M,burnin=M0,thin=skip) q.r = t(apply(fit.r$beta,1,quantile,c(0.025,0.5,0.975))) pred.r = ind[((q.r[,1]>0)|(q.r[,3]<0))] if (length(pred.h)>1){ e.r = y2-X2[,pred.r]%*%fit.r$mu.beta[pred.r] }else{ e.r = y2-X2[,pred.r]*fit.r$mu.beta[pred.r] } e.r1 = y2-X2%*%fit.r$mu.beta fit.l = bayesreg(y~.,data=data1,model="normal",prior="lasso",n.samples=M,burnin=M0,thin=skip) q.l = t(apply(fit.l$beta,1,quantile,c(0.025,0.5,0.975))) pred.l = ind[((q.l[,1]>0)|(q.l[,3]<0))] if (length(pred.h)>1){ e.l = y2-X2[,pred.l]%*%fit.l$mu.beta[pred.l] }else{ e.l = y2-X2[,pred.l]*fit.l$mu.beta[pred.l] } e.l1 = y2-X2%*%fit.l$mu.beta fit.h = bayesreg(y~.,data=data1,model="normal",prior="horseshoe",n.samples=M,burnin=M0,thin=skip) q.h = t(apply(fit.h$beta,1,quantile,c(0.025,0.5,0.975))) pred.h = ind[((q.h[,1]>0)|(q.h[,3]<0))] if (length(pred.h)>1){ e.h = y2-X2[,pred.h]%*%fit.h$mu.beta[pred.h] }else{ e.h = y2-X2[,pred.h]*fit.h$mu.beta[pred.h] } e.h1 = y2-X2%*%fit.h$mu.beta stats[1,i,] = c(mean(e.r^2),mean(e.l^2),mean(e.h^2),mean(abs(e.r)),mean(abs(e.l)),mean(abs(e.h))) stats[2,i,] = c(mean(e.r1^2),mean(e.l1^2),mean(e.h1^2),mean(abs(e.r1)),mean(abs(e.l1)),mean(abs(e.h1))) } range = c(min(stats[!is.na(stats)]),max(stats[!is.na(stats)])) par(mfrow=c(1,2)) boxplot(stats[1,,],names=c("mse(r)","mse(l)","mse(h)","mse(r)","mse(l)","mse(h)"),ylim=range) title(paste("n=",n," - n1=",n1," (",100*alpha,"%)\nR=",Rep," replications",sep="")) legend("topright",legend="Shrinkage and selecting") boxplot(stats[2,,],names=c("mse(r)","mse(l)","mse(h)","mse(r)","mse(l)","mse(h)"),ylim=range) title(paste("n=",n," - n1=",n1," (",100*alpha,"%)\nR=",Rep," replications",sep="")) legend("topright",legend="Shrinkage only")