####################################################################### # # Revisiting Stock and Watson macroeconomic data # # CART, BART ad random forests # # X is a 659 by 130 matrix # # Hedibert F. Lopes # March 25th 2021 # ####################################################################### #install.packages("aplore3") #install.packages("glmnet") #install.packages("arm") #install.packages("tree") #install.packages("BART") library("aplore3") library("glmnet") library("arm") library("tree") library("BART") library("randomForest") library("monomvn") # Reading the data macro2 = read.table("stockwatson2002-data.txt",header=FALSE) k = ncol(macro2)-1 y = macro2[,1] X = as.matrix(macro2[,2:(k+1)]) macrodata = data.frame(y,X) msemae = matrix(0,3,2) tree.fit = tree(y~.,data=macrodata) summary(tree.fit) tree.fit plot(tree.fit,type="uniform") text(tree.fit,pretty=0) pred.cart = predict(tree.fit,data=macrodata) err = y-pred.cart msemae[1,] = c(mean(err^2),mean(abs(err))) # BART for classification bart = wbart(X,y) pred.bart = apply(bart$yhat.train,2,mean) err = y-pred.bart msemae[2,] = c(mean(err^2),mean(abs(err))) # Random forest rf = randomForest(y~.,data=macrodata,mtry=50,importance=TRUE) pred.rf = rf$predicted err = y-pred.rf msemae[3,] = c(mean(err^2),mean(abs(err))) rownames(msemae) = c("CART","BART","RF") colnames(msemae) = c("MSE","MAE") msemae cor(cbind(y,pred.cart,pred.bart,pred.rf)) pairs(cbind(y,pred.cart,pred.bart,pred.rf)) set.seed(1243253) Rep = 20 n = nrow(X) gsize = trunc(n/3) n2 = n-gsize mse = array(0,c(Rep,5)) mae = array(0,c(Rep,5)) for (rep in 1:Rep){ ind = sample(1:n) y1 = y[ind] X1 = X[ind,] test = sort(sample(1:n,size=gsize,replace=FALSE)) train = setdiff(1:n,test) ytrain = y1[train] Xtrain = X1[train,] ytest = y1[test] Xtest = X1[test,] traindata = data.frame(ytrain,Xtrain) testdata = data.frame(ytest,Xtest) tree = tree(ytrain~.,data=traindata) pred.cart = predict(tree,newdata=testdata) err = ytest-pred.cart mse[rep,1] = mean(err^2) mae[rep,1] = mean(abs(err)) bart = wbart(x.train=Xtrain,y.train=ytrain,x.test=Xtest) pred.bart = apply(bart$yhat.test,2,mean) err = ytest-pred.bart mse[rep,2] = mean(err^2) mae[rep,2] = mean(abs(err)) rf = randomForest(ytrain~.,data=traindata,mtry=50,importance=TRUE) pred.rf = predict(rf,newdata=testdata) err = ytest-pred.rf mse[rep,3] = mean(err^2) mae[rep,3] = mean(abs(err)) fit.hs = blasso(Xtrain,ytrain,T=10000,thin=1,RJ=FALSE,case="hs",icept=FALSE,verb=0) fit.ng = blasso(Xtrain,ytrain,T=10000,thin=1,RJ=FALSE,case="ng",icept=FALSE,verb=0) beta.hs = apply(fit.hs$beta,2,mean) beta.ng = apply(fit.ng$beta,2,mean) err.hs = ytest-Xtest%*%beta.hs err.ng = ytest-Xtest%*%beta.ng mse[rep,4] = mean(err.hs^2) mae[rep,4] = mean(abs(err.hs)) mse[rep,5] = mean(err.hs^2) mae[rep,5] = mean(abs(err.hs)) } par(mfrow=c(1,2)) boxplot(sqrt(mse),names=c("CART","BART","RF","HS","NG"),ylab="Root mean square error") boxplot(mae,names=c("CART","BART","RF","HS","NG"),ylab="Mean absolute error")