# Reference: http://www.jstor.com/stable/223257 install.packages("wooldridge") library("wooldridge") data('wagepan') names(wagepan) dim(wagepan) ?wagepan attach(wagepan) n = nrow(wagepan) occupation = c("Professional, Technical and kindred","Managers, Officials and Proprietors","Sales Workers","Clerical and kindred", "Craftsmen, Foremen and kindred","Operatives and kindred", "Laborers and farmers","Farm Laborers and Foreman","Service Workers") industry = c("Agricultural","Mining","Construction","Trade", "Transportation","Finance","Business & Repair Service", "Personal Service","Entertainment","Manufacturing", "Professional & Related Service","Public Administration") race = rep(1,n) race[black==1]=2 race[hisp==1]=3 location = rep(1,n) location[nrthcen==1]=2 location[nrtheast==1]=3 location[south==1]=4 occup = rep(1,n) occup[occ2==1]=2 occup[occ3==1]=3 occup[occ4==1]=4 occup[occ5==1]=5 occup[occ6==1]=6 occup[occ7==1]=7 occup[occ8==1]=8 occup[occ9==1]=9 industry = rep(1,n) industry[min==1]=2 industry[construc==1]=3 industry[trad==1]=4 industry[tad==1]=5 industry[fin==1]=6 industry[bus==1]=7 industry[per==1]=8 industry[ent==1]=9 industry[manuf==1]=10 industry[pro==1]=11 industry[pub==1]=12 par(mfrow=c(2,2)) plot(educ,lwage) plot(exper,lwage) plot(hours,lwage) plot(year-1979,lwage) par(mfrow=c(2,4)) boxplot(lwage~race,names=c("white","black","hisp")) boxplot(lwage~married) boxplot(lwage~union) boxplot(lwage~location) boxplot(lwage~race,names=c("white","black","hisp"),outline=FALSE) boxplot(lwage~married,outline=FALSE) boxplot(lwage~union,outline=FALSE) boxplot(lwage~location,outline=FALSE) par(mfrow=c(2,2)) boxplot(lwage~occup,xlab="Occupation") boxplot(lwage~occup,outline=FALSE,xlab="Occupation") boxplot(lwage~industry,xlab="Industry") boxplot(lwage~industry,outline=FALSE,xlab="Industry") # OLS y = scale(lwage) X1 = cbind(educ,exper,hours,year-1979) X2 = cbind(black,hisp,married,union,nrthcen,nrtheast,south) X3 = cbind(occ1,occ2,occ3,occ4,occ5,occ6,occ7,occ8,occ9) X4 = cbind(agric,min,construc,trad,tra,fin,per,ent,manuf,pro,pub) X = cbind(scale(X1),X2,X3,X4) p = ncol(X) summary(lm(y~X-1)) install.packages("bayesreg") library("bayesreg") M0 = 1000 M = 1000 skip = 10 data1 = data.frame(y=y,X=X) fit.r = bayesreg(y~X,data=data1,model="normal",prior="ridge",n.samples=M,burnin=M0,thin=skip) fit.l = bayesreg(y~X,data=data1,model="normal",prior="lasso",n.samples=M,burnin=M0,thin=skip) fit.h = bayesreg(y~X,data=data1,model="normal",prior="horseshoe",n.samples=M,burnin=M0,thin=skip) quant.r = t(apply(fit.r$beta,1,quantile,c(0.05,0.5,0.95))) quant.l = t(apply(fit.l$beta,1,quantile,c(0.05,0.5,0.95))) quant.h = t(apply(fit.h$beta,1,quantile,c(0.05,0.5,0.95))) range = range(quant.r,quant.l,quant.h) par(mfrow=c(2,2)) plot(quant.r[,2],lwd=2,pch=16,ylim=range,ylab="beta") title("Ridge regression") for (i in 1:p) segments(i,quant.r[i,1],i,quant.r[i,3]) abline(h=0,lty=2) plot(quant.l[,2],lwd=2,pch=16,ylim=range,ylab="beta") title("Lasso regression") for (i in 1:p) segments(i,quant.l[i,1],i,quant.l[i,3]) abline(h=0,lty=2) plot(quant.r[,2],lwd=2,pch=16,ylim=range,ylab="beta") title("Horseshoe regression") for (i in 1:p) segments(i,quant.h[i,1],i,quant.h[i,3]) abline(h=0,lty=2) range = sqrt(range(fit.r$sigma2,fit.l$sigma2,fit.h$sigma2)) plot(density(sqrt(fit.r$sigma2)),xlab="",main="Sigma") lines(density(sqrt(fit.l$sigma2)),col=2) lines(density(sqrt(fit.h$sigma2)),col=3) range = range(quant.r,quant.l,quant.h) par(mfrow=c(1,1)) plot(quant.r[,2],pch=16,ylim=range,ylab="beta",cex=0.5,col=0) for (i in 1:p){ segments(i,quant.r[i,1],i,quant.r[i,3]) segments(i+0.2,quant.l[i,1],i+0.2,quant.l[i,3],col=2) segments(i+0.4,quant.h[i,1],i+0.4,quant.h[i,3],col=3) } abline(h=0,lty=2)