library("mlbench") library("mvtnorm") library("leaps") standardize = function(x){ (x-mean(x))/sd(x) } logpredictive = function(y,X,b,B,nu,sig2){ In = diag(1,nrow(X)) return(dmvt(y,X%*%b,sig2*(In+X%*%B%*%t(X)),df=nu,log=TRUE)) } Xnames = c("lstat","rm","b","ptratio","lon","lat","indus","rad","nox","chas") Xnames1 = c("% of lower status of the population", "average number of rooms per dwelling", "1000(B-0.63)^2 - B=proportion of blacks by town", "pupil-teacher ratio by town", "longitude of census tract", "latitude of census tract", "proportion of non-retail business acres per town", "index of accessibility to radial highways", "nitric oxides concentration (parts per 10 million)", "Charles River dummy variable") yname = "median value of homes" ############################################################ # BOSTON HOUSE DATA ############################################################ data(BostonHousing2) attach(BostonHousing2) par(mfrow=c(1,1)) plot(lat,lon) y = cmedv[cmedv<50] X = cbind(lstat,rm,b,ptratio,lon,lat,indus,rad,nox,chas) X = X[cmedv<50,] n = nrow(X) p = ncol(X) y = (y-mean(y))/sqrt(var(y)) for (i in 1:p) X[,i] = (X[,i]-mean(X[,i]))/sqrt(var(X[,i])) colnames(X) = Xnames par(mfrow=c(2,5)) for (i in 1:p){ plot(X[,i],y,xlab="",ylab="") mtext(side=1,line=2,text=Xnames1[i],cex=0.55) mtext(side=2,line=2,text=yname,cex=0.55) } fit = lm(y~X-1) summary(fit) par(mfrow=c(1,1)) hist(y,breaks=20,xlim=c(-3,3),prob=TRUE,ylim=c(0,1.1)) lines(density(fit$residuals),col=2,lwd=3) ############################################################ # OLS estimation of the complete/full model # All covariates/regressors included ############################################################ iXtX = solve(t(X)%*%X) betahat = iXtX%*%t(X)%*%y sigma2hat = sum((y-X%*%betahat)^2)/(n-p) se.betahat = sqrt(sigma2hat*diag(iXtX)) L = betahat+qt(0.025,df=n-p) U = betahat+qt(0.975,df=n-p) tvalue = abs(betahat)/se.betahat pvalue = 2*(1-pt(tvalue,df=n-p)) tab = cbind(round(betahat,5),round(se.betahat,5), round(tvalue,3),round(pvalue,5)) colnames(tab) = c("Estimate","Std.Error","t-value","Pr(>|t|)") tab summary(lm(y~X-1)) ###### Running all models nvmax = 10 data = data.frame(y=y,X=X) regs = regsubsets(y~.,data=data,method="seq",nvmax=nvmax,intercept=FALSE) regs.summary = summary(regs) par(mfrow=c(1,2)) plot(regs.summary$bic,pch=16,xlab="Number of predictors",ylab="BIC") plot(regs,scale="bic") ############################################################ # Bayesian inference estimation of the complete/full model # All covariates/regressors included ############################################################ b0 = rep(0,p) B0 = diag(9,p) nu0 = 5 sig20 = 1.0 nu0sig20 = nu0*sig20 iB0 = solve(B0) iB0b0 = iB0%*%b0 B1 = solve(iB0+t(X)%*%X) tcB1 = t(chol(B1)) b1 = B1%*%(iB0b0+t(X)%*%y) nu1 = nu0 + (n-p) sig21 = ((nu0sig20+sum((y-X%*%b1)*y)+t(b0-b1)%*%iB0b0)/nu1)[1] se1 = sqrt(sig21*diag(B1)) logpredy = logpredictive(y,X,b0,B0,nu0,sig20) L = b1+qt(0.025,df=nu1) U = b1+qt(0.975,df=nu1) tvalue = abs(b1)/se1 pvalue = 2*(1-pt(tvalue,df=nu1)) tab.bayes = cbind(round(b1,5),round(se1,5), round(tvalue,3),round(pvalue,5)) colnames(tab.bayes) = c("Estimate","Std.Error","t-value","Pr(>|t|)") tab.bayes ############################################################ # Bayesian inference for each one of the 2^(p-1) models # p-1 since we will include the intercept in all models ############################################################ k = 2^p model = matrix(0,k-1,p) ind = 1:p logpred = rep(0,k-1) nu0 = 5 sig20 = 1.0 l = 0 for (i1 in 1:0) for (i2 in 1:0) for (i3 in 1:0) for (i4 in 1:0) for (i5 in 1:0) for (i6 in 1:0) for (i7 in 1:0) for (i8 in 1:0) for (i9 in 1:0) for (i10 in 1:0){ l = l + 1 if (l