library("mlbench")
library("mvtnorm")
library("leaps")
standardize = function(x){
(x-mean(x))/sd(x)
}
logpredictive = function(y,X,b,B,nu,sig2){
In = diag(1,nrow(X))
return(dmvt(y,X%*%b,sig2*(In+X%*%B%*%t(X)),df=nu,log=TRUE))
}
data(BostonHousing2)
attach(BostonHousing2)
Xnames = c("lstat","rm","b","ptratio","lon","lat","indus","rad","nox","chas")
Xnames1 = c("% of lower status of the population","average number of rooms per dwelling",
"1000(B-0.63)^2 - B=proportion of blacks by town","pupil-teacher ratio by town","longitude of census tract",
"latitude of census tract","proportion of non-retail business acres per town","index of accessibility to radial highways",
"nitric oxides concentration (parts per 10 million)","Charles River dummy variable")
yname = "median value of homes"
par(mfrow=c(1,1))
plot(lon,lat)
y = cmedv[cmedv<50]
X = cbind(lstat,rm,b,ptratio,lon,lat,indus,rad,nox,chas)
X = X[cmedv<50,]
n = nrow(X)
p = ncol(X)
y = (y-mean(y))/sqrt(var(y))
for (i in 1:p)
X[,i] = (X[,i]-mean(X[,i]))/sqrt(var(X[,i]))
colnames(X) = Xnames
par(mfrow=c(2,5))
for (i in 1:p){
plot(X[,i],y,xlab="",ylab="")
mtext(side=1,line=2,text=Xnames1[i],cex=0.55)
mtext(side=2,line=2,text=yname,cex=0.55)
}
fit = lm(y~X-1)
summary(fit)
##
## Call:
## lm(formula = y ~ X - 1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.36495 -0.28402 -0.06474 0.23647 2.11424
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## Xlstat -0.35350 0.03694 -9.570 < 2e-16 ***
## Xrm 0.32673 0.03006 10.870 < 2e-16 ***
## Xb 0.10777 0.02611 4.128 4.31e-05 ***
## Xptratio -0.18398 0.02994 -6.144 1.69e-09 ***
## Xlon -0.13123 0.02626 -4.997 8.17e-07 ***
## Xlat 0.04839 0.02383 2.031 0.0428 *
## Xindus -0.09371 0.04110 -2.280 0.0230 *
## Xrad -0.01962 0.03556 -0.552 0.5814
## Xnox -0.02379 0.04227 -0.563 0.5737
## Xchas 0.01481 0.02349 0.631 0.5286
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5014 on 480 degrees of freedom
## Multiple R-squared: 0.7533, Adjusted R-squared: 0.7481
## F-statistic: 146.5 on 10 and 480 DF, p-value: < 2.2e-16
par(mfrow=c(1,1))
hist(y,breaks=20,xlim=c(-3,3),prob=TRUE,ylim=c(0,1.1))
lines(density(fit$residuals),col=2,lwd=3)
Here we replicate the derivations from the R function lm
iXtX = solve(t(X)%*%X)
betahat = iXtX%*%t(X)%*%y
sigma2hat = sum((y-X%*%betahat)^2)/(n-p)
se.betahat = sqrt(sigma2hat*diag(iXtX))
L = betahat+qt(0.025,df=n-p)
U = betahat+qt(0.975,df=n-p)
tvalue = abs(betahat)/se.betahat
pvalue = 2*(1-pt(tvalue,df=n-p))
tab = cbind(round(betahat,5),round(se.betahat,5),
round(tvalue,3),round(pvalue,5))
colnames(tab) = c("Estimate","Std.Error","t-value","Pr(>|t|)")
Let us compare the results. They should be identical!
tab
## Estimate Std.Error t-value Pr(>|t|)
## lstat -0.35350 0.03694 9.570 0.00000
## rm 0.32673 0.03006 10.870 0.00000
## b 0.10777 0.02611 4.128 0.00004
## ptratio -0.18398 0.02994 6.144 0.00000
## lon -0.13123 0.02626 4.997 0.00000
## lat 0.04839 0.02383 2.031 0.04282
## indus -0.09371 0.04110 2.280 0.02305
## rad -0.01962 0.03556 0.552 0.58144
## nox -0.02379 0.04227 0.563 0.57371
## chas 0.01481 0.02349 0.631 0.52861
summary(lm(y~X-1))
##
## Call:
## lm(formula = y ~ X - 1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.36495 -0.28402 -0.06474 0.23647 2.11424
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## Xlstat -0.35350 0.03694 -9.570 < 2e-16 ***
## Xrm 0.32673 0.03006 10.870 < 2e-16 ***
## Xb 0.10777 0.02611 4.128 4.31e-05 ***
## Xptratio -0.18398 0.02994 -6.144 1.69e-09 ***
## Xlon -0.13123 0.02626 -4.997 8.17e-07 ***
## Xlat 0.04839 0.02383 2.031 0.0428 *
## Xindus -0.09371 0.04110 -2.280 0.0230 *
## Xrad -0.01962 0.03556 -0.552 0.5814
## Xnox -0.02379 0.04227 -0.563 0.5737
## Xchas 0.01481 0.02349 0.631 0.5286
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5014 on 480 degrees of freedom
## Multiple R-squared: 0.7533, Adjusted R-squared: 0.7481
## F-statistic: 146.5 on 10 and 480 DF, p-value: < 2.2e-16
nvmax = 10
data = data.frame(y=y,X=X)
regs = regsubsets(y~.,data=data,method="seq",nvmax=nvmax,intercept=FALSE)
regs.summary = summary(regs)
par(mfrow=c(1,2))
plot(regs.summary$bic,pch=16,xlab="Number of predictors",ylab="BIC")
plot(regs,scale="bic")
b0 = rep(0,p)
B0 = diag(9,p)
nu0 = 5
sig20 = 1.0
nu0sig20 = nu0*sig20
iB0 = solve(B0)
iB0b0 = iB0%*%b0
B1 = solve(iB0+t(X)%*%X)
tcB1 = t(chol(B1))
b1 = B1%*%(iB0b0+t(X)%*%y)
nu1 = nu0 + (n-p)
sig21 = ((nu0sig20+sum((y-X%*%b1)*y)+t(b0-b1)%*%iB0b0)/nu1)[1]
se1 = sqrt(sig21*diag(B1))
logpredy = logpredictive(y,X,b0,B0,nu0,sig20)
L = b1+qt(0.025,df=nu1)
U = b1+qt(0.975,df=nu1)
tvalue = abs(b1)/se1
pvalue = 2*(1-pt(tvalue,df=nu1))
tab.bayes = cbind(round(b1,5),round(se1,5),
round(tvalue,3),round(pvalue,5))
colnames(tab.bayes) = c("Estimate","Std.Error","t-value","Pr(>|t|)")
tab.bayes
## Estimate Std.Error t-value Pr(>|t|)
## lstat -0.35339 0.03749 9.426 0.00000
## rm 0.32670 0.03051 10.708 0.00000
## b 0.10775 0.02650 4.066 0.00006
## ptratio -0.18396 0.03039 6.053 0.00000
## lon -0.13122 0.02666 4.922 0.00000
## lat 0.04836 0.02419 1.999 0.04616
## indus -0.09372 0.04171 2.247 0.02509
## rad -0.01966 0.03610 0.545 0.58619
## nox -0.02384 0.04289 0.556 0.57853
## chas 0.01482 0.02385 0.621 0.53465
Here we consider \(p-1\) since we will include the intercept in all models
k = 2^p
model = matrix(0,k-1,p)
ind = 1:p
logpred = rep(0,k-1)
nu0 = 5
sig20 = 1.0
l = 0
for (i1 in 1:0)
for (i2 in 1:0)
for (i3 in 1:0)
for (i4 in 1:0)
for (i5 in 1:0)
for (i6 in 1:0)
for (i7 in 1:0)
for (i8 in 1:0)
for (i9 in 1:0)
for (i10 in 1:0){
l = l + 1
if (l<k){
model[l,] = c(i1,i2,i3,i4,i5,i6,i7,i8,i9,i10)
p1 = sum(model[l,])
X1 = matrix(X[,ind[model[l,]==1]],n,p1)
b0 = rep(0,p1)
B0 = diag(9,p1)
logpred[l] = logpredictive(y,X1,b0,B0,nu0,sig20)
}
}
nreg = apply(model,1,sum)
tab.bayes = cbind(model,nreg,logpred)
tab.bayes[1:20,]
## nreg logpred
## [1,] 1 1 1 1 1 1 1 1 1 1 10 -398.4742
## [2,] 1 1 1 1 1 1 1 1 1 0 9 -394.5118
## [3,] 1 1 1 1 1 1 1 1 0 1 9 -395.0592
## [4,] 1 1 1 1 1 1 1 1 0 0 8 -391.0438
## [5,] 1 1 1 1 1 1 1 0 1 1 9 -394.8806
## [6,] 1 1 1 1 1 1 1 0 1 0 8 -390.9475
## [7,] 1 1 1 1 1 1 1 0 0 1 8 -391.5660
## [8,] 1 1 1 1 1 1 1 0 0 0 7 -387.5690
## [9,] 1 1 1 1 1 1 0 1 1 1 9 -397.4370
## [10,] 1 1 1 1 1 1 0 1 1 0 8 -393.5044
## [11,] 1 1 1 1 1 1 0 1 0 1 8 -396.4628
## [12,] 1 1 1 1 1 1 0 1 0 0 7 -392.3354
## [13,] 1 1 1 1 1 1 0 0 1 1 8 -393.8890
## [14,] 1 1 1 1 1 1 0 0 1 0 7 -389.9935
## [15,] 1 1 1 1 1 1 0 0 0 1 7 -394.4709
## [16,] 1 1 1 1 1 1 0 0 0 0 6 -390.3542
## [17,] 1 1 1 1 1 0 1 1 1 1 9 -396.3601
## [18,] 1 1 1 1 1 0 1 1 1 0 8 -392.3603
## [19,] 1 1 1 1 1 0 1 1 0 1 8 -392.9659
## [20,] 1 1 1 1 1 0 1 1 0 0 7 -388.9145
plot(nreg,logpred,xlab="Number of regressors",ylab="Log predictive")
legend("bottomright",legend=paste(Xnames[tab.bayes[logpred==max(logpred),1:p]==1]))
PMP = exp(logpred)
PMP = PMP/sum(PMP)
plot(nreg,PMP)
tab.bayes = cbind(tab.bayes,PMP)
ord = order(PMP,decreasing=TRUE)
tab.bayes = tab.bayes[ord,]
tab.bayes = cbind(tab.bayes,cumsum(tab.bayes[,p+3]))
tab.bayes[tab.bayes[,p+4]<0.99,]
## nreg logpred PMP
## [1,] 1 1 1 1 1 0 1 0 0 0 6 -385.9380 0.644334765 0.6443348
## [2,] 1 1 1 1 1 1 1 0 0 0 7 -387.5690 0.126112726 0.7704475
## [3,] 1 1 1 1 1 0 0 0 1 0 6 -388.0284 0.079661435 0.8501089
## [4,] 1 1 1 1 1 0 1 1 0 0 7 -388.9145 0.032840131 0.8829491
## [5,] 1 1 1 1 1 0 0 0 0 0 5 -389.1183 0.026785070 0.9097341
## [6,] 1 1 1 1 1 0 1 0 1 0 7 -389.1333 0.026387458 0.9361216
## [7,] 1 1 1 1 1 0 1 0 0 1 7 -389.9666 0.011468109 0.9475897
## [8,] 1 1 1 1 1 1 0 0 1 0 7 -389.9935 0.011163289 0.9587530
## [9,] 1 1 1 1 1 0 0 1 0 0 6 -390.0158 0.010917114 0.9696701
## [10,] 1 1 1 1 1 1 0 0 0 0 6 -390.3542 0.007783398 0.9774535
## [11,] 1 1 1 1 1 1 1 0 1 0 8 -390.9475 0.004300264 0.9817538
## [12,] 1 1 1 1 1 1 1 1 0 0 8 -391.0438 0.003905589 0.9856593
## [13,] 1 1 1 1 1 0 0 1 1 0 7 -391.1861 0.003387563 0.9890469
colnames(tab.bayes) = c(Xnames,"nreg","logpred","PMP","cum PMP")
round(tab.bayes[1:10,],3)
## lstat rm b ptratio lon lat indus rad nox chas nreg logpred PMP cum PMP
## [1,] 1 1 1 1 1 0 1 0 0 0 6 -385.938 0.644 0.644
## [2,] 1 1 1 1 1 1 1 0 0 0 7 -387.569 0.126 0.770
## [3,] 1 1 1 1 1 0 0 0 1 0 6 -388.028 0.080 0.850
## [4,] 1 1 1 1 1 0 1 1 0 0 7 -388.915 0.033 0.883
## [5,] 1 1 1 1 1 0 0 0 0 0 5 -389.118 0.027 0.910
## [6,] 1 1 1 1 1 0 1 0 1 0 7 -389.133 0.026 0.936
## [7,] 1 1 1 1 1 0 1 0 0 1 7 -389.967 0.011 0.948
## [8,] 1 1 1 1 1 1 0 0 1 0 7 -389.994 0.011 0.959
## [9,] 1 1 1 1 1 0 0 1 0 0 6 -390.016 0.011 0.970
## [10,] 1 1 1 1 1 1 0 0 0 0 6 -390.354 0.008 0.977
We repeat the splitting 100 times
set.seed(12353)
Rep = 100
n1 = trunc(0.75*n)
n2 = n-n1
k = 2^p
model = matrix(0,k-1,p)
ind = 1:p
nu0 = 5
sig20 = 1.0
logpred1 = matrix(0,Rep,k-1)
logpostpred1 = matrix(0,Rep,k-1)
for (rep in 1:Rep){
frac = sample(1:n,size=n1,replace=FALSE)
Xtrain = X[frac,]
ytrain = y[frac]
Xtest = X[-frac,]
ytest = y[-frac]
l = 0
for (i1 in 1:0)
for (i2 in 1:0)
for (i3 in 1:0)
for (i4 in 1:0)
for (i5 in 1:0)
for (i6 in 1:0)
for (i7 in 1:0)
for (i8 in 1:0)
for (i9 in 1:0)
for (i10 in 1:0){
l = l + 1
if (l<k){
model[l,] = c(i1,i2,i3,i4,i5,i6,i7,i8,i9,i10)
p1 = sum(model[l,])
X1 = matrix(Xtrain[,ind[model[l,]==1]],n1,p1)
b0 = rep(0,p1)
B0 = diag(9,p1)
nu0sig20 = nu0*sig20
iB0 = solve(B0)
iB0b0 = iB0%*%b0
B1 = solve(iB0+t(X1)%*%X1)
tcB1 = t(chol(B1))
b1 = B1%*%(iB0b0+t(X1)%*%ytrain)
nu1 = nu0 + (n1-p1)
sig21 = ((nu0sig20+sum((ytrain-X1%*%b1)*ytrain)+t(b0-b1)%*%iB0b0)/nu1)[1]
Xtest1 = matrix(Xtest[,ind[model[l,]==1]],n2,p1)
logpred1[rep,l] = logpredictive(ytrain,X1,b0,B0,nu0,sig20)
logpostpred1[rep,l] = logpredictive(ytest,Xtest1,b1,B1,nu1,sig21)
}
}
}
ind = 1:(k-1)
predmodels = rep(0,Rep)
postpredmodels = rep(0,Rep)
for (i in 1:Rep){
predmodels[i] = ind[logpred1[i,]==max(logpred1[i,])]
postpredmodels[i] = ind[logpostpred1[i,]==max(logpostpred1[i,])]
}
table(predmodels)/Rep
## predmodels
## 8 16 24 28 30 32 62 152
## 0.01 0.01 0.71 0.01 0.05 0.17 0.02 0.02
table(postpredmodels)/Rep
## postpredmodels
## 3 4 5 6 7 8 12 13 14 16 20 23 24 26 28 29
## 0.01 0.09 0.02 0.01 0.03 0.10 0.04 0.01 0.05 0.02 0.09 0.02 0.06 0.07 0.01 0.01
## 37 38 39 40 64 66 84 131 132 133 136 142 143 144 149 150
## 0.01 0.01 0.02 0.01 0.01 0.01 0.01 0.01 0.03 0.01 0.01 0.01 0.01 0.01 0.01 0.01
## 151 158 160 165 166 260 263 392 530 532 538 641 644 645 658
## 0.01 0.01 0.01 0.01 0.01 0.01 0.02 0.01 0.01 0.01 0.02 0.01 0.01 0.01 0.01
plot(table(predmodels)/Rep)
plot(table(postpredmodels)/Rep)
plot(predmodels)
plot(postpredmodels)
c(Xnames[model[24,]==1],sum(predmodels==24)/Rep)
## [1] "lstat" "rm" "b" "ptratio" "lon" "indus" "0.71"
c(Xnames[model[32,]==1],sum(predmodels==32)/Rep)
## [1] "lstat" "rm" "b" "ptratio" "lon" "0.17"
c(Xnames[model[8,]==1],sum(postpredmodels==8)/Rep)
## [1] "lstat" "rm" "b" "ptratio" "lon" "lat" "indus"
## [8] "0.1"
c(Xnames[model[20,]==1],sum(postpredmodels==20)/Rep)
## [1] "lstat" "rm" "b" "ptratio" "lon" "indus" "rad"
## [8] "0.09"
c(Xnames[model[24,]==1],sum(postpredmodels==24)/Rep)
## [1] "lstat" "rm" "b" "ptratio" "lon" "indus" "0.06"
c(Xnames[model[26,]==1],sum(postpredmodels==26)/Rep)
## [1] "lstat" "rm" "b" "ptratio" "lon" "rad" "nox"
## [8] "0.07"