library("mlbench")
library("mvtnorm")
library("leaps")


standardize = function(x){
  (x-mean(x))/sd(x)
}

logpredictive = function(y,X,b,B,nu,sig2){
  In = diag(1,nrow(X))
  return(dmvt(y,X%*%b,sig2*(In+X%*%B%*%t(X)),df=nu,log=TRUE))
}

Xnames = c("lstat","rm","b","ptratio","lon","lat","indus","rad","nox","chas")

Xnames1 = c("% of lower status of the population",
"average number of rooms per dwelling",
"1000(B-0.63)^2 - B=proportion of blacks by town",
"pupil-teacher ratio by town",
"longitude of census tract",
"latitude of census tract",
"proportion of non-retail business acres per town",
"index of accessibility to radial highways",
"nitric oxides concentration (parts per 10 million)",
"Charles River dummy variable")
yname = "median value of homes"


############################################################
# BOSTON HOUSE DATA
############################################################
data(BostonHousing2)
attach(BostonHousing2)

par(mfrow=c(1,1))
plot(lat,lon)

y = cmedv[cmedv<50]  
X = cbind(lstat,rm,b,ptratio,lon,lat,indus,rad,nox,chas)
X = X[cmedv<50,]
n = nrow(X)
p = ncol(X)
y = (y-mean(y))/sqrt(var(y))
for (i in 1:p)
  X[,i] = (X[,i]-mean(X[,i]))/sqrt(var(X[,i]))
colnames(X) = Xnames

par(mfrow=c(2,5))
for (i in 1:p){
  plot(X[,i],y,xlab="",ylab="")
  mtext(side=1,line=2,text=Xnames1[i],cex=0.55)
  mtext(side=2,line=2,text=yname,cex=0.55)
} 

  
fit = lm(y~X-1)
summary(fit)

par(mfrow=c(1,1))
hist(y,breaks=20,xlim=c(-3,3),prob=TRUE,ylim=c(0,1.1))
lines(density(fit$residuals),col=2,lwd=3)


############################################################
# OLS estimation of the complete/full model 
# All covariates/regressors included
############################################################
iXtX    = solve(t(X)%*%X)
betahat = iXtX%*%t(X)%*%y
sigma2hat = sum((y-X%*%betahat)^2)/(n-p)
se.betahat = sqrt(sigma2hat*diag(iXtX))
L = betahat+qt(0.025,df=n-p)
U = betahat+qt(0.975,df=n-p)
tvalue = abs(betahat)/se.betahat
pvalue = 2*(1-pt(tvalue,df=n-p))

tab = cbind(round(betahat,5),round(se.betahat,5),
round(tvalue,3),round(pvalue,5))
colnames(tab) = c("Estimate","Std.Error","t-value","Pr(>|t|)")

tab

summary(lm(y~X-1))


###### Running all models
nvmax = 10
data  = data.frame(y=y,X=X)
regs  = regsubsets(y~.,data=data,method="seq",nvmax=nvmax,intercept=FALSE)
regs.summary = summary(regs)

par(mfrow=c(1,2))
plot(regs.summary$bic,pch=16,xlab="Number of predictors",ylab="BIC")
plot(regs,scale="bic")


############################################################
# Bayesian inference estimation of the complete/full model 
# All covariates/regressors included
############################################################
b0       = rep(0,p)
B0       = diag(9,p)
nu0      = 5
sig20    = 1.0
nu0sig20 = nu0*sig20
iB0      = solve(B0)
iB0b0    = iB0%*%b0
B1       = solve(iB0+t(X)%*%X)
tcB1     = t(chol(B1))
b1       = B1%*%(iB0b0+t(X)%*%y)
nu1      = nu0 + (n-p)
sig21    = ((nu0sig20+sum((y-X%*%b1)*y)+t(b0-b1)%*%iB0b0)/nu1)[1]
se1      = sqrt(sig21*diag(B1))
logpredy = logpredictive(y,X,b0,B0,nu0,sig20)
L = b1+qt(0.025,df=nu1)
U = b1+qt(0.975,df=nu1)
tvalue = abs(b1)/se1
pvalue = 2*(1-pt(tvalue,df=nu1))

tab.bayes = cbind(round(b1,5),round(se1,5),
round(tvalue,3),round(pvalue,5))
colnames(tab.bayes) = c("Estimate","Std.Error","t-value","Pr(>|t|)")

tab.bayes


############################################################
# Bayesian inference for each one of the 2^(p-1) models
# p-1 since we will include the intercept in all models
############################################################
k       = 2^p
model   = matrix(0,k-1,p)
ind     = 1:p
logpred = rep(0,k-1)
nu0     = 5
sig20   = 1.0
l       = 0
for (i1 in 1:0)
for (i2 in 1:0)
for (i3 in 1:0)
for (i4 in 1:0)
for (i5 in 1:0)
for (i6 in 1:0)
for (i7 in 1:0)
for (i8 in 1:0)
for (i9 in 1:0)
for (i10 in 1:0){
  l = l + 1 
  if (l<k){
   model[l,] = c(i1,i2,i3,i4,i5,i6,i7,i8,i9,i10)
   p1 = sum(model[l,])
   X1 = matrix(X[,ind[model[l,]==1]],n,p1)
   b0 = rep(0,p1)
   B0 = diag(9,p1)
   logpred[l] = logpredictive(y,X1,b0,B0,nu0,sig20)
  }
}
nreg = apply(model,1,sum)
tab.bayes = cbind(model,nreg,logpred)

plot(nreg,logpred,xlab="Number of regressors",ylab="Log predictive")
legend("bottomright",legend=paste(Xnames[tab.bayes[logpred==max(logpred),1:p]==1]))

PMP = exp(logpred)
PMP = PMP/sum(PMP)

plot(nreg,PMP)

tab.bayes = cbind(tab.bayes,PMP)

ord = order(PMP,decreasing=TRUE)

tab.bayes = tab.bayes[ord,]

tab.bayes = cbind(tab.bayes,cumsum(tab.bayes[,p+3]))

tab.bayes[tab.bayes[,p+4]<0.99,]

colnames(tab.bayes) = c(Xnames,"nreg","logpred","PMP","cum PMP")
round(tab.bayes[1:10,],3)

############################################################
# Splitting training and testing samples
# REPEATING 100 TIMES
############################################################
set.seed(12353)
Rep     = 100
n1      = trunc(0.75*n)
n2      = n-n1
k       = 2^p
model   = matrix(0,k-1,p)
ind     = 1:p
nu0     = 5
sig20   = 1.0
logpred1 = matrix(0,Rep,k-1)
logpostpred1 = matrix(0,Rep,k-1)
for (rep in 1:Rep){
  frac    = sample(1:n,size=n1,replace=FALSE)
  Xtrain  = X[frac,]
  ytrain  = y[frac]
  Xtest   = X[-frac,]
  ytest   = y[-frac]
  l       = 0
  for (i1 in 1:0)
  for (i2 in 1:0)
  for (i3 in 1:0)
  for (i4 in 1:0)
  for (i5 in 1:0)
  for (i6 in 1:0)
  for (i7 in 1:0)
  for (i8 in 1:0)
  for (i9 in 1:0)
  for (i10 in 1:0){
    l = l + 1 
    if (l<k){
      model[l,] = c(i1,i2,i3,i4,i5,i6,i7,i8,i9,i10)
      p1 = sum(model[l,])
      X1 = matrix(Xtrain[,ind[model[l,]==1]],n1,p1)
      b0 = rep(0,p1)
      B0 = diag(9,p1)
      nu0sig20 = nu0*sig20
      iB0      = solve(B0)
      iB0b0    = iB0%*%b0
      B1       = solve(iB0+t(X1)%*%X1)
      tcB1     = t(chol(B1))
      b1       = B1%*%(iB0b0+t(X1)%*%ytrain)
      nu1      = nu0 + (n1-p1)
      sig21    = ((nu0sig20+sum((ytrain-X1%*%b1)*ytrain)+t(b0-b1)%*%iB0b0)/nu1)[1]
      Xtest1   = matrix(Xtest[,ind[model[l,]==1]],n2,p1)
      logpred1[rep,l] = logpredictive(ytrain,X1,b0,B0,nu0,sig20)
      logpostpred1[rep,l] = logpredictive(ytest,Xtest1,b1,B1,nu1,sig21)
    }
  }
}

ind            = 1:(k-1)
predmodels     = rep(0,Rep)
postpredmodels = rep(0,Rep)
for (i in 1:Rep){
  predmodels[i]     = ind[logpred1[i,]==max(logpred1[i,])]
  postpredmodels[i] = ind[logpostpred1[i,]==max(logpostpred1[i,])]
}

table(predmodels)/Rep

table(postpredmodels)/Rep

plot(table(predmodels)/Rep)

plot(table(postpredmodels)/Rep)

plot(predmodels)

plot(postpredmodels)

c(Xnames[model[24,]==1],sum(predmodels==24)/Rep)
c(Xnames[model[32,]==1],sum(predmodels==32)/Rep)

c(Xnames[model[8,]==1],sum(postpredmodels==8)/Rep)
c(Xnames[model[20,]==1],sum(postpredmodels==20)/Rep)
c(Xnames[model[24,]==1],sum(postpredmodels==24)/Rep)
c(Xnames[model[26,]==1],sum(postpredmodels==26)/Rep)