Reading the data

library("arm")
## Loading required package: MASS
## Loading required package: Matrix
## Loading required package: lme4
## 
## arm (Version 1.11-2, built: 2020-7-27)
## Working directory is /Users/hedibert/Dropbox/Cursos/Cursos2021/BayesianLearning/Classes/Class7
nba     = read.csv("http://hedibert.org/wp-content/uploads/2016/03/height-weight-nba.csv")
wnba    = read.csv("http://hedibert.org/wp-content/uploads/2016/03/height-weight-wnba.csv")

n1 = nrow(wnba)
n2 = nrow(nba)
n = n1+n2

# Creating the response variable (0=WNBA and 1=NBA)
y = rep(0,n)
y[(n1+1):n] = 1

# Converging heights from inches to centimeters
# and weights from pounds to kilograms
height = rep(0,n)
weight = rep(0,n)
height[1:n1]=wnba[,4]*2.54
height[(n1+1):n]=nba[,3]*2.54
weight[1:n1]=wnba[,5]*0.453592
weight[(n1+1):n]=nba[,4]*0.453592

# For the WNBA, we grouped "F" and "GF" positions 
# under "Forward" position and grouped "FC" and "C" 
# positions under the "Center" position

forward.wnba = rep(0,n1)
forward.wnba[wnba[,3]=="F"]=1
forward.wnba[wnba[,3]=="GF"]=1
forward.nba = rep(0,n2)
forward.nba[nba[,2]=="F"]=1
center.wnba = rep(0,n1)
center.wnba[wnba[,3]=="FC"]=1
center.wnba[wnba[,3]=="C"]=1
center.nba = rep(0,n2)
center.nba[nba[,2]=="C"]=1
f = c(forward.wnba,forward.nba)
c = c(center.wnba,center.nba)

Bayesian modeling

Covariates: height, weight, forward and center plus the interactions between height and weight and forward center Overall, there are p=8 predictors and therefore \(2^p=256\) sub-models. Below we fit non-Bayesian logistic regressions, i.e.  generalized linar models with logit link, for all 255 models (excluding the model without any predictor). We compute the Akaike’s information criteria (AIC) and the misclassification errors, i.e. wnba players classified as nba players, nba players classified as# wnba players and the sum of both misclassification rates.

X     = cbind(height,weight,f,c,height*f,height*c,weight*f,weight*c)

bayesfit = bayesglm.fit(cbind(1,X),y,family=binomial(link = "logit"))

p     = ncol(X)
ind   = 1:p
k     = 2^p-1
model = matrix(0,k,p)
error = matrix(0,k,3)
aic   = rep(0,k)
l     = 0
for (i1 in 1:0)
for (i2 in 1:0)
for (i3 in 1:0)
for (i4 in 1:0)
for (i5 in 1:0)
for (i6 in 1:0)
for (i7 in 1:0)
for (i8 in 1:0){
  l   = l + 1
  if (l<=k){
    model[l,] = c(i1,i2,i3,i4,i5,i6,i7,i8)
    p1  = sum(model[l,])
    X1  = matrix(X[,ind[model[l,]==1]],n,p1)
    bayesfit    = bayesglm.fit(cbind(1,X1),y,family=binomial(link = "logit"))
    error[l,1] = 1-mean(round(bayesfit$fitted[1:n1])==y[1:n1])
    error[l,2] = 1-mean(round(bayesfit$fitted[(n1+1):n])==y[(n1+1):n])    
    error[l,3] = 1-mean(round(bayesfit$fitted)==y)
    aic[l] = bayesfit$aic
  }
}

tab = cbind(model,aic,error)
colnames(tab) = c("h","w","f","c","hf","hw","wf","wc","aic","misc.wnba","misc.nba","misc.all")
par(mfrow=c(1,1))
plot(aic,error[,1],ylim=c(0,1))
points(aic,error[,2],col=2)
points(aic,error[,3],col=3)

ord = order(aic)
round(tab[ord,],3)[1:30,]
##       h w f c hf hw wf wc    aic misc.wnba misc.nba misc.all
##  [1,] 1 1 1 1  0  0  0  0 67.252     0.043    0.010    0.017
##  [2,] 1 1 0 1  0  0  1  0 67.855     0.043    0.010    0.017
##  [3,] 1 1 0 1  1  0  0  0 67.997     0.043    0.010    0.017
##  [4,] 1 1 1 0  0  1  0  0 68.306     0.043    0.012    0.019
##  [5,] 1 1 1 0  0  0  0  1 68.662     0.050    0.010    0.019
##  [6,] 1 1 0 0  0  0  1  1 68.770     0.058    0.010    0.020
##  [7,] 1 1 0 0  1  1  0  0 68.980     0.043    0.012    0.019
##  [8,] 1 1 0 0  0  1  1  0 69.065     0.043    0.010    0.017
##  [9,] 1 1 1 1  0  0  1  0 69.254     0.043    0.010    0.017
## [10,] 1 1 1 1  0  0  0  1 69.265     0.043    0.010    0.017
## [11,] 1 1 1 1  0  1  0  0 69.297     0.043    0.010    0.017
## [12,] 1 1 1 1  1  0  0  0 69.309     0.043    0.010    0.017
## [13,] 1 1 0 0  1  0  0  1 69.527     0.050    0.010    0.019
## [14,] 1 1 0 1  0  0  1  1 69.851     0.043    0.010    0.017
## [15,] 1 1 0 1  0  1  1  0 69.897     0.043    0.010    0.017
## [16,] 1 1 0 1  1  0  1  0 69.947     0.043    0.010    0.017
## [17,] 1 1 0 1  1  0  0  1 70.013     0.043    0.010    0.017
## [18,] 1 1 0 1  1  1  0  0 70.040     0.043    0.010    0.017
## [19,] 1 1 1 0  0  1  0  1 70.230     0.036    0.012    0.017
## [20,] 1 1 1 0  0  1  1  0 70.313     0.043    0.012    0.019
## [21,] 1 1 1 0  1  1  0  0 70.360     0.043    0.012    0.019
## [22,] 1 1 1 0  0  0  1  1 70.643     0.050    0.010    0.019
## [23,] 1 1 1 0  1  0  0  1 70.720     0.050    0.010    0.019
## [24,] 1 1 0 0  1  1  0  1 70.920     0.043    0.012    0.019
## [25,] 1 1 0 0  0  1  1  1 70.925     0.043    0.010    0.017
## [26,] 1 1 0 0  1  1  1  0 70.949     0.043    0.012    0.019
## [27,] 1 1 1 1  0  0  1  1 71.267     0.043    0.010    0.017
## [28,] 1 1 1 1  0  1  1  0 71.300     0.043    0.010    0.017
## [29,] 1 1 1 1  0  1  0  1 71.314     0.043    0.010    0.017
## [30,] 1 1 1 1  1  0  1  0 71.318     0.043    0.010    0.017
ord = order(error[,3])
tab1 = round(tab[ord,],3)[1:35,]

ord = order(tab1[,9])
round(tab1[ord,],3)
##       h w f c hf hw wf wc    aic misc.wnba misc.nba misc.all
##  [1,] 1 1 1 1  0  0  0  0 67.252     0.043    0.010    0.017
##  [2,] 1 1 0 1  0  0  1  0 67.855     0.043    0.010    0.017
##  [3,] 1 1 0 1  1  0  0  0 67.997     0.043    0.010    0.017
##  [4,] 1 1 0 0  0  1  1  0 69.065     0.043    0.010    0.017
##  [5,] 1 1 1 1  0  0  1  0 69.254     0.043    0.010    0.017
##  [6,] 1 1 1 1  0  0  0  1 69.265     0.043    0.010    0.017
##  [7,] 1 1 1 1  0  1  0  0 69.297     0.043    0.010    0.017
##  [8,] 1 1 1 1  1  0  0  0 69.309     0.043    0.010    0.017
##  [9,] 1 1 0 1  0  0  1  1 69.851     0.043    0.010    0.017
## [10,] 1 1 0 1  0  1  1  0 69.897     0.043    0.010    0.017
## [11,] 1 1 0 1  1  0  1  0 69.947     0.043    0.010    0.017
## [12,] 1 1 0 1  1  0  0  1 70.013     0.043    0.010    0.017
## [13,] 1 1 0 1  1  1  0  0 70.040     0.043    0.010    0.017
## [14,] 1 1 1 0  0  1  0  1 70.230     0.036    0.012    0.017
## [15,] 1 1 0 0  0  1  1  1 70.925     0.043    0.010    0.017
## [16,] 1 1 1 1  0  0  1  1 71.267     0.043    0.010    0.017
## [17,] 1 1 1 1  0  1  1  0 71.300     0.043    0.010    0.017
## [18,] 1 1 1 1  0  1  0  1 71.314     0.043    0.010    0.017
## [19,] 1 1 1 1  1  0  1  0 71.318     0.043    0.010    0.017
## [20,] 1 1 1 1  1  0  0  1 71.322     0.043    0.010    0.017
## [21,] 1 1 1 1  1  1  0  0 71.355     0.043    0.010    0.017
## [22,] 1 1 0 1  0  1  1  1 71.896     0.043    0.010    0.017
## [23,] 1 1 0 1  1  0  1  1 71.962     0.043    0.010    0.017
## [24,] 1 1 0 1  1  1  1  0 71.990     0.043    0.010    0.017
## [25,] 1 1 0 1  1  1  0  1 72.059     0.043    0.010    0.017
## [26,] 1 1 1 0  0  1  1  1 72.237     0.036    0.012    0.017
## [27,] 1 1 1 0  1  1  0  1 72.285     0.036    0.012    0.017
## [28,] 1 1 0 0  1  1  1  1 72.883     0.036    0.012    0.017
## [29,] 1 1 1 1  0  1  1  1 73.316     0.043    0.010    0.017
## [30,] 1 1 1 1  1  0  1  1 73.331     0.043    0.010    0.017
## [31,] 1 1 1 1  1  1  1  0 73.364     0.043    0.010    0.017
## [32,] 1 1 1 1  1  1  0  1 73.372     0.043    0.010    0.017
## [33,] 1 1 0 1  1  1  1  1 74.008     0.043    0.010    0.017
## [34,] 1 1 1 0  1  1  1  1 74.298     0.036    0.012    0.017
## [35,] 1 1 1 1  1  1  1  1 75.380     0.043    0.010    0.017

Top model

bayesfit    = bayesglm.fit(cbind(1,X[,1:4]),y,family=binomial(link = "logit"))

h = seq(150,230,length=1000)

par(mfrow=c(2,3))

eta = cbind(1,h,quantile(X[y==0,2],0.10),0,0)%*%bayesfit$coef
plot(h,1/(1+exp(-eta)),xlab="Height (cm)",ylab="Prob. class. as NBA",type="l",lwd=2)
eta = cbind(1,h,quantile(X[y==0,2],0.50),0,0)%*%bayesfit$coef
lines(h,1/(1+exp(-eta)),col=2,lwd=2)
eta = cbind(1,h,quantile(X[y==0,2],0.90),0,0)%*%bayesfit$coef
lines(h,1/(1+exp(-eta)),col=3,lwd=2)
abline(h=0.5,lty=2)
legend("bottomright",legend=c("10-perc weight","50-perc weight","90-perc weight"),col=1:3,lty=1,lwd=2,bty="n")
abline(v=190,lty=2)
hh = height[(y==0)&(f==0)&(c==0)]
qhh = quantile(hh,c(0.1,0.5,0.9))
segments(qhh[1],0.5,qhh[3],0.5,col=6,lwd=2)
points(qhh[2],0.5,col=6,pch=16,cex=1.5)
title(paste("WNBA players - guard \n",length(hh)," players",sep=""))


eta = cbind(1,h,quantile(X[y==0,2],0.10),1,0)%*%bayesfit$coef
plot(h,1/(1+exp(-eta)),xlab="Height (cm)",ylab="Prob. class. as NBA",type="l",lwd=2)
eta = cbind(1,h,quantile(X[y==0,2],0.50),1,0)%*%bayesfit$coef
lines(h,1/(1+exp(-eta)),col=2,lwd=2)
eta = cbind(1,h,quantile(X[y==0,2],0.90),1,0)%*%bayesfit$coef
lines(h,1/(1+exp(-eta)),col=3,lwd=2)
abline(h=0.5,lty=2)
abline(v=190,lty=2)
hh = height[(y==0)&(f==1)&(c==0)]
qhh = quantile(hh,c(0.1,0.5,0.9))
segments(qhh[1],0.5,qhh[3],0.5,col=6,lwd=2)
points(qhh[2],0.5,col=6,pch=16,cex=1.5)
title(paste("WNBA players - forward \n",length(hh)," players",sep=""))


eta = cbind(1,h,quantile(X[y==0,2],0.10),0,1)%*%bayesfit$coef
plot(h,1/(1+exp(-eta)),xlab="Height (cm)",ylab="Prob. class. as NBA",type="l",lwd=2)
eta = cbind(1,h,quantile(X[y==0,2],0.50),0,1)%*%bayesfit$coef
lines(h,1/(1+exp(-eta)),col=2,lwd=2)
eta = cbind(1,h,quantile(X[y==0,2],0.90),0,1)%*%bayesfit$coef
lines(h,1/(1+exp(-eta)),col=3,lwd=2)
abline(h=0.5,lty=2)
abline(v=190,lty=2)
hh = height[(y==0)&(f==0)&(c==1)]
qhh = quantile(hh,c(0.1,0.5,0.9))
segments(qhh[1],0.5,qhh[3],0.5,col=6,lwd=2)
points(qhh[2],0.5,col=6,pch=16,cex=1.5)
title(paste("WNBA players - center \n",length(hh)," players",sep=""))


eta = cbind(1,h,quantile(X[y==1,2],0.10),0,0)%*%bayesfit$coef
plot(h,1/(1+exp(-eta)),xlab="Height (cm)",ylab="Prob. class. as NBA",type="l",lwd=2)
eta = cbind(1,h,quantile(X[y==1,2],0.50),0,0)%*%bayesfit$coef
lines(h,1/(1+exp(-eta)),col=2,lwd=2)
eta = cbind(1,h,quantile(X[y==1,2],0.90),0,0)%*%bayesfit$coef
lines(h,1/(1+exp(-eta)),col=3,lwd=2)
abline(h=0.5,lty=2)
abline(v=190,lty=2)
hh = height[(y==1)&(f==0)&(c==0)]
qhh = quantile(hh,c(0.1,0.5,0.9))
segments(qhh[1],0.5,qhh[3],0.5,col=6,lwd=2)
points(qhh[2],0.5,col=6,pch=16,cex=1.5)
title(paste("NBA players - guard \n",length(hh)," players",sep=""))


eta = cbind(1,h,quantile(X[y==1,2],0.10),1,0)%*%bayesfit$coef
plot(h,1/(1+exp(-eta)),xlab="Height (cm)",ylab="Prob. class. as NBA",type="l",lwd=2)
eta = cbind(1,h,quantile(X[y==1,2],0.50),1,0)%*%bayesfit$coef
lines(h,1/(1+exp(-eta)),col=2,lwd=2)
eta = cbind(1,h,quantile(X[y==1,2],0.90),1,0)%*%bayesfit$coef
lines(h,1/(1+exp(-eta)),col=3,lwd=2)
abline(h=0.5,lty=2)
abline(v=190,lty=2)
hh = height[(y==1)&(f==1)&(c==0)]
qhh = quantile(hh,c(0.1,0.5,0.9))
segments(qhh[1],0.5,qhh[3],0.5,col=6,lwd=2)
points(qhh[2],0.5,col=6,pch=16,cex=1.5)
title(paste("NBA players - forward \n",length(hh)," players",sep=""))



eta = cbind(1,h,quantile(X[y==1,2],0.10),0,1)%*%bayesfit$coef
plot(h,1/(1+exp(-eta)),xlab="Height (cm)",ylab="Prob. class. as NBA",type="l",lwd=2)
eta = cbind(1,h,quantile(X[y==1,2],0.50),0,1)%*%bayesfit$coef
lines(h,1/(1+exp(-eta)),col=2,lwd=2)
eta = cbind(1,h,quantile(X[y==1,2],0.90),0,1)%*%bayesfit$coef
lines(h,1/(1+exp(-eta)),col=3,lwd=2)
abline(h=0.5,lty=2)
abline(v=190,lty=2)
hh = height[(y==1)&(f==0)&(c==1)]
qhh = quantile(hh,c(0.1,0.5,0.9))
segments(qhh[1],0.5,qhh[3],0.5,col=6,lwd=2)
points(qhh[2],0.5,col=6,pch=16,cex=1.5)
title(paste("NBA players - center \n",length(hh)," players",sep=""))