# Problem 8, page 480-481 of Jim Albert and Jingchen Hu's (2020) # Probability and Bayesian Modeling, CRC Press, Taylor & Francis Group. # What factors determine admission to graduate school? In a study, data on 400 graduate # school admission cases was collected. Admission is a binary response, with 0 indicating # not admitted, and 1 indicating admitted. Moreover, the applicant's GRE score, and # undergraduate grade point average (GPA) are available. GRE: Graduate Record Examination. # Removing objects from the workspace rm(list = ls()) # File name filename = "https://raw.githubusercontent.com/monika76five/ProbBayes/master/R%20Code/chapter%2012/data/GradSchoolAdmission.csv" # Reading the data data <- read.csv(file = filename) # Sample size n = nrow(data) # Using the columns of the dataset individually attach(data) # 2/3 admitted and 1/3 not admitted table(Admission) pdf(file="graduate-school-admission.pdf",width=18,height=8) par(mfrow=c(1,2)) plot(table(round(GPA,1)),xlab="GPA (rounded)",ylab="") abline(v=3.45,col=2,lwd=3) plot(table(GRE),xlab="GRE") GPA35 = rep(0,n) GPA35[GPA>=3.5]=1 table(GPA35,Admission) par(mfrow=c(1,1)) boxplot(GRE~GPA35,names=c("GPA<3.5","GPA>=3.5"),xlab="GPA") # Linear model with Gaussian errors fit.lm = lm(Admission~GRE+GPA35) # Generalized linear model with Binomial link fit.glm = glm(Admission~GRE+GPA35,family=binomial(link = "logit")) par(mfrow=c(1,1)) plot(GRE,fit.glm$fitted,xlab="GRE",ylab="Fitted",ylim=c(0,0.6),col=GPA35+1,pch=16) abline(h=mean(Admission),col=4) points(GRE,fit.lm$fitted,col=GPA35+1) legend("topleft",legend=c("Binomial","LM (GPA<3.5)","LM (GPA>=3.5)","GLM (GPA<3.5)","GLM (GPA>=3.5)"),col=c(4,1,2,1,2),pch=c(16,1,1,16,16)) M = 1000000 beta0 = rnorm(M,-2,1) beta1 = rnorm(M,0,1) beta2 = rnorm(M,0,1) eta = matrix(0,M,n) for (i in 1:n) eta[,i] = beta0 + beta1*GRE[i] + beta2*GPA35[i] fit.prior = 1/(1+exp(-eta)) qfit.prior = t(apply(fit.prior,2,quantile,c(0.05,0.5,0.95))) par(mfrow=c(1,1)) plot(GRE,qfit.prior[,2],ylim=range(qfit),main="Prior",ylab="Fit") points(GRE,qfit.prior[,1],col=2) points(GRE,qfit.prior[,3],col=2) weight = rep(0,M) like = rep(0,M) fit.post = matrix(0,M,n) for (i in 1:M){ prob = beta0[i] + beta1[i]*GRE + beta2[i]*GPA35 prob = 1/(1+exp(-prob)) like[i] = sum(dbinom(Admission,1,prob,log=TRUE)) fit.post[i,] = prob } like = exp(like-max(like)) M1 = M/100 ind = sample(1:M,size=M1,replace=TRUE,prob=like) qfit.post = t(apply(fit.post[ind,],2,quantile,c(0.05,0.5,0.95))) beta0p = beta0[ind] beta1p = beta1[ind] beta2p = beta2[ind] par(mfrow=c(2,3)) hist(beta0,prob=TRUE,xlab="",main=expression(beta[0]),ylab="Prior density") hist(beta1,prob=TRUE,xlab="",main=expression(beta[1]),ylab="Prior density") hist(beta2,prob=TRUE,xlab="",main=expression(beta[2]),ylab="Prior density") hist(beta0p,xlim=range(beta0),prob=TRUE,xlab="",main=expression(beta[0]),ylab="Posterior density") abline(v=0,col=2) points(fit.glm$coef[1],0,pch=16,cex=2,col=4) hist(beta1p,prob=TRUE,xlab="",main=expression(beta[1]),ylab="Posterior density") abline(v=0,col=2) points(fit.glm$coef[2],0,pch=16,cex=2,col=4) hist(beta2p,xlim=range(beta2),prob=TRUE,xlab="",main=expression(beta[2]),ylab="Posterior density") abline(v=0,col=2) points(fit.glm$coef[3],0,pch=16,cex=2,col=4) ind1 = sample(1:M,size=M1,replace=TRUE) par(mfrow=c(2,3)) plot(beta0[ind1],beta1[ind1]) points(beta0p,beta1p,col=2) plot(beta0[ind1],beta2[ind1]) points(beta0p,beta2p,col=2) plot(beta1[ind1],beta2[ind1]) points(beta1p,beta2p,col=2) plot(beta0p,beta1p,col=2) points(fit.glm$coef[1],fit.glm$coef[2],col=4,pch=16,cex=2) plot(beta0p,beta2p,col=2) points(fit.glm$coef[1],fit.glm$coef[3],col=4,pch=16,cex=2) plot(beta1p,beta2p,col=2) points(fit.glm$coef[2],fit.glm$coef[3],col=4,pch=16,cex=2) par(mfrow=c(1,2)) plot(GRE,qfit.prior[,2],ylim=c(0,1),main="Prior",ylab="Fit") points(GRE,qfit.prior[,1],col=2) points(GRE,qfit.prior[,3],col=2) plot(GRE,qfit.post[,2],ylim=c(0,1),main="Posterior",ylab="Fit",col=GPA35+1,pch=16) points(GRE,qfit.post[,1],col=GPA35+1) points(GRE,qfit.post[,3],col=GPA35+1) legend("topleft",legend=c("Bayes GLM (GPA<3.5)","Bayes GLM (GPA>=3.5)"),col=1:2,pch=16) dev.off()