# Jim Albert's probit example # Source: Bayesian computation with R # Section 10.3 Binary Response Regression with a Probit Link # https://bayesball.github.io/bcwr/notebooks2013/probit.regression.html install.packages("LearnBayes") library(LearnBayes) grade = c(0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1) sat = c(525, 533, 545, 582, 581, 576, 572, 609, 559, 543, 576, 525, 574, 582, 574, 471, 595, 557, 557, 584, 599, 517, 649, 584, 463, 591, 488, 563, 553, 549) n = length(sat) plot(sat,grade+rnorm(n,0,0.04),xlab="SAT score",ylab="1=passed a statistics class") # GLM fit = glm(grade~sat,family=binomial("probit")) xxx = seq(min(sat),max(sat),by=1) prob = fit$fit fit = pnorm(fit$coef[1]+fit$coef[2]*xxx) # Bayes GLM - Gibbs sampler M = 10000 fitbayes = bayes.probit(grade, cbind(1, sat),M) names(fitbayes) dim(fitbayes$beta) nx = length(xxx) prob = matrix(0,M,nx) prob1 = matrix(0,M,n) for (i in 1:nx) prob[,i] = pnorm(fitbayes$beta[,1]+fitbayes$beta[,2]*xxx[i]) q.prob = apply(prob,2,quantile,c(0.05,0.5,0.975)) for (i in 1:n) prob1[,i] = pnorm(fitbayes$beta[,1]+fitbayes$beta[,2]*sat[i]) par(mfrow=c(1,3)) plot(fitbayes$beta,xlab=expression(beta[0]),ylab=expression(beta[1])) hist(fitbayes$beta[,1],prob=TRUE,xlab="",main=expression(beta[0])) hist(fitbayes$beta[,2],prob=TRUE,xlab="",main=expression(beta[1])) par(mfrow=c(1,2)) plot(sat,grade+rnorm(n,0,0.04),xlab="SAT score",ylab="1=passed a statistics class") lines(xxx,q.prob[1,],col=3) lines(xxx,q.prob[2,],col=2) lines(xxx,q.prob[3,],col=3) abline(h=0.5,lty=2) lines(xxx,fit,col=4) plot(density(prob[,xxx==500]),xlim=c(0,1),ylim=c(0,7),main="",xlab="Prob passed a statistics class") lines(density(prob[,xxx==530]),lty=2) lines(density(prob[,xxx==560]),lty=3) abline(v=mean(grade),col=grey(0.7)) legend("topright",legend=c("SAT=500","SAT=530","SAT=560"),lty=1:3,bty="n") title(paste("Passed = ",100*mean(grade),"%",sep="")) abline(v=fit[xxx==500]) abline(v=fit[xxx==530],lty=2) abline(v=fit[xxx==560],lty=3) cutoffs = seq(0.1,0.9,by=0.01) nc = length(cutoffs) tpr = matrix(0,M,nc) fpr = matrix(0,M,nc) tpr.glm = rep(0,nc) fpr.glm = rep(0,nc) for (j in 1:nc){ cutoff = cutoffs[j] for (i in 1:M){ tpr[i,j] = sum(prob1[i,]>cutoff & grade==1)/sum(grade==1) fpr[i,j] = sum(prob1[i,]>cutoff & grade==0)/sum(grade==0) } tpr.glm[j] = sum(prob>cutoff & grade==1)/sum(grade==1) fpr.glm[j] = sum(prob>cutoff & grade==0)/sum(grade==0) } q.tpr = apply(tpr,2,quantile,c(0.25,0.5,0.75)) q.fpr = apply(fpr,2,quantile,c(0.25,0.5,0.75)) par(mfrow=c(1,1)) plot(q.fpr[2,],q.tpr[2,],xlim=c(0,1),ylim=c(0,1),type="l",lwd=2, xlab="False positive rate (1-Specificity)",ylab="True positive rate (Sensitivity)") lines(fpr.glm,tpr.glm,col=4) abline(0,1,lty=2) lines(q.fpr[1,],q.tpr[1,],col=2) lines(q.fpr[3,],q.tpr[3,],col=2)