install.packages("catdata") install.packages("UPG") install.packages("rstanarm") library("catdata") library("UPG") library("rstanarm") pdf(file="BayesianLogisticRegression-UPG-vs-STAN.pdf",height=8,width=12) # x = age of the person in years (16 to 61) # y = unemployment status - 0:short-term, 1:long-term # n = 982 observations data(unemployment) x = unemployment$age y = unemployment$durbin-1 n = length(y) plot(x,y+rnorm(n,0,0.1),pch=16,cex=0.75,xlab="Age (in years)", ylab="Unemployment type",axes=FALSE) axis(2,at=0:1);box() age = 16:61 m = length(age) counts = matrix(0,m,2) for (i in 1:m){ counts[i,1] = sum(y[x==age[i]]==0) counts[i,2] = sum(y[x==age[i]]==1) } plot(age,counts[,1],ylim=range(counts),type="h",lwd=3, ylab="Counts",xlab="Age (in years)") lines(age+0.25,counts[,2],col=2,type="h",lwd=3) legend("topright",legend=c("short-term","long-term"),col=1:2,lwd=3,lty=1,bty="n") boxplot(x[y==0],x[y==1],horizontal=TRUE,names=c("0","1"),ylab="Unemployment type", xlab="Age (in years)") # GLM: Maximum likelihood estimation fit = glm(y~x,family=binomial) eta = cbind(1,age)%*%fit$coef theta = 1/(1+exp(-eta)) plot(x,y+rnorm(n,0,0.01),pch=16,cex=0.75,xlab="Age (in years)", ylab="Probability of long-term unemployment") lines(age,theta,col=2,lwd=2) abline(lm(y~x)$coef,col=4,lwd=2) legend("left",legend=c("LM","GLM"),col=c(2,4),lwd=2,lty=1,bty="n") # UPG: Efficient Bayesian Algorithms for Binary and # Categorical Data Regression Models # https://cran.r-project.org/web/packages/UPG/index.html time.upg = system.time({fit.upg = UPG(y, X, model = "logit",draws=4000,burnin=2000)}) betas.upg = fit.upg$posterior$beta names = c("intercept","slope") par(mfrow=c(2,3)) for (i in 1:2){ ts.plot(betas.upg[,i],main="R package UPG",ylab="") acf(betas.upg[,i],main=paste("Runtime=",round(time.upg[3],3),"sec",sep="")) hist(betas.upg[,i],prob=TRUE,xlab="",main=names[i]) } eta.upg = cbind(1,age)%*%t(betas.upg) theta.upg = 1/(1+exp(-eta.upg)) q.upg = t(apply(theta.upg,1,quantile,c(0.05,0.5,0.95))) # Estimating Generalized Linear Models for Binary and Binomial # Data with rstanarm # https://mc-stan.org/rstanarm/articles/binomial.html time.stan = system.time({fit.stan = stan_glm(y~x,family = binomial(link = "logit"))}) posterior = as.array(fit.stan) betas.stan = rbind(posterior[,1,],posterior[,2,],posterior[,3,],posterior[,4,]) par(mfrow=c(2,3)) for (i in 1:2){ ts.plot(betas.stan[,i],main="R package stan_glm",ylab="") acf(betas.upg[,i],main=paste("Runtime=",round(time.stan[3],3),"sec",sep="")) hist(betas.stan[,i],prob=TRUE,xlab="",main=names[i]) } eta.stan = cbind(1,age)%*%t(betas.stan) theta.stan = 1/(1+exp(-eta.stan)) q.stan = t(apply(theta.stan,1,quantile,c(0.05,0.5,0.95))) par(mfrow=c(1,2)) plot(density(betas.upg[,1]),xlab="",main="Intercept",lwd=2) lines(density(betas.stan[,1]),col=2,lwd=2) plot(density(betas.upg[,2]),xlab="",main="Slope",lwd=2) lines(density(betas.stan[,2]),col=2,lwd=2) legend("topright",legend=c("UPG","stan_glm"),col=1:2,lwd=2,lty=1,bty="n",cex=0.75) par(mfrow=c(1,1)) plot(x,y+rnorm(n,0,0.01),pch=16,cex=0.75,xlab="Age (in years)", ylab="Probability of long-term unemployment") lines(age,theta,col=2,lwd=2) lines(age,q.upg[,1],col=4,lty=2,lwd=2) lines(age,q.upg[,2],col=4) lines(age,q.upg[,3],col=4,lty=2,lwd=2) legend("left",legend=c("Logit","Bayes logit"),col=c(2,4),lwd=2,lty=1,bty="n") plot(density(theta.upg[5,]),xlim=c(0.2,0.75),ylim=c(0,30),lwd=2, xlab="Probability of long-term unemployment",main="") lines(density(theta.upg[25,]),col=2,lwd=2) lines(density(theta.upg[45,]),col=3,lwd=2) lines(density(theta.upg[15,]),col=4,lwd=2) lines(density(theta.upg[35,]),col=5,lwd=2) legend("topright",legend=c("age=20","age=30","age=40","age=50","age=60"), col=1:5,lwd=2,lty=1,bty="n") dev.off()