library("bayesrules") #Data on the number of LGBTQ+ equality laws (as of 2019) # and demographics in each U.S. state. data("equality_index") ?equality_index attach(equality_index) par(mfrow=c(2,2)) plot(gop_2016,laws,ylim=c(0,50)) plot(percent_urban,laws,ylim=c(0,50)) plot(historical,laws,ylim=c(0,50)) plot(region,laws,ylim=c(0,50)) # y = number of LGBTQ+ rights laws (as of 2019) # x = percent of the 2016 presidential election vote earned by the Republican ("GOP") candidate y = laws[-5] x = gop_2016[-5]-50 z = percent_urban[-5]-70 n = length(y) par(mfrow=c(1,2)) plot(x,y) plot(z,y) # Model 1: IID Poisson data a0 = 50 b0 = 5 N = 1000 lambdas = seq(5,16,length=N) plot(lambdas,dgamma(lambdas,a0,b0),type="l",xlab=expression(lambda),ylab="Prior density") loglike = rep(0,N) for (i in 1:N) loglike[i] = dpois(y,lambdas[i],log=TRUE) like = exp(loglike) plot(lambdas,like,type="l",xlab=expression(lambda),ylab="Likelihood") sumy = sum(y) a1 = a0 + sumy b1 = b0 + n plot(lambdas,dgamma(lambdas,a1,b1),type="l",xlab=expression(lambda),ylab="Density",lwd=2) lines(lambdas,dgamma(lambdas,a0,b0),col=2,lwd=2) lines(lambdas,dgamma(lambdas,sumy+1,n),col=4,lwd=2) legend("topleft",legend=c("Prior","Likelihood","Posterior"),col=c(2,4,1),lwd=2) # prior predictive (log) logpriorpred.M1 = a0*log(b0)-lgamma(a0) - sum(lfactorial(y))+lgamma(a0+sumy)-(a0+sumy)*log(b0+n) # posterior predictive (log) ynew = 0:max(y) logpostpred.M1 = -lfactorial(ynew) + a1*log(b1)-lgamma(a1) + lgamma(a1+ynew)-(a1+ynew)*log(1+b1) plot(ynew,exp(logpostpred.M1),type="h") plot(table(y)/n,lwd=3,ylab="Density") lines(ynew,exp(logpostpred.M1),type="h",col=2,lwd=1.5) legend("topright",legend=c("Data","Posterior predictive"),col=1:2,lwd=2) # MODEL 2: Poisson regression with one regressor par(mfrow=c(1,2)) plot(x,y) plot(x,log(y)) theta.guess = lm(log(y)~x)$coef theta0 = c(2,-0.075) V0 = diag(c(0.02,0.001)) N = 200 alphas = seq(1.7,2.4,length=N) betas = seq(-0.15,0,length=N) loglike = matrix(0,N,N) prior = matrix(0,N,N) for (i in 1:N) for (j in 1:N){ lambda = exp(alphas[i]+betas[j]*x) loglike[i,j] = sum(dpois(y,lambda,log=TRUE)) prior[i,j] = dnorm(alphas[i],theta0[1],sqrt(V0[1,1]))*dnorm(betas[j],theta0[2],sqrt(V0[2,2])) } like = exp(loglike) post = like*prior par(mfrow=c(1,1)) contour(alphas,betas,like,xlab=expression(alpha),ylab=expression(beta),drawlabels=FALSE) contour(alphas,betas,prior,add=TRUE,col=2,drawlabels=FALSE) contour(alphas,betas,post,add=TRUE,col=3,drawlabels=FALSE) legend("topright",legend=c("Likelihood","Prior","Posterior"),col=1:3,lty=1,lwd=2) # SIR M = 10000 draws.alpha = rnorm(M,theta0[1],sqrt(V0[1,1])) draws.beta = rnorm(M,theta0[2],sqrt(V0[2,2])) w = rep(0,M) for (i in 1:M) w[i] = sum(dpois(y,exp(draws.alpha[i]+draws.beta[i]*x),log=TRUE)) w1 = exp(w) logpriorpred.M2 = log(sum(w1)) w = exp(w-max(w)) ind = sample(1:M,size=M,replace=TRUE,prob=w) alpha.post = draws.alpha[ind] beta.post = draws.beta[ind] plot(draws.alpha,draws.beta,xlab=expression(alpha),ylab=expression(beta),pch=16) points(alpha.post,beta.post,col=3,pch=16) contour(alphas,betas,prior,add=TRUE,col=4,drawlabels=FALSE) contour(alphas,betas,post,add=TRUE,col=6,drawlabels=FALSE) par(mfrow=c(1,2)) hist(alpha.post,prob=TRUE,xlab="",main=expression(alpha)) hist(beta.post,prob=TRUE,xlab="",main=expression(beta)) xxx = seq(min(x),max(x),by=0.5) nx = length(xxx) pred = matrix(0,nx,M) for (i in 1:nx) pred[i,] = exp(alpha.post+beta.post*xxx[i]) q.pred = t(apply(pred,1,quantile,c(0.05,0.5,0.95))) par(mfrow=c(1,1)) plot(xxx+50,q.pred[,2],ylim=range(q.pred,y),xlab="x",ylab="Prediction",type="l",col=2) lines(xxx+50,q.pred[,1],col=3) lines(xxx+50,q.pred[,3],col=3) points(x+50,y) abline(h=a1/b1,col=4) #a = 0 #abline(h=(a0+sum(y[xa]))/(b0+sum(x>a)),col=4,lty=3) abline(v=50,lty=2) # Log Bayes factor log(exp(logpriorpred.M2)/exp(logpriorpred.M1)) # MODEL 3: Poisson regression with tw regressors par(mfrow=c(1,2)) plot(x,log(y)) plot(z,log(y)) theta.guess = lm(log(y)~x+z)$coef theta0 = c(1.7,-0.075,0.01) V0 = diag(c(0.1,0.0002,0.0001)) # SIR M = 10000 draws.alpha = rnorm(M,theta0[1],sqrt(V0[1,1])) draws.beta = rnorm(M,theta0[2],sqrt(V0[2,2])) draws.gamma = rnorm(M,theta0[3],sqrt(V0[3,3])) w = rep(0,M) for (i in 1:M) w[i] = sum(dpois(y,exp(draws.alpha[i]+draws.beta[i]*x+draws.gamma[i]*z),log=TRUE)) w1 = exp(w) logpriorpred.M3 = log(sum(w1)) w = exp(w-max(w)) ind = sample(1:M,size=M,replace=TRUE,prob=w) alpha.post = draws.alpha[ind] beta.post = draws.beta[ind] gamma.post = draws.gamma[ind] par(mfrow=c(1,3)) plot(draws.alpha,draws.beta,xlab=expression(alpha),ylab=expression(beta),pch=16) points(alpha.post,beta.post,col=3,pch=16) plot(draws.alpha,draws.gamma,xlab=expression(alpha),ylab=expression(gamma),pch=16) points(alpha.post,gamma.post,col=3,pch=16) plot(draws.beta,draws.gamma,xlab=expression(beta),ylab=expression(gamma),pch=16) points(beta.post,gamma.post,col=3,pch=16) hist(alpha.post,prob=TRUE,xlab="",main=expression(alpha)) hist(beta.post,prob=TRUE,xlab="",main=expression(beta)) hist(gamma.post,prob=TRUE,xlab="",main=expression(gamma)) nx = 20 xxx = seq(min(x),max(x),length=nx) zzz = seq(min(z),max(z),length=nx) pred = matrix(0,nx,nx) for (i in 1:nx) for (j in 1:nx) pred[i,j] = median(exp(alpha.post+beta.post*xxx[i]+gamma.post*zzz[j])) par(mfrow=c(1,1)) contour(xxx+50,zzz+70,pred) persp(xxx+50,zzz+70,pred,theta = 30, phi = 30, expand = 0.5, col = "lightblue", ltheta = 120, shade = 0.75, ticktype = "detailed", xlab = "X", ylab = "Z", zlab = "Pred") plot(xxx+50,pred[,2],ylim=range(pred),type="l",xlab="% 2016 GOP votes",ylab="Predictive of law counts") lines(xxx+50,pred[,6],col=2) lines(xxx+50,pred[,11],col=3) lines(xxx+50,pred[,16],col=4) lines(xxx+50,pred[,19],col=5) legend("topright",legend=c("Z=42%","Z=53%","Z=68%","Z=83%","Z=92%"),col=1:5,lty=1) title("Z = % living in urban areas (2010)") # Log Bayes factor log(exp(logpriorpred.M2)/exp(logpriorpred.M1)) log(exp(logpriorpred.M3)/exp(logpriorpred.M1)) log(exp(logpriorpred.M3)/exp(logpriorpred.M2))