library("bayesrules")

#Data on the number of LGBTQ+ equality laws (as of 2019) 
# and demographics in each U.S. state.
data("equality_index")

?equality_index

attach(equality_index)

par(mfrow=c(2,2))
plot(gop_2016,laws,ylim=c(0,50))
plot(percent_urban,laws,ylim=c(0,50))
plot(historical,laws,ylim=c(0,50))
plot(region,laws,ylim=c(0,50))


# y = number of LGBTQ+ rights laws (as of 2019)
# x = percent of the 2016 presidential election vote earned by the Republican ("GOP") candidate

y = laws[-5]
x = gop_2016[-5]-50
z = percent_urban[-5]-70
n = length(y)
par(mfrow=c(1,2))
plot(x,y)
plot(z,y)


# Model 1: IID Poisson data
a0 = 50
b0 = 5
N  = 1000
lambdas = seq(5,16,length=N)
plot(lambdas,dgamma(lambdas,a0,b0),type="l",xlab=expression(lambda),ylab="Prior density")

loglike = rep(0,N)
for (i in 1:N)
  loglike[i] = dpois(y,lambdas[i],log=TRUE)
like = exp(loglike)
plot(lambdas,like,type="l",xlab=expression(lambda),ylab="Likelihood")

sumy = sum(y)
a1   = a0 + sumy
b1   = b0 + n

plot(lambdas,dgamma(lambdas,a1,b1),type="l",xlab=expression(lambda),ylab="Density",lwd=2)
lines(lambdas,dgamma(lambdas,a0,b0),col=2,lwd=2)
lines(lambdas,dgamma(lambdas,sumy+1,n),col=4,lwd=2)
legend("topleft",legend=c("Prior","Likelihood","Posterior"),col=c(2,4,1),lwd=2)

# prior predictive (log)
logpriorpred.M1 = a0*log(b0)-lgamma(a0) - sum(lfactorial(y))+lgamma(a0+sumy)-(a0+sumy)*log(b0+n)

# posterior predictive (log)
ynew = 0:max(y)
logpostpred.M1 = -lfactorial(ynew) + a1*log(b1)-lgamma(a1) + lgamma(a1+ynew)-(a1+ynew)*log(1+b1)
plot(ynew,exp(logpostpred.M1),type="h")

plot(table(y)/n,lwd=3,ylab="Density")
lines(ynew,exp(logpostpred.M1),type="h",col=2,lwd=1.5)
legend("topright",legend=c("Data","Posterior predictive"),col=1:2,lwd=2)


# MODEL 2: Poisson regression with one regressor
par(mfrow=c(1,2))
plot(x,y)
plot(x,log(y))

theta.guess = lm(log(y)~x)$coef

theta0 = c(2,-0.075)
V0 = diag(c(0.02,0.001))

N = 200
alphas = seq(1.7,2.4,length=N)
betas = seq(-0.15,0,length=N)
loglike = matrix(0,N,N)
prior = matrix(0,N,N)
for (i in 1:N)
  for (j in 1:N){
    lambda = exp(alphas[i]+betas[j]*x)    
    loglike[i,j] = sum(dpois(y,lambda,log=TRUE))
    prior[i,j] = dnorm(alphas[i],theta0[1],sqrt(V0[1,1]))*dnorm(betas[j],theta0[2],sqrt(V0[2,2]))
  }
like = exp(loglike)

post = like*prior

par(mfrow=c(1,1))
contour(alphas,betas,like,xlab=expression(alpha),ylab=expression(beta),drawlabels=FALSE)
contour(alphas,betas,prior,add=TRUE,col=2,drawlabels=FALSE)
contour(alphas,betas,post,add=TRUE,col=3,drawlabels=FALSE)
legend("topright",legend=c("Likelihood","Prior","Posterior"),col=1:3,lty=1,lwd=2)


# SIR
M = 10000
draws.alpha = rnorm(M,theta0[1],sqrt(V0[1,1]))
draws.beta  = rnorm(M,theta0[2],sqrt(V0[2,2]))
w = rep(0,M)
for (i in 1:M)
  w[i] = sum(dpois(y,exp(draws.alpha[i]+draws.beta[i]*x),log=TRUE))
w1 = exp(w)
logpriorpred.M2 = log(sum(w1))
w = exp(w-max(w))
ind = sample(1:M,size=M,replace=TRUE,prob=w)
alpha.post = draws.alpha[ind]
beta.post = draws.beta[ind]

plot(draws.alpha,draws.beta,xlab=expression(alpha),ylab=expression(beta),pch=16)
points(alpha.post,beta.post,col=3,pch=16)
contour(alphas,betas,prior,add=TRUE,col=4,drawlabels=FALSE)
contour(alphas,betas,post,add=TRUE,col=6,drawlabels=FALSE)


par(mfrow=c(1,2))
hist(alpha.post,prob=TRUE,xlab="",main=expression(alpha))
hist(beta.post,prob=TRUE,xlab="",main=expression(beta))


xxx = seq(min(x),max(x),by=0.5)
nx = length(xxx)
pred = matrix(0,nx,M)
for (i in 1:nx)
  pred[i,] = exp(alpha.post+beta.post*xxx[i])
q.pred = t(apply(pred,1,quantile,c(0.05,0.5,0.95)))

par(mfrow=c(1,1))
plot(xxx+50,q.pred[,2],ylim=range(q.pred,y),xlab="x",ylab="Prediction",type="l",col=2)
lines(xxx+50,q.pred[,1],col=3)
lines(xxx+50,q.pred[,3],col=3)
points(x+50,y)
abline(h=a1/b1,col=4)
#a = 0
#abline(h=(a0+sum(y[x<a]))/(b0+sum(x<a)),col=4,lty=2)
#abline(h=(a0+sum(y[x>a]))/(b0+sum(x>a)),col=4,lty=3)
abline(v=50,lty=2)


# Log Bayes factor
log(exp(logpriorpred.M2)/exp(logpriorpred.M1))


# MODEL 3: Poisson regression with tw regressors
par(mfrow=c(1,2))
plot(x,log(y))
plot(z,log(y))

theta.guess = lm(log(y)~x+z)$coef

theta0 = c(1.7,-0.075,0.01)
V0 = diag(c(0.1,0.0002,0.0001))

# SIR
M = 10000
draws.alpha = rnorm(M,theta0[1],sqrt(V0[1,1]))
draws.beta  = rnorm(M,theta0[2],sqrt(V0[2,2]))
draws.gamma = rnorm(M,theta0[3],sqrt(V0[3,3]))
w = rep(0,M)
for (i in 1:M)
  w[i] = sum(dpois(y,exp(draws.alpha[i]+draws.beta[i]*x+draws.gamma[i]*z),log=TRUE))
w1 = exp(w)
logpriorpred.M3 = log(sum(w1))
w = exp(w-max(w))
ind = sample(1:M,size=M,replace=TRUE,prob=w)
alpha.post = draws.alpha[ind]
beta.post = draws.beta[ind]
gamma.post = draws.gamma[ind]


par(mfrow=c(1,3))
plot(draws.alpha,draws.beta,xlab=expression(alpha),ylab=expression(beta),pch=16)
points(alpha.post,beta.post,col=3,pch=16)
plot(draws.alpha,draws.gamma,xlab=expression(alpha),ylab=expression(gamma),pch=16)
points(alpha.post,gamma.post,col=3,pch=16)
plot(draws.beta,draws.gamma,xlab=expression(beta),ylab=expression(gamma),pch=16)
points(beta.post,gamma.post,col=3,pch=16)

hist(alpha.post,prob=TRUE,xlab="",main=expression(alpha))
hist(beta.post,prob=TRUE,xlab="",main=expression(beta))
hist(gamma.post,prob=TRUE,xlab="",main=expression(gamma))


nx = 20
xxx = seq(min(x),max(x),length=nx)
zzz = seq(min(z),max(z),length=nx)
pred = matrix(0,nx,nx)
for (i in 1:nx)
  for (j in 1:nx)
    pred[i,j] = median(exp(alpha.post+beta.post*xxx[i]+gamma.post*zzz[j]))

par(mfrow=c(1,1))
contour(xxx+50,zzz+70,pred)

persp(xxx+50,zzz+70,pred,theta = 30, phi = 30, expand = 0.5, col = "lightblue",
      ltheta = 120, shade = 0.75, ticktype = "detailed",
      xlab = "X", ylab = "Z", zlab = "Pred")

plot(xxx+50,pred[,2],ylim=range(pred),type="l",xlab="% 2016 GOP votes",ylab="Predictive of law counts")
lines(xxx+50,pred[,6],col=2)
lines(xxx+50,pred[,11],col=3)
lines(xxx+50,pred[,16],col=4)
lines(xxx+50,pred[,19],col=5)
legend("topright",legend=c("Z=42%","Z=53%","Z=68%","Z=83%","Z=92%"),col=1:5,lty=1)
title("Z = % living in urban areas (2010)")

# Log Bayes factor
log(exp(logpriorpred.M2)/exp(logpriorpred.M1))
log(exp(logpriorpred.M3)/exp(logpriorpred.M1))
log(exp(logpriorpred.M3)/exp(logpriorpred.M2))