We will continue in the context of the midterm take-home exam, where \(y_1,\ldots,y_n\) are counts (number of major derogatory reports).
http://hedibert.org/wp-content/uploads/2021/05/midterm-bayesianlearning-2021.pdf. Now we will bring to the analysis a covariate, \(x_i\) (number of active credit accounts).
#install.packages("AER")
library("AER")
## Loading required package: car
## Loading required package: carData
## Loading required package: lmtest
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
## Loading required package: survival
data(CreditCard)
attach(CreditCard)
y = reports
x = active
n = length(x)
attach(CreditCard)
## The following objects are masked from CreditCard (pos = 3):
##
## active, age, card, dependents, expenditure, income, majorcards,
## months, owner, reports, selfemp, share
plot(x,y,ylab="Number of major derogatory reports",xlab="Number of active credit accounts")
Now each observation \(y_i\) will have its own rate \(\lambda_i\), which is a non-linear function of \(x_i\), for \(i=1,\ldots,n\), \[\begin{eqnarray*} y_i|\lambda_i &\sim& Poi(\lambda_i)\\ \log \lambda_i &=& \alpha + \beta x_i. \end{eqnarray*}\] Notice that now the likelihood is a function of two parameters, \((\alpha,\beta)\), \[ L(\alpha,\beta|y_{1:n},x_{1:n}) = \frac{1}{\prod_{i=1}^n y_i!} \left(\prod_{i=1}^n \exp\{(\alpha+\beta x_i)y_i\}\right)\exp\left\{-\sum_{i=1}^n e^{\alpha+\beta x_i}\right\}. \] It is highly recommended to work with the logarithm of the likelihood function, \[ {\cal L}(\alpha,\beta|y_{1:n},x_{1:n}) = -\sum_{i=1}^n \log{y_i!} + \sum_{i=1}^n (\alpha+\beta x_i)y_i - \sum_{i=1}^n \exp\{\alpha+\beta x_i\} \ . \]
loglikelihood = function(alpha,beta){
lambda = exp(alpha+beta*x)
return(sum(dpois(y,lambda,log=TRUE)))
}
N = 200
alphas = seq(-1.8,-1.1,length=N)
betas = seq(0.05,0.1,length=N)
loglike = matrix(0,N,N)
for (i in 1:N)
for (j in 1:N)
loglike[i,j] = loglikelihood(alphas[i],betas[j])
like = exp(loglike-max(loglike))
contour(alphas,betas,like,drawlabels=FALSE,main="Lilelihood function",
xlab=expression(alpha),ylab=expression(beta))
To keep things simple, we will use independent uniform priors for \(\alpha\) and \(\beta\): \[ \alpha \sim U(-1.8,-1.1) \qquad \beta \sim U(0.05,0.1), \] so \(p(\alpha,\beta) = 28.57143\), for \((\alpha,\beta) \in (-1.8,-1.1) \times (0.05,0.1)\), and zero otherwise. ## \(p(\alpha,\beta|y_{1:n},x_{1:n})\)
M = 20000
alpha.draw = runif(M,-1.8,-1.1)
beta.draw = runif(M,0.05,0.1)
w = rep(0,M)
for (i in 1:M)
w[i] = loglikelihood(alpha.draw[i],beta.draw[i])
A = max(w)
w = exp(w-A)
logpred1 = log(sum(w))+A
k = sample(1:M,size=M,replace=TRUE,prob=w)
alphas.post = alpha.draw[k]
betas.post = beta.draw[k]
par(mfrow=c(1,3))
plot(alphas.post,betas.post,xlab=expression(alpha),ylab=expression(beta),pch=16)
contour(alphas,betas,like,drawlabels=FALSE,add=TRUE,col=2,lwd=2)
hist(alphas.post,prob=T,xlab=expression(alpha),ylab="Posterior density",main="")
hist(betas.post,prob=T,xlab=expression(beta),ylab="Posterior density",main="")
The mean of posterior predictive density for a new value \(x_{new}\) is given by \[\begin{eqnarray*} E(y_{new}|x_{new},x_{1:n},y_{1:n}) &=& \int E(y_{new}|x_{new},\alpha,\beta)p(\alpha,\beta|x_{1:n},y_{1:n})d\alpha d\beta\\ &=& \int \exp\{\alpha+\beta x_{new}\} p(\alpha,\beta|x_{1:n},y_{1:n}) d\alpha d\beta \end{eqnarray*}\] which can be approximated by \[ {\hat E}_{mc}(y_{new}|x_{new},x_{1:n},y_{1:n}) = \frac{1}{M} \sum_{j=1}^M \exp\{\alpha^{(i)} + \beta^{(i)}x_{new}\} \] for \(\{(\alpha,\beta)^{(1)},\ldots,(\alpha,\beta)^{(n)}\}\) our SIR-based sample from \(p(\alpha,\beta|y_{1:n},x_{1:n})\).
xx = seq(min(x),max(x),length=100)
pred = matrix(0,100,3)
for (i in 1:100)
pred[i,] = quantile(exp(alphas.post+betas.post*xx[i]),c(0.05,0.5,0.95))
par(mfrow=c(1,1))
plot(x,y+rnorm(n,0,0.1),pch=16,cex=0.5,
xlab="Active credit accounts",ylab="Major derogatory reports")
lines(xx,pred[,1],col=3,lwd=2)
lines(xx,pred[,2],col=4,lwd=2)
lines(xx,pred[,3],col=3,lwd=2)
abline(h=mean(y),lwd=2,lty=2)
a0 = 1.5
b0 = 1.0
a1 = a0 + sum(y)
b1 = b0 + n
abline(h=qgamma(0.05,a1,b1),lwd=2,col=2,lty=2)
abline(h=qgamma(0.5,a1,b1),lwd=2,col=2)
abline(h=qgamma(0.95,a1,b1),lwd=2,col=2,lty=2)
abline(h=1.5,col=grey(0.8))
abline(v=30,col=grey(0.8))
A closer look.
xx = seq(min(x),max(x),length=100)
pred = matrix(0,100,3)
for (i in 1:100)
pred[i,] = quantile(exp(alphas.post+betas.post*xx[i]),c(0.05,0.5,0.95))
par(mfrow=c(1,1))
plot(x,y+rnorm(n,0,0.1),pch=16,cex=0.5,
xlab="Active credit accounts",ylab="Major derogatory reports",xlim=c(0,30),ylim=c(0,1.5))
lines(xx,pred[,1],col=3,lwd=2)
lines(xx,pred[,2],col=4,lwd=2)
lines(xx,pred[,3],col=3,lwd=2)
abline(h=mean(y),lwd=2,lty=2)
a0 = 1.5
b0 = 1.0
a1 = a0 + sum(y)
b1 = b0 + n
abline(h=qgamma(0.05,a1,b1),lwd=2,col=2,lty=2)
abline(h=qgamma(0.5,a1,b1),lwd=2,col=2)
abline(h=qgamma(0.95,a1,b1),lwd=2,col=2,lty=2)
Here we compare \(p(y_{1:n}|{\cal M}_0)\) and \(p(y_{1:n}|x_{1:n},{\cal M}_1)\), where \({\cal M}_0\) is the i.i.d Poisson model, while \({\cal M}_1\) is the Poisson regression model. Similar to item d) of the midterm exam, it is easy to show that \[ p(y_{1:n}|{\cal M}_0) = \frac{1}{\prod_{i=1}^n y_i!} \times \frac{\beta_0^{\alpha_0}}{\Gamma(\alpha_0)} \times \frac{\Gamma(\alpha_0+n{\bar y}_n)}{(\beta_0+n)^{\alpha_0+n{\bar y}_n}}. \] A by-product of the SIR algorithm is that a MC estimate of the prior predictive is the sum of the weights, as long as the likelihood is fully evaluated (including normalizing constants) and the candidate distribution is the prior distribution. If either of these two conditions fail, then the sum of the weights ARE NOT an estimate of the prior predictive.
Finally, assuming both models, \({\cal M}_0\) and \({\cal M}_1\), have equal prior probability, it is easy to see that \[ Pr({\cal M}_0|y_{1:n}) = \frac{B_{01}}{1+B_{01}}, \] where \(B_{01}\) is the Bayes factor \[ B_{01} = \frac{p(y_{1:n}|{\cal M}_0)}{p(y_{1:n}|x_{1:n},{\cal M}_1)}. \]
logpred0 = a0*log(b0)-lgamma(a0)-sum(log(factorial(y)))+lgamma(a0+sum(y))-(a0+sum(y))*log(b0+n)
c(logpred0,logpred1)
## [1] -1502.277 -1401.580
BF = exp(logpred0-logpred1)
BF
## [1] 1.851983e-44
PMP = c(BF/(1+BF),1/(1+BF))
PMP
## [1] 1.851983e-44 1.000000e+00