The data
The state wildlife biologists want to model how many fish are being
caught by fishermen at a state park. Visitors are asked how long they
stayed, how many people were in the group, were there children in the
group and how many fish were caught. Some visitors do not fish, but
there is no data on whether a person fished or not. Some visitors who
did fish did not catch any fish so there are excess zeros in the data
because of the people that did not fish.
We have data on 250 groups that went to a park. Each group was
questioned about how many fish they caught (count), how many children
were in the group (child), how many people were in the group (persons),
and whether or not they brought a camper to the park (camper).
# Data
data <- read.csv("https://stats.idre.ucla.edu/stat/data/fish.csv")
head(data)
## nofish livebait camper persons child xb zg count
## 1 1 0 0 1 0 -0.8963146 3.0504048 0
## 2 0 1 1 1 0 -0.5583450 1.7461489 0
## 3 0 1 0 1 0 -0.4017310 0.2799389 0
## 4 0 1 1 2 1 -0.9562981 -0.6015257 0
## 5 0 1 0 1 0 0.4368910 0.5277091 1
## 6 0 1 1 4 2 1.3944855 -0.7075348 0
y = data$count[data$camper==1 & data$child==1]
par(mfrow=c(1,1))
plot(table(y),xlab="Number of fishes",ylab="Frequency",axes=FALSE,ylim=c(0,25))
axis(2);axis(1,at=min(y):max(y))
for (i in min(y):max(y))
text(i,25,sum(y==i))

The model
Let \(\{y_1,\ldots,y_n\}\) be the
number of fishes caught by \(n=42\)
individuals/groups, who brought campers, children and live baits to the
park. Some of them do not fish, but there is no data on whether a person
fished or not. Some visitors who did fish did not catch any fish
(Poisson zeros), so there are excess zeros in the data because of the
people that did not fish (inflated zeros).
The data \(\{y_1,\ldots,y_n\}\) will
be modeled as \(Poisson(\lambda,\pi)\),
where \(\lambda\) is the usual rate of
fish catching and \(\pi\) the
proportion of visitor who do not fish. Therefore,
\[
Pr(y_i|\lambda,\pi) = \left\{
\begin{array}{ll}
\pi + (1-\pi)\exp\{-\lambda\} & \mbox{if} \ \ y_i=0\\
(1-\pi) \frac{\lambda^{y_i}\exp\{-\lambda\}}{k!} & \mbox{if} \ \
y_i=1,2,\ldots \\
\end{array}
\right.
\] It follows that \[
E(y_i|\lambda,\pi) = (1-\pi)\lambda \ \ \ \mbox{and} \ \ \
V(y_i|\lambda,\pi) = (1-\pi)\lambda(1+\pi\lambda).
\]
Maximum likelihood
estimators
The likelihood function is \[\begin{eqnarray*}
L(\lambda,\pi;\mbox{data}) &=& \prod_{i=1}^n
Pr(y_i|\lambda,\pi)\\
&\propto& \left[\pi +
(1-\pi)\exp\{-\lambda\}\right]^{n_0}[(1-\pi)\exp\{-\lambda\}]^{n-n_0}
\lambda^{n{\bar y}},
\end{eqnarray*}\] where \(n_0\)
is the observed number of zeros (inflated or otherwise) and \(n{\bar y}=\sum_{i=1}^n y_i\).
The maximum likelihood estimator (MLE) of \((\lambda,\pi)\) are \[
{\widehat \lambda}_{mle} = W_0(-{\bar z} e^{-{\bar z}})+{\bar z} \ \ \
\mbox{and} \ \ \
{\widehat \pi}_{mle} = 1-\frac{{\bar y}}{{\widehat \lambda}_{mle}},
\] where \(W_0\) being the main
branch of Lambert’s W-function and \[
{\bar z} = \frac{\sum_{i=1}^n y_i}{n-n_0}.
\]
#install.packages("LambertW")
library("LambertW")
## Loading required package: MASS
## Loading required package: ggplot2
## This is 'LambertW' v0.6.9.2. See NEWS and citation("LambertW").
##
## R: https://github.com/gmgeorg/LambertW
## Python: https://github.com/gmgeorg/pylambertw
## ---------------------------------------------
likelihood = function(y,lambda,pi){
ifelse(y==0,pi+(1-pi)*exp(-lambda),(1-pi)*dpois(y,lambda))
}
n = length(y)
n0 = sum(y==0)
ybar = mean(y)
s2 = var(y)
zbar = sum(y)/(n-n0)
lambda.mle = W(-zbar*exp(-zbar))+zbar
pi.mle = 1-ybar/lambda.mle
N = 30
lambdas = seq(3,6,length=N)
pis = seq(0.3,0.7,length=N)
like = matrix(0,N,N)
for (i in 1:N)
for (j in 1:N)
like[i,j] = prod(likelihood(y,lambdas[i],pis[j]))
par(mfrow=c(1,2))
persp(x=lambdas,y=pis,z=like,
theta = 40, # rotation angle
phi = 25, # elevation angle
col = "lightblue", # color of surface
shade = 0.5, # shading intensity
expand = 0.7, # scaling factor for z-axis
ticktype = "detailed",
xlab = expression(lambda),
ylab = expression(pi),
zlab = "Likelihood",
main = "Likelihood Surface over λ and π"
)
contour(lambdas,pis,like,drawlabels=FALSE,xlab=expression(lambda),
ylab=expression(pi),main="Likelihood Contours over λ and π")
points(lambda.mle,pi.mle,col=2,pch=16,cex=2)

Method of moments
estimators
By matching the sample mean, \({\bar
y}\) and the sample variance, \(s^2\), to the population moments \(E(y|\lambda,\pi)\) and \(V(y|\lambda,\pi)\), respectively, it can be
shown that the MM estimator of \((\lambda,\pi)\) are \[
\lambda_{mm} = \frac{s^2+{\bar y}^2}{{\bar y}}-1\ \ \ \mbox{and} \ \ \
\pi_{mm} = \frac{s^2-{\bar y}}{s^2+{\bar y}^2-{\bar y}}
\]
lambda.mm = (s2+ybar^2)/ybar-1
pi.mm = (s2-ybar)/(s2+ybar^2-ybar)
tab = rbind(c(lambda.mm,pi.mm),c(lambda.mle,pi.mle))
colnames(tab) = c("lambda","pi")
rownames(tab) = c("Method of moments","Maximum likelihood")
round(tab,3)
## lambda pi
## Method of moments 7.159 0.721
## Maximum likelihood 4.223 0.526
Comparing fitted
models
yy = sort(unique(y))
nn = length(yy)
freq = rep(0,nn)
for (i in 1:nn)
freq[i] = sum(y==yy[i])/n
fit.mle = likelihood(yy,lambda.mle,pi.mle)
fit.mm = likelihood(yy,lambda.mm,pi.mm)
rmse.mle = sqrt(mean((freq-fit.mle)^2))
rmse.mm = sqrt(mean((freq-fit.mm)^2))
par(mfrow=c(1,1))
plot(yy+0.1,freq,ylim=c(0,1),xlab="Counts",ylab="Probability",pch=16,axes=FALSE)
axis(2)
axis(1,at=0:max(yy))
points(yy-0.1,fit.mle,col=2,pch=16)
points(yy,fit.mm,col=4,pch=16)
legend("topright",legend=c("Observed count","Method of Moments","Maximum likelihood"),col=c(1,4,2),bty="n",pch=16)
legend("topleft",legend=c(paste("RMSE(MLE) = ",round(rmse.mle,3),sep=""),
paste("RMSE(MM) = ",round(rmse.mm,3),sep="")))

tab = round(cbind(yy,n*freq,n*fit.mle,n*fit.mm))
colnames(tab) = c("counts","Frequency","MLE","MM")
tab
## counts Frequency MLE MM
## [1,] 0 24 24 32
## [2,] 1 6 1 0
## [3,] 2 2 3 0
## [4,] 3 4 4 1
## [5,] 4 2 4 1
## [6,] 5 3 3 2
## [7,] 7 1 1 2
## [8,] 8 1 1 2
## [9,] 14 1 0 0
## [10,] 16 1 0 0
Posterior inference via
SIR
M = 10000
lambda.d = runif(M,1,8)
pi.d = runif(M,0.1,0.9)
w = rep(0,M)
for (i in 1:M)
w[i] = prod(likelihood(y,lambda.d[i],pi.d[i]))
ind = sample(1:M,size=M,prob=w,replace=TRUE)
lambda.p = lambda.d[ind]
pi.p = pi.d[ind]
par(mfrow=c(2,2))
plot(lambda.d,pi.d,xlab=expression(lambda),ylab=expression(pi))
contour(lambdas,pis,like,col=2,add=TRUE,drawlabels=FALSE,lwd=2)
title("Prior & likelihood ")
plot(lambda.p,pi.p,xlim=range(lambda.d),ylim=range(pi.d),xlab=expression(lambda),ylab=expression(pi))
contour(lambdas,pis,like,col=2,add=TRUE,drawlabels=FALSE,lwd=2)
title("Posterior vs likelihood")
hist(lambda.p,prob=TRUE,main="",xlab=expression(lambda))
hist(pi.p,prob=TRUE,main="",xlab=expression(pi))

Posterior predictive
inference via SIR
pred = matrix(0,M,nn)
for (i in 1:M)
for (j in 1:nn)
pred[i,j] = likelihood(yy[j],lambda.p[i],pi.p[i])
qpred = t(apply(pred,2,quantile,c(0.025,0.5,0.925)))
plot(yy,qpred[,2],ylim=range(qpred),pch=16,xlab="Counts",ylab="Predictive probability",col=6)
segments(yy,qpred[,1],yy,qpred[,3],col=6)
points(yy,freq,pch=16)
legend("topright",legend=c("Observed count","95% predictive interval"),col=c(1,6),bty="n",pch=16)

---
title:    "Zero-Inflated Poisson modeling"
subtitle: "Method of moments, ML and Bayesian estimation"
author:   "Hedibert Freitas Lopes"
date: "04/11/2025"
output:
  html_document:
    toc: true
    toc_depth: 3
    toc_collapsed: true
    code_download: true
    number_sections: true
  pdf_document:
    toc: true
    toc_depth: '3'

---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

# The data

The state wildlife biologists want to model how many fish are being caught by fishermen at a state park. Visitors are asked how long they stayed, how many people were in the group, were there children in the group and how many fish were caught. Some visitors do not fish, but there is no data on whether a person fished or not. Some visitors who did fish did not catch any fish so there are excess zeros in the data because of the people that did not fish.

We have data on 250 groups that went to a park. Each group was questioned about how many fish they caught (count), how many children were in the group (child), how many people were in the group (persons), and whether or not they brought a camper to the park (camper).

* Source: https://en.wikipedia.org/wiki/Zero-inflated_model

* Source: https://stats.oarc.ucla.edu/r/dae/zip/


```{r fig.align='center', fig.width=8, fig.height=6}
# Data
data <- read.csv("https://stats.idre.ucla.edu/stat/data/fish.csv")

head(data)

y = data$count[data$camper==1 & data$child==1]

par(mfrow=c(1,1))
plot(table(y),xlab="Number of fishes",ylab="Frequency",axes=FALSE,ylim=c(0,25))
axis(2);axis(1,at=min(y):max(y))
for (i in min(y):max(y))
  text(i,25,sum(y==i))
```

# The model
Let $\{y_1,\ldots,y_n\}$ be the number of fishes caught by $n=42$ individuals/groups, who brought 
campers, children and live baits to the park.  Some of them do not fish, but there is no data on whether a person fished or not. Some visitors who did fish did not catch any fish (Poisson zeros), so there are excess zeros in the data because of the people that did not fish (inflated zeros).

The data $\{y_1,\ldots,y_n\}$ will be modeled as $Poisson(\lambda,\pi)$, where $\lambda$ is the usual rate of fish catching and $\pi$ the proportion of visitor who do not fish.  Therefore,

$$
Pr(y_i|\lambda,\pi) = \left\{
\begin{array}{ll}
\pi + (1-\pi)\exp\{-\lambda\} & \mbox{if} \ \ y_i=0\\
(1-\pi) \frac{\lambda^{y_i}\exp\{-\lambda\}}{k!} & \mbox{if} \ \ y_i=1,2,\ldots \\
\end{array}
\right.
$$
It follows that 
$$
E(y_i|\lambda,\pi) = (1-\pi)\lambda \ \ \ \mbox{and} \ \ \ 
V(y_i|\lambda,\pi) = (1-\pi)\lambda(1+\pi\lambda).
$$

# Maximum likelihood estimators

The likelihood function is
\begin{eqnarray*}
L(\lambda,\pi;\mbox{data}) &=& \prod_{i=1}^n Pr(y_i|\lambda,\pi)\\
&\propto&  \left[\pi + (1-\pi)\exp\{-\lambda\}\right]^{n_0}[(1-\pi)\exp\{-\lambda\}]^{n-n_0} \lambda^{n{\bar y}},
\end{eqnarray*}
where $n_0$ is the observed number of zeros (inflated or otherwise) and $n{\bar y}=\sum_{i=1}^n y_i$. 

The maximum likelihood estimator (MLE) of $(\lambda,\pi)$ are
$$
{\widehat \lambda}_{mle} = W_0(-{\bar z} e^{-{\bar z}})+{\bar z} \ \ \ \mbox{and} \ \ \ 
{\widehat \pi}_{mle} = 1-\frac{{\bar y}}{{\widehat \lambda}_{mle}},
$$
where $W_0$ being the main branch of Lambert's W-function and 
$$
{\bar z} = \frac{\sum_{i=1}^n y_i}{n-n_0}.
$$



```{r fig.align='center', fig.width=15, fig.height=8}
#install.packages("LambertW")
library("LambertW")
likelihood = function(y,lambda,pi){
  ifelse(y==0,pi+(1-pi)*exp(-lambda),(1-pi)*dpois(y,lambda))
}

n          = length(y)
n0         = sum(y==0)
ybar       = mean(y)
s2         = var(y)
zbar       = sum(y)/(n-n0)
lambda.mle = W(-zbar*exp(-zbar))+zbar
pi.mle     = 1-ybar/lambda.mle


N = 30
lambdas = seq(3,6,length=N)
pis = seq(0.3,0.7,length=N)
like = matrix(0,N,N)
for (i in 1:N)
  for (j in 1:N)
     like[i,j] = prod(likelihood(y,lambdas[i],pis[j]))

par(mfrow=c(1,2))
persp(x=lambdas,y=pis,z=like,
  theta = 40,          # rotation angle
  phi = 25,            # elevation angle
  col = "lightblue",   # color of surface
  shade = 0.5,         # shading intensity
  expand = 0.7,        # scaling factor for z-axis
  ticktype = "detailed", 
  xlab = expression(lambda), 
  ylab = expression(pi), 
  zlab = "Likelihood", 
  main = "Likelihood Surface over λ and π"
)
contour(lambdas,pis,like,drawlabels=FALSE,xlab=expression(lambda),
        ylab=expression(pi),main="Likelihood Contours over λ and π")
points(lambda.mle,pi.mle,col=2,pch=16,cex=2)
```


# Method of moments estimators
By matching the sample mean, ${\bar y}$ and the sample variance, $s^2$, to the population moments $E(y|\lambda,\pi)$ and $V(y|\lambda,\pi)$, respectively, it can be shown that the MM estimator of $(\lambda,\pi)$ are
$$
\lambda_{mm} = \frac{s^2+{\bar y}^2}{{\bar y}}-1\ \ \ \mbox{and} \ \ \ 
\pi_{mm} = \frac{s^2-{\bar y}}{s^2+{\bar y}^2-{\bar y}}
$$

```{r}
lambda.mm  = (s2+ybar^2)/ybar-1
pi.mm      = (s2-ybar)/(s2+ybar^2-ybar)

tab = rbind(c(lambda.mm,pi.mm),c(lambda.mle,pi.mle))
colnames(tab) = c("lambda","pi")
rownames(tab) = c("Method of moments","Maximum likelihood")
round(tab,3)
```

# Comparing fitted models

```{r fig.align='center', fig.width=8, fig.height=6}
yy = sort(unique(y))
nn = length(yy)
freq = rep(0,nn)
for (i in 1:nn)
  freq[i] = sum(y==yy[i])/n
fit.mle = likelihood(yy,lambda.mle,pi.mle)
fit.mm  = likelihood(yy,lambda.mm,pi.mm)
rmse.mle = sqrt(mean((freq-fit.mle)^2))
rmse.mm = sqrt(mean((freq-fit.mm)^2))

par(mfrow=c(1,1))
plot(yy+0.1,freq,ylim=c(0,1),xlab="Counts",ylab="Probability",pch=16,axes=FALSE)
axis(2)
axis(1,at=0:max(yy))
points(yy-0.1,fit.mle,col=2,pch=16)
points(yy,fit.mm,col=4,pch=16)
legend("topright",legend=c("Observed count","Method of Moments","Maximum likelihood"),col=c(1,4,2),bty="n",pch=16)
legend("topleft",legend=c(paste("RMSE(MLE) = ",round(rmse.mle,3),sep=""),
                          paste("RMSE(MM)  = ",round(rmse.mm,3),sep="")))

tab = round(cbind(yy,n*freq,n*fit.mle,n*fit.mm))
colnames(tab) = c("counts","Frequency","MLE","MM")
tab

```


# Posterior inference via SIR
```{r fig.align='center', fig.width=8, fig.height=6}
M = 10000
lambda.d = runif(M,1,8)
pi.d = runif(M,0.1,0.9)
w = rep(0,M)
for (i in 1:M)
 w[i] = prod(likelihood(y,lambda.d[i],pi.d[i]))

ind = sample(1:M,size=M,prob=w,replace=TRUE)
lambda.p = lambda.d[ind]
pi.p = pi.d[ind]

par(mfrow=c(2,2))
plot(lambda.d,pi.d,xlab=expression(lambda),ylab=expression(pi))
contour(lambdas,pis,like,col=2,add=TRUE,drawlabels=FALSE,lwd=2)
title("Prior & likelihood ")
plot(lambda.p,pi.p,xlim=range(lambda.d),ylim=range(pi.d),xlab=expression(lambda),ylab=expression(pi))
contour(lambdas,pis,like,col=2,add=TRUE,drawlabels=FALSE,lwd=2)
title("Posterior vs likelihood")

hist(lambda.p,prob=TRUE,main="",xlab=expression(lambda))
hist(pi.p,prob=TRUE,main="",xlab=expression(pi))
```

# Posterior predictive inference via SIR

```{r fig.align='center', fig.width=8, fig.height=6}
pred = matrix(0,M,nn)
for (i in 1:M)
  for (j in 1:nn)
    pred[i,j] = likelihood(yy[j],lambda.p[i],pi.p[i])

qpred = t(apply(pred,2,quantile,c(0.025,0.5,0.925)))
plot(yy,qpred[,2],ylim=range(qpred),pch=16,xlab="Counts",ylab="Predictive probability",col=6)
segments(yy,qpred[,1],yy,qpred[,3],col=6)
points(yy,freq,pch=16)
legend("topright",legend=c("Observed count","95% predictive interval"),col=c(1,6),bty="n",pch=16)

```



