######################################################################## # # Modeling i.i.d. count data with Binomial and Poisson models # # M0: y1,...,yn iid Binomial(n0,theta) # M1: y1,...,yn iid Poisson(lambda) # # We compute prior predictives, posteriors, posterior # predictives Bayes factor, posterior model probabilities # and perform Bayesian model averaging (BMA). All quantities # are approximated by Monte Carlo integration and the # Sampling importance resampling (SIR) algorithm. # At the bottom of this script you will see that, in fact, # all quantities could have been computed in closed form. # # Hedibert Lopes # www.hedibert.org # January 28th 2021 # ######################################################################## set.seed(12345) Binomial = TRUE n = 100 # Sample size n0 = 10 # Number of trials in the Binomial model theta0 = 0.2 # Probability of success in the Binomial model lambda0 = 2 # Rate of occurrence in the Poisson model # Simulating the data if(Binomial){ y = rbinom(n,n0,theta0) truemodel = paste("M0: Binomial(",n0,",",theta0,")",sep="") }else{ y = rpois(n,lambda0) truemodel = paste("M1: Poisson(",lambda0,")",sep="") } par(mfrow=c(2,2)) plot(y,type="b",xlab=paste("Days - n=",n,sep=""),ylab="Counts",main=truemodel) #plot(100*table(y)/n,xlab="Counts",ylab="Frequency (%)") # Binomial inference via SIR M = 1000000 a0 = 1 b0 = 1 theta = rbeta(M,a0,b0) w = rep(0,M) for (i in 1:M) w[i] = sum(dbinom(y,n0,theta[i],log=TRUE)) pred.bin = mean(exp(w)) w = exp(w-max(w)) theta1 = sample(theta,replace=TRUE,size=M,prob=w) hist(theta1,prob=TRUE,xlab=expression(theta),main="") title(paste("Log predictive = ",round(log(pred.bin),3),"\n Binomial model (M0)",sep="")) # Poisson inference via SIR c0 = 2 d0 = 2 lambda = rgamma(M,c0,d0) w = rep(0,M) for (i in 1:M) w[i] = sum(dpois(y,lambda[i],log=TRUE)) pred.poi = mean(exp(w)) w = exp(w-max(w)) lambda1 = sample(lambda,replace=TRUE,size=M,prob=w) hist(lambda1,prob=TRUE,xlab=expression(lambda),main="") title(paste("Log predictive = ",round(log(pred.poi),3),"\n Poisson model (M1)",sep="")) BF10 = pred.poi/pred.bin legend("topright",legend=paste("BF10 = ",round(BF10,3),sep=""),bty="n") # Posterior predictive Pr{y(n+1)=2|y1,...,yn} ynew = 2 PriorM0 = 0.5 PriorM1 = 1-PriorM0 PostM1 = 1/(1+PriorM0/PriorM1/BF10) PostM0 = 1-PostM1 postpredM0 = mean(dbinom(ynew,10,theta1)) postpredM1 = mean(dpois(ynew,lambda1)) tab = rbind(c(PriorM0,pred.bin,PostM0,postpredM0), c(PriorM1,pred.poi,PostM1,postpredM1)) rownames(tab) = c("Binomial model (M0)","Poisson model (M1)") colnames(tab) = c("Prior","Predictive","Posterior", paste("Pr(y(n+1)=",ynew,"|y1,...,yn)",sep="")) tab postpred = PostM0*postpredM0+PostM1*postpredM1 postpred # Posterior preditve Pr{y(n+1)|y1,...,yn} for y(n+1)=0,1,...,10. ynews = 0:n0 postpredM0=NULL postpredM1=NULL for (ynew in ynews){ postpredM0 = c(postpredM0,mean(dbinom(ynew,10,theta1))) postpredM1 = c(postpredM1,mean(dpois(ynew,lambda1))) } postpred = PostM0*postpredM0+PostM1*postpredM1 range = range(postpred,postpredM0,postpredM1) plot(ynews,postpred,type="h",xlab=expression(y[n+1]),lwd=3, ylab="Probability",ylim=range,col=2) title("Posterior predictive") lines(ynews-0.15,postpredM0,col=1,type="h",lwd=2) lines(ynews+0.15,postpredM1,col=4,type="h",lwd=2) legend("topright",legend=c("Binomial model (M0)","Poisson model (M1)", "Bayesian model averaging (BMA)"),col=c(1,4,2),lty=1,lwd=3,bty="n") ########################################################################### # All quantities are actually available in closed form ########################################################################### sumy = sum(y) a1 = a0+sumy b1 = b0+n*n0-sumy c1 = c0+sumy d1 = d0+n E.bin.exact = a1/(a1+b1) SD.bin.exact = sqrt(a1*b1/((a1+b1)^2*(a1+b1+1))) tab.bin = cbind(c(E.bin.exact,SD.bin.exact), c(mean(theta1),sqrt(var(theta1)))) rownames(tab.bin) = c("Posterior mean","Posterior StDev") colnames(tab.bin) = c("MC","Exact") tab.bin E.poi.exact = c1/d1 SD.poi.exact = sqrt(c1/d1^2) tab.poi=cbind(c(E.poi.exact,SD.poi.exact), c(mean(lambda1),sqrt(var(lambda1)))) rownames(tab.poi) = c("Posterior mean","Posterior StDev") colnames(tab.poi) = c("MC","Exact") tab.poi lpred.bin.exact = lgamma(a0+b0)-lgamma(a0)-lgamma(b0)+ sum(log(choose(n0,y)))+ lgamma(a1)+lgamma(b1)-lgamma(a1+b1) lpred.poi.exact = c0*log(d0)-lgamma(c0)- sum(log(factorial(y)))+ lgamma(c1)-c1*log(d1) lpostpred.bin.exact = log(choose(n0,ynews))+ lgamma(a1+b1)-lgamma(a1)-lgamma(b1)+ lgamma(a1+ynews)+lgamma(b1+n0-ynews)-lgamma(a1+b1+n0) lpostpred.poi.exact = -log(factorial(ynews))+ c1*log(d1)-lgamma(c1)+ lgamma(c1+ynews)-(c1+ynews)*log(d1+1) lBF10.exact = lpred.poi.exact-lpred.bin.exact tab = rbind(c(log(pred.bin),log(pred.poi),BF10), c(lpred.bin.exact,lpred.poi.exact,exp(lBF10.exact))) rownames(tab) = c("MC","Exact") colnames(tab) = c("logpred M0","logpred M1","BF10") tab tab = round(cbind(ynews,log(postpredM0),lpostpred.bin.exact,log(postpredM1),lpostpred.poi.exact),4) colnames(tab) = c("y(n+1)","lpost M0 (MC)","lpost M0 (exact)","lpost M1(MC)","lpost M1(exact)") tab