################################################################################################ # # NSW Government - Bureau of Crime Statistics and Research # www.bocsar.nsw.gov.au # # Recorded crime by offence - Monthly data on all criminal # incidents recorded by police (1995-2017) # LGA: Local Government Areas # # LGA Offence category Subcategory # Nambucca Against justice procedures Breach bail conditions # Nambucca Sexual offences Indecent assault, act of indecency and other sexual offences # Nambucca Theft Steal from person # Sydney Abduction and kidnapping # Sydney Pornography offences # Sydney Blackmail and extortion # Sydney Homicide Murder # # Reference: # Cathy W.S. Chen and Sangyeol Lee (2016) # Generalized Poisson autoregressive models for time series of counts # Computational Statistics and Data Analysis 99 (2016) 51–67 # ################################################################################################ rm(list=ls()) pdf(file="poissonmodels-iid-autoregressive.pdf",width=12,height=8) data = read.table("countdata.txt",header=TRUE) n = nrow(data) month = data[,1] y = data[,5] par(mfrow=c(1,1)) plot(month,y,xlab="Months",ylab="Counts",main="Sydney - Abduction and kidnapping") par(mfrow=c(1,1)) plot(table(y)/n,ylab="Relative frequency",xlab="Counts") title("Sydney Abduction and kidnapping") ########################################################## # Model 1: Poisson model (All computations are done in closed form) ########################################################## lambda.mle = mean(y) # prior hyperparameters alpha0 = 2 beta0 = 2 # Posterior hyperparameters alpha1 = alpha0 + n*lambda.mle beta1 = beta0 + n N = 1000 lambdas = seq(0,3,length=N) h=lambdas[2]-lambdas[1] prior = dgamma(lambdas,alpha0,beta0) loglike = -n*lambdas + n*lambda.mle*log(lambdas) like = exp(loglike-max(loglike)) like = like/sum(like*h) post = dgamma(lambdas,alpha1,beta1) par(mfrow=c(1,1)) plot(lambdas,prior,xlab=expression(lambda),ylab="Prior density",type="l",lwd=2,ylim=range(prior,like,post)) lines(lambdas,like,col=2,lwd=2) lines(lambdas,post,col=3,lwd=2) legend("topright",legend=c("prior","likelihood","posterior"),lty=1,col=1:3,lwd=2) title("y1,...,yn iid Poisson(lambda)\nlambda ~ Gamma(alpha0,beta0)") ys = 0:max(y) fit.mle = dpois(ys,lambda.mle) prior.pred = gamma(alpha0+ys)*beta0^alpha0/gamma(ys+1)/gamma(alpha0)/((beta0+1)^(alpha0+ys)) log.post.pred = lgamma(alpha1+ys)+alpha1*log(beta1)-lgamma(ys+1)-lgamma(alpha1)-(alpha1+ys)*log(beta1+1) post.pred =exp(log.post.pred-max(log.post.pred)) post.pred = post.pred/sum(post.pred) par(mfrow=c(1,1)) plot(table(y)/n,ylab="Relative frequency",xlab="Counts",lwd=5,ylim=range(table(y)/n,fit.mle,prior.pred,post.pred)) lines(ys+0.1,fit.mle,type="h",col=2,lwd=5) lines(ys+0.2,prior.pred,type="h",col=3,lwd=5) lines(ys+0.3,post.pred,type="h",col=4,lwd=5) legend("topright",legend=c("data","MLE fit","prior predictive","posterior predictive"),lty=1,col=1:4,lwd=5) # Pearson's residuals mean.lambda = alpha1/beta1 plot(month,(y-mean.lambda)/sqrt(mean.lambda),ylab="Pearson's residuals",xlab="Months") abline(h=0,lty=2,col=2) abline(h=2,lty=2,col=2) abline(h=-2,lty=2,col=2) ######################################################## # Model 2: Poisson autoregression (SIR-based posterior inference ) ######################################################## par(mfrow=c(1,1)) means = rep(0,n-20) vars = rep(0,n-20) for (t in 11:(n-10)){ means[t-10] = mean(y[(t-10):(t+10)]) vars[t-10] = var(y[(t-10):(t+10)]) } means=c(rep(means[1],10),means,rep(means[n-20],10)) coef = lm(means[2:n]~means[1:(n-1)]+y[1:(n-1)]-1)$coef plot(month,y,xlab="Months",ylab="Counts",main="") lines(month,means,col=2) lines(month[2:n],coef[1]*means[1:(n-1)]+coef[2]*y[1:(n-1)],col=4) legend("topright",legend=c("lambda(t) (rolling window)","lambda(t)=0.99*lambda(t-1)+0.01*y[t-1] (OLS)"),col=c(2,4),lty=1) title("y(t) ~ Poisson(lambda(t))\nlambda(t)=alpha*lambda(t-1)+beta*y(t-1)") N = 100 alphas = seq(0.8,1,length=N) betas = seq(0,0.2,length=N) loglike = matrix(0,N,N) lambdas = rep(1,n) for (i in 1:N){ for (j in 1:N){ for (t in 2:n){ lambdas[t] = alphas[i]*lambdas[t-1]+betas[j]*y[t-1] } loglike[i,j] = sum(dpois(y,lambdas,log=TRUE) ) } } like = exp(loglike-max(loglike)) contour(alphas,betas,like,xlab=expression(alpha),ylab=expression(beta),drawlabels=FALSE,main="Likelihood function") # Sampling from the posterior of alpha and beta via SIR set.seed(1234) M = 100000 M1 = M/10 alphas1 = runif(M,0.8,1) betas1 = runif(M,0,0.2) loglike = rep(0,M) lambdas = rep(1,n) for (i in 1:M){ for (t in 2:n){ lambdas[t] = alphas1[i]*lambdas[t-1]+betas1[i]*y[t-1] } loglike[i] = sum(dpois(y,lambdas,log=TRUE) ) } w = exp(loglike-max(loglike)) ind = sample(1:M,size=M1,prob=w,replace=TRUE) alphas2 = alphas1[ind] betas2 = betas1[ind] plot(alphas2,betas2,xlim=range(alphas),ylim=range(betas),xlab=expression(alpha),ylab=expression(beta),pch=16) contour(alphas,betas,like,drawlabels=FALSE,add=TRUE,col=2) title("SIR based on 100,000 draws from U(0.8,1.0)xU()0,0.2)\n10,000 resampled draws") par(mfrow=c(1,2)) hist(alphas2,prob=TRUE,main="",ylab="Posterior density",xlab=expression(alpha)) hist(betas2,prob=TRUE,main="",ylab="Posterior density",xlab=expression(beta)) lambdas = matrix(0,M1,n) ydraws = matrix(0,M1,n) for (t in 2:n){ lambdas[,t] = alphas2*lambdas[,t-1]+betas2*y[t-1] ydraws[,t] = rpois(M1,lambdas[,t]) } qlambdas= t(apply(lambdas,2,quantile,c(0.025,0.5,0.975))) qys= t(apply(ydraws,2,quantile,c(0.25,0.5,0.75))) par(mfrow=c(1,1)) plot(month,y,xlab="Months",ylab="Counts",main="Posterior quantiles of lambda(t)") for (t in 1:n) segments(month,qlambdas[t,1],data[t,1],qlambdas[t,3],col=3) points(month,qlambdas[,2],pch=16,col=2,cex=0.5) legend("topright",legend=c("Median","95% CI"),col=2:3,lty=1,lwd=2) par(mfrow=c(1,1)) plot(data[,1],y,xlab="Months",ylab="Counts",main="Posterior quantiles of y(t)") for (t in 1:n) segments(month,qys[t,1],data[t,1],qys[t,3],col=3) points(month,qys[,2],pch=16,col=2,cex=0.5) legend("topright",legend=c("Median","50% CI"),col=2:3,lty=1,lwd=2) # Pearson's residuals plot(data[,1],(y-qys[,2])/sqrt(qys[,2]),ylab="Pearson's residuals",xlab="Months") abline(h=0,lty=2,col=2) abline(h=2,lty=2,col=2) abline(h=-2,lty=2,col=2) dev.off()