################################################################################################ # # NSW Government - Bureau of Crime Statistics and Research # www.bocsar.nsw.gov.au # # Recorded crime by offence - Monthly data on all criminal # incidents recorded by police (1995-2017) # LGA: Local Government Areas # # LGA Offence category Subcategory # Nambucca Against justice procedures Breach bail conditions # Nambucca Sexual offences Indecent assault, act of indecency and other sexual offences # Nambucca Theft Steal from person # Sydney Abduction and kidnapping # Sydney Pornography offences # Sydney Blackmail and extortion # Sydney Homicide Murder # # Reference: # Cathy W.S. Chen and Sangyeol Lee (2016) # Generalized Poisson autoregressive models for time series of counts # Computational Statistics and Data Analysis 99 (2016) 51–67 # ################################################################################################ data = read.table("countdata.txt",header=TRUE) n = nrow(data) month = data[,1] y = data[,5] par(mfrow=c(1,2)) plot(month,y,xlab="Months",ylab="Counts",main="Sydney - Abduction and kidnapping",cex=0.5) plot(table(y)/n,ylab="Relative frequency",xlab="Counts") title("Sydney Abduction and kidnapping") ####################### # Poisson model ####################### lambda.mle = mean(y) # prior hyperparameters alpha0 = 2 beta0 = 2 # Posterior hyperparameters alpha1 = alpha0 + n*lambda.mle beta1 = beta0 + n N = 1000 lambdas = seq(0,3,length=N) h=lambdas[2]-lambdas[1] prior = dgamma(lambdas,alpha0,beta0) loglike = -n*lambdas + n*lambda.mle*log(lambdas) like = exp(loglike-max(loglike)) like = like/sum(like*h) post = dgamma(lambdas,alpha1,beta1) par(mfrow=c(1,1)) plot(lambdas,prior,xlab=expression(lambda),ylab="Prior density",type="l",lwd=2,ylim=range(prior,like,post)) lines(lambdas,like,col=2,lwd=2) lines(lambdas,post,col=3,lwd=2) legend("topright",legend=c("prior","likelihood","posterior"),lty=1,col=1:3,lwd=2) ys = 0:max(y) fit.mle = dpois(ys,lambda.mle) prior.pred = gamma(alpha0+ys)*beta0^alpha0/gamma(ys+1)/gamma(alpha0)/((beta0+1)^(alpha0+ys)) #post.pred = gamma(alpha1+ys)*beta1^alpha1/gamma(ys+1)/gamma(alpha1)/((beta1+1)^(alpha1+ys)) log.post.pred = lgamma(alpha1+ys)+alpha1*log(beta1)-lgamma(ys+1)-lgamma(alpha1)-(alpha1+ys)*log(beta1+1) post.pred =exp(log.post.pred-max(log.post.pred)) post.pred = post.pred/sum(post.pred) par(mfrow=c(1,1)) plot(table(y)/n,ylab="Relative frequency",xlab="Counts",lwd=5,ylim=range(table(y)/n,fit.mle,prior.pred,post.pred)) lines(ys+0.1,fit.mle,type="h",col=2,lwd=5) lines(ys+0.2,prior.pred,type="h",col=3,lwd=5) lines(ys+0.3,post.pred,type="h",col=4,lwd=5) legend("topright",legend=c("data","MLE fit","prior predictive","posterior predictive"),lty=1,col=1:4,lwd=5) # Monte Carlo Integration set.seed(1235) N = 20000 lambdas = rgamma(N,alpha0,beta0) loglike = -n*lambdas + n*lambda.mle*log(lambdas) like = exp(loglike-max(loglike)) lower = 1.25 upper = 2.25 lambdas1 = runif(N,lower,upper) prior1 = dgamma(lambdas1,alpha0,beta0) loglike = -n*lambdas1 + n*lambda.mle*log(lambdas1) like1 = exp(loglike-max(loglike)) mean = alpha1/beta1 sd = sqrt(mean/beta1) lambdas2 = rnorm(N,mean,sd) prior2 = dgamma(lambdas2,alpha0,beta0) loglike = -n*lambdas2 + n*lambda.mle*log(lambdas2) like2 = exp(loglike-max(loglike)) alpha1/beta1 mean(lambdas*like)/mean(like) mean(lambdas1*prior1*like1)/mean(prior1*like1) mean(lambdas2*prior2*like2/dnorm(lambdas2,mean,sd))/mean(prior2*like2/dnorm(lambdas2,mean,sd)) plot(cumsum(lambdas*like)/cumsum(like),xlab=" N",ylab="Expectation",type="l",ylim=c(1.81,1.85),lwd=2) abline(h=alpha1/beta1,col=4,lwd=2) lines(cumsum(lambdas1*like1*prior1)/cumsum(like1*prior1),col=2,lwd=2) lines(cumsum(lambdas2*prior2*like2/dnorm(lambdas2,mean,sd))/cumsum(prior2*like2/dnorm(lambdas2,mean,sd)),col=3,lwd=2) legend("bottomright",legend=c( paste("Proposal: G(",alpha0,",",beta0,")",sep=""), paste("Proposal: U(",lower,",",upper,")",sep=""), paste("Proposal: N(",round(mean,3),",",round(sd^2,3),")",sep=""),"True"),col=1:4,lty=1,lwd=2)