########################################################
#
#
# EXAMPLE 1: WEIGHTS OF MALE PHD STUDENTS
#
#
########################################################
#
# Model : y1,...,yn iid N(mu,sig2)
#
# We will assume, for the sake of the example, that sig=6
y = c(68,70,65,70,75,60,80,75,66,72,65)
n = length(x)
sig = 6
sig2 = sig^2

# OLS estimates
mu.mle = mean(y)
c(mu.mle,sqrt(sig2/n))

# Gaussian prior for mu with mean mu0=70 and variance tau02=100
mu0 = 70
tau02 = 100

# The posterior of mu is also Gaussian with mean mu1 and  variance tau12
tau12 = 1/(1/tau02+n/sig2)
mu1 = tau12*(mu0/tau02+sum(y)/sig2)
c(mu1,sqrt(tau12))

# Posterior 95% credibility interval for mu
l = qnorm(0.025,mu1,sqrt(tau12))
u = qnorm(0.975,mu1,sqrt(tau12))
c(l,u)


# Let us now consider a Student t prior for mu with nu=5 degrees of fredom, 
# location mu0=70 and scale tau02=60
# Prior mean is mu0 since nu>1
# Prior variance is tau02*nu/(nu-2)=100
# Notice that both priors have the same means and the same variances
# Unfortunately p(mu|data) is not Gaussian or any known distribution

N = 1000
mus = seq(30,110,length=N)
par(mfrow=c(1,2))
plot(mus,dt((mus-70)/sqrt(60),df=5)/sqrt(60),type="l",lwd=2,ylab="Density",xlab=expression(mu))
lines(mus,dnorm(mus,70,sqrt(100)),col=2,lwd=2)
legend("topright",legend=c("Normal","Student's t"),col=2:1,lty=1,lwd=2)
title("Prior densities")

plot(mus,dnorm(mus,70,sqrt(100),log=TRUE),type="l",lwd=2,col=2,xlab=expression(mu),ylab="Log density")
lines(mus,dt((mus-70)/sqrt(60),df=5,log=TRUE)-log(sqrt(60)),lwd=2)
title("Prior log densities")

# Computing E(mu|data) via Riemann's integration approximation
h = function(mu){prod(dnorm(y,mu,sqrt(sig2)))*dt((mu-70)/sqrt(60),df=5)/sqrt(60)}

N = 10000
mus1 = seq(0,160,length=N)
int = mus1[2]-mus1[1]
hs = rep(0,N)
for (i in 1:N)
  hs[i] = h(mus1[i])
post = hs/sum(hs*int)

Emu.riemann = sum(mus1*hs*int)/sum(hs*int)
Emu2.riemann = sum(mus1^2*hs*int)/sum(hs*int)
Vmu.riemann = Emu2.riemann-(Emu.riemann)^2

c(Emu.riemann,Vmu.riemann,sqrt(Vmu.riemann))


par(mfrow=c(1,1))
plot(mus1,post,type="l",ylab="Density",xlab=expression(mu),lwd=2)
lines(mus1,dnorm(mus1,mu1,sqrt(tau12)),col=2,lwd=2)
lines(mus,dt((mus-70)/sqrt(60),df=5)/sqrt(60),lwd=2,col=3)
lines(mus,dnorm(mus,70,sqrt(100)),col=4,lwd=2)
legend("topright",legend=c("Normal  prior","Student's t prior","Posterior for normal prior","Posterior for Student's t prior"),col=4:1,lty=1,lwd=2)


# Computing E(mu|data) via Monte Carlo integration (MCI)
L = function(mu){prod(dnorm(y,mu,sqrt(sig2)))}
M = 1000
mu.draw = 70+sqrt(60)*rt(M,df=5)
Ls = rep(0,M)
for (i in 1:M)
  Ls[i] = L(mu.draw[i])

Emu = mean(mu.draw*Ls)/mean(Ls)
Emu2 = mean(mu.draw^2*Ls)/mean(Ls)
Vmu = Emu2-Emu^2

c(Emu,Vmu,sqrt(Vmu))


#################################################################
#
#
# EXAMPLE 2: WEIGHTS AND HEIGHTS OF MALE PHD STUDENTS
#
#
#################################################################
y     = c(68,70,65,70,75,60,80,75,66,72,65)
x     = c(175,170,173,183,175,170,175,183,171,169,170)
n     = length(x)

# Likelihood function
L = function(alpha,beta,sig2){prod(dnorm(y,alpha+beta*x,sqrt(sig2)))}

# Prior hyperparameters 
a0 = -20
A0 = 10000
b0 = 0.5
B0 = 5
c0 = 1
d0 = 25
M = 10000
alphas = rnorm(M,a0,sqrt(A0))
betas = rnorm(M,b0,sqrt(B0))
sig2s = runif(M,c0,d0)
Ls = rep(0,M)
for (i in 1:M)
  Ls[i] = L(alphas[i],betas[i],sig2s[i])

Ealpha = mean(alphas*Ls)/mean(Ls)
Ealpha2 = mean(alphas^2*Ls)/mean(Ls)
Ebeta = mean(betas*Ls)/mean(Ls)
Ebeta2 = mean(betas^2*Ls)/mean(Ls)
Esigma2 = mean(sig2s*Ls)/mean(Ls)
Valpha = Ealpha2-Ealpha^2
Vbeta = Ebeta2-Ebeta^2

c(Ealpha,Ebeta,sqrt(Esigma2))
c(Ealpha,sqrt(Valpha))
c(Ebeta,sqrt(Vbeta))


#################################################################
#
# EXAMPLE 3: Counts of Abduction and kidnapping in Sydney
#
#################################################################
#
# NSW Government - Bureau of Crime Statistics and Research
# www.bocsar.nsw.gov.au
# 
# Recorded crime by offence - Monthly data on all criminal
# incidents recorded by police (1995-2017)
# LGA: Local Government Areas
#
# LGA			Offence category			
# Nambucca		Against justice procedures	
# Nambucca		Sexual offences			
# Nambucca		Theft					
# Sydney			Abduction and kidnapping	
# Sydney			Pornography offences	
# Sydney			Blackmail and extortion	
# Sydney			Homicide				
#
# Reference:
# Cathy W.S. Chen and Sangyeol Lee (2016) 
# Generalized Poisson autoregressive models for time series of counts
# Computational Statistics and Data Analysis 99 (2016) 51â€“67
#
################################################################################################
data = read.table("countdata.txt",header=TRUE)
n = nrow(data)
month = data[,1]
y = data[,5]

par(mfrow=c(1,1))
plot(month,y,xlab="Months",ylab="Counts",main="Sydney - Abduction and kidnapping")

par(mfrow=c(1,1))
plot(table(y)/n,ylab="Relative frequency",xlab="Counts")
title("Sydney Abduction and kidnapping")

#######################################
# Model 1: iid Poisson data
#######################################

# Likelihood function
L = function(lambda){prod(dpois(y,lambda))}

# Posterior expectation via MC integration
M = 1000
a = 1
b = 1
lambdas = rgamma(M,a,b)
Ls = rep(0,M)
for (i in 1:M)
  Ls[i] = L(lambdas[i])

Elambda.exact = (a+sum(y))/(b+n)
Elambda = mean(lambdas*Ls)/mean(Ls)
Elambda2 = mean(lambdas^2*Ls)/mean(Ls)
Vlambda = Elambda2-Elambda^2

c(Elambda.exact,Elambda,sqrt(Vlambda))


#######################################
# Model 2: iid Poisson time series data
#######################################
L = function(theta){
  alpha= theta[1]
  beta = theta[2]
  lambda = rep(1,n)
  loglike = dpois(y[1],alpha+beta,log=TRUE)
  for (t in 2:n){
    lambda[t] = alpha*lambda[t-1]+beta*y[t-1]
    loglike = loglike+dpois(y[t],lambda[t],log=TRUE)
  }
  return(loglike)
}

# Likelihood inference
##################
minusL = function(theta){
  -L(theta)
}

init = c(1,0)
theta.mle = nlm(minusL,init)$estimate
alpha.mle = theta.mle[1]
beta.mle = theta.mle[2]
lambda.mle = rep(1,n)
for (t in 2:n)
  lambda.mle[t] = alpha.mle*lambda.mle[t-1]+beta.mle*y[t-1]


# Bayesian inference
##################
M = 1000
alphas = rnorm(M,1,0.1)^2
betas = rnorm(M,0,0.1)^2
lambdas = matrix(0,M,n)
lambdas[,1] = rpois(M,2)
for (t in 2:n)
  lambdas[,t] = alphas*lambdas[,t-1]+betas*y[t-1]

ts.plot(t(lambdas),ylim=c(0,10))
points(y,pch=16,col=2)
title("Prior of (alpha,beta) induces prior lambda(t) trajectories")

Ls = rep(0,M)
for (i in 1:M)
  Ls[i] = L(c(alphas[i],betas[i]))
Ls = exp(Ls-max(Ls))

Ealpha = mean(Ls*alphas)/mean(Ls)
Ebeta = mean(Ls*betas)/mean(Ls)

lambda.hat = rep(1,n)
for (t in 2:n)
    lambda.hat[t] = Ealpha*lambda.hat[t-1]+Ebeta*y[t-1]

ts.plot(t(lambdas),ylim=c(0,10))
points(y,pch=16,col=2)
points(lambda.hat,pch=16,col=3)
points(lambda.mle,pch=15,col=4)