#################################################################################
#
#  Unit 5 - Example v.
#
#   Computing Deviance Information Criteria (DIC)                                                          #       
#   100 cycles-to-failure times for samples of yarn airplanes.  For each 
#   individual airplane, it has been suggested that an exponential model 
#   fits the data well.                      
#                                                                    
#################################################################################
#
# Author : Hedibert Freitas Lopes
#          Graduate School of Business
#          University of Chicago
#          5807 South Woodlawn Avenue
#          Chicago, Illinois, 60637
#          Email : hlopes@ChicagoGSB.edu
#
#################################################################################
dnorm2 = function(theta,mu,sig2){-0.5*sum((theta-mu)^2/sig2)}
f1  = function(theta){sum(dgamma(y,exp(theta[1]),exp(theta[2]),log=T))}
f2  = function(theta){sum(dlnorm(y,theta[1],exp(theta[2]),log=T))}
f3  = function(theta){sum(dweibull(y,exp(theta[1]),exp(theta[2]),log=T))}
ff1 = function(theta){-f1(theta)}
ff2 = function(theta){-f2(theta)}
ff3 = function(theta){-f3(theta)}
fi1 = function(y,theta){dgamma(y,exp(theta[1]),exp(theta[2]),log=T)}
fi2 = function(y,theta){dlnorm(y,theta[1],exp(theta[2]),log=T)}
fi3 = function(y,theta){dweibull(y,exp(theta[1]),exp(theta[2]),log=T)}

y = c(86, 146, 251, 653,  98, 249, 400, 292, 131, 169, 175, 176,  76, 264,  15,
364, 195, 262,  88, 264, 157, 220,  42, 321, 180, 198,  38,  20,  61, 121,
282, 224, 149, 180, 325, 250, 196,  90, 229, 166,  38, 337,  65, 151, 341,
 40,  40, 135, 597, 246, 211, 180,  93, 315, 353, 571, 124, 279,  81, 186,
497, 182, 423, 185, 229, 400, 338, 290, 398,  71, 246, 185, 188, 568,  55,
 55,  61, 244,  20, 284, 393, 396, 203, 829, 239, 236, 286, 194, 277, 143, 
198, 264, 105, 203, 124, 137, 135, 350, 193, 188) 
n  = length(y)

# Model 1: Gamma
# ---------------
alpha = seq(1.645,3.0,length=30)
beta  = seq(0.00674,0.015,length=30)
alpha = log(alpha)
beta  = log(beta)
like1 = matrix(0,30,30)
for (i in 1:30)
  for (j in 1:30)
    like1[i,j] = f1(c(alpha[i],beta[j]))

# Model 2: Log-normal
# --------------------
mu    = seq(5.0,5.35,length=30)
sig2  = seq(0.64,0.9,length=30) 
sig2  = log(sig2)
like2 = matrix(0,30,30)
for (i in 1:30)
  for (j in 1:30)
    like2[i,j] = f2(c(mu[i],sig2[j]))

# Model 3: Weibull
# -----------------
gamma = seq(1.35,1.91,length=30)
delta = seq(200,299,length=30)
gamma = log(gamma)
delta = log(delta)
like3 = matrix(0,30,30)
for (i in 1:30)
  for (j in 1:30)
    like3[i,j] = f3(c(gamma[i],delta[j]))

# Maximum likelihood estimation
# -----------------------------
theta1.mle = nlm(ff1,c(0.8,-4.6))$estimate
theta2.mle = nlm(ff2,c(5.15,-0.25))$estimate
theta3.mle = nlm(ff3,c(0.47,5.5))$estimate

# AIC and BIC
# -----------
AIC1 = -2*f1(theta1.mle)+4
AIC2 = -2*f2(theta2.mle)+4
AIC3 = -2*f3(theta3.mle)+4
BIC1 = -2*f1(theta1.mle)+2*log(n)
BIC2 = -2*f2(theta2.mle)+2*log(n)
BIC3 = -2*f3(theta3.mle)+2*log(n)
AICs = c(AIC1,AIC2,AIC3)
BICs = c(BIC1,BIC2,BIC3)

# Bayesian inference through weighted resampling
# ----------------------------------------------
mus = matrix(0,3,2)
sdtheta = matrix(0,3,2)
ind = range(alpha);sdalpha = (ind[2]-ind[1])/4
ind = range(beta); sdbeta  = (ind[2]-ind[1])/4
sdtheta[1,] = c(sdalpha,sdbeta)
ind = range(mu);   sdmu    = (ind[2]-ind[1])/4
ind = range(sig2); sdsig2  = (ind[2]-ind[1])/4
sdtheta[2,] = c(sdmu,sdsig2)
ind = range(gamma);sdgamma = (ind[2]-ind[1])/4
ind = range(delta);sddelta = (ind[2]-ind[1])/4
sdtheta[3,] = c(sdgamma,sddelta)
mus[1,] = c(0.8058902,-4.5966971)
mus[2,] = c(5.1629177,-0.2642575)
mus[3,] = c(0.4727277,5.5130855)

M = 10000
theta = array(0,c(3,M,2))
w = matrix(0,M,3)
for (i in 1:3)
  theta[i,,] = matrix(rnorm(2*M,mus[i,],sdtheta[i,]),M,2,byrow=T)

for (i in 1:M){
  w[i,1] = f1(theta[1,i,])-dnorm2(theta[1,i,],mus[1,],sdtheta[1,])
  w[i,2] = f2(theta[2,i,])-dnorm2(theta[2,i,],mus[2,],sdtheta[2,])
  w[i,3] = f3(theta[3,i,])-dnorm2(theta[3,i,],mus[3,],sdtheta[3,])
}
ind    = matrix(0,M,3)
theta1 = theta
for (i in 1:3){
  ind[,i]     = sample(1:M,replace=T,prob=exp(w[,i]),size=M)
  theta1[i,,] = theta[i,ind[,i],]
}

# This if Figure 7.3
# ------------------
par(mfrow=c(2,2))
plot(exp(theta1[1,,1]),exp(theta1[1,,2]),xlab=expression(alpha),ylab=expression(beta),pch=".")
contour(exp(alpha),exp(beta),exp(like1),drawlabels=F,add=T)
title("(a)")
plot(theta1[2,,1],exp(theta1[2,,2]),xlab=expression(mu),ylab=expression(sigma^2),pch=".")
contour(mu,exp(sig2),exp(like2),drawlabels=F,add=T)
title("(b)")
plot(exp(theta1[3,,]),xlab=expression(gamma),ylab=expression(delta),pch=".")
contour(exp(gamma),exp(delta),exp(like3),drawlabels=F,add=T)
title("(c)")
x = seq(0.01,max(y))
y1 = seq(min(y),max(y),length=20)
hist(y,prob=T,breaks=y1,xlab="",ylab="",main="",ylim=c(0,0.0045))
lines(x,dgamma(x,mean(exp(theta1[1,,1])),mean(exp(theta1[1,,2]))))
lines(x,dlnorm(x,mean(theta1[2,,1]),sqrt(mean(exp(theta1[2,,2])))),lty=2)
lines(x,dweibull(x,mean(exp(theta1[3,,1])),mean(exp(theta1[3,,2]))),lty=3)
title("(d)")

parameters = cbind(exp(theta1[1,,1]),exp(theta1[1,,2]),theta1[2,,1],
exp(theta1[2,,2]),exp(theta1[3,,1]),exp(theta1[3,,2]))
quant025 = function(x){quantile(x,0.025)}
quant975 = function(x){quantile(x,0.975)}
summary = cbind(apply(parameters,2,mean),sqrt(apply(parameters,2,var)),
apply(parameters,2,quant025),apply(parameters,2,quant975))
round(summary,2)


# Computing pseudo Bayes factors
# ------------------------------
pseudoBF = matrix(0,n,3)
for (i in 1:M){
  pseudoBF[,1] = pseudoBF[,1] + 1/exp(fi1(y,theta1[1,i,]))
  pseudoBF[,2] = pseudoBF[,2] + 1/exp(fi2(y,theta1[2,i,]))
  pseudoBF[,3] = pseudoBF[,3] + 1/exp(fi3(y,theta1[3,i,]))
}
pseudoBF = pseudoBF/M
pseudoBF = 1/pseudoBF

par(mfrow=c(1,1))
plot(pseudoBF[,1],xlab="",ylab="",col=0,ylim=range(pseudoBF))
for (i in 1:n){
  text(i,pseudoBF[i,1],"1",col=1)
  text(i,pseudoBF[i,2],"2",col=2)
  text(i,pseudoBF[i,3],"3",col=3)
}
pseudoBFs = apply(pseudoBF,2,sum)


# Computing posterior predictive criteria
# ---------------------------------------
ypred = seq(0,1000)

par1 = exp(theta1[1,,1])
par2 = exp(theta1[1,,2])
E1  = mean(par1/par2)
V1  = mean(par1/par2^2)
G1G = sum((y-E1)^2)
P1G = n*V1
D1G = G1G+P1G

par1 = theta1[2,,1]
par2 = exp(theta1[2,,2])
E2  = mean(exp(par1+0.5*par2))
V2  = mean(exp(2*par1+par2)*exp(par2-1))
G2G = sum((y-E2)^2)
P2G = n*V2
D2G = G2G+P2G

par1 = exp(theta1[3,,1])
par2 = exp(theta1[3,,2])
E3   = mean(par2/par1*gamma(1/par1))
V3   = mean(par2^2/par1*(2*gamma(2/par1)-(gamma(1/par1))^2/par1))
G3G = sum((y-E3)^2)
P3G = n*V3
D3G = G3G+P3G

DGs = c(D1G,D2G,D3G)

# Computing deviance information criteria
# ---------------------------------------
means1 = apply(theta1[1,,],2,mean)
means2 = apply(theta1[2,,],2,mean)
means3 = apply(theta1[3,,],2,mean)
mu11   = -2*f1(means1)
mu22   = -2*f2(means2)
mu33   = -2*f3(means3)
mu1    = 0.0
mu2    = 0.0
mu3    = 0.0
for (i in 1:M){
  mu1 = mu1 -2*f1(theta1[1,i,])
  mu2 = mu2 -2*f2(theta1[2,i,])
  mu3 = mu3 -2*f3(theta1[3,i,])
}
pD1  = mu1/M-mu11  
pD2  = mu2/M-mu22  
pD3  = mu3/M-mu33  
DIC1 = mu11 + 2*pD1  
DIC2 = mu22 + 2*pD2  
DIC3 = mu33 + 2*pD3  

pDs = c(pD1,pD2,pD3)
DICs = c(DIC1,DIC2,DIC3)

# Table 7.7
# ---------
criteria = cbind(AICs,BICs,pseudoBFs,DGs,DICs)      
round(criteria,2)