###############################################################
#
# Computing posterior of the efficacy of the coronavac vacine 
# in Brazilian individuais based on a GLM model in the binomial 
# family and logit, probit and cloglog link funtions
#
# Hedibert Freitas Lopes
# hedibert.org
# January 14th 2021
#
###############################################################

# Data
nt = 4653   # number of vaccinated individuals
np = 4599   # number of placebo individuals
ct = 85     # number of covid-19 cases in vaccinated group
cp = 167    # number of covid-19 cases in placebo group
x = rep(0,nt+np)
x[1:nt]=1
y = rep(0,nt+np)
y[1:ct]=1
y[(nt+1):(nt+cp)]=1

# Picking the link function of the binomial logit model
# 1: logit - log(theta/(1-theta)) = alpha+beta*x
# 2: probit - qnorm(theta) = alpha+beta*x
# 3: cloglog - log(-log(1-theta)) = alpha+beta*x
link=3


# ML inference
# ------------
if (link==1){
   fit = glm(y~x,family=binomial(link="logit"))
   num  = 1/(1+exp(-fit$coef[1]-fit$coef[2]))
   den = 1/(1+exp(-fit$coef[1]))
}
if (link==2){
   fit = glm(y~x,family=binomial(link="probit"))
   num=pnorm(fit$coef[1]+fit$coef[2])
   den=pnorm(fit$coef[1])
}
if (link==3){
   fit = glm(y~x,family=binomial(link="cloglog"))
   num=1-exp(-exp(fit$coef[1]+fit$coef[2]))
   den=1-exp(-exp(fit$coef[1]))
}

summary(fit)
efficacy.mle = 1-num/den
efficacy.mle

# Bayesian inference (via sampling importance resampling-SIR)
# -----------------------------------------------------------


# Step 1: Drawing from the prior
N     = 10000
sd    = summary(fit)$coef[,2]
alpha = rnorm(N,fit$coef[1],10*sd[1])
beta  = rnorm(N,fit$coef[2],10*sd[2])


# Step 2: Computing weights
like  = rep(0,N)
if (link==1){
  for (i in 1:N){
    prob = 1/(1+exp(-alpha[i]-beta[i]*x))
    like[i] = sum(dbinom(y,1,prob,log=TRUE))
  }
}
if (link==2){
  for (i in 1:N){
    prob = pnorm(alpha[i]+beta[i]*x)
    like[i] = sum(dbinom(y,1,prob,log=TRUE))
  }
}
if (link==3){
  for (i in 1:N){
    prob = 1-exp(-exp(alpha[i]+beta[i]*x))
    like[i] = sum(dbinom(y,1,prob,log=TRUE))
  }
}
w = exp(like-max(like))

# Step 3: resampling
ii = sample(1:N,size=N,replace=TRUE,prob=w)
alpha1 = alpha[ii]
beta1 = beta[ii]

# Prior vs posterior draws
plot(alpha,beta,pch=16,xlab=expression(alpha),ylab=expression(beta))
points(alpha1,beta1,col=2,pch=16)
legend("topleft",legend=c("Prior","Posterior"),col=1:2,pch=16)

# Efficacy posterior draws
if (link==1){
  prob.t  = 1/(1+exp(-alpha1-beta1))
  prob.p = 1/(1+exp(-alpha1))
}
if (link==2){
  prob.t  = pnorm(alpha1+beta1)
  prob.p = pnorm(alpha1)
}
if (link==3){
  prob.t = 1-exp(-exp(alpha1+beta1))
  prob.p = 1-exp(-exp(alpha1))
}
efficacy=1-prob.t/prob.p

hist(efficacy,xlim=c(0,1),prob=TRUE,xlab="Coronavac efficacy",
main="",ylab="Density")
abline(v=efficacy.mle,col=2,lwd=3)
legend("topleft",legend=c(
paste("Mean efficacy=",round(mean(efficacy),3),sep=""),
paste("95% CI=(",round(quantile(efficacy,0.025),3),",",
round(quantile(efficacy,0.975),3),")",sep="")),bty="n")