#####################################################################
# PROBLEM 1: Sampling data from a given distribution 
#            via sampling impor- tance resampling (SIR) 
#####################################################################


# Sampling from known distributions in R
# --------------------------------------
n = 1000
par(mfrow=c(2,2))
x=rnorm(n);hist(x);mean(x);var(x)
x=rt(n,df=4);hist(x);mean(x);var(x)
x=rgamma(n,3,1);hist(x);mean(x);var(x)
x=rbinom(n,10,0.25);hist(x);mean(x);var(x)


# SIR in action: p=N(0,1) - q=T(0,1,df=4)
# ---------------------------------------
M = 100000
N = 10000
xtilde = rt(M,df=4)
w = dnorm(xtilde)/dt(xtilde,df=4)
x = sample(xtilde,size=N,replace=TRUE,prob=w)
par(mfrow=c(1,2))
hist(xtilde,prob=TRUE,xlab="draws",ylab="Density",main="Proposal",breaks=100)
curve(dt(x,df=4),from=-10,to=10,n=100,col=2,add=TRUE,lwd=2)
hist(x,prob=TRUE,xlab="draws",ylab="Density",main="Target",breaks=40)
curve(dnorm,from=-3,to=3,n=100,col=2,add=TRUE,lwd=2)


# SIR for problem 1
# -----------------
p1 = function(x){2*dnorm(x)*pnorm(3*x)}
p2 = function(x){2/(pi*(1+x^2))}
p3 = function(x){1/(x*sqrt(2*pi))*exp(-0.5*(log(x))^2)}
p4 = function(x){30*x*(1-x)^4}

par(mfrow=c(2,2))
curve(p1,from=-2,to=10,n=1000,ylab="Density")
curve(p2,from=0,to=10,n=1000,ylab="Density")
curve(p3,from=0,to=10,n=1000,ylab="Density")
curve(p4,from=0,to=1,n=1000,ylab="Density")

set.seed(123546)
M = 100000
draw1 = rnorm(M,0,sqrt(10))
draw2 = rgamma(M,1,1)
draw3 = rgamma(M,1,1)
draw4 = runif(M,0,1)

w1=p1(draw1)/dnorm(draw1,0,sqrt(10))
w2=p2(draw2)/dgamma(draw2,1,1)
w3=p3(draw3)/dgamma(draw3,1,1)
w4=p4(draw4)/1

draw1.post = sample(draw1,size=N,replace=TRUE,prob=w1)
draw2.post = sample(draw2,size=N,replace=TRUE,prob=w2)
draw3.post = sample(draw3,size=N,replace=TRUE,prob=w3)
draw4.post = sample(draw4,size=N,replace=TRUE,prob=w4)


par(mfrow=c(2,2))
hist(draw1.post,breaks=30,prob=TRUE)
curve(p1,from=-2,to=10,n=1000,ylab="Density",add=TRUE,col=2)
hist(draw2.post,breaks=30,prob=TRUE)
curve(p2,from=0,to=10,n=1000,ylab="Density",add=TRUE,col=2)
hist(draw3.post,breaks=30,prob=TRUE)
curve(p3,from=0,to=10,n=1000,ylab="Density",add=TRUE,col=2)
hist(draw4.post,breaks=30,prob=TRUE)
curve(p4,from=0,to=1,n=1000,ylab="Density",add=TRUE,col=2)


##############################################
# PROBLEM 2: Using SIR for Bayesian inference
##############################################
post = function(theta){
  (1+theta)^(125)*(1-theta)^(38)*theta^(34)
}
curve(post,from=0,to=1,n=100)

summary = function(theta){
theta = sort(theta)
N = length(theta)
E = mean(theta)
V = var(theta)
P = mean(theta<0.6)
L = theta[trunc(0.025*N)][1]
U = theta[trunc(0.975*N)][1]
return(c(E,V,P,L,U))
}


# Numerical integration (on a fine grid)
thetas = seq(0,1,length=10000)
h = thetas[2]-thetas[1]
pred = sum(post(thetas)*h)
E = sum(thetas*post(thetas)*h)/pred
V = sum((thetas-E)^2*post(thetas)*h)/pred
P = sum(post(thetas[thetas<=0.6])*h)/pred
cdf = cumsum(post(thetas)*h)/pred
L = thetas[cdf>0.025][1]
U = thetas[cdf>0.975][1]
true = c(E,V,P,L,U)


set.seed(12345)
M = c(10000,100000,1000000)
N = 10000
thetas.q = cbind(runif(M[3]),runif(M[3],0.4,0.9))
thetas.p = array(0,c(N,2,3))
summaries = matrix(0,5,6)
l = 0
par(mfrow=c(2,3))
for (i in 1:2){
  for (j in 1:3){
    w = post(thetas.q[1:M[j],i])
    thetas.p[,i,j] = sample(thetas.q[1:M[j],i],size=N,replace=TRUE,prob=w)
    l = l + 1 
    summaries[,l] = summary(thetas.p[,i,j])
    hist(thetas.p[,i,j],prob=TRUE,xlab="",main="",xlim=c(0,1))
    legend("topleft",legend=paste("q",i," - M=",M[j],sep=""),bty="n")
  }
}
names = c("q1,M1","q2,M1","q1,M2","q2,M2","q1,M3","q2,M3")
summ = c("Mean","Var","Tail prob","Lower percentile","Upper percentile")

tab = round(summaries,4)
colnames(tab) = c("q1,M1","q2,M1","q1,M2","q2,M2","q1,M3","q2,M3")
rownames(tab) = c("Mean","Var","Tail prob","Lower percentile","Upper percentile")
tab


############ FROM HERE ON WASN'T IN THE HW2 ############
############ I DECIDED TO INCLUDE AN EXERCISE ##########
############ BASED ON MONTE CARLO REPLICATIONS #########
# Monte Carlo error
set.seed(12345)
M   = c(1000,5000,10000)
N   = 500
Rep = 100
summaries = array(0,c(Rep,5,6))
for (rep in 1:Rep){
  thetas.q = cbind(runif(M[3]),runif(M[3],0.4,0.9))
  thetas.p = array(0,c(N,2,3))
  l = 0
  for (i in 1:2){
    for (j in 1:3){
      l = l + 1 
      w = post(thetas.q[1:M[j],i])
      thetas.p[,i,j] = sample(thetas.q[1:M[j],i],size=N,replace=TRUE,prob=w)
      summaries[rep,,l] = summary(thetas.p[,i,j])
    }
  }
}

names = c("q1,M1","q2,M1","q1,M2","q2,M2","q1,M3","q2,M3")
summ = c("Mean","Var","Tail prob","Lower percentile","Upper percentile")
par(mfrow=c(2,3))
for (i in 1:5){
boxplot(summaries[,i,c(1,4,2,5,3,6)],names=names,main=summ[i])
abline(v=2.5,lty=2)
abline(v=4.5,lty=2)
abline(h=true[i],col=2)
}