##################################################################### # PROBLEM 1: Sampling data from a given distribution # via sampling impor- tance resampling (SIR) ##################################################################### # Sampling from known distributions in R # -------------------------------------- n = 1000 par(mfrow=c(2,2)) x=rnorm(n);hist(x);mean(x);var(x) x=rt(n,df=4);hist(x);mean(x);var(x) x=rgamma(n,3,1);hist(x);mean(x);var(x) x=rbinom(n,10,0.25);hist(x);mean(x);var(x) # SIR in action: p=N(0,1) - q=T(0,1,df=4) # --------------------------------------- M = 100000 N = 10000 xtilde = rt(M,df=4) w = dnorm(xtilde)/dt(xtilde,df=4) x = sample(xtilde,size=N,replace=TRUE,prob=w) par(mfrow=c(1,2)) hist(xtilde,prob=TRUE,xlab="draws",ylab="Density",main="Proposal",breaks=100) curve(dt(x,df=4),from=-10,to=10,n=100,col=2,add=TRUE,lwd=2) hist(x,prob=TRUE,xlab="draws",ylab="Density",main="Target",breaks=40) curve(dnorm,from=-3,to=3,n=100,col=2,add=TRUE,lwd=2) # SIR for problem 1 # ----------------- p1 = function(x){2*dnorm(x)*pnorm(3*x)} p2 = function(x){2/(pi*(1+x^2))} p3 = function(x){1/(x*sqrt(2*pi))*exp(-0.5*(log(x))^2)} p4 = function(x){30*x*(1-x)^4} par(mfrow=c(2,2)) curve(p1,from=-2,to=10,n=1000,ylab="Density") curve(p2,from=0,to=10,n=1000,ylab="Density") curve(p3,from=0,to=10,n=1000,ylab="Density") curve(p4,from=0,to=1,n=1000,ylab="Density") set.seed(123546) M = 100000 draw1 = rnorm(M,0,sqrt(10)) draw2 = rgamma(M,1,1) draw3 = rgamma(M,1,1) draw4 = runif(M,0,1) w1=p1(draw1)/dnorm(draw1,0,sqrt(10)) w2=p2(draw2)/dgamma(draw2,1,1) w3=p3(draw3)/dgamma(draw3,1,1) w4=p4(draw4)/1 draw1.post = sample(draw1,size=N,replace=TRUE,prob=w1) draw2.post = sample(draw2,size=N,replace=TRUE,prob=w2) draw3.post = sample(draw3,size=N,replace=TRUE,prob=w3) draw4.post = sample(draw4,size=N,replace=TRUE,prob=w4) par(mfrow=c(2,2)) hist(draw1.post,breaks=30,prob=TRUE) curve(p1,from=-2,to=10,n=1000,ylab="Density",add=TRUE,col=2) hist(draw2.post,breaks=30,prob=TRUE) curve(p2,from=0,to=10,n=1000,ylab="Density",add=TRUE,col=2) hist(draw3.post,breaks=30,prob=TRUE) curve(p3,from=0,to=10,n=1000,ylab="Density",add=TRUE,col=2) hist(draw4.post,breaks=30,prob=TRUE) curve(p4,from=0,to=1,n=1000,ylab="Density",add=TRUE,col=2) ############################################## # PROBLEM 2: Using SIR for Bayesian inference ############################################## post = function(theta){ (1+theta)^(125)*(1-theta)^(38)*theta^(34) } curve(post,from=0,to=1,n=100) summary = function(theta){ theta = sort(theta) N = length(theta) E = mean(theta) V = var(theta) P = mean(theta<0.6) L = theta[trunc(0.025*N)][1] U = theta[trunc(0.975*N)][1] return(c(E,V,P,L,U)) } # Numerical integration (on a fine grid) thetas = seq(0,1,length=10000) h = thetas[2]-thetas[1] pred = sum(post(thetas)*h) E = sum(thetas*post(thetas)*h)/pred V = sum((thetas-E)^2*post(thetas)*h)/pred P = sum(post(thetas[thetas<=0.6])*h)/pred cdf = cumsum(post(thetas)*h)/pred L = thetas[cdf>0.025][1] U = thetas[cdf>0.975][1] true = c(E,V,P,L,U) set.seed(12345) M = c(10000,100000,1000000) N = 10000 thetas.q = cbind(runif(M[3]),runif(M[3],0.4,0.9)) thetas.p = array(0,c(N,2,3)) summaries = matrix(0,5,6) l = 0 par(mfrow=c(2,3)) for (i in 1:2){ for (j in 1:3){ w = post(thetas.q[1:M[j],i]) thetas.p[,i,j] = sample(thetas.q[1:M[j],i],size=N,replace=TRUE,prob=w) l = l + 1 summaries[,l] = summary(thetas.p[,i,j]) hist(thetas.p[,i,j],prob=TRUE,xlab="",main="",xlim=c(0,1)) legend("topleft",legend=paste("q",i," - M=",M[j],sep=""),bty="n") } } names = c("q1,M1","q2,M1","q1,M2","q2,M2","q1,M3","q2,M3") summ = c("Mean","Var","Tail prob","Lower percentile","Upper percentile") tab = round(summaries,4) colnames(tab) = c("q1,M1","q2,M1","q1,M2","q2,M2","q1,M3","q2,M3") rownames(tab) = c("Mean","Var","Tail prob","Lower percentile","Upper percentile") tab ############ FROM HERE ON WASN'T IN THE HW2 ############ ############ I DECIDED TO INCLUDE AN EXERCISE ########## ############ BASED ON MONTE CARLO REPLICATIONS ######### # Monte Carlo error set.seed(12345) M = c(1000,5000,10000) N = 500 Rep = 100 summaries = array(0,c(Rep,5,6)) for (rep in 1:Rep){ thetas.q = cbind(runif(M[3]),runif(M[3],0.4,0.9)) thetas.p = array(0,c(N,2,3)) l = 0 for (i in 1:2){ for (j in 1:3){ l = l + 1 w = post(thetas.q[1:M[j],i]) thetas.p[,i,j] = sample(thetas.q[1:M[j],i],size=N,replace=TRUE,prob=w) summaries[rep,,l] = summary(thetas.p[,i,j]) } } } names = c("q1,M1","q2,M1","q1,M2","q2,M2","q1,M3","q2,M3") summ = c("Mean","Var","Tail prob","Lower percentile","Upper percentile") par(mfrow=c(2,3)) for (i in 1:5){ boxplot(summaries[,i,c(1,4,2,5,3,6)],names=names,main=summ[i]) abline(v=2.5,lty=2) abline(v=4.5,lty=2) abline(h=true[i],col=2) }