###################################################### # # Unit 2 - Example v. # # O-ring failures by temperature # # Christensen (1997) and Congdon (2001) analyze 23 # binary observations of O-ring failures y[i] (1=failure) # in relation to temperature t[t]$ (Fahrenheit).} # # y = (1,1,1,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0) # t = (53,57,58,63,66,67,67,67,68,69,70,70,70,70,72, # 73,75,75,76,76,78,79,81) # ###################################################### # # Author : Hedibert Freitas Lopes # Graduate School of Business # University of Chicago # 5807 South Woodlawn Avenue # Chicago, Illinois, 60637 # Email : hlopes@ChicagoGSB.edu # ###################################################### prior = function(b){dnorm(b,b0,Sb)} like = function(b){prod(exp((a+b*x)*y)/(1+exp(a+b*x)))} fun = function(b){prior(b)*like(b)} post = function(b){prior(b)*like(b)/py} # Logit link like1 = function(b){ theta = 1/(1+exp(-a-b*x)) prod((theta^y)*((1-theta)^(1-y))) } # Probit link like2 = function(b){ theta = pnorm(a+b*x) prod((theta^y)*((1-theta)^(1-y))) } # Complementary log-log link like3 = function(b){ theta = 1-exp(-exp(a+b*x)) prod((theta^y)*((1-theta)^(1-y))) } # O-ring data # ----------- y = c(1,1,1,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0) t = c(53,57,58,63,66,67,67,67,68,69,70,70,70,70,72,73,75,75,76,76,78,79,81) x = t - mean(t) a = -1.26 # regression intercept b0 = 0.0 # Prior mean Vb = 10 # Prior variance Sb = sqrt(Vb) # Plotting the data # ----------------- par(mfrow=c(1,1)) plot(x,y,xlab="Centered temperature (in Fahrenheit, mean=69.6)",ylab="O-ring failure") # Plotting prior density and likelihood function # ---------------------------------------------- par(mfrow=c(1,2)) bs = seq(-15,15,length=200) bs1 = seq(-1,0.1,length=200) p = NULL l = NULL for (i in 1:200){ p = c(p,prior(bs[i])) l = c(l,like(bs1[i])) } plot(bs,p,type="l",xlab=expression(beta),ylab="",main=expression(p(beta))) plot(bs1,l,type="l",xlab=expression(beta),ylab="",main=expression(l(beta,y))) # Plotting prior*likelihood # ------------------------- bs = seq(-1,0.1,length=200) f = NULL for (i in 1:200){ f = c(f,fun(bs1[i])) } par(mfrow=c(1,1)) plot(bs,f,type="l",xlab=expression(beta),ylab="",main="") # computing p(y) # -------------- bs = seq(-1,0.1,by=0.0001) f = NULL for (i in 1:length(bs)) f = c(f,fun(bs[i])) py = (bs[2]-bs[1])*sum(f) # posterior # --------- bs = seq(-1,0.1,length=200) p = NULL for (i in 1:200) p = c(p,post(bs[i])) par(mfrow=c(1,1)) plot(bs,p,type="l",xlab=expression(beta),ylab="",main="") # computing E(beta|y) and V(beta|y) - simple integration rule # ----------------------------------------------------------- h = 0.0001 bs1= seq(-1,0.1,by=h) m1 = 0.0 m2 = 0.0 for (i in 1:length(bs1)){ f = fun(bs1[i]) m1 = m1 + bs1[i]*f m2 = m2 + bs1[i]^2*f } m1 = m1*h/py m2 = m2*h/py V = m2-m1^2 # computing p(y) by Monte Carlo integration # ----------------------------------------- M = 50000 betas = rnorm(M,b0,sqrt(Vb)) l = NULL for (i in 1:M) l = c(l,like(betas[i])) py.mc = mean(l) mcerror = sqrt(mean((l-py.mc)^2)/M^2) # Sampling from p(beta|y) by SIR # ------------------------------ w = l/sum(l) ind = sample(1:M,size=M,replace=T,prob=w) betas1 = betas[ind] breaks = seq(min(betas1),max(betas1),length=40) par(mfrow=c(1,1)) hist(betas1,breaks=breaks,prob=T,xlab=expression(beta),ylab="", main="",ylim=c(0,max(p))) lines(bs,p,col=2) # Posterior predictive # -------------------- xs = seq(31,81,by=2)-mean(t) ps = NULL for (i in 1:length(xs)){ A = exp(a+betas1*xs[i]) ps = c(ps,mean(A/(1+A))) } z = xs[(ps<0.51)&(ps>0.49)]+mean(t) plot(t,y,xlim=range(xs+mean(t)),ylab="O-ring failure", xlab="Centered temperature (in Fahrenheit)",axes=F) axis(2) axis(1,at=sort(c(seq(30,80,by=10),z))) lines(xs+mean(t),ps,col=2,lwd=3) segments(z,0,z,0.5,col=3,lwd=3) segments(0,0.5,z,0.5,col=3,lwd=3) ########################################################################### # # Several link functions # ########################################################################### # Sampling from p(beta|y) by SIR where q(beta)=prior # -------------------------------------------------- M = 50000 betas = rnorm(M,b0,sqrt(Vb)) l = matrix(0,M,3) for (i in 1:M){ l[i,1] = like1(betas[i]) l[i,2] = like2(betas[i]) l[i,3] = like3(betas[i]) } # Predictive densities py1 = mean(l[,1]) py2 = mean(l[,2]) py3 = mean(l[,3]) py = c(py1,py2,py3) # Posterior model probabilities (assuming prior Pr(M)=1/3) pm = py/sum(py) # sampling from p(beta|y) w = matrix(0,M,3) betas1 = matrix(0,M,3) for (i in 1:3){ w[,i] = l[,i]/sum(l[,i]) betas1[,i] = sample(betas,size=M,replace=T,prob=w[,i]) } # Plotting the posterior distributions of beta plot(density(betas1[,2],bw=0.02),xlim=c(-1,0.1),xlab="beta",ylab="",main="") lines(density(betas1[,1],bw=0.02),col=1,lwd=2) lines(density(betas1[,2],bw=0.02),col=2,lwd=2) lines(density(betas1[,3],bw=0.02),col=3,lwd=2) legend(-0.8,5,legend=c("Logit model","Probit Model","Complementary log-log model"),lty=c(1,1,1),col=1:3,bty="n",lwd=c(2,2,2)) # Computing posterior predictive, p(ynew|xnew,yold,xold) xs = seq(min(t),max(t),by=0.25)-mean(t) ps1 = rep(0,length(xs)) ps2 = rep(0,length(xs)) ps3 = rep(0,length(xs)) for (i in 1:length(xs)){ ps1[i] = mean(1/(1+exp(-a-betas1[,1]*xs[i]))) ps2[i] = mean(pnorm(a+betas1[,2]*xs[i])) ps3[i] = mean(1-exp(-exp(a+betas1[,3]*xs[i]))) } # Average across competing models ps = cbind(ps1,ps2,ps3)%*%pm # Graphical summary par(mfrow=c(1,1)) plot(t,y,xlim=range(xs+mean(t)),ylab="O-ring failure", xlab="Temperature (in Fahrenheit)",axes=F) points(t,y,pch=16) axis(2) axis(1,at=seq(min(t),max(t),by=2)) lines(xs+mean(t),ps,lty=1,col=1,lwd=2) lines(xs+mean(t),ps1,lty=2,col=2,lwd=2) lines(xs+mean(t),ps2,lty=3,col=3,lwd=2) lines(xs+mean(t),ps3,lty=4,col=4,lwd=2) legend(53,0.5,legend=c("Model averaging", "Logit model","Probit Model","Complementary log-log model"), lty=1:4,col=1:4,lwd=rep(2,4),bty="n")