rm(list=ls()) gibbs = function(y,x,mutheta,Vtheta,mubeta,Vbeta,a1,b1,a2,b2,M0,M,L,lambda,seed){ T = length(x) iVtheta = solve(Vtheta) iVthetamutheta = iVtheta%*%mutheta iVbeta = solve(Vbeta) iVbetamubeta = iVbeta%*%mubeta reg1 = lm(y[1:lambda]~x[1:lambda]) reg2 = lm(y[(lambda+1):T]~x[(lambda+1):T]) sig2 = mean(reg1$res^2) tau2 = mean(reg2$res^2) set.seed(seed) niter = M0+L*M draws = matrix(0,niter,7) X = cbind(1,x) for (iter in 1:niter){ # Step 1: sampling theta X1 = X[1:lambda,] y1 = y[1:lambda] Vtheta1 = solve(iVtheta + t(X1)%*%X1/sig2) mutheta1 = Vtheta1%*%(iVthetamutheta + t(X1)%*%y1/sig2) theta = mutheta1 + t(chol(Vtheta1))%*%rnorm(2) # Step 2: sampling sig2 e2 = sum((y1-X1%*%theta)^2) sig2 = 1/rgamma(1,a1+lambda/2,b1+e2/2) # Step 3: sampling beta X1 = X[(lambda+1):T,] y1 = y[(lambda+1):T] Vbeta1 = solve(iVbeta + t(X1)%*%X1/tau2) mubeta1 = Vbeta1%*%(iVbetamubeta + t(X1)%*%y1/tau2) beta = mubeta1 + t(chol(Vbeta1))%*%rnorm(2) # Step 4: sampling tau2 e2 = sum((y1-X1%*%beta)^2) tau2 = 1/rgamma(1,a2+(T-lambda)/2,b2+e2/2) # Step 5: sampling lambda w = rep(0,T-1) for (t in 1:(T-1)) w[t] = sum(dnorm(y[1:t],X[1:t,]%*%theta,sqrt(sig2),log=TRUE))+ sum(dnorm(y[(t+1):T],X[(t+1):T,]%*%beta,sqrt(tau2),log=TRUE)) w = exp(w-max(w)) lambda = sample(1:(T-1),size=1,prob=w) draws[iter,] = c(theta,sig2,beta,tau2,lambda) } return(draws) } summary = function(x){ c(mean(x),sqrt(var(x)),median(x),quantile(x,0.025),quantile(x,0.975)) } ########################################################################### # # Generate T=500 observations from the changepoint model # # y_t|x_t ~ N(0.0+2.0x_t,0.2), for t <= 85 # y_t|x_t ~ N(1.5+0.8x_t,0.5), for t > 85 # # where x_t ~ N(0,1) # ########################################################################### set.seed(21054) T = 500 lambda = 85 theta = c(0.0,2.0) sig2 = 0.2 beta = c(1.5,0.8) tau2 = 0.5 true = c(theta,sig2,beta,tau2,lambda) x = rnorm(T) y = rep(0,T) X = cbind(1,x) y[1:lambda] = theta[1]+theta[2]*x[1:lambda] + rnorm(lambda,sqrt(sig2)) y[(lambda+1):T] = beta[1]+beta[2]*x[(lambda+1):T]+ rnorm(T-lambda,sqrt(tau2)) pdf(file="finalexam-takehome-simulation-R.pdf",width=12,height=9) par(mfrow=c(1,1)) plot(x,y,xlab="X",ylab="Y",pch=16) ############################################################################ # Maximum likelihood estimation (MLE) ############################################################################ ts = 4:(T-3) nts = length(ts) llike = rep(0,nts) for (t in ts){ reg1 = lm(y[1:t]~x[1:t]) reg2 = lm(y[(t+1):T]~x[(t+1):T]) llike[t-ts[1]-1] = sum(dnorm(reg1$res,log=TRUE))+sum(dnorm(reg2$res,log=TRUE)) } like = exp(llike-max(llike)) like = like/sum(like) lambdahat = ts[like==max(like)] par(mfrow=c(1,1)) plot(ts,llike,type="l",xlab=expression(lambda),ylab="Log-likelihood") abline(v=lambdahat,lty=2) points(lambdahat,min(llike),pch=16,cex=2) text(300,-750,paste("lambda-hat=",lambdahat,sep="")) par(mfrow=c(1,1)) plot(ts[78:90],like[78:90],type="h",xlab=expression(lambda), ylab="Normalized likelihood",lwd=3) text(88,0.3,paste("lambda-hat=",lambdahat,sep="")) text(88,0.28,paste("true lambda=",lambda,sep="")) reg1 = lm(y[1:lambdahat]~x[1:lambdahat]) reg2 = lm(y[(lambdahat+1):T]~x[(lambdahat+1):T]) mle = round(c(reg1$coef,mean(reg1$res^2),reg2$coef,mean(reg2$res^2),lambdahat),2) par(mfrow=c(1,1)) plot(x,y,col=c(rep(1,lambda),rep(2,T-lambda)),xlab="X",ylab="Y",pch=16) abline(true[1:2],lwd=3) abline(true[4:5],lwd=3,col=2) abline(reg1$coef,lwd=3,lty=2) abline(reg2$coef,col=2,lwd=3,lty=2) legend(-3,6.25,legend=c(paste("y=",mle[1],"+",mle[2],"*x",sep=""), paste("y=",mle[4],"+",mle[5],"*x",sep="")), col=1:2,lwd=3,lty=2,bty="n") text(-2,6.5,"MLE estimates") legend(1,-2,legend=c(paste("y=",true[1],"+",true[2],"*x",sep=""), paste("y=",true[4],"+",true[5],"*x",sep="")), col=1:2,lwd=3,bty="n") text(1.8,-1.75,"TRUE values") ############################################################################ # Bayesian estimation ############################################################################ # Prior hyperparameters # --------------------- mutheta = matrix(0,2,1) Vtheta = diag(100,2) mubeta = matrix(0,2,1) Vbeta = diag(100,2) a1 = 3 b1 = 3 a2 = 0.5 b2 = 0.5 # MCMC set up & initial value # --------------------------- seed = 230543 M0 = 0 M = 1000 L = 1 lambda = lambdahat run = gibbs(y,x,mutheta,Vtheta,mubeta,Vbeta,a1,b1,a2,b2,M0,M,L,lambda,seed) parname = c("theta1","theta2","sig2","beta1","beta2","tau2","lambda") par(mfrow=c(3,3)) for (i in 1:3){ ts.plot(run[,i],xlab="iteration",ylab=parname[i]) abline(h=mle[i],lwd=2,col=3) } for (i in 1:3) acf(run[,i],main="") for (i in 1:3) hist(run[,i],xlab="",prob=TRUE,main="") par(mfrow=c(3,3)) for (i in 4:6){ ts.plot(run[,i],xlab="iteration",ylab=parname[i]) abline(h=mle[i],lwd=2,col=3) } for (i in 4:6) acf(run[,i],main="") for (i in 4:6) hist(run[,i],xlab="",prob=TRUE,main="") par(mfrow=c(1,3)) ts.plot(run[,7],xlab="iteration",ylab=parname[7]) abline(h=mle[7],lwd=2,col=3) acf(run[,7],main="") plot(table(run[,7])/M,type="h",ylab="",xlab="",main="") fx = matrix(0,M,T) for (i in 1:M){ fx[i,1:run[i,7]] = run[i,1]+run[i,2]*x[1:run[i,7]] fx[i,(run[i,7]+1):T] = run[i,4]+run[i,5]*x[(run[i,7]+1):T] } qfx = t(apply(fx,2,quantile,c(0.025,0.5,0.975))) type = rep(0,T) type[1:true[7]]=0 type[(true[7]+1):T]=1 par(mfrow=c(1,1)) plot(x,y,pch=15*type+1,xlab="X",ylab="Y") points(x,qfx[,2],col=2,pch=16) abline(reg1$coef,lwd=3,col=4) abline(reg2$coef,lwd=3,col=4) legend(2,-2,legend=c("t<=85","t>85"),pch=c(1,16),bty="n",cex=1.5) legend(2,1,legend=c("MLE","Bayes"),col=c(4,2),pch=16,bty="n",cex=1.5) dev.off() ############################################################################### # Data on yearly homicide rates, y_t, of young men between 15 and 29 years # of age (in 100,000 inhabitants) in the city of Sao Paulo from 1980 to 2009 # (T=30). ############################################################################### homicide = c(67.013164,85.89454101,82.77238719,119.7349466,160.4026286, 161.6597468,166.899996,177.1043736,161.5218547,197.6797453,205.0404588, 210.7597362,195.8328623,180.3019394,198.0452656,223.1396617,223.8610683, 220.0628459,238.4131361,273.1180898,267.0710926,257.7604174,214.2931845, 227.3388931,168.5799798,110.0643213,83.79087271,63.4692651,51.43414089, 56.25928242) y = homicide x = 1980:2009-1979 T = length(y) pdf(file="finalexam-takehome-realdata-R.pdf",width=12,height=9) par(mfrow=c(1,1)) plot(x,y,xlab="Year",ylab="Homicide rate",pch=16,axes=FALSE) axis(2);box();axis(1,at=x,lab=1979+x) ############################################################################ # Maximum likelihood estimation (MLE) ############################################################################ ts = 4:(T-3) nts = length(ts) llike = rep(0,nts) for (t in ts){ reg1 = lm(y[1:t]~x[1:t]) reg2 = lm(y[(t+1):T]~x[(t+1):T]) llike[t-ts[1]-1] = sum(dnorm(reg1$res,log=TRUE))+sum(dnorm(reg2$res,log=TRUE)) } like = exp(llike-max(llike)) like = like/sum(like) lambdahat = ts[like==max(like)] par(mfrow=c(1,1)) plot(1979+ts,llike,type="l",xlab=expression(lambda),ylab="Log-likelihood") abline(v=1979+lambdahat,lty=2) points(lambdahat,min(llike),pch=16,cex=2) text(2001,-30000,paste("lambda-hat=",1979+lambdahat,sep="")) par(mfrow=c(1,1)) plot(ts,like,type="h",xlab=expression(lambda), ylab="Normalized likelihood",lwd=3) reg1 = lm(y[1:lambdahat]~x[1:lambdahat]) reg2 = lm(y[(lambdahat+1):T]~x[(lambdahat+1):T]) mle = round(c(reg1$coef,mean(reg1$res^2),reg2$coef,mean(reg2$res^2),lambdahat),2) par(mfrow=c(1,1)) plot(x,y,xlab="Year",ylab="Homicide rate",pch=16,axes=FALSE) axis(2);box();axis(1,at=x,lab=1979+x) abline(reg1$coef,col=2,lwd=3) abline(reg2$coef,col=3,lwd=3) legend(15,100,legend=c(paste("homicide=",mle[1],"+",mle[2],"*(year-1979)",sep=""), paste("homicide=",mle[4],mle[5],"*(year-1979)",sep="")), col=2:3,lwd=3,bty="n") text(20,110,"MLE estimates") ############################################################################ # Bayesian estimation ############################################################################ # Prior hyperparameters # --------------------- mutheta = matrix(0,2,1) Vtheta = diag(1000^2,2) mubeta = matrix(0,2,1) Vbeta = diag(1000^2,2) a1 = 3 b1 = 3 a2 = 0.5 b2 = 0.5 # MCMC set up & initial value # --------------------------- seed = 230543 M0 = 0 M = 1000 L = 1 lambda = lambdahat run = gibbs(y,x,mutheta,Vtheta,mubeta,Vbeta,a1,b1,a2,b2,M0,M,L,lambda,seed) run1 = run run1[,3] = sqrt(run[,3]) run1[,6] = sqrt(run[,6]) run1[,7] = 1979+run[,7] t(round(apply(run1,2,summary),1)) parname = c("theta1","theta2","sig2","beta1","beta2","tau2","lambda") par(mfrow=c(3,3)) for (i in 1:3){ ts.plot(run[,i],xlab="iteration",ylab=parname[i]) abline(h=mle[i],lwd=2,col=3) } for (i in 1:3) acf(run[,i],main="") for (i in 1:3) hist(run[,i],xlab="",prob=TRUE,main="") par(mfrow=c(3,3)) for (i in 4:6){ ts.plot(run[,i],xlab="iteration",ylab=parname[i]) abline(h=mle[i],lwd=2,col=3) } for (i in 4:6) acf(run[,i],main="") for (i in 4:6) hist(run[,i],xlab="",prob=TRUE,main="") par(mfrow=c(1,3)) ts.plot(run[,7],xlab="iteration",ylab=parname[7]) abline(h=mle[7],lwd=2,col=3) acf(run[,7],main="") plot(table(run[,7])/M,type="h",ylab="",xlab="",main="") fx = matrix(0,M,T) for (i in 1:M){ fx[i,1:run[i,7]] = run[i,1]+run[i,2]*x[1:run[i,7]] fx[i,(run[i,7]+1):T] = run[i,4]+run[i,5]*x[(run[i,7]+1):T] } qfx = t(apply(fx,2,quantile,c(0.025,0.5,0.975))) par(mfrow=c(1,1)) plot(x,y,pch=16,xlab="Year",ylab="Homicide rate") points(x,qfx[,2],col=2,pch=16) abline(reg1$coef,lwd=3,col=4) abline(reg2$coef,lwd=3,col=4) legend(15,100,legend=c("MLE","Bayes"),col=c(4,2),pch=16,bty="n",cex=1.5) dev.off()