############################################################################################ # # Piece-wise linear regression # ############################################################################################ # # HEDIBERT FREITAS LOPES # Associate Professor of Econometrics and Statistics # The University of Chicago Booth School of Business # 5807 South Woodlawn Avenue # Chicago, Illinois, 60637 # Email : hlopes@ChicagoBooth.edu # URL: http://faculty.chicagobooth.edu/hedibert.lopes/research/ # ############################################################################################ rm(list=ls()) pred = function(y,X,mu,iVb,lambda,nu){ n = nrow(X) In = diag(1,n) P = (In-X%*%solve(iVb+t(X)%*%X)%*%t(X))/lambda logdet = n*log(lambda)+log(det(iVb+t(X)%*%X))-log(det(iVb)) lpred = -0.5*logdet-0.5*(nu+n)*log(1+t(y-X%*%mu)%*%P%*%(y-X%*%mu)/nu) return(lpred) } data = read.csv("SA-healtheducation-household.csv",header=TRUE) n = nrow(data) x = data[,1] y = data[,2]/data[,3] pdf(file="results.pdf",width=15,height=10) #pdf(file="data.pdf",width=15,height=10) par(mfrow=c(1,2)) plot(data[,1],log(data[,2]),type="b",pch=16,ylim=range(log(data[,2:3])), xlab="Year",ylab="Expenditure by household") lines(data[,1],log(data[,3]),type="b",col=2,pch=16) plot(x,y,ylim=c(0.5,4),xlab="Year",ylab="health/education ratio",pch=16) #dev.off() #pdf(file="ols.pdf",width=15,height=10) par(mfrow=c(2,2)) plot(x,y,ylim=c(0.5,4),xlab="Year",ylab="health/education ratio") reg = lm(y~x) abline(reg$coef,col=2,lwd=3) title(paste("s2=",round(mean(reg$res^2),4),sep="")) plot(x,y,ylim=c(0.5,4),xlab="Year",ylab="health/education ratio") cut = 20 x1 = x[1:cut] x2 = x[(cut+1):n] y1 = y[1:cut] y2 = y[(cut+1):n] reg1 = lm(y1~x1) reg2 = lm(y2~x2) lines(x1,reg1$fit,lwd=3,col=2) lines(x2,reg2$fit,lwd=3,col=2) title(paste("s2=",round((mean(reg1$res^2)+mean(reg2$res^2))/2,4),sep="")) plot(x,y,ylim=c(0.5,4),xlab="Year",ylab="health/education ratio") cut1 = 20 cut2 = 52 x1 = x[1:cut1] x2 = x[(cut1+1):cut2] x3 = x[(cut2+1):n] y1 = y[1:cut1] y2 = y[(cut1+1):cut2] y3 = y[(cut2+1):n] reg1 = lm(y1~x1) reg2 = lm(y2~x2) reg3 = lm(y3~x3) lines(x1,reg1$fit,lwd=3,col=2) lines(x2,reg2$fit,lwd=3,col=2) lines(x3,reg3$fit,lwd=3,col=2) title(paste("s2=",round((mean(reg1$res^2)+mean(reg2$res^2)+mean(reg3$res^2))/3,4),sep="")) plot(x,y,ylim=c(0.5,4),xlab="Year",ylab="health/education ratio") cut1 = 20 cut2 = 40 cut3 = 52 x1 = x[1:cut1] x2 = x[(cut1+1):cut2] x3 = x[(cut2+1):cut3] x4 = x[(cut3+1):n] y1 = y[1:cut1] y2 = y[(cut1+1):cut2] y3 = y[(cut2+1):cut3] y4 = y[(cut3+1):n] reg1 = lm(y1~x1) reg2 = lm(y2~x2) reg3 = lm(y3~x3) reg4 = lm(y4~x4) lines(x1,reg1$fit,lwd=3,col=2) lines(x2,reg2$fit,lwd=3,col=2) lines(x3,reg3$fit,lwd=3,col=2) lines(x4,reg4$fit,lwd=3,col=2) title(paste("s2=",round((mean(reg1$res^2)+mean(reg2$res^2)+mean(reg3$res^2)+mean(reg4$res^2))/4,4),sep="")) #dev.off() ####################################### # Bayesian inference ####################################### # Prior hyperparameters d = 2 mu = rep(0,d) Vb = diag(1000^2,d) lambda = 0.01333 nu = 10 iVb = solve(Vb) # Model search set.seed(12321645) niter = 10000 end0 = c(20,40,52,n) end = c(10,30,55,n) nn = length(end) beg = c(1,end[1:(nn-1)]+1) prior = matrix(0,n,nn-1) for (i in 1:(nn-1)) prior[,i] = dnorm(1:n,end0[i],2) draws = matrix(0,niter,nn-1) beg1 = beg end1 = end for (iter in 1:niter){ print(iter) for (i in 1:(nn-1)) end1[i] = sample(1:n,size=1,prob=prior[,i]) if (min(diff(end1))>0){ beg1 = c(1,end1[1:(nn-1)]+1) num = 0 den = 0 for (i in 1:nn){ ind = beg[i]:end[i] ind1 = beg1[i]:end1[i] num = num + pred(y[ind1],cbind(1,x[ind1]),mu,iVb,lambda,n) den = den + pred(y[ind],cbind(1,x[ind]),mu,iVb,lambda,n) } if (log(runif(1))<(num-den)){ beg = beg1 end = end1 } } draws[iter,] = end[1:(nn-1)] } freq = matrix(0,n,nn-1) for (i in 1:n) for (j in 1:(nn-1)) freq[i,j] = mean(draws[,j]==i) par(mfrow=c(nn-1,2)) for (i in 1:(nn-1)){ ts.plot(draws[,i],xlab="Iteration",ylab="",main=paste("t",i,sep="")) plot(1:n,freq[,i],type="h",xlab="",main="") lines(1:n,prior[,i],col=2) } fits = matrix(0,niter,n) for (iter in 1:niter){ end = c(draws[iter,],n) beg = c(1,end[1:(nn-1)]+1) xxx = NULL fit = NULL for (i in 1:nn){ ind = beg[i]:end[i] X = cbind(1,x[ind]) y1 = y[ind] b = solve(iVb+t(X)%*%X)%*%(iVb%*%mu+t(X)%*%y1) xx = x[beg[i]]:x[end[i]] ff = b[1]+b[2]*xx fit = c(fit,ff) xxx = c(xxx,xx) } fits[iter,] = fit } means = apply(fits,2,mean) #pdf(file="fits1.pdf",width=15,height=10) par(mfrow=c(1,1)) plot(x,y,ylim=c(0.5,4),xlab="Year",ylab="health/education ratio",pch=16) cut1 = 20 cut2 = 40 cut3 = 52 x1 = x[1:cut1] x2 = x[(cut1+1):cut2] x3 = x[(cut2+1):cut3] x4 = x[(cut3+1):n] y1 = y[1:cut1] y2 = y[(cut1+1):cut2] y3 = y[(cut2+1):cut3] y4 = y[(cut3+1):n] reg1 = lm(y1~x1) reg2 = lm(y2~x2) reg3 = lm(y3~x3) reg4 = lm(y4~x4) lines(x1,reg1$fit,lwd=3,col=2) lines(x2,reg2$fit,lwd=3,col=2) lines(x3,reg3$fit,lwd=3,col=2) lines(x4,reg4$fit,lwd=3,col=2) points(x,means,col=4,pch=16,type="b") segments(x[freq[,1]>0.0001],0.5,x[freq[,1]>0.0001],0.5+freq[freq[,1]>0.0001,1],col=4,lwd=2) segments(x[freq[,2]>0.0001],0.5,x[freq[,2]>0.0001],0.5+freq[freq[,2]>0.0001,2],col=4,lwd=2) segments(x[freq[,3]>0.0001],0.5,x[freq[,3]>0.0001],0.5+freq[freq[,3]>0.0001,3],col=4,lwd=2) text(1960,4,paste("s2 ols=", round((mean(reg1$res^2)+mean(reg2$res^2)+mean(reg3$res^2)+mean(reg4$res^2))/4,4),sep=""),cex=2) text(1960,3.75,paste("s2 Bayes=", round(mean(apply((fits-matrix(y,niter,n,byrow=TRUE))^2,1,mean)),4),sep=""),cex=2) #dev.off() #pdf(file="fits2.pdf",width=15,height=10) par(mfrow=c(1,3)) for (i in 1:(nn-1)){ xx = x[freq[,i]>0]-x[1]+1 bp = boxplot.matrix(fits[,xx],names=xx+x[1],outline=FALSE) for (j in 1:length(xx)) text(xx[j]-xx[1]+1,bp$stats[5,j]+0.01,round(100*freq[xx[j],i],1)) } #dev.off() #pdf(file="joint-t1t2.pdf",width=15,height=10) par(mfrow=c(1,1)) x11 = x[freq[,1]>0.0001] x12 = x[freq[,2]>0.0001] n1 = length(x11) n2 = length(x12) plot(c(x11[1],x11[n1]),c(x12[1],x12[n2]),xlab="t1",ylab="t2",col=0,axes=FALSE) axis(2,at=x12);axis(1,at=x11);box() for (i in 1:n1){ for (j in 1:n2){ m = mean((draws[,1]==(x11[i]-x[1]))&(draws[,2]==(x12[j]-x[1]))) if (m>0.00001){ text(x11[i],x12[j],round(100*m,2),cex=round(20*m)) } } } dev.off()