############################################################################# # # LINEAR TOBIT REGRESSION # ############################################################################# # # Married Women’s Annual Labor Supply # # Wooldridge (5th edition, Chapter 7, Example 17.2) # # inlf (“in the labor force”) be a binary variable indicating # labor force participation by a married woman during 1975: # inlf 1 if the woman reports working for a wage outside the # home at some point during the year, and zero otherwise. # # It is assumed that labor force participation depends on other # sources of income, including husband’s earnings (nwifeinc , # measured in thousands of dollars), years of education (educ ), # past years of labor market experience (exper ), age , # number of children less than six years old (kidslt6 ), # and number of kids between 6 and 18 years of age (kidsge6 ). # Using the data in MROZ.RAW from Mroz (1987) 428 of the 753 # women in the sample report being in the labor force at some # point during 1975. # # The file MROZ.RAW includes data on hours worked for 753 married women, # 428 of whom worked for a wage outside the home during the year; 325 of # the women worked zero hours. For the women who worked positive hours, # the range is fairly broad, extending from 12 to 4,950. Thus, annual # hours worked is a good candidate for a Tobit model. # # Obs: 753 # # 1. inlf =1 if in labor force, 1975 # 2. hours hours worked, 1975 # 3. kidslt6 # kids < 6 years # 4. kidsge6 # kids 6-18 # 5. age woman's age in yrs # 6. educ years of schooling # 7. wage estimated wage from earns., hours # 8. repwage reported wage at interview in 1976 # 9. hushrs hours worked by husband, 1975 # 10. husage husband's age # 11. huseduc husband's years of schooling # 12. huswage husband's hourly wage, 1975 # 13. faminc family income, 1975 # 14. mtr fed. marginal tax rate facing woman # 15. motheduc mother's years of schooling # 16. fatheduc father's years of schooling # 17. unem unem. rate in county of resid. # 18. city =1 if live in SMSA # 19. exper actual labor mkt exper # 20. nwifeinc (faminc - wage*hours)/1000 # 21. lwage log(wage) # 22. expersq exper^2 # # Mroz (1987) The Sensitivity of an Empirical Model of Married # Women’s Hours of Work to Economic and Statistical Assumptions, # Econometrica, 55, 765–799. # ############################################################################# rm(list=ls()) bayesreg = function(y,x,M0,M,L){ X = cbind(1,x) tciXtX = t(chol(solve(t(X)%*%X))) reg = lm(y~x) bhat = reg$coef s2hat = sqrt(mean(reg$res^2)) s2 = s2hat niter = M0+L*M pars = matrix(0,niter,3) for (iter in 1:niter){ b = bhat + sqrt(s2)*tciXtX%*%rnorm(2) s2 = 1/rgamma(1,n/2,sum((y-b[1]-b[2]*x)^2)/2) pars[iter,] = c(b,s2) } return(pars) } ########################################################################### pdf(file="Tobit-model.pdf",width=10,height=8) data = read.table("MROZ.txt",header=TRUE) attach(data) n = nrow(data) L = min(hours[hours>0]) U = max(hours) par(mfrow=c(1,2)) hist(hours[hours>0],breaks=seq(L,U,length=30), xlab="hours worked, 1975",main="Excluding 0's") hist(hours[hours>0],breaks=seq(L,U,length=30),ylim=c(0,350), xlab="hours worked, 1975",main="Including 0's") points(0,sum(hours==0),pch=16,cex=2) segments(0,0,0,sum(hours==0),lwd=3) above0 = rep(0,n) above0[hours>0]=1 par(mfrow=c(2,3)) plot(nwifeinc,hours,col=2-above0,pch=16,ylab="hours worked, 1975", xlab="(faminc - wage*hours)/1000") abline(lm(hours~nwifeinc),col=2,lwd=2) abline(lm(hours[above0==1]~nwifeinc[above0==1]),lwd=2) plot(educ,hours,col=2-above0,pch=16,ylab="hours worked, 1975", xlab="Years of Schooling") abline(lm(hours~educ),col=2,lwd=2) abline(lm(hours[above0==1]~educ[above0==1]),lwd=2) plot(exper,hours,col=2-above0,pch=16,ylab="hours worked, 1975", xlab="actual labor mkt exper") abline(lm(hours~exper),col=2,lwd=2) abline(lm(hours[above0==1]~exper[above0==1]),lwd=2) plot(age,hours,col=2-above0,pch=16,ylab="hours worked, 1975", xlab="woman's age in yrs") abline(lm(hours~age),col=2,lwd=2) abline(lm(hours[above0==1]~age[above0==1]),lwd=2) plot(kidslt6,hours,col=2-above0,pch=16,ylab="hours worked, 1975", xlab="kids < 6 years") abline(lm(hours~kidslt6),lwd=2,col=2) abline(lm(hours[above0==1]~kidslt6[above0==1]),lwd=2) plot(kidsge6,hours,col=2-above0,pch=16,ylab="hours worked, 1975", xlab="kids 6-18") abline(lm(hours~kidsge6),lwd=2,col=2) abline(lm(hours[above0==1]~kidsge6[above0==1]),lwd=2) #################################################### # Bayesian inference when y:hours and x:exper #################################################### x = exper y = hours # MCMC setup M0 = 10000 M = 10000 L = 1 niter = M0+L*M # Bayesian linear regression: # Ignoring that y=0 represents missing data set.seed(726972) pars1 = bayesreg(y,x,M0,M,L) # Bayesian linear regression: # Discarding the observations where y=0 set.seed(726972) pars2 = bayesreg(y[y>0],x[y>0],M0,M,L) # Bayesian Tobit regression # Acknowledging the missing data X = cbind(1,x) XtX = t(X)%*%X iXtX = solve(XtX) Xty = t(X)%*%y tciXtX= t(chol(iXtX)) A = iXtX%*%t(X) nzero = sum(y==0) x1 = x[y==0] # Initial values reg = lm(y[y>0]~x[y>0]) b = reg$coef s = sqrt(mean(reg$res^2)) y11 = y pars3 = matrix(0,niter,3) for (iter in 1:niter){ b = A%*%y11 + s*tciXtX%*%rnorm(2) s2 = 1/rgamma(1,n/2,sum((y11-b[1]-b[2]*x)^2)/2) s = sqrt(s2) u = runif(nzero) xxx = u*pnorm(0,b[1]+b[2]*x1,s) y11[y==0] = qnorm(xxx,b[1]+b[2]*x1,s) pars3[iter,] = c(b,s2) } pars3 = pars3[seq(M0+1,niter,by=L),] par(mfrow=c(3,3)) ts.plot(pars3[,1],xlab="iteration",ylab="",main=expression(beta[0])) ts.plot(pars3[,2],xlab="iteration",ylab="",main=expression(beta[1])) ts.plot(sqrt(pars3[,3]),xlab="iteration",ylab="",main=expression(sigma)) acf(pars3[,1],main="") acf(pars3[,2],main="") acf(sqrt(pars3[,3]),main="") hist(pars3[,1],xlab="",main="",prob=TRUE) hist(pars3[,2],xlab="",main="",prob=TRUE) hist(sqrt(pars3[,3]),xlab="",main="",prob=TRUE) coef1 = round(apply(pars1,2,mean),2) coef2 = round(apply(pars2,2,mean),2) coef3 = round(apply(pars3,2,mean),2) par(mfrow=c(1,1)) plot(exper,hours,col=2-above0,pch=16,ylab="hours worked, 1975", xlab="woman's age in yrs",ylim=c(-500,5000)) abline(coef1[1],coef1[2],col=2,lwd=2) abline(coef2[1],coef2[2],col=1,lwd=2) abline(coef3[1],coef3[2],col=4,lwd=2) text(30,5000,paste("hours = ",coef1[1]," + ",coef1[2],"*exper",sep=""),col=2) text(30,4750,paste("hours = ",coef2[1]," + ",coef2[2],"*exper",sep=""),col=1) text(30,4500,paste("hours = ",coef3[1]," + ",coef3[2],"*exper",sep=""),col=4) par(mfrow=c(1,1)) plot(density(pars1[,1]),xlim=range(pars1[,1],pars2[,1],pars3[,1]), ylim=c(0,0.01),col=2,main=expression(beta[0]),xlab="") lines(density(pars2[,1]),col=1) lines(density(pars3[,1]),col=4) par(mfrow=c(1,1)) plot(density(pars1[,2]),xlim=range(pars1[,2],pars2[,2],pars3[,2]), ylim=c(0,0.12),col=2,main=expression(beta[1]),xlab="") lines(density(pars2[,2]),col=1) lines(density(pars3[,2]),col=4) dev.off()