############################################################################# # # BERNOULLI REGRESSION: LOGIT & PROBIT LINK FUNCTIONS # ############################################################################# # # Married Women’s Annual Labor Supply # # Wooldridge (5th edition, Chapter 7, Example 17.2) # # The file MROZ.RAW includes data on hours worked for 753 married women, # 428 of whom worked for a wage outside the home during the year; 325 of # the women worked zero hours. For the women who worked positive hours, # the range is fairly broad, extending from 12 to 4,950. Thus, annual # hours worked is a good candidate for a Tobit model. # # Obs: 753 # # 1. inlf =1 if in labor force, 1975 # 2. hours hours worked, 1975 # 3. kidslt6 # kids < 6 years # 4. kidsge6 # kids 6-18 # 5. age woman's age in yrs # 6. educ years of schooling # 7. wage estimated wage from earns., hours # 8. repwage reported wage at interview in 1976 # 9. hushrs hours worked by husband, 1975 # 10. husage husband's age # 11. huseduc husband's years of schooling # 12. huswage husband's hourly wage, 1975 # 13. faminc family income, 1975 # 14. mtr fed. marginal tax rate facing woman # 15. motheduc mother's years of schooling # 16. fatheduc father's years of schooling # 17. unem unem. rate in county of resid. # 18. city =1 if live in SMSA # 19. exper actual labor mkt exper # 20. nwifeinc (faminc - wage*hours)/1000 # 21. lwage log(wage) # 22. expersq exper^2 # # Mroz (1987) The Sensitivity of an Empirical Model of Married # Women’s Hours of Work to Economic and Statistical Assumptions, # Econometrica, 55, 765–799. # ############################################################################# rm(list=ls()) logpost = function(y,x,beta){ theta = 1/(1+exp(beta[1]+beta[2]*x)) sum(y*log(theta)+(1-y)*log(1-theta)) } logpost1 = function(y,x,beta){ theta = pnorm(beta[1]+beta[2]*x) sum(y*log(theta)+(1-y)*log(1-theta)) } pdf(file="BernoulliRegression-logit-probit-links.pdf",width=12,height=8) data = read.table("MROZ.txt",header=TRUE) attach(data) par(mfrow=c(2,3)) boxplot(nwifeinc[inlf==0],nwifeinc[inlf==1],horizontal=TRUE,names=c("0","1"), xlab="(faminc - wage*hours)/1000",ylab="In labor force in 1975?",axes=FALSE) axis(1);box();axis(2,at=1:2,lab=c("NO","YES")) ueduc = sort(unique(educ[inlf==0])) plot(table(educ[inlf==0])/sum(inlf==0),xlab="Years of Schooling", ylab="Relative Frequency") ueduc = sort(unique(educ[inlf==0])) lines(ueduc+0.25,table(educ[inlf==1])/sum(inlf==1),type="h",col=2,lwd=2) uexper = sort(unique(exper[inlf==0])) plot(table(exper[inlf==0])/sum(inlf==0),xlab="actual labor mkt exper", ylab="Relative Frequency",type="b") uexper = sort(unique(exper[inlf==1])) lines(uexper+0.25,table(exper[inlf==1])/sum(inlf==1),type="b",col=2,lwd=2) uage = sort(unique(age[inlf==0])) plot(table(age[inlf==0])/sum(inlf==0),xlab="woman's age in yrs", ylab="Relative Frequency",type="b") uage = sort(unique(age[inlf==1])) lines(uage+0.25,table(age[inlf==1])/sum(inlf==1),type="b",col=2,lwd=2) uk1 = sort(unique(kidslt6[inlf==0])) plot(table(kidslt6[inlf==0])/sum(inlf==0),xlab="kids < 6 years", ylab="Relative Frequency",type="b",ylim=c(0,1)) uk1 = sort(unique(kidslt6[inlf==1])) lines(uk1,table(kidslt6[inlf==1])/sum(inlf==1),type="b",col=2,lwd=2) uk2 = sort(unique(kidsge6[inlf==1])) plot(table(kidsge6[inlf==1])/sum(inlf==1),xlab="kids 6-18", ylab="Relative Frequency",type="b",ylim=c(0,0.4),xlim=c(0,8),col=2) uk2 = sort(unique(kidsge6[inlf==0])) lines(uk2,table(kidsge6[inlf==0])/sum(inlf==0),type="b",lwd=2) #################################### # BERNOULLI REGRESSIONS #################################### y = inlf x = exper xs = sort(unique(x)) nx = length(xs) count = matrix(0,nx,2) for (i in 1:nx){ count[i,1] = sum(x[y==0]==xs[i]) count[i,2] = sum(x[y==1]==xs[i]) } #################################### # LOGIT MODEL #################################### # Posterior density N = 100 b1 = seq(0.2,1.4,length=N) b2 = seq(-0.16,-0.05,length=N) lpost = matrix(0,N,N) for (i in 1:N) for (j in 1:N) lpost[i,j] = logpost(y,x,c(b1[i],b2[j])) post = exp(lpost-max(lpost)) par(mfrow=c(2,3)) contour(b1,b2,post,xlab=expression(beta[0]),ylab=expression(beta[1]), drawlabels=FALSE,main="Posterior distribution") plot(b1,apply(post,1,sum),type="l",ylab="Density",main="",xlab="") title(expression(beta[0])) plot(b2,apply(post,2,sum),type="l",ylab="Density",main="",xlab="") title(expression(beta[1])) # Proposal: uniform in the square set.seed(2357397) M = 1000000 M1 = 0.01*M b1s = runif(M,0.2,1.4) b2s = runif(M,-0.16,-0.05) w = rep(0,M) for (i in 1:M) w[i] = logpost(y,x,c(b1s[i],b2s[i])) w = exp(w-max(w)) k = sample(1:M,size=M1,replace=TRUE,prob=w) b1s = b1s[k] b2s = b2s[k] plot(b1s,b2s,xlab=expression(beta[0]),ylab=expression(beta[1]),main="",pch=16) title("Posterior distribution") contour(b1,b2,post,drawlabels=FALSE,add=TRUE,col=2,lwd=2) xxx = seq(min(x),max(x),by=1) N1 = length(xxx) thetas = matrix(0,N1,M1) for (i in 1:N1){ mu = b1s+b2s*xxx[i] thetas[i,] = 1/(1+exp(mu)) } qthetas = apply(thetas,1,quantile,c(0.025,0.5,0.975)) plot(xxx,qthetas[2,],type="l",lwd=2,col=2,ylim=c(-0.1,1.1), xlab="actual labor mkt exper", ylab="Prob of being in labor force, 1975") lines(xxx,qthetas[1,],col=2,lty=2) lines(xxx,qthetas[3,],col=2,lty=2) abline(h=0.5,lty=3) abline(h=0.8,lty=3) plot(density(thetas[6,]),xlim=c(0.2,1),ylim=c(0,20),main="", xlab="Prob of being in labor force, 1975") lines(density(thetas[11,]),col=2) lines(density(thetas[16,]),col=3) lines(density(thetas[21,]),col=4) lines(density(thetas[26,]),col=5) lines(density(thetas[31,]),col=6) legend(0.2,20,legend=c("5y","10y","15y","20y","25y","30y"), col=1:6,lwd=2,bty="n") text(0.275,20,"actual labor mkt exper") draw.logit = cbind(b1s,b2s) theta.logit = thetas qtheta.logit = qthetas #################################### # PROBIT MODEL #################################### # Posterior density N = 100 b1 = seq(-0.8,0,length=N) b2 = seq(0.03,0.1,length=N) lpost = matrix(0,N,N) for (i in 1:N) for (j in 1:N) lpost[i,j] = logpost1(y,x,c(b1[i],b2[j])) lpost[1:10,1:10] post = exp(lpost-max(lpost)) par(mfrow=c(2,3)) contour(b1,b2,post,xlab=expression(beta[0]),ylab=expression(beta[1]), drawlabels=FALSE,main="Posterior distribution") plot(b1,apply(post,1,sum),type="l",ylab="Density",main="",xlab="") title(expression(beta[0])) plot(b2,apply(post,2,sum),type="l",ylab="Density",main="",xlab="") title(expression(beta[1])) # Proposal: uniform in the square set.seed(2357397) M = 1000000 M1 = 0.01*M b1s = runif(M,-0.8,0) b2s = runif(M,0.03,0.1) w = rep(0,M) for (i in 1:M) w[i] = logpost1(y,x,c(b1s[i],b2s[i])) w = exp(w-max(w)) k = sample(1:M,size=M1,replace=TRUE,prob=w) b1s = b1s[k] b2s = b2s[k] plot(b1s,b2s,xlab=expression(beta[0]),ylab=expression(beta[1]),main="",pch=16) title("Posterior distribution") contour(b1,b2,post,drawlabels=FALSE,add=TRUE,col=2,lwd=2) xxx = seq(min(x),max(x),by=1) N1 = length(xxx) thetas = matrix(0,N1,M1) for (i in 1:N1) thetas[i,] = pnorm(b1s+b2s*xxx[i]) qthetas = apply(thetas,1,quantile,c(0.025,0.5,0.975)) plot(xxx,qthetas[2,],type="l",lwd=2,col=2,ylim=c(-0.1,1.1), xlab="actual labor mkt exper", ylab="Prob of being in labor force, 1975") lines(xxx,qthetas[1,],col=2,lty=2) lines(xxx,qthetas[3,],col=2,lty=2) abline(h=0.5,lty=3) abline(h=0.8,lty=3) plot(density(thetas[6,]),xlim=c(0.2,1),ylim=c(0,21),main="", xlab="Prob of being in labor force, 1975") lines(density(thetas[11,]),col=2) lines(density(thetas[16,]),col=3) lines(density(thetas[21,]),col=4) lines(density(thetas[26,]),col=5) lines(density(thetas[31,]),col=6) legend(0.2,20,legend=c("5y","10y","15y","20y","25y","30y"), col=1:6,lwd=2,bty="n") text(0.275,20,"actual labor mkt exper") draw.probit = cbind(b1s,b2s) theta.probit = thetas qtheta.probit = qthetas ############################# # COMPARING THE RESULTS ############################# par(mfrow=c(1,1)) plot(xxx,qtheta.logit[2,],type="l",lwd=2,col=2,ylim=c(0.2,1.1), xlab="actual labor mkt exper", ylab="Prob of being in labor force, 1975",axes=FALSE) axis(2);box();axis(1,at=xs) lines(xxx,qtheta.logit[1,],col=2,lty=2,lwd=2) lines(xxx,qtheta.logit[3,],col=2,lty=2,lwd=2) lines(xxx,qtheta.probit[1,],col=4,lty=2,lwd=2) lines(xxx,qtheta.probit[2,],col=4,lwd=2) lines(xxx,qtheta.probit[3,],col=4,lty=2,lwd=2) for (i in 1:nx){ text(xs[i],0.2,count[i,1],cex=0.95) text(xs[i], 1.1,count[i,2],cex=0.95) } par(mfrow=c(1,1)) plot(density(theta.probit[6,]),xlim=c(0.2,1),ylim=c(0,21),main="", xlab="Prob of being in labor force, 1975",lwd=2) lines(density(theta.probit[11,]),col=2,lwd=2) lines(density(theta.probit[16,]),col=3,lwd=2) lines(density(theta.probit[21,]),col=4,lwd=2) lines(density(theta.probit[26,]),col=5,lwd=2) lines(density(theta.probit[31,]),col=6,lwd=2) lines(density(theta.logit[6,]),col=1,lty=2,lwd=2) lines(density(theta.logit[11,]),col=2,lty=2,lwd=2) lines(density(theta.logit[16,]),col=3,lty=2,lwd=2) lines(density(theta.logit[21,]),col=4,lty=2,lwd=2) lines(density(theta.logit[26,]),col=5,lty=2,lwd=2) lines(density(theta.logit[31,]),col=6,lty=2,lwd=2) legend(0.2,20,legend=c("5y","10y","15y","20y","25y","30y"), col=1:6,lwd=2,bty="n") legend(0.2,10,legend=c("Logit","Probit"),lwd=2,lty=1:2,bty="n") text(0.275,20,"actual labor mkt exper") dev.off()