# Gary Koop and Justin L. Tobias # "Learning about Heterogeneity in Returns to Schooling" # Journal of Applied Econometrics, Vol. 19, No. 7, 2004, pp. 827-849. # # This panel data set consists of N=2,178 individuals. # The data are taken from the National Longitudinal Survey of Youth. # The variables are: # Column 1: Education (educ) # Column 2: Log Hourly Wage (logwage) # Column 3: Potential Experience (exper) # Column 4: Ability (ability) # Column 5: Mother's Education (momeduc) # Column 6: Father's Education (dadeduc) # Column 7: Dummy for Residence in Broken Home (broken) # Column 8: Number of Siblings (siblings) data = read.table("kooptobias.txt",header=TRUE) n = nrow(data) attach(data) round(cor(data[,c(2,1,3:8)]),2) par(mfrow=c(2,3)) plot(educ,logwage,col=broken+1) abline(lm(logwage~educ),col=3,lwd=3) plot(exper,logwage,col=broken+1) abline(lm(logwage~exper),col=3,lwd=3) plot(ability,logwage,col=broken+1) abline(lm(logwage~ability),col=3,lwd=3) plot(momeduc,logwage,col=broken+1) abline(lm(logwage~momeduc),col=3,lwd=3) plot(dadeduc,logwage,col=broken+1) abline(lm(logwage~dadeduc),col=3,lwd=3) plot(siblings,logwage,col=broken+1) abline(lm(logwage~siblings),col=3,lwd=3) # All regressors are marginally significant summary(lm(logwage~educ)) summary(lm(logwage~exper)) summary(lm(logwage~ability)) summary(lm(logwage~momeduc)) summary(lm(logwage~dadeduc)) summary(lm(logwage~broken)) summary(lm(logwage~siblings)) # Siblings become irrelevant in the presence of education summary(lm(logwage~educ+exper+broken)) summary(lm(logwage~educ+ability+broken)) summary(lm(logwage~educ+momeduc+broken)) summary(lm(logwage~educ+dadeduc+broken)) summary(lm(logwage~educ+siblings+broken)) # Ability, mom's and dad's education are still relevant, broken at the 5% level summary(lm(logwage~educ+exper+ability+broken)) summary(lm(logwage~educ+exper+momeduc+broken)) summary(lm(logwage~educ+exper+dadeduc+broken)) # Mom's and dad's education become irrelevant at the # 5% and 1% level respectively. Broken becomes irrelevant # at the 1% level summary(lm(logwage~educ+exper+ability+momeduc+broken)) summary(lm(logwage~educ+exper+ability+dadeduc+broken)) # Final competing model for our exercise ability1 = ability*broken summary(lm(logwage~educ+exper+ability)) summary(lm(logwage~educ+exper+ability+broken)) # Full regression summary(lm(logwage~educ+exper+ability+momeduc+dadeduc+broken+siblings)) # Let us consider then two competing models # Model 1 (M1): X1 = c(1,educ,exper,ability) # Model 2 (M2): X2 = c(1,educ,exper,ability,broken) # M1: Adjusted R-squared: 0.1799 # M2: Adjusted R-squared: 0.1814 # M1: Residual standard error: 0.5290 # M2: Residual standard error: 0.5286 # M2: p-value for H0: broken=0 is 0.0283 (Reject H0 at the 1%, but not a the 5% level) y = lm(logwage~educ+exper+ability)$res y = y/sqrt(var(y)) summary(lm(y~broken-1)) pred1 = sum(dnorm(y1,log=TRUE)) install.packages("emdbook") library("emdbook") zero = rep(0,n) Sigma = broken%*%t(broken)+diag(1,n) pred2 = dmvnorm(y1,zero,Sigma,log=TRUE) B12 = exp(pred1-pred2) Prob1 = B12/(1+B12) c(pred1,pred2) # -3091.890 -3092.876 c(B12,Prob1) # 2.6817338 0.7283888