####################################################################### # # Dataset: More on wages # ####################################################################### # # Basic reference: # Blackburn and Newmark (1992) Unobserved ability, eciency wages and # interindustry wage", Quarterly Journal of Economics, 107, 1421-36. # # Summary: Data on monthly earnings, education, several demographic # variables, and IQ scores for 935 men in 1980. # # 1. wage monthly earnings # 2. hours average weekly hours # 3. iq IQ score # 4. kww knowledge of world work score # 5. educ years of education # 6. exper years of work experience # 7. tenure years with current employer # 8. age age in years # 9. married =1 if married # 10. black =1 if black # 11. south =1 if live in south # 12. urban =1 if live in SMSA # 13. sibs number of siblings # 14. brthord birth order # 15. meduc mother's education # 16. feduc father's education # 17. lwage natural log of wage # ####################################################################### # # Source: Wooldridge (2012) # Introductory Econometrics: A Modern Approach (5th edition) # South-Western, Cengage Learning # ####################################################################### # # Copyright of R code by: # # Hedibert Freitas Lopes # Professor of Statistics and Econometrics # Insper - Institute for Education and Research # ####################################################################### rm(list=ls()) pdf(file="wage2-wooldridge.pdf",width=14,height=10) data = read.table("wage2-wooldridge.txt",header=TRUE) n = nrow(data) attach(data) # Univariate regressions # ---------------------- par(mfrow=c(2,5)) plot(educ,lwage,pch=16,xlab="Years of education",ylab="Log monthly earnings") abline(lm(lwage~educ),col=2,lwd=2) title(paste("Correlation=",round(cor(lwage,educ),3),sep="")) plot(age,lwage,pch=16,xlab="Age in years",ylab="Log monthly earnings") abline(lm(lwage~age),col=2,lwd=2) title(paste("Correlation=",round(cor(lwage,age),3),sep="")) plot(tenure,lwage,pch=16,xlab="Years with current employer",ylab="Log monthly earnings") abline(lm(lwage~tenure),col=2,lwd=2) title(paste("Correlation=",round(cor(lwage,tenure),3),sep="")) plot(iq,lwage,pch=16,xlab="IQ score",ylab="Log monthly earnings") abline(lm(lwage~iq),col=2,lwd=2) title(paste("Correlation=",round(cor(lwage,iq),3),sep="")) plot(kww,lwage,pch=16,xlab="knowledge of world work score",ylab="Log monthly earnings") abline(lm(lwage~kww),col=2,lwd=2) title(paste("Correlation=",round(cor(lwage,kww),3),sep="")) plot(iq,kww,pch=16,xlab="IQ score",ylab="knowledge of world work score") title(paste("Correlation=",round(cor(iq,kww),3),sep="")) boxplot(lwage[married==0],lwage[married==1],ylab="Log monthly earnings",xlab="Married", names=c("No","Yes")) boxplot(lwage[black==0],lwage[black==1],ylab="Log monthly earnings",xlab="Black", names=c("No","Yes")) boxplot(lwage[south==0],lwage[south==1],ylab="Log monthly earnings",xlab="South", names=c("No","Yes")) boxplot(lwage[urban==0],lwage[urban==1],ylab="Log monthly earnings",xlab="Urban", names=c("No","Yes")) # Regressors pairs(cbind(educ,age,tenure,iq,kww)) # Comparing regression models X = cbind(educ,age,tenure,iq,kww,married,black,south,urban) p = ncol(X) nm = 2^p-1 mod = matrix(0,nm+1,p) m = 0 lab = rep(0,nm+1) for (i in 0:1) for (j in 0:1) for (k in 0:1) for (l in 0:1) for (o in 0:1) for (q in 0:1) for (r in 0:1) for (s in 0:1) for (t in 0:1){ m = m+1 mod[m,] = c(i,j,k,l,o,q,r,s,t) } mod = mod[-1,] lab = lab[-1] SST = sum((lwage-mean(lwage))^2) se = rep(0,nm) R2a = rep(0,nm) res = matrix(0,n,nm) ks = rep(0,m) for (m in 1:nm){ k = sum(mod[m,]) X1 = X[,mod[m,]==TRUE] reg = lm(lwage~X1) SSR = sum(reg$res^2) R2 = 1-SSR/SST R2a[m] = 1-(1-R2)*(n-1)/(n-k) se[m] = sqrt(SSR/(n-k+1)) res[,m] = reg$res ks[m] = k } oR2a = c(R2a[ks==1],R2a[ks==2],R2a[ks==3],R2a[ks==4],R2a[ks==5], R2a[ks==6],R2a[ks==7],R2a[ks==8],R2a[ks==9]) ocols = NULL nmodels = NULL for (i in 1:9){ ocols = c(ocols,rep(i,sum(ks==i))) nmodels = c(nmodels,sum(ks==i)) } par(mfrow=c(1,1)) plot(100*oR2a,xlab="Model",ylab="Adjusted R2 (%)",pch=16,col=ocols) for (i in 1:9) text(450,11-(i-1),paste(i," regressors: ",nmodels[i]," models",sep=""),col=i) # Best model reg = lm(lwage~educ+age+tenure+iq+married+black+south+urban) summary(reg) se = sqrt(sum(reg$res^2)/(n-9)) res = reg$res/se L = -max(abs(res)) U = max(abs(res)) cov1 = round(100*mean(abs(res)