########################################################################################## # # Dataset: More on wages # ########################################################################################## # # Summary: Data on monthly earnings, education, several demographic # variables, and IQ scores for 935 men in 1980. # # 1. wage monthly earnings # 2. hours average weekly hours # 3. iq IQ score # 4. kww knowledge of world work score # 5. educ years of education # 6. exper years of work experience # 7. tenure years with current employer # 8. age age in years # 9. married =1 if married # 10. black =1 if black # 11. south =1 if live in south # 12. urban =1 if live in SMSA # 13. sibs number of siblings # 14. brthord birth order # 15. meduc mother's education # 16. feduc father's education # 17. lwage natural log of wage # ########################################################################################## # # Source: Wooldridge (2012) # Introductory Econometrics: A Modern Approach (5th edition) # South-Western, Cengage Learning # # Basic reference: Blackburn and Newmark (1992) Unobserved ability, # efficiency wages and interindustry wage", Quarterly Journal of # Economics, 107, 1421-36. # ########################################################################################## # # Copyright of R code by: # Hedibert Freitas Lopes # Professor of Statistics and Econometrics # Insper - Institute for Education and Research # ########################################################################################## # # datafile = "https://hedibert.org/wp-content/uploads/2014/02/wage2-wooldridge.txt" # data = read.table(datafile,header=TRUE) # n = nrow(data) # ######################################################################################### rm(list=ls()) datafile = "https://hedibert.org/wp-content/uploads/2014/02/wage2-wooldridge.txt" data = read.table(datafile) n = nrow(data) head(data) salary = log(data[,1]) educ = data[,5] iq = data[,3] par(mfrow=c(1,3)) plot(educ,iq,xlab="Years of education",ylab="IQ score") title(paste(n," observations",sep="")) legend("topleft",legend=paste("cor=",round(cor(educ,iq),3),sep="")) plot(educ,salary,xlab="Years of education",ylab="Monthly earnings (log)") legend("topleft",legend=paste("cor=",round(cor(educ,salary),3),sep="")) plot(iq,salary,xlab="IQ score",ylab="Monthly earnings (log)") legend("topleft",legend=paste("cor=",round(cor(iq,salary),3),sep="")) # Let us start with sample of size 50 # ----------------------------------- set.seed(2718282) n1 = 20 ind = sort(sample(1:n,size=n1,replace=FALSE)) salary1 = log(data[ind,1]) educ1 = data[ind,5] iq1 = data[ind,3] x = sort(unique(educ1)) nx = length(x) cutoff.x = x[1:(nx-1)]+diff(x)/2 y = sort(unique(iq1)) ny = length(y) cutoff.y = y[1:(ny-1)]+diff(y)/2 par(mfrow=c(1,1)) plot(educ1,iq1,ylab="IQ score",xlab="Years of education",cex=0.75,pch=16,col=2) title(paste(n1," observations",sep="")) for (i in 1:nx) abline(v=cutoff.x[i],lty=2) for (i in 1:ny) abline(h=cutoff.y[i],lty=2) segments(15.5,70,15.5,130,col=6,lwd=3) segments(10,102,15.5,102,col=6,lwd=3) text(13,85,"A",col=6,cex=2) text(13,108,"B",col=6,cex=2) text(17,95,"C",col=6,cex=2) library(tree) fitted = tree(salary1 ~ educ1 + iq1) summary(fitted) fitted par(mfrow=c(1,1)) plot(fitted, type = "uniform") text(fitted, pretty = 0, all = TRUE, cex = 0.75) library(tree) fitted = tree(salary ~ educ + iq) summary(fitted) fitted par(mfrow=c(1,1)) plot(fitted, type = "uniform") text(fitted, pretty = 0, all = TRUE, cex = 0.75) wage = log(data[,1]) iq = data[,3] educ = data[,5] exper = data[,6] tenure = data[,7] age = data[,8] married = data[,9] black = data[,10] meduc = data[,15] lm.fit = lm(wage ~ iq + educ + exper + tenure + age + married + black + meduc) cart.fit = tree(wage ~ iq + educ + exper + tenure + age + married + black + meduc) summary(lm.fit) summary(cart.fit) cart.fit par(mfrow=c(1,1)) plot(cart.fit, type = "uniform") text(cart.fit, pretty = 0, all = TRUE, cex = 0.75)