################################################################################################ # The cross-sectional data used for econometric analysis can be represented and # stored in computers. This is a cross-sectional data set on 526 working # individuals for the year 1976. # # The variables include: # wage (in dollars per hour) # educ (years of education) # exper (years of potential labor force experience) # female (an indicator for gender) # and married (marital status) # # These last two variables are binary (zero-one) in nature and serve to indicate # qualitative features of the individual (the person is female or not; the person # is married or not). # ################################################################################################ # Reading the data from a CSV file data = read.csv("http://hedibert.org/wp-content/uploads/2016/02/wage.csv",header=TRUE) # Reading the data from a TXT file samedata = read.table("http://hedibert.org/wp-content/uploads/2016/02/wage.txt",header=TRUE) # Confirming both datasets are the same data==samedata # Making variables in a file directly available for manipulation attach(data) # Creating additional dummy variables singleman=(female==0)&(married==0) marriedman=(female==0)&(married==1) singlewoman=(female==1)&(married==0) marriedwoman=(female==1)&(married==1) # Proportion of observations in each group mean(singleman) mean(marriedman) mean(singlewoman) mean(marriedwoman) ########################### # UNIVARIATE STATISTICS ########################### # Boxplot help ?boxplot # This also works help(boxplot) # Boxplot of wage by gender boxplot(wage[female==0],wage[female==1],names=c("Men","Women"),ylab="wage (in dollars per hour)") # Boxplot of wage by marital status boxplot(wage[married==0],wage[married==1],names=c("Single","Married"),ylab="wage (in dollars per hour)") # Boxplot of wage by gender and marital status boxplot(wage[marriedman],wage[singleman],wage[marriedwoman],wage[singlewoman], names=c("Married man","Single man","Married woman","Single woman"),ylab="wage (in dollars per hour)") # Boxplot of wage by years of experience (excluindo outliers) par(mfrow=c(1,1)) boxplot(wage[exper<=5], wage[(exper>5)&(exper<=10)], wage[(exper>10)&(exper<=15)], wage[(exper>15)&(exper<=20)], wage[(exper>20)&(exper<=25)], wage[(exper>25)&(exper<=30)], wage[(exper>30)&(exper<=35)], wage[(exper>35)&(exper<=40)], wage[(exper>40)&(exper<=45)], wage[(exper>45)],names=c("<=5","(5,10]","(10,15]","(15,20]","(20,25]","(25,30]","(30,35]","(35,40]","(40,45]",">45"), xlab="Years of experience",ylab="wage (in dollars per hour)",outline=FALSE) ########################### # BIVARIATE STATISTICS ########################### # Scatter plot with black dots instead of hollow dots plot(educ,wage,pch=16) # Scatter plots by gender par(mfrow=c(1,2)) plot(educ[female==0],wage[female==0],pch=16) plot(educ[female==1],wage[female==1],pch=16) # Scatter plots by gender (same x and y scales) par(mfrow=c(1,2)) plot(educ[female==0],wage[female==0],pch=16,xlim=range(educ),ylim=range(wage)) plot(educ[female==1],wage[female==1],pch=16,xlim=range(educ),ylim=range(wage)) par(mfrow=c(1,2)) plot(educ[married==0],wage[married ==0],pch=16,xlim=range(educ),ylim=range(wage)) plot(educ[married ==1],wage[married ==1],pch=16,xlim=range(educ),ylim=range(wage)) # Scatter plots by gender & marital status (same x and y scales) par(mfrow=c(2,2)) plot(educ[singleman],wage[singleman],pch=16,xlim=range(educ),ylim=range(wage)) plot(educ[marriedman],wage[marriedman],pch=16,xlim=range(educ),ylim=range(wage)) plot(educ[singlewoman],wage[singlewoman],pch=16,xlim=range(educ),ylim=range(wage)) plot(educ[marriedwoman],wage[marriedwoman],pch=16,xlim=range(educ),ylim=range(wage)) par(mfrow=c(1,1)) plot(educ,wage,col=married+1,pch=16) abline(lm(wage[married==0]~educ[married==0]),col=1,lwd=3) abline(lm(wage[married==1]~educ[married==1]),col=2,lwd=3) legend("topleft",legend=c("single","married"),col=1:2,pch=16) plot(educ,wage,col=female+1,pch=16) abline(lm(wage[female==0]~educ[female==0]),col=1,lwd=3) abline(lm(wage[female==1]~educ[female==1]),col=2,lwd=3) legend("topleft",legend=c("male","female"),col=1:2,pch=16) par(mfrow=c(1,1)) plot(educ,wage,pch=16,col=grey(0.8)) abline(lm(wage[singleman]~educ[singleman]),col=1,lwd=3) abline(lm(wage[singlewoman]~educ[singlewoman]),col=2,lwd=3) abline(lm(wage[marriedman]~educ[marriedman]),col=3,lwd=3) abline(lm(wage[marriedwoman]~educ[marriedwoman]),col=4,lwd=3) legend("topleft",legend=c("Single male","Single female", "Married male","Married female"),col=1:4,lty=1,lwd=3)