################################################################## # The cross-sectional data used for econometric analysis can be represented and # stored in computers. This is a cross-sectional data set on 526 working # individuals for the year 1976. # # The variables include: # wage (in dollars per hour) # educ (years of education) # exper (years of potential labor force experience) # female (an indicator for gender) # and married (marital status) # # These last two variables are binary (zero-one) in nature and serve to indicate # qualitative features of the individual (the person is female or not; the person # is married or not). # ################################################################## # Reading the data from a CSV file data = read.csv("wage.csv",header=TRUE) # Reading the data from a TXT file samedata = read.table("wage.txt",header=TRUE) # Confirming both datasets are the same data==samedata # Making variables in a file directly available for manipulation attach(data) # Creating additional dummy variables singleman=(female==0)&(married==0) marriedman=(female==0)&(married==1) singlewoman=(female==1)&(married==0) marriedwoman=(female==1)&(married==1) # Proportion of observations in each group mean(singleman) mean(marriedman) mean(singlewoman) mean(marriedwoman) ########################### # UNIVARIATE STATISTICS ########################### # Boxplot help ?boxplot # This also works help(boxplot) # Boxplot of wage by gender boxplot(wage[female==0],wage[female==1],names=c("Men","Women"),ylab="wage (in dollars per hour)") # Boxplot of wage by marital status boxplot(wage[married==0],wage[married==1],names=c("Single","Married"),ylab="wage (in dollars per hour)") # Boxplot of wage by gender and marital status boxplot(wage[marriedman],wage[singleman],wage[marriedwoman],wage[singlewoman], names=c("Married man","Single man","Married woman","Single woman"),ylab="wage (in dollars per hour)") # Boxplot of wage by years of experience (excluindo outliers) par(mfrow=c(1,1)) boxplot(wage[exper<=5], wage[(exper>5)&(exper<=10)], wage[(exper>10)&(exper<=15)], wage[(exper>15)&(exper<=20)], wage[(exper>20)&(exper<=25)], wage[(exper>25)&(exper<=30)], wage[(exper>30)&(exper<=35)], wage[(exper>35)&(exper<=40)], wage[(exper>40)&(exper<=45)], wage[(exper>45)],names=c("<=5","(5,10]","(10,15]","(15,20]","(20,25]","(25,30]","(30,35]","(35,40]","(40,45]",">45"), xlab="Years of experience",ylab="wage (in dollars per hour)",outline=FALSE) ########################### # BIVARIATE STATISTICS ########################### # Scatter plot with black dots instead of hollow dots plot(educ,wage,pch=16) # Scatter plots by gender par(mfrow=c(1,2)) plot(educ[female==0],wage[female==0],pch=16) plot(educ[female==1],wage[female==1],pch=16) # Scatter plots by gender (same x and y scales) par(mfrow=c(1,2)) plot(educ[female==0],wage[female==0],pch=16,xlim=range(educ),ylim=range(wage)) plot(educ[female==1],wage[female==1],pch=16,xlim=range(educ),ylim=range(wage)) par(mfrow=c(1,2)) plot(educ[married==0],wage[married ==0],pch=16,xlim=range(educ),ylim=range(wage)) plot(educ[married ==1],wage[married ==1],pch=16,xlim=range(educ),ylim=range(wage)) # Scatter plots by gender & marital status (same x and y scales) par(mfrow=c(2,2)) plot(educ[singleman],wage[singleman],pch=16,xlim=range(educ),ylim=range(wage)) plot(educ[marriedman],wage[marriedman],pch=16,xlim=range(educ),ylim=range(wage)) plot(educ[singlewoman],wage[singlewoman],pch=16,xlim=range(educ),ylim=range(wage)) plot(educ[marriedwoman],wage[marriedwoman],pch=16,xlim=range(educ),ylim=range(wage))