####################################################################### # # Dataset: Bank Wages # ####################################################################### # # Summary: 474 observations on education (in terms of finished years of education) # and salary (in (natural) logarithms of the yearly salary S in dollars). The salaries are # measured in logarithms. The data are taken from one of the standard data files of the # statistical software package SPSS and concern the employees of a US bank. # ####################################################################### # # Source: Heij, Boer, Franses and van Dijk (2004) # Econometric Methods with Applications in Business and Economics. # Oxford University Press. # ####################################################################### # # Copyright of R code by: # # Hedibert Freitas Lopes # Professor of Statistics and Econometrics # Insper - Institute for Education and Research # ####################################################################### data = read.table("bankwages.txt",header=TRUE) attach(data) SST = sum((LOGSAL-mean(LOGSAL))^2) pdf(file="bankwages.pdf",width=15,height=10) par(mfrow=c(2,2)) reg = lm(LOGSAL~EDUC) coef = round(reg$coef,3) R2 = round(1 - sum(reg$res^2)/SST,4) plot(EDUC,LOGSAL,pch=16) abline(reg$coef,col=2,lwd=2) title(paste("LOGSAL = ",coef[1]," + ",coef[2],"EDUC \n R2=",R2,sep="")) reg1 = reg R21 = R2 reg = lm(LOGSAL~LOGSALBEGIN) coef = round(reg$coef,3) R2 = round(1 - sum(reg$res^2)/SST,4) plot(LOGSALBEGIN,LOGSAL,pch=16) abline(reg$coef,col=2,lwd=2) title(paste("LOGSAL = ",coef[1]," + ",coef[2],"LOGSALBEGIN \n R2=",R2,sep="")) reg2 = reg R22 = R2 reg = lm(LOGSAL~GENDER) coef = round(reg$coef,3) R2 = round(1 - sum(reg$res^2)/SST,4) plot(GENDER,LOGSAL,pch=16) abline(reg$coef,col=2,lwd=2) title(paste("LOGSAL = ",coef[1]," + ",coef[2],"GENDER \n R2=",R2,sep="")) reg3 = reg R23 = R2 reg = lm(LOGSAL~MINORITY) coef = round(reg$coef,3) R2 = round(1 - sum(reg$res^2)/SST,4) plot(MINORITY,LOGSAL,pch=16) abline(reg$coef,col=2,lwd=2) title(paste("LOGSAL = ",coef[1]," + ",coef[2],"MINORITY \n R2=",R2,sep="")) reg4 = reg R24 = R2 par(mfrow=c(2,2)) plot(EDUC,reg1$res,ylab="RESIDUALS") abline(h=0,lty=2) plot(LOGSALBEGIN,reg1$res,ylab="RESIDUALS") abline(h=0,lty=2) plot(GENDER,reg3$res,ylab="RESIDUALS") abline(h=0,lty=2) plot(MINORITY,reg3$res,ylab="RESIDUALS") abline(h=0,lty=2) par(mfrow=c(1,1)) reg = lm(LOGSAL~EDUC+LOGSALBEGIN) coef = round(reg$coef,3) R2 = round(1 - sum(reg$res^2)/SST,4) plot(reg$fitted,LOGSAL,pch=16,xlab="FITTED") abline(0,1,col=2,lwd=2) title(paste("LOGSAL = ",coef[1]," + ",coef[2],"EDUC + ",coef[3],"LOGSALBEGIN\n R2=",R2,sep="")) reg5 = reg R25 = R2 par(mfrow=c(1,1)) reg = lm(LOGSAL~EDUC+GENDER) coef = round(reg$coef,3) R2 = round(1 - sum(reg$res^2)/SST,4) plot(reg$fitted,LOGSAL,pch=16,xlab="FITTED") abline(0,1,col=2,lwd=2) title(paste("LOGSAL = ",coef[1]," + ",coef[2],"EDUC + ",coef[3],"GENDER\n R2=",R2,sep="")) reg6 = reg R26 = R2 par(mfrow=c(1,1)) reg = lm(LOGSAL~EDUC+MINORITY) coef = round(reg$coef,3) R2 = round(1 - sum(reg$res^2)/SST,4) plot(reg$fitted,LOGSAL,pch=16,xlab="FITTED") abline(0,1,col=2,lwd=2) title(paste("LOGSAL = ",coef[1]," + ",coef[2],"EDUC + ",coef[3],"MINORITY\n R2=",R2,sep="")) reg7 = reg R27 = R2 par(mfrow=c(1,1)) reg = lm(LOGSAL~LOGSALBEGIN+GENDER) coef = round(reg$coef,3) R2 = round(1 - sum(reg$res^2)/SST,4) plot(reg$fitted,LOGSAL,pch=16,xlab="FITTED") abline(0,1,col=2,lwd=2) title(paste("LOGSAL = ",coef[1]," + ",coef[2],"LOGSALBEGIN + ",coef[3],"GENDER\n R2=",R2,sep="")) reg8 = reg R28 = R2 par(mfrow=c(1,1)) reg = lm(LOGSAL~LOGSALBEGIN+MINORITY) coef = round(reg$coef,3) R2 = round(1 - sum(reg$res^2)/SST,4) plot(reg$fitted,LOGSAL,pch=16,xlab="FITTED") abline(0,1,col=2,lwd=2) title(paste("LOGSAL = ",coef[1]," + ",coef[2],"LOGSALBEGIN + ",coef[3],"MINORITY\n R2=",R2,sep="")) reg9 = reg R29 = R2 par(mfrow=c(1,1)) reg = lm(LOGSAL~GENDER+MINORITY) coef = round(reg$coef,3) R2 = round(1 - sum(reg$res^2)/SST,4) plot(reg$fitted,LOGSAL,pch=16,xlab="FITTED") abline(0,1,col=2,lwd=2) title(paste("LOGSAL = ",coef[1]," + ",coef[2],"GENDER + ",coef[3],"MINORITY\n R2=",R2,sep="")) reg10= reg R210 = R2 par(mfrow=c(1,1)) reg = lm(LOGSAL~EDUC+LOGSALBEGIN+GENDER) coef = round(reg$coef,3) R2 = round(1 - sum(reg$res^2)/SST,4) plot(reg$fitted,LOGSAL,pch=16,xlab="FITTED") abline(0,1,col=2,lwd=2) title(paste("LOGSAL = ",coef[1]," + ",coef[2],"EDUC + ",coef[3],"LOGSALBEGIN + ",coef[4],"GENDER\n R2=",R2,sep="")) reg11 = reg R211 = R2 par(mfrow=c(1,1)) reg = lm(LOGSAL~EDUC+LOGSALBEGIN+MINORITY) coef = round(reg$coef,3) R2 = round(1 - sum(reg$res^2)/SST,4) plot(reg$fitted,LOGSAL,pch=16,xlab="FITTED") abline(0,1,col=2,lwd=2) title(paste("LOGSAL = ",coef[1]," + ",coef[2],"EDUC + ",coef[3],"LOGSALBEGIN + ",coef[4],"MINORITY\n R2=",R2,sep="")) reg12 = reg R212 = R2 par(mfrow=c(1,1)) reg = lm(LOGSAL~EDUC+GENDER+MINORITY) coef = round(reg$coef,3) R2 = round(1 - sum(reg$res^2)/SST,4) plot(reg$fitted,LOGSAL,pch=16,xlab="FITTED") abline(0,1,col=2,lwd=2) title(paste("LOGSAL = ",coef[1]," + ",coef[2],"EDUC + ",coef[3],"GENDER + ",coef[4],"MINORITY\n R2=",R2,sep="")) reg13 = reg R213 = R2 par(mfrow=c(1,1)) reg = lm(LOGSAL~LOGSALBEGIN+GENDER+MINORITY) coef = round(reg$coef,3) R2 = round(1 - sum(reg$res^2)/SST,4) plot(reg$fitted,LOGSAL,pch=16,xlab="FITTED") abline(0,1,col=2,lwd=2) title(paste("LOGSAL = ",coef[1]," + ",coef[2],"LOGSALBEGIN + ",coef[3],"GENDER + ",coef[4],"MINORITY\n R2=",R2,sep="")) reg14 = reg R214 = R2 par(mfrow=c(1,1)) reg = lm(LOGSAL~EDUC+LOGSALBEGIN+GENDER+MINORITY) coef = round(reg$coef,3) R2 = round(1 - sum(reg$res^2)/SST,4) plot(reg$fitted,LOGSAL,pch=16,xlab="FITTED") abline(0,1,col=2,lwd=2) title(paste("LOGSAL = ",coef[1]," + ",coef[2],"EDUC + ",coef[3],"LOGSALBEGIN + ",coef[4],"GENDER + ",coef[5],"MINORITY\n R2=",R2,sep="")) reg15 = reg R215 = R2 # R2 e R2-adjusted n = nrow(data) R2 = c(R21,R22,R23,R24,R25,R26,R27,R28,R29,R210,R211,R212,R213,R214,R215) k = c(1,1,1,1,2,2,2,2,2,2,3,3,3,3,4) R2a = round(1-(1-R2)*(n-1)/(n-k-1),4) s = c(sqrt(mean(reg1$res^2)), sqrt(mean(reg2$res^2)), sqrt(mean(reg3$res^2)), sqrt(mean(reg4$res^2)), sqrt(mean(reg5$res^2)), sqrt(mean(reg6$res^2)), sqrt(mean(reg7$res^2)), sqrt(mean(reg8$res^2)), sqrt(mean(reg9$res^2)), sqrt(mean(reg10$res^2)), sqrt(mean(reg11$res^2)), sqrt(mean(reg12$res^2)), sqrt(mean(reg13$res^2)), sqrt(mean(reg14$res^2)), sqrt(mean(reg15$res^2))) par(mfrow=c(3,5)) plot(reg1$fitted,reg1$res,xlab="FITTED",ylab="RESIDUAL");abline(h=0,lty=2) plot(reg2$fitted,reg2$res,xlab="FITTED",ylab="RESIDUAL");abline(h=0,lty=2) plot(reg3$fitted,reg3$res,xlab="FITTED",ylab="RESIDUAL");abline(h=0,lty=2) plot(reg4$fitted,reg4$res,xlab="FITTED",ylab="RESIDUAL");abline(h=0,lty=2) plot(reg5$fitted,reg5$res,xlab="FITTED",ylab="RESIDUAL");abline(h=0,lty=2) plot(reg6$fitted,reg6$res,xlab="FITTED",ylab="RESIDUAL");abline(h=0,lty=2) plot(reg7$fitted,reg7$res,xlab="FITTED",ylab="RESIDUAL");abline(h=0,lty=2) plot(reg8$fitted,reg8$res,xlab="FITTED",ylab="RESIDUAL");abline(h=0,lty=2) plot(reg9$fitted,reg9$res,xlab="FITTED",ylab="RESIDUAL");abline(h=0,lty=2) plot(reg10$fitted,reg10$res,xlab="FITTED",ylab="RESIDUAL");abline(h=0,lty=2) plot(reg11$fitted,reg11$res,xlab="FITTED",ylab="RESIDUAL");abline(h=0,lty=2) plot(reg12$fitted,reg12$res,xlab="FITTED",ylab="RESIDUAL");abline(h=0,lty=2) plot(reg13$fitted,reg13$res,xlab="FITTED",ylab="RESIDUAL");abline(h=0,lty=2) plot(reg14$fitted,reg14$res,xlab="FITTED",ylab="RESIDUAL");abline(h=0,lty=2) plot(reg15$fitted,reg15$res,xlab="FITTED",ylab="RESIDUAL");abline(h=0,lty=2) names = c("E","B","G","M","EB","EG","EM","BG","BM","GM","EBG","EBM","EGM","BGM","EBGM") res = cbind(reg1$res,reg2$res,reg3$res,reg4$res,reg5$res, reg6$res,reg7$res,reg8$res,reg9$res,reg10$res, reg11$res,reg12$res,reg13$res,reg14$res,reg15$res) par(mfrow=c(1,1)) boxplot.matrix(res,ylim=c(-1,1.7),names=names,xlab="LINEAR MODELS") abline(h=0,lty=2) for (i in 1:15){ text(i,1.7,round(R2[i],2)) text(i,1.6,round(R2a[i],2)) text(i,1.5,round(s[i],2)) } text(0.3,1.7,"R2") text(0.3,1.6,"R2a") text(0.3,1.5,"s") title(paste("ST. DEV. LOGSAL = ",round(sqrt(mean((LOGSAL-mean(LOGSAL))^2)),3),sep="")) dev.off()