####################################################################### # # Dataset: Bank Wages # # Dummies and interactions # ####################################################################### # # Summary: 474 observations on education (in terms of finished years of education) # and salary (in (natural) logarithms of the yearly salary S in dollars). The salaries are # measured in logarithms. The data are taken from one of the standard data files of the # statistical software package SPSS and concern the employees of a US bank. # ####################################################################### # # Source: Heij, Boer, Franses and van Dijk (2004) # Econometric Methods with Applications in Business and Economics. # Oxford University Press. # ####################################################################### # # Copyright of R code by: # # Hedibert Freitas Lopes # Professor of Statistics and Econometrics # Insper - Institute for Education and Research # ####################################################################### rm(list=ls()) pdf(file="bankwages-dummies-interactions.pdf",width=10,height=10) data = read.table("bankwages.txt",header=TRUE) attach(data) n = nrow(data) jobcat2 = rep(0,n) jobcat3 = rep(0,n) jobcat2[JOBCAT==2]=1 jobcat3[JOBCAT==3]=1 SST = sum((LOGSAL-mean(LOGSAL))^2) L = min(LOGSAL,LOGSALBEGIN) U = max(LOGSAL,LOGSALBEGIN) reg1 = lm(LOGSAL~LOGSALBEGIN) coef = round(reg1$coef,4) R21 = round(1 - sum(reg1$res^2)/SST,4) plot(LOGSALBEGIN,LOGSAL,pch=16,main="",xlim=c(L,U),ylim=c(L,U)) title(paste("LS=",coef[1]," + ",coef[2],"LSB",sep="")) abline(reg1$coef,lwd=2,col=2) abline(0,1) text(9.3,11.7,paste("R2=",R21,sep="")) reg2 = lm(LOGSAL~LOGSALBEGIN+GENDER) coef = round(reg2$coef,4) R22 = round(1 - sum(reg2$res^2)/SST,4) plot(LOGSALBEGIN,LOGSAL,col=GENDER+1,pch=16,main="",xlim=c(L,U),ylim=c(L,U)) title(paste("LS=",coef[1]," + ",coef[2],"LSB + ",coef[3],"G",sep="")) text(9.3,11.7,paste("R2=",R22,sep="")) abline(reg2$coef[1],reg2$coef[2],lwd=2) abline(reg2$coef[1]+reg2$coef[3],reg2$coef[2],lwd=2,col=2) legend(10.5,10,legend=c("Gender=0","Gender=1"),col=1:2,lwd=2,bty="n") abline(0,1) reg3 = lm(LOGSAL~LOGSALBEGIN+MINORITY) coef = round(reg3$coef,4) R23 = round(1 - sum(reg3$res^2)/SST,4) plot(LOGSALBEGIN,LOGSAL,col=MINORITY+1,pch=16,main="",xlim=c(L,U),ylim=c(L,U)) title(paste("LS=",coef[1]," + ",coef[2],"LSB ",coef[3],"M",sep="")) text(9.3,11.7,paste("R2=",R23,sep="")) abline(reg3$coef[1],reg3$coef[2],lwd=2) abline(reg3$coef[1]+reg3$coef[3],reg3$coef[2],lwd=2,col=2) legend(10.5,10,legend=c("Minority=0","Minority=1"),col=1:2,lwd=2,bty="n") abline(0,1) reg4 = lm(LOGSAL~LOGSALBEGIN+jobcat2+jobcat3) coef = round(reg4$coef,4) R24 = round(1 - sum(reg4$res^2)/SST,4) plot(LOGSALBEGIN,LOGSAL,col=JOBCAT,pch=16,main="",xlim=c(L,U),ylim=c(L,U)) text(9.3,11.7,paste("R2=",R24,sep="")) abline(reg4$coef[1],reg4$coef[2],lwd=2) abline(reg4$coef[1]+reg4$coef[3],reg4$coef[2],lwd=2,col=2) abline(reg4$coef[1]+reg4$coef[4],reg4$coef[2],lwd=2,col=3) legend(10.5,10,legend=c("Jobcat=1","Jobcat=2","Jobcat=3"),col=1:3,lwd=2,bty="n") abline(0,1) title(paste("LS=",coef[1]," + ",coef[2],"LSB + ",coef[3],"J2 + ",coef[4],"J3",sep="")) reg5 = lm(LOGSAL~LOGSALBEGIN+GENDER+LOGSALBEGIN*GENDER) coef = round(reg5$coef,4) R25 = round(1 - sum(reg5$res^2)/SST,4) plot(LOGSALBEGIN,LOGSAL,col=GENDER+1,pch=16,main="",xlim=c(L,U),ylim=c(L,U)) text(9.3,11.7,paste("R2=",R25,sep="")) abline(reg5$coef[1],reg5$coef[2],lwd=2) abline(reg5$coef[1]+reg5$coef[3],reg5$coef[2]+reg5$coef[4],lwd=2,col=2) legend(10.5,10,legend=c("Gender=0","Gender=1"),col=1:2,lwd=2,bty="n") abline(0,1) title(paste("LS=",coef[1]," + ",coef[2],"LSB ",coef[3],"G + ",coef[4],"LSB*G",sep="")) reg6 = lm(LOGSAL~LOGSALBEGIN+MINORITY+LOGSALBEGIN*MINORITY) coef = round(reg6$coef,4) R26 = round(1 - sum(reg6$res^2)/SST,4) plot(LOGSALBEGIN,LOGSAL,col=MINORITY+1,pch=16,main="",xlim=c(L,U),ylim=c(L,U)) text(9.3,11.7,paste("R2=",R26,sep="")) abline(reg6$coef[1],reg6$coef[2],lwd=2) abline(reg6$coef[1]+reg6$coef[3],reg6$coef[2]+reg6$coef[4],lwd=2,col=2) legend(10.5,10,legend=c("Minority=0","Minority=1"),col=1:2,lwd=2,bty="n") abline(0,1) title(paste("LS=",coef[1]," + ",coef[2],"LSB + ",coef[3],"M ",coef[4],"LSB*M",sep="")) reg7 = lm(LOGSAL~LOGSALBEGIN+jobcat2+jobcat3+LOGSALBEGIN*jobcat2+LOGSALBEGIN*jobcat3) coef = round(reg7$coef,4) R27 = round(1 - sum(reg7$res^2)/SST,4) plot(LOGSALBEGIN,LOGSAL,col=JOBCAT,pch=16,main="",xlim=c(L,U),ylim=c(L,U)) text(9.3,11.7,paste("R2=",R27,sep="")) abline(reg7$coef[1],reg7$coef[2],lwd=2) abline(reg7$coef[1]+reg7$coef[3],reg7$coef[2]+reg7$coef[5],lwd=2,col=2) abline(reg7$coef[1]+reg7$coef[4],reg7$coef[2]+reg7$coef[6],lwd=2,col=3) abline(0,1) title(paste("LS=",coef[1]," + ",coef[2]," + LSB ",coef[3],"J1 + ",coef[4],"J2", coef[5],"LSB*J1",coef[6],"LSB*J2",sep="")) boxplot(reg1$res,reg2$res,reg3$res,reg4$res,reg5$res,reg6$res,reg7$res, names=c("LSB","LSB+G","LSB+M","LSB+J","LSB*G","LSB*M","LSB*J")) text(0.5,0.75,"R2",col=4) text(1,0.75,R21,col=4) text(2,0.75,R22,col=4) text(3,0.75,R23,col=4) text(4,0.75,R24,col=4) text(5,0.75,R25,col=4) text(6,0.75,R26,col=4) text(7,0.75,R27,col=4) dev.off()