##################################################################################################### # # O arquivo salario.txt contem as seguintes colunas autoexplicativas: # 1. salario (de 110 a 172) # 2. posicao (de 1 a 9) # 3. anosexperiencia (de 1.7 a 26.1) # 4. sexo (0=mulher,1=homen) # # 220 observacoes, sendo 75 mulheres e 145 homens # #################################################################################################### rm(list=ls()) pdf(file="salario.pdf",width=9,height=6) # leitura dos dados dados = read.table("salario.txt",header=TRUE) # graficos dos salarios par(mfrow=c(1,1)) boxplot(dados[,1],dados[dados[,4]==0,1],dados[dados[,4]==1,1],names=c("Todos","Mulheres","Homens"), horizontal=TRUE,xlab="Salario",col=c(5,2,3)) boxplot(dados[,3],dados[dados[,4]==0,3],dados[dados[,4]==1,3],names=c("Todos","Mulheres","Homens"), horizontal=TRUE,xlab="Anos de experiencia",col=c(5,2,3)) #################################################################################################### # PRIMEIRO CONJUNTO DE REGRESSOES LINEARES SIMPLES # Explicando salario linearmente atraves dos anos de experencia #################################################################################################### y1 = dados[,1] x1 = dados[,3] z = dados[,4] # Salarios versus anos de experiencia par(mfrow=c(1,1)) plot(x1,y1,xlab="Anos de experiencia",ylab="Salario",col=z+2,pch=16,ylim=c(min(y1),185)) legend(12,185,legend=c("mulheres","homens"),col=2:3,bty="n",cex=1.5,pch=16) # Somente mulheres y2 = y1[z==0] x2 = x1[z==0] # Somente homens y3 = y1[z==1] x3 = x1[z==1] # Somentes mulheres (excluindo as duas com mais anos de experiencia) y4 = y2[x2<15] x4 = x2[x2<15] reg1 = lm(y1~x1) reg2 = lm(y2~x2) reg3 = lm(y3~x3) reg4 = lm(y4~x4) # Calculando os coeficientes de determinacao ybar = mean(y1) SST = sum((y1-ybar)^2) SSR1 = sum((y1-reg1$fit)^2) SSR2 = sum((y2-reg2$fit)^2) SSR3 = sum((y3-reg3$fit)^2) SSR4 = sum((y4-reg4$fit)^2) R21 = round(100*(1 - SSR1/SST),1) R22 = round(100*(1 - SSR2/SST),1) R23 = round(100*(1 - SSR3/SST),1) R24 = round(100*(1 - SSR4/SST),1) # Resumo grafico par(mfrow=c(1,1)) plot(x1,y1,xlab="Anos de experiencia",ylab="Salario",col=z+2,pch=16,ylim=c(min(y1),185)) abline(reg1$coef,lwd=2) abline(reg2$coef,col=2,lwd=2) abline(reg3$coef,col=3,lwd=2) abline(reg4$coef,col=4,lwd=2) legend(12,185,legend=c( paste("Todos (R2=",R21,"%)",sep=""), paste("Mulheres (R2=",R22,"%)",sep=""), paste("Homens (R2=",R23,"%)",sep=""), paste("Mulheres - 2 outliers (R2=",R24,"%)",sep="")),col=1:4,lwd=2,bty="n") title("salario = beta0 + beta1*(anos de experiencia)") #################################################################################################### # SEGUNDO CONJUNTO DE REGRESSOES LINEARES SIMPLES # Explicando salario linearmente atraves da posicao #################################################################################################### y1 = dados[,1] x1 = dados[,2] z = dados[,4] # Distribuicao de homens e mulheres de acordo com a posicao na empresa plot(table(x1[z==0]),type="h",col=2,ylim=c(0,30),lwd=3,ylab="Frequencia",xlab="Posicao") lines((2:9)+0.1,table(x1[z==1]),type="h",col=3,lwd=3) legend(2,30,legend=c("Mulheres","Homens"),col=2:3,lwd=3,bty="n",cex=1.5) par(mfrow=c(1,1)) boxplot( dados[dados[,2]==1,1],dados[dados[,2]==2,1],dados[dados[,2]==3,1], dados[dados[,2]==4,1],dados[dados[,2]==5,1],dados[dados[,2]==6,1], dados[dados[,2]==7,1],dados[dados[,2]==8,1],dados[dados[,2]==9,1],xlab="Posicao",ylab="Salario", names=1:9) # Salarios versus posicao par(mfrow=c(1,1)) plot(x1+0.1*z,y1,col=z+2,pch=16,xlab="Posicao",ylab="Salario",axes=FALSE) axis(2);box();axis(1,at=1:9) legend(3,170,legend=c("Mulheres","Homens"),col=2:3,bty="n",cex=1.5,pch=16) # Somente mulheres y2 = y1[z==0] x2 = x1[z==0] # Somente homens y3 = y1[z==1] x3 = x1[z==1] # Regressoes lineares simples reg5 = lm(y1~x1) reg6 = lm(y2~x2) reg7 = lm(y3~x3) # Calculando os coeficientes de determinacao SSR5 = sum((y1-reg5$fit)^2) SSR6 = sum((y2-reg6$fit)^2) SSR7 = sum((y3-reg7$fit)^2) R25 = round(100*(1-SSR5/SST),1) R26 = round(100*(1-SSR6/SST),1) R27 = round(100*(1-SSR7/SST),1) par(mfrow=c(1,1)) plot(x1+0.1*z,y1,xlab="Posicao",ylab="Salario",col=z+2,pch=16,axes=FALSE) axis(2);box();axis(1,at=1:9) abline(reg5$coef,lwd=2) abline(reg6$coef,col=2,lwd=2) abline(reg7$coef,col=3,lwd=2) title("salario = beta0 + beta1*posicao") legend(2,170,legend=c( paste("Todos (R2=",R25,"%)",sep=""), paste("Mulheres (R2=",R26,"%)",sep=""), paste("Homens (R2=",R27,"%)",sep="")),col=1:3,lwd=2,bty="n") dev.off()