################################################################################### # # O objetivo deste problema é investigar os determinantes da # performance média dos alunos de diferentes escolas (n = 420) # da Califórnia em um teste padronizado aplicado aos alunos de # quinta série (TESTSCR). As informações de interesse se encontram # no banco de dados caliescom.xls. # # STR – número de alunos / número de professores; # MEALPCT – porcentagem de alunos que se qualificam para o lanche subsidiado. # ################################################################################### rm(list=ls()) nomes = c("Nota no teste","razao alunos por professor","% lanche subsidiado") data = read.csv("caliescom.csv",header=TRUE) n = nrow(data) data[,1] = 100*data[,1]/max(data[,1]) attach(data) par(mfrow=c(1,2)) for (i in 2:3){ plot(data[,i],data[,1],xlab=nomes[i],ylab=nomes[1]) abline(lm(data[,1]~data[,i]),col=2,lwd=2) } reg = lm(TESTSCR~STR+MEALPCT) summary(reg) yhat = reg$fit e = reg$res s2 = sum(e^2)/(n-3) s = sqrt(s2) cov = round(100*mean(abs(e)<2*s),1) par(mfrow=c(1,2)) plot(yhat,e,xlab="Fitted",ylab="Residuals",pch=16) abline(h=0,lty=2) abline(h=-2*s,lty=2) abline(h=2*s,lty=2) title(paste("Coverage = ",cov,"%",sep="")) hist(e,prob=TRUE,xlab="Residuals",main="");box() xxx = seq(min(e),max(e),length=1000) lines(xxx,dnorm(xxx,0,s),col=2,lwd=2) # RESET test reg1 = lm(TESTSCR~STR+MEALPCT) yhat2 = reg1$fit^2 yhat3 = reg1$fit^3 reg2 = lm(TESTSCR~STR+MEALPCT+yhat2+yhat3) summary(reg2)