################################################################################ # # Data on 81 cars about # # MPG (average miles per gallons) # SP (top speed, miles per hour) # HP (engine horsepower) # VOL (cubic feet of cab space) # WT (vehicle weight, hundreds of pounds) # # Source: U.S. Environmental Protection Agency, 1991, Report EPA/AA/CTAB/91-02 # ################################################################################ rm(list=ls()) pdf(file="mileage.pdf",width=12,height=7) names = c("Average miles per gallons","Top speed, miles per hour", "Engine horsepower","Cubic feet of cab space","Vehicle weight, hundreds of pounds") lnames = c("Average miles per gallons (LOG)","Top speed, miles per hour (LOG)", "Engine horsepower (LOG)","Cubic feet of cab space (LOG)","Vehicle weight, hundreds of pounds (LOG)") data = read.table("mileage.txt",header=TRUE) attach(data) pairs(data,label=names) par(mfrow=c(2,4)) for (i in 1:4){ plot(data[,1+i],MPG,xlab=names[1+i],ylab=names[1]) abline(lm(MPG~data[,1+i]),col=2) } for (i in 1:4){ reg = lm(MPG~data[,1+i]) plot(data[,1+i],reg$res,xlab=names[1+i],ylab="Residuals") abline(h=0,lty=2) } # Base model: MPG = beta1 + beta2*SP + beta3*HP + beta4*WT reg = lm(MPG~SP+HP+WT) summary(reg) par(mfrow=c(1,2)) plot(reg$fit,MPG,xlab="Fitted",ylab=names[1]) abline(0,1,lty=2) plot(reg$fit,reg$res,xlab="Fitted",ylab="Residuals") abline(h=0,lty=2) # Tests: Pagan's, White's and Modified White's e2 = reg$res^2 # Pagan's test summary(lm(e2~SP+HP+WT)) # White's test SP2 = SP^2 HP2 = HP^2 WT2 = WT^2 SPHP = SP*HP SPWT = SP*WT HPWT = HP*WT summary(lm(e2~SP+HP+WT+SP2+HP2+WT2+SPHP+SPWT+HPWT)) # White's more parsimonious test yhat = reg$fit yhat2 = yhat^2 summary(lm(e2~yhat+yhat2)) # Transformando as covariaveis # ---------------------------- data1 = log(data) pairs(data1,label=lnames) par(mfrow=c(2,4)) for (i in 1:4){ plot(data1[,1+i],MPG,xlab=lnames[1+i],ylab=lnames[1]) abline(lm(MPG~data1[,1+i]),col=2) } for (i in 1:4){ reg = lm(MPG~data1[,1+i]) plot(data1[,1+i],reg$res,xlab=lnames[1+i],ylab="Residuals") abline(h=0,lty=2) } # Log-log model: LMPG = beta1 + beta2*LSP + beta3*LHP + beta4*LWT LMPG = log(MPG) LSP = log(SP) LHP = log(HP) LWT = log(WT) reg = lm(LMPG~LSP+LHP+LWT) summary(reg) par(mfrow=c(1,2)) plot(reg$fit,LMPG,xlab="Fitted",ylab=lnames[1]) abline(0,1,lty=2) plot(reg$fit,reg$res,xlab="Fitted",ylab="Residuals") abline(h=0,lty=2) # Tests: Pagan's, White's and Modified White's e2 = reg$res^2 # Pagan's test summary(lm(e2~LSP+LHP+LWT)) # White's test LSP2 = LSP^2 LHP2 = LHP^2 LWT2 = LWT^2 LSPHP = LSP*LHP LSPWT = LSP*LWT LHPWT = LHP*LWT summary(lm(e2~LSP+LHP+LWT+LSP2+LHP2+LWT2+LSPHP+LSPWT+LHPWT)) # White's more parsimonious test yhat = reg$fit yhat2 = yhat^2 summary(lm(e2~yhat+yhat2)) # Another regression wt = WT*0.455 sp = SP*1.6 reg = lm(sp~wt) coef = round(reg$coef,1) reg1 = lm(log(reg$res^2)~wt) coef1 = round(reg1$coef,2) par(mfrow=c(1,2)) plot(wt,sp,xlab="Weight - 100kg",ylab="Speed - km/h") abline(coef,col=2) title(paste("Speed = ",coef[1]," + ",coef[2],"Weight",sep="")) plot(wt,log(reg$res^2),xlab="Weight - 100kg",ylab="Log residual squared") abline(reg1$coef,col=2) title(paste("Log e^2 = ",coef1[1]," + ",coef1[2],"Weight",sep="")) f = function(par){ sig2 = exp(par[3]+par[4]*wt) f = -sum(dnorm(sp,par[1]+par[2]*wt,sqrt(sig2),log=TRUE)) return(f) } nonlinear = nlm(f,c(coef,coef1)) coefn = round(nonlinear$est[1:2],1) coefn1 = round(nonlinear$est[3:4],2) par(mfrow=c(1,2)) plot(wt,sp,xlab="Weight - 100kg",ylab="Speed - km/h") abline(coef,col=2) abline(coefn,col=4) title(paste("Speed = ",coefn[1]," + ",coefn[2],"Weight",sep="")) plot(wt,log(reg$res^2),xlab="Weight - 100kg",ylab="Log residual squared") abline(reg1$coef,col=2) abline(coefn1,col=4) title(paste("Log e^2 = ",coefn1[1]," + ",coefn1[2],"Weight",sep="")) par(mfrow=c(1,1)) wts = seq(min(wt),max(wt),length=100) plot(cbind(1,wts)%*%reg$coef[1:2],sqrt(exp(cbind(1,wts)%*%reg1$coef)), xlab="Fitted mean (km/h)",ylab="Fitted standard deviation (km/h)", pch=16,ylim=c(0,60)) points(cbind(1,wts)%*%nonlinear$est[1:2],sqrt(exp(cbind(1,wts)%*%nonlinear$est[3:4])),pch=16,col=2) legend(160,60,legend=c("Ordinary least squares","Maximum likelihood"),pch=16,col=1:2,bty="n",cex=2) dev.off()