################################################################################################ # # USED CARS # ################################################################################################ # # Data taken from the advertising pages of the Sunday Times a few years ago, presenting cars # for sale in the UK of car, age of car (in six-month units based on date of registration), # recorded mileage, and vendor. # # 1. Case number 1:54; # 2. Asking price in pounds; # 3. Type/Model: 0=model 500, 1=450, 2=380, 3=280, 4=200; # 4. Age of car in six-month units, based on registration; # 5. Recorded mileage (in thousands); # 6. Vendor (0,1,2,3 are dealerships, 4="sale by owner"). # ################################################################################################ names = c("case","price","model","age","mileage","vendor") data = read.table("usedcars.txt",header=TRUE) n = nrow(data) attach(data) # Question a) reg.a = lm(price~mileage) summary(reg.a) # Question b) reg.b = lm(price~age) summary(reg.b) # Question d) reg.d = lm(price~mileage+age) summary(reg.d) # Question g) # Model can be 0, 1, 2, 3 and 4 # Therefore we need 4 dummy variables M0=rep(0,n) M1=rep(0,n) M2=rep(0,n) M3=rep(0,n) M0[model==0]=1 M1[model==1]=1 M2[model==2]=1 M3[model==3]=1 reg.g = lm(price~M0+M1+M2+M3) summary(reg.g) # Question h) # Vendor can be 0, 1, 2, 3 and 4 # Therefore we need 4 dummy variables V0=rep(0,n) V1=rep(0,n) V2=rep(0,n) V3=rep(0,n) V0[vendor==0]=1 V1[vendor==1]=1 V2[vendor==2]=1 V3[vendor==3]=1 reg.h = lm(price~V0+V1+V2+V3) summary(reg.h) # Question i) reg.i = lm(price~mileage+age+M0+M1+M2+M3+V0+V1+V2+V3) summary(reg.i) # Residual plots pdf(file="hw5-fig1.pdf",width=10,height=10) par(mfrow=c(3,2)) plot(reg.a$fit,reg.a$res,xlab="fitted value",ylab="residuals") abline(h=0,col=2) title("price on milage") plot(reg.b$fit,reg.b$res,xlab="fitted value",ylab="residuals") abline(h=0,col=2) title("price on age") plot(reg.d$fit,reg.d$res,xlab="fitted value",ylab="residuals") abline(h=0,col=2) title("price on mileage and age") plot(reg.g$fit,reg.g$res,xlab="fitted value",ylab="residuals") abline(h=0,col=2) title("price on type") plot(reg.h$fit,reg.h$res,xlab="fitted value",ylab="residuals") abline(h=0,col=2) title("price on vendor") plot(reg.i$fit,reg.i$res,xlab="fitted value",ylab="residuals") abline(h=0,col=2) title("price on mileage, age, type and vendor") dev.off() # Excluding mileage and vendor from the regression reg= lm(price~age+M0+M1+M2+M3) summary(reg)