########################################################################################## # # ANOTHER HOUSE PRICE DATASET # ########################################################################################## # # The data set has 546 observations and were gathered by the authors from the # Multiple Listing Service for houses sold in Windsor, Ontario, Canada. The # variables are defined as follows: # # sell = sale price of a house # lot = the lot size of a property in square feet # bdms = the number of bedrooms # fb = the number of full bathrooms # sty = the number of stories excluding basement # drv = 1 if the house has a driveway # rec = 1 if the house has a recreational room # ffin = 1 if the house has a full finished basement # ghw = 1 if the house uses gas for hot water heating # ca = 1 if there is central air conditioning # gar = the number of garage places # reg = 1 if the house is located in the preferred neighbourhood of the city # # Data: http://qed.econ.queensu.ca/jae/1996-v11.6/anglin-gencay/readme.ag.txt # # Source: Paul Anglin and Ramazan Gencay, "Semiparametric Estimation of a Hedonic # Price Function", Journal of Applied Econometrics, Vol. 11, No. 6, 1996, # pp. 633-648. # ########################################################################################## # # Breusch-Pagan test = 112.93 and White test = 44.97. chi-square(9) has a 5% critical # value is 16.92. Since both test statistics are greater than critical value, both of # these tests indicate that heteroskedasticity is present. Take logs of all variables: # BP=12.96 and W=11.45 which are both less than the 5% critical value. # ########################################################################################## rm(list=ls()) names = c("Sale price (in 1000 reals)","Lot size (in square meters)","Number of bedrooms", "Number of full bathrooms","Number of stories excluding basement","1=house has a driveway", "1=house has a recreational room","1=house has a full finished basement", "1=house uses gas for hot water heating","1=central air conditioning", "the number of garage places","1=located in the preferred neighbourhood of the city") lnames = c("Log sale price (in 1000 reals)","Log lot size (in square meters)", "Log number of bedrooms","Log number of full bathrooms", "Log number of stories excluding basement","1=house has a driveway", "1=house has a recreational room","1=house has a full finished basement", "1=house uses gas for hot water heating","1=central air conditioning", "the number of garage places","1=located in the preferred neighbourhood of the city") names1 = c("sell","lot","bdms","fb","sty","drv","rec","ffin","ghw","ca","gar","reg") lnames1 = c("logsell","loglot","logbdms","logfb","logsty","drv","rec","ffin","ghw","ca","gar","reg") data = read.table("houseprice-canada.txt",header=TRUE) n = nrow(data) # Transformations: from sq-feet to sq-meters and dollars to reals data[,1] = data[,1]*2.21500132/1000 data[,2] = data[,2]*0.09290304 attach(data) pdf(file="houseprice-canada.pdf",width=10,height=8) par(mfrow=c(1,1)) for (i in 2:12){ reg1 = lm(data[,1]~data[,i]) X = cbind(1,data[,i]) y = data[,1] coef = round(reg1$coef,3) se = round(sqrt(diag(sum(reg1$res^2)/(n-2)*solve(t(X)%*%X))),2) plot(data[,i],data[,1],xlab=names[i],ylab=names[1]) abline(coef,col=2,lwd=3) title(paste("sell = ",coef[1],"+",coef[2],"*",names1[i],"\n (",se[1],") (",se[2],")",sep="")) } # Full model reg2 = lm(sell ~ lot+bdms+fb+sty+drv+rec+ffin+ghw+ca+gar+reg) yhat = reg2$fit yhat2 = yhat^2 ehat = reg2$res ehat2 = ehat^2 par(mfrow=c(1,2)) plot(yhat,ehat,xlab="Fitted",ylab="Residuals",main="Full model") abline(h=0,lty=2) plot(yhat,ehat2,xlab="Fitted",ylab="Square residuals") abline(lm(ehat2~yhat),col=2,lwd=2) # Pagan homocedasticity test summary(lm(ehat2 ~ lot+bdms+fb+sty+drv+rec+ffin+ghw+ca+gar+reg)) # White (simplified) homocedasticity test summary(lm(ehat2 ~ yhat+yhat2)) ################################ # Log model ################################ for (i in 1:5) data[,i] = log(data[,i]) lsell = log(sell) llot = log(lot) lbdms = log(bdms) lfb = log(fb) lsty = log(sty) par(mfrow=c(1,1)) for (i in 2:12){ reg1 = lm(data[,1]~data[,i]) X = cbind(1,data[,i]) y = data[,1] coef = round(reg1$coef,3) se = round(sqrt(diag(sum(reg1$res^2)/(n-2)*solve(t(X)%*%X))),2) plot(data[,i],data[,1],xlab=names[i],ylab=lnames[1]) abline(coef,col=2,lwd=3) title(paste("logsell = ",coef[1],"+",coef[2],"*",lnames1[i],"\n (",se[1],") (",se[2],")",sep="")) } reg3 = lm(lsell ~ llot+lbdms+lfb+lsty+drv+rec+ffin+ghw+ca+gar+reg) yhat = reg3$fit yhat2 = yhat^2 ehat = reg3$res ehat2 = ehat^2 par(mfrow=c(1,2)) plot(yhat,ehat,xlab="Fitted",ylab="Residuals",main="Full model (log)") abline(h=0,lty=2) plot(yhat,ehat2,xlab="Fitted",ylab="Square residuals") abline(lm(ehat2~yhat),col=2,lwd=2) # Pagan homocedasticity test summary(lm(ehat2 ~ llot+lbdms+lfb+lsty+drv+rec+ffin+ghw+ca+gar+reg)) # White (simplified) homocedasticity test summary(lm(ehat2 ~ yhat+yhat2)) dev.off()