# HPRICE2.DTA (stata file) # # # For a sample of 506 communities in the Boston area, we estimate a model relating median # housing price ( price) in the community to various community characteristics: nox is the # amount of nitrous oxide in the air, in parts per million; dist is a weighted distance of the # community from five employment centers, in miles; rooms is the average number of rooms # in houses in the community; and stratio is the average student-teacher ratio of schools in # the community. # # Obs: 506 # 1. price median housing price, $ # 2. crime crimes committed per capita # 3. nox nitrous oxide, parts per 100 mill. # 4. rooms avg number of rooms per house # 5. dist weighted dist. to 5 employ centers # 6. radial accessibiliy index to radial hghwys # 7. proptax property tax per $1000 # 8. stratio average student-teacher ratio # 9. lowstat % of people 'lower status' # 10. lprice log(price) # 11. lnox log(nox) # 12. lproptax log(proptax) library(foreign) data = read.dta("http://fmwww.bc.edu/ec-p/data/wooldridge/hprice2.dta",convert.factors=FALSE) data = data[data[,1]<40000,] attach(data) price = price/1000 par(mfrow=c(1,1)) plot(lowstat,price,xlab="% of people 'lower status'",ylab="median housing price, 1000$",pch=16,ylim=c(0,50)) par(mfrow=c(1,1)) plot(lowstat,price,xlab="% of people 'lower status'",ylab="median housing price, 1000$",pch=16,ylim=c(0,50)) reg1 = lm(price~lowstat) bhat1 = reg1$coef yhat1 = bhat1[1]+bhat1[2]*lowstat xxx = seq(min(lowstat),max(lowstat),length=1000) lines(xxx,bhat1[1]+bhat1[2]*xxx,col=2,lwd=2) reg2 = lm(log(price)~lowstat) bhat2 = reg2$coef yhat2 = exp(bhat2[1]+bhat2[2]*lowstat) xxx = seq(min(lowstat),max(lowstat),length=1000) lines(xxx,exp(bhat2[1]+bhat2[2]*xxx),col=3,lwd=2) reg3 = lm(log(price)~log(lowstat)) bhat3 = reg3$coef yhat3 = exp(bhat3[1]+bhat3[2]*log(lowstat)) xxx = seq(min(lowstat),max(lowstat),length=1000) lines(xxx,exp(bhat3[1]+bhat3[2]*log(xxx)),col=4,lwd=2) reg4 = lm(price~log(lowstat)) bhat4 = reg4$coef yhat4 = bhat4[1]+bhat4[2]*log(lowstat) xxx = seq(min(lowstat),max(lowstat),length=1000) lines(xxx,bhat4[1]+bhat4[2]*log(xxx),col=5,lwd=2) legend("topright",legend=c("nivel-nivel, MSE=4.417","log-nivel, MSE=4.165","log-log, MSE=4.228","nivel-log, MSE=4.046"),col=2:5,lty=1,lwd=2) rmse1 = sqrt(mean((price-yhat1)^2)) rmse2 = sqrt(mean((price-yhat2)^2)) rmse3 = sqrt(mean((price-yhat3)^2)) rmse4 = sqrt(mean((price-yhat4)^2)) c(rmse1,rmse2,rmse3,rmse4)