################################################################################################ # # HOUSING PRICES IN BOSTON # # CRIM per capita crime rate by town # ZN proportion of residential land zoned for lots over 25,000 ft2 # INDUS proportion of non-retail business acres per town # CHAS Charles River dummy variable (=1 if tract bounds river; 0 otherwise) # NOX Nitrogen oxide concentration (parts per 10 million) # RM average number of rooms per dwelling # AGE proportion of owner-occupied units built prior to 1940 # DIS weighted distances to five Boston employment centres # RAD index of accessibility to radial highways # TAX full-value property-tax rate per $10,000 # PTRATIO pupil-teacher ratio by town # B 1000(Bk - 0.63)2 where Bk is the proportion of blacks by town # LSTAT % lower status of the population # MEDV Median value of owner-occupied homes in $1000's # ################################################################################################ data = read.table("boston-houseprice.txt",header=TRUE) n = nrow(data) x = log(data[data[,14]<50,1]) y = log(data[data[,14]<50,14]) n = length(y) cuts = c(min(x)-0.1,seq(-4,3.5,by=0.5),max(x)+0.1) nc = length(cuts) nn = rep(nc-1) means = rep(nc-1) stdevs = rep(nc-1) for (i in 2:nc){ yy = y[(x<=cuts[i])&(x>cuts[i-1])] nn[i-1] = length(yy) means[i-1] = mean(yy) stdevs[i-1] = sqrt(var(yy)) } pdf(file="boston-houseprice.pdf",width=18,height=15) par(mfrow=c(1,1)) plot(x,y,xlab="Log per capita crime rate by town",ylab="Log median value of owner-occupied homes in $1000's", ylim=c(min(y),max(y)+0.5)) plot(x,y,xlab="Log per capita crime rate by town",ylab="Log median value of owner-occupied homes in $1000's", ylim=c(min(y),max(y)+0.5)) for (i in 2:nc) abline(v=cuts[i],lty=3) plot(x,y,xlab="Log per capita crime rate by town",ylab="Log median value of owner-occupied homes in $1000's", ylim=c(min(y),max(y)+0.5)) for (i in 2:nc) abline(v=cuts[i],lty=3) text(cuts[1],4.2,"Mean") for (i in 2:nc) text((cuts[i-1]+cuts[i])/2,4.2,round(means[i-1],2)) plot(x,y,xlab="Log per capita crime rate by town",ylab="Log median value of owner-occupied homes in $1000's", ylim=c(min(y),max(y)+0.5)) for (i in 2:nc) abline(v=cuts[i],lty=3) text(cuts[1],4.2,"Mean") for (i in 2:nc) text((cuts[i-1]+cuts[i])/2,4.2,round(means[i-1],2)) text(cuts[1],4.1,"StDev") for (i in 2:nc) text((cuts[i-1]+cuts[i])/2,4.1,round(stdevs[i-1],2)) plot(x,y,xlab="Log per capita crime rate by town",ylab="Log median value of owner-occupied homes in $1000's", ylim=c(min(y),max(y)+0.5)) for (i in 2:nc) abline(v=cuts[i],lty=3) text(cuts[1],4.2,"Mean") for (i in 2:nc) text((cuts[i-1]+cuts[i])/2,4.2,round(means[i-1],2)) text(cuts[1],4.1,"StDev") for (i in 2:nc) text((cuts[i-1]+cuts[i])/2,4.1,round(stdevs[i-1],2)) for (i in 2:nc) points((cuts[i-1]+cuts[i])/2,means[i-1],pch=16,col=2,lwd=2,cex=2) plot(x,y,xlab="Log per capita crime rate by town",ylab="Log median value of owner-occupied homes in $1000's", ylim=c(min(y),max(y)+0.5)) for (i in 2:nc) abline(v=cuts[i],lty=3) text(cuts[1],4.2,"Mean") for (i in 2:nc) text((cuts[i-1]+cuts[i])/2,4.2,round(means[i-1],2)) text(cuts[1],4.1,"StDev") for (i in 2:nc) text((cuts[i-1]+cuts[i])/2,4.1,round(stdevs[i-1],2)) for (i in 2:nc) points((cuts[i-1]+cuts[i])/2,means[i-1],pch=16,col=2,lwd=2,cex=2) abline(lm(y~x)$coef,lwd=2,col=4) text(cuts[1]+0.5,2.0,paste("Mean Y=",round(mean(y),2),sep="")) text(cuts[1]+0.5,1.9,paste("StDev Y=",round(sqrt(var(y)),2),sep="")) reg = lm(y~x) a = round(reg$coef[1],2) b = round(reg$coef[2],2) s = round(sqrt(mean(reg$res^2)),2) title(paste("Fitted regression: Y=",a,b,"*X \n Regression StDev=",s,sep="")) dev.off()