############################################################ # HOUSE PRICE DATA ############################################################ # # Home: index of the house # Nbhd: Neighborhood # Offers: number of offers # SqFt: size in square feet # Brick: dummy for brick house # Bedrooms: number of bedrooms # Bathrooms: number of bathrooms # Price: price in dollars # ############################################################ pdf(file="houseprice-regressionoutput.pdf",width=8,height=8) data = read.table("houseprice.txt",header=TRUE) attach(data) # Scaling and plotting the variables size = SqFt/1000 price = Price/1000 # Price against size # A) Price as a linear function of size ############################## par(mfrow=c(1,1)) plot(size,price,xlab="Size (1000 square feet)",ylab="Price (1000 dollars)",pch=16) reg = lm(price~size) summary(reg) abline(reg$coef,lwd=3,col=2) title("price = -10 + 70*size") text(1.6,200,"min size = 1.45",cex=1.25,col="blue") text(1.6,190,"max size = 2.59",cex=1.25,col="blue") text(1.6,180,"min price = 69.1",cex=1.25,col="blue") text(1.6,170,"max price= 211.2",cex=1.25,col="blue") # B) Taking into account the type of house ################################### lm(price[Brick=="No"]~size[Brick=="No"]) lm(price[Brick=="Yes"]~size[Brick=="Yes"]) par(mfrow=c(1,1)) plot(size,price,xlab="Size (1000 square feet)",ylab="Price (1000 dollars)",pch=16,col=Brick) legend("topleft",legend=c("Brick=NO: price=5+59*size","Brick=YES: price=-23+84*size"),col=1:2,pch=16,cex=1.5) abline(lm(price[Brick=="No"]~size[Brick=="No"]),lwd=3,col=1) abline(lm(price[Brick=="No"]~size[Brick=="No"]),lwd=3,col=1) # C) Taking into account the neighborhood ################################### lm(price[Nbhd==1]~size[Nbhd==1]) lm(price[Nbhd==2]~size[Nbhd==2]) lm(price[Nbhd==3]~size[Nbhd==3]) par(mfrow=c(1,1)) plot(size,price,xlab="Size (1000 square feet)",ylab="Price (1000 dollars)",pch=16, col=Nbhd) legend("topleft",legend=c("Nbhd=1 - price=33+40*size", "Nbhd=2 - price=26+49*size","Nbhd=3 - price=57+49*size"), col=1:3,pch=16,cex=1.5) abline(lm(price[Nbhd==1]~size[Nbhd==1]),lwd=3,col=1) abline(lm(price[Nbhd==2]~size[Nbhd==2]),lwd=3,col=2) abline(lm(price[Nbhd==3]~size[Nbhd==3]),lwd=3,col=3) # D) Taking into account the number of bedrooms ######################################### table(Bedrooms) bed = Bedrooms bed[bed<=3]=2 bed[bed>=4]=4 lm(price[bed==2]~size[bed==2]) lm(price[bed==4]~size[bed==4]) par(mfrow=c(1,1)) plot(size,price,xlab="Size (1000 square feet)",ylab="Price (1000 dollars)",pch=16, col=bed) legend("topleft",legend=c("2/3 bedrooms: price=27+49*size","4/5 bedrooms: price=24+61*size"), col=c(2,4),pch=16,cex=1.25) abline(lm(price[bed==2]~size[bed==2]),lwd=3,col=2) abline(lm(price[bed==4]~size[bed==4]),lwd=3,col=4) # E) Taking into account the number of bathrooms ######################################### table(Bathrooms) bath = Bathrooms bath[bath>=3]=3 lm(price[bath==2]~size[bath==2]) lm(price[bath==3]~size[bath==3]) par(mfrow=c(1,1)) plot(size,price,xlab="Size (1000 square feet)",ylab="Price (1000 dollars)",pch=16, col=bath) legend("topleft",legend=c("2 Bathrooms: price=23+50*size","3 Bathrooms: price=36+52*size"), col=2:3,pch=16,cex=1.25) abline(lm(price[bath==2]~size[bath==2]),lwd=3,col=2) abline(lm(price[bath==3]~size[bath==3]),lwd=3,col=3) dev.off()