data = read.table("houseprice.txt",header=TRUE) size = data[,4]/1000 price = data[,8]/1000 n = nrow(data) pdf(file="houseprice-regression.pdf",width=12,height=10) par(mfrow=c(1,1)) plot(size,price,xlab="Size (1000 square feet)",ylab="Price (1000 dollars)",axes=FALSE,pch=16) axis(1);box();axis(2,at=seq(80,200,by=20)) title("Leave one out fitted lines") ii = 1:n coefs = NULL ss = NULL for (i in 1:n){ ind = ii[1:n!=i] y = price[ind] x = size[ind] reg = lm(y~x) coef = reg$coef s = sqrt(mean(reg$res^2)) ss = c(ss,s) abline(coef,col=i) coefs = rbind(coefs,coef) } reg = lm(price~size) coef = reg$coef s = sqrt(mean(reg$res^2)) par(mfrow=c(2,3)) hist(coefs[,1],xlab="",prob=TRUE,main="Intercept") abline(v=coef[1],col=2,lwd=2) hist(coefs[,2],xlab="",prob=TRUE,main="Slope") abline(v=coef[2],col=2,lwd=2) hist(ss,xlab="",prob=TRUE,main="Standard deviation") abline(v=s,col=2,lwd=2) boxplot(coefs[,1],main="Intercept",outline=FALSE) abline(h=coef[1],col=2,lwd=2) boxplot(coefs[,2],main="Slope",outline=FALSE) abline(h=coef[2],col=2,lwd=2) boxplot(ss,main="Standard deviation",outline=FALSE) abline(h=s,col=2,lwd=2) dev.off()