################################################################################################ # # RETAIL PROFITS (for a chain of pharmacies looking into a new community) # ################################################################################################ # # Y: Profit # X1: Income (median annual salary) # X2: disposable income (median income net of taxes) # X3: Birth rate (per 1,000 people in the location population) # X4: Social Security recipients (per 1,000 peoople in the location) # X5: Cardiovascular deaths (per 100,000 people in the local population) # X6: Percentage of the local population aged 65 or more # # Kansas City, MO: # X1 = 28774 # X2 = 22642 # X3 = 14.4 # X4 = 148.8 # X5 = 338.5 # X6 = 11.4 # ################################################################################################ names = c("Income (median annual salary)", "disposable income (median income net of taxes)", "Birth rate (per 1,000 people in the location population)", "Social Security recipients (per 1,000 peoople in the location)", "Cardiovascular deaths (per 100,000 people in the local population)", "Percentage of the local population aged 65 or more") names1 = c("Income","DispIncome","Birthrate","SocSec","CVdeath","Aged65") data = read.table("profit.txt",header=TRUE) n = nrow(data) y = data[,1]/1000 data[,2] = data[,2]/1000 data[,3] = data[,3]/1000 X = data[,2:7] pdf(file="profit-regression.pdf",width=20,height=15) par(mfrow=c(1,1)) # Model with 0 regressor plot(y,xlab="observation index",ylab="Profit",pch=16,ylim=c(100,310)) abline(h=mean(y),col=2) s = round(sqrt(var(y)),1) title(paste("No regression \n s=",s,sep="")) abline(h=mean(y)-2*sqrt(var(y)),lty=2,col=2) abline(h=mean(y)+2*sqrt(var(y)),lty=2,col=2) ss = s # Models with 1 regressor for (i in 1:6){ x = X[,i] ord = order(x) x = x[ord] y1 = y[ord] reg = lm(y1~x) coef = reg$coef s = sqrt(mean(reg$res^2)) ss = c(ss,s) plot(x,y1,xlab=names[i],ylab="Profit",pch=16,ylim=c(100,310)) abline(coef,col=2) lines(x,reg$fit+2*s,col=2,lty=2) lines(x,reg$fit-2*s,col=2,lty=2) coef = round(coef,4) s = round(s,1) r2 = round(summary(reg)$r.squared,2) title(paste("Profit = ",coef[1],"+ (",coef[2],")*",names1[i],"\n s=",s," - R2=",r2,sep="")) } # Models with 2 regressors p2 = matrix(c(1,2,1,3,1,4,1,5,1,6,2,3,2,4,2,5,2,6,3,4,3,5,3,6,4,5,4,6,5,6),byrow=TRUE,ncol=2) for (i in 1:15){ x = X[,p2[i,]] reg = lm(y~x[,1]+x[,2]) coef = reg$coef s = sqrt(mean(reg$res^2)) ss = c(ss,s) plot(reg$fit,y,xlab="Fitted profit",ylab="Profit",pch=16,ylim=c(100,310),xlim=c(100,310)) coef = round(coef,4) s = round(s,1) r2 = round(summary(reg)$r.squared,2) title(paste("Profit = ",coef[1],"+ (",coef[2],")*",names1[p2[i,1]], "+ (",coef[3],")*",names1[p2[i,2]],"\n s=",s," - R2=",r2,sep="")) } # Models with 3 regressors p3 = matrix(c(1,2,3,1,2,4,1,2,5,1,2,6,1,3,4,1,3,5,1,3,6,1,4,5,1,4,6,1,5,6, 2,3,4,2,3,5,2,3,6,2,4,5,2,4,6,2,5,6,3,4,5,3,4,6,3,5,6,4,5,6),byrow=TRUE,ncol=3) for (i in 1:20){ x = X[,p3[i,]] reg = lm(y~x[,1]+x[,2]+x[,3]) coef = reg$coef s = sqrt(mean(reg$res^2)) ss = c(ss,s) plot(reg$fit,y,xlab="Fitted profit",ylab="Profit",pch=16,ylim=c(100,310),xlim=c(100,310)) coef = round(coef,4) s = round(s,1) r2 = round(summary(reg)$r.squared,2) title(paste("Profit = ",coef[1], "+ (",coef[2],")*",names1[p3[i,1]], "+ (",coef[3],")*",names1[p3[i,2]], "+ (",coef[4],")*",names1[p3[i,3]], "\n s=",s," - R2=",r2,sep="")) } # Models with 4 regressors p4 = matrix(c(1,2,3,4,1,2,3,5,1,2,3,6,1,2,4,5,1,2,4,6,1,2,5,6,1,3,4,5,1,3,4,6, 1,3,5,6,1,4,5,6,2,3,4,5,2,3,4,6,2,3,5,6,2,4,5,6,3,4,5,6),byrow=TRUE,ncol=4) for (i in 1:15){ x = X[,p4[i,]] reg = lm(y~x[,1]+x[,2]+x[,3]+x[,4]) coef = reg$coef s = sqrt(mean(reg$res^2)) ss = c(ss,s) plot(reg$fit,y,xlab="Fitted profit",ylab="Profit",pch=16,ylim=c(100,310),xlim=c(100,310)) coef = round(coef,4) s = round(s,1) r2 = round(summary(reg)$r.squared,2) title(paste("Profit = ",coef[1], "+ (",coef[2],")*",names1[p4[i,1]], "+ (",coef[3],")*",names1[p4[i,2]], "+ (",coef[4],")*",names1[p4[i,3]], "+ (",coef[5],")*",names1[p4[i,4]], "\n s=",s," - R2=",r2,sep="")) } # Models with 5 regressors p5 = matrix(c(1,2,3,4,5,1,2,3,4,6,1,2,3,5,6,1,2,4,5,6,1,3,4,5,6,2,3,4,5,6),byrow=TRUE,ncol=5) for (i in 1:6){ x = X[,p5[i,]] reg = lm(y~x[,1]+x[,2]+x[,3]+x[,4]+x[,5]) coef = reg$coef s = sqrt(mean(reg$res^2)) ss = c(ss,s) plot(reg$fit,y,xlab="Fitted profit",ylab="Profit",pch=16,ylim=c(100,310),xlim=c(100,310)) coef = round(coef,4) s = round(s,1) r2 = round(summary(reg)$r.squared,2) title(paste("Profit = ",coef[1], "+ (",coef[2],")*",names1[p5[i,1]], "+ (",coef[3],")*",names1[p5[i,2]], "+ (",coef[4],")*",names1[p5[i,3]], "+ (",coef[5],")*",names1[p5[i,4]], "+ (",coef[6],")*",names1[p5[i,5]], "\n s=",s," - R2=",r2,sep="")) } # Model with 6 regressors (complete model) x = X reg = lm(y~x[,1]+x[,2]+x[,3]+x[,4]+x[,5]+x[,6]) coef = reg$coef s = sqrt(mean(reg$res^2)) plot(reg$fit,y,xlab="Fitted profit",ylab="Profit",pch=16,ylim=c(100,310),xlim=c(100,310)) coef = round(coef,4) s = round(s,1) ss = c(ss,s) r2 = round(summary(reg)$r.squared,2) title(paste("Profit = ",coef[1], "+ (",coef[2],")*",names1[1], "+ (",coef[3],")*",names1[2], "+ (",coef[4],")*",names1[3], "+ (",coef[5],")*",names1[4], "+ (",coef[6],")*",names1[5], "+ (",coef[7],")*",names1[6], "\n s=",s," - R2=",r2,sep="")) par(mfrow=c(1,1)) cols = c(1,rep(2,6),rep(3,15),rep(4,20),rep(5,15),rep(6,6),rep(7,1)) plot(ss,xlab="Model index",ylab="Standard error",col=0) points(ss,col=cols,pch=16) abline(v=1.5,lty=2) abline(v=7.5,lty=2) abline(v=22.5,lty=2) abline(v=42.5,lty=2) abline(v=57.5,lty=2) abline(v=63.5,lty=2) text(4,26,"one regressor",col=2) text(14.5,26,"two regressor",col=3) text(32,26,"three regressor",col=4) text(50,26,"four regressor",col=5) text(60,26,"five regressor",col=6) dev.off()