names = c("state","county","community","communityname","fold","population","householdsize","racepctblack","racePctWhite","racePctAsian","racePctHisp","agePct12t21","agePct12t29","agePct16t24","agePct65up","numbUrban","pctUrban","medIncome","pctWWage","pctWFarmSelf","pctWInvInc","pctWSocSec","pctWPubAsst","pctWRetire","medFamInc","perCapInc","whitePerCap","blackPerCap","indianPerCap","AsianPerCap","OtherPerCap","HispPerCap","NumUnderPov","PctPopUnderPov","PctLess9thGrade","PctNotHSGrad","PctBSorMore","PctUnemployed","PctEmploy","PctEmplManu","PctEmplProfServ","PctOccupManu","PctOccupMgmtProf","MalePctDivorce","MalePctNevMarr","FemalePctDiv","TotalPctDiv","PersPerFam","PctFam2Par","PctKids2Par","PctYoungKids2Par","PctTeen2Par","PctWorkMomYoungKids","PctWorkMom","NumIlleg","PctIlleg","NumImmig","PctImmigRecent","PctImmigRec5","PctImmigRec8","PctImmigRec10","PctRecentImmig","PctRecImmig5","PctRecImmig8","PctRecImmig10","PctSpeakEnglOnly","PctNotSpeakEnglWell","PctLargHouseFam","PctLargHouseOccup","PersPerOccupHous","PersPerOwnOccHous","PersPerRentOccHous","PctPersOwnOccup","PctPersDenseHous","PctHousLess3BR","MedNumBR","HousVacant","PctHousOccup","PctHousOwnOcc","PctVacantBoarded","PctVacMore6Mos","MedYrHousBuilt","PctHousNoPhone","PctWOFullPlumb","OwnOccLowQuart","OwnOccMedVal","OwnOccHiQuart","RentLowQ","RentMedian","RentHighQ","MedRent","MedRentPctHousInc","MedOwnCostPctInc","MedOwnCostPctIncNoMtg","NumInShelters","NumStreet","PctForeignBorn","PctBornSameState","PctSameHouse85","PctSameCity85","PctSameState85","LemasSwornFT","LemasSwFTPerPop","LemasSwFTFieldOps","LemasSwFTFieldPerPop","LemasTotalReq","LemasTotReqPerPop","PolicReqPerOffic","PolicPerPop","RacialMatchCommPol","PctPolicWhite","PctPolicBlack","PctPolicHisp","PctPolicAsian","PctPolicMinor","OfficAssgnDrugUnits","NumKindsDrugsSeiz","PolicAveOTWorked","LandArea","PopDens","PctUsePubTrans","PolicCars","PolicOperBudg","LemasPctPolicOnPatr","LemasGangUnitDeploy","LemasPctOfficDrugUn","PolicBudgPerPop","ViolentCrimesPerPop") data = read.csv("communities.data",header=FALSE) missing = rep(0,128) for (i in 1:128) missing[i] = sum(data[,i]=="?") ind = 1:128 inds = ind[missing==0] data = data[,inds] y = data[,103] X = as.matrix(data[,4:102]) names = names[inds][4:103] p = ncol(X) n = nrow(X) r2adj = rep(0,p) for (i in 1:p) r2adj[i] = summary(lm(y~X[,i]))$adj.r.squared ord = order(r2adj,decreasing=TRUE) plot(r2adj[ord]) par(mfrow=c(2,5)) for (i in 1:10){ plot(X[,ord[i]],y,xlab=names[ord[i]],ylab=names[100]) title(paste("R2 adj=",round(100*r2adj[ord[i]],1),sep="")) } par(mfrow=c(2,5)) for (i in 11:20){ plot(X[,ord[i]],y,xlab=names[ord[i]],ylab=names[100]) title(paste("R2 adj=",round(100*r2adj[ord[i]],1),sep="")) } par(mfrow=c(2,5)) for (i in 21:30){ plot(X[,ord[i]],y,xlab=names[ord[i]],ylab=names[100]) title(paste("R2 adj=",round(100*r2adj[ord[i]],1),sep="")) } par(mfrow=c(2,5)) for (i in 31:40){ plot(X[,ord[i]],y,xlab=names[ord[i]],ylab=names[100]) title(paste("R2 adj=",round(100*r2adj[ord[i]],1),sep="")) } # Standardizing the variables y = (y - mean(y))/sqrt(var(y)) mX = apply(X,2,mean) sdX = sqrt(apply(X,2,var)) X = (X-matrix(mX,n,p,byrow=TRUE))%*%diag(1/sdX) reg = lm(y~X) sde = sqrt(diag(summary(reg)$sigma*solve(t(X)%*%X))) L = reg$coef[2:(p+1)]-2*sde U = reg$coef[2:(p+1)]+2*sde par(mfrow=c(1,1)) plot(reg$coef[2:(p+1)],pch=16,ylim=range(L,U), xlab="Regressor",ylab="Coefficient") abline(h=0,lty=2) for (i in 1:p) segments(i,L[i],i,U[i]) count = 0 ind = NULL for (i in 1:p){ if ((L[i]>0)|(U[i]<0)){ text(i,1,i,col="2",cex=0.75) count = count + 1 ind = c(ind,i) } } > names[ind] [1] "racepctblack" "pctUrban" "PctPopUnderPov" [4] "PctEmploy" "MalePctNevMarr" "PctWorkMom" [7] "PersPerRentOccHous" "PctPersDenseHous" "PctVacMore6Mos" [10] "RentLowQ" "MedRent" "MedOwnCostPctIncNoMtg" [13] "NumStreet" #1.percentage of population that is african american #2.percentage of people living in areas classified as urban #3.percentage of people under the poverty level #4.percentage of people 16 and over who are employed #5.percentage of males who have never married #6.percentage of moms of kids under 18 in labor force #7.mean persons per rental household #8.percent of persons in dense housing (more than 1 person per room) #9.percent of vacant housing that has been vacant more than 6 months #10.rental housing - lower quartile rent #11.median gross rent (Census variable H43A from file STF3A - includes utilities) #12.median owners cost as a percentage of household income - for owners without a mortgage #13.number of homeless people counted in the streetÂ