# Contains data from http://fmwww.bc.edu/ec-p/data/stockwatson/mcas.dta # obs: 220 # vars: 17 30 Mar 2002 16:48 # size: 25,080 (99.9% of memory free) # ------------------------------------------------------------------------------- # storage display value # variable name type format label variable label # ------------------------------------------------------------------------------- # code int %8.0g District Code (numerical) # municipa str29 %29s Municipality (name) # district str25 %25s District Name # regday int %8.0g Spending per pupil, regular # specneed float %9.0g Spending per pupil, special needs # bilingua long %12.0g Spending per pupil, bilingual # occupday int %8.0g Spending per pupil, occupational # tot_day int %8.0g Spending per pupil, Total # s_p_c float %9.0g Students per Computer # spec_ed float %9.0g % Special Education students # lnch_pct float %9.0g % Eligible for free/reduced price lunch # tchratio float %9.0g Students per Teacher # percap double %12.0g Per Capita Income # totsc4 float %9.0g # totsc8 float %9.0g # avgsalry float %9.0g # pctel float %9.0g # ------------------------------------------------------------------------------- # Sorted by: municipa # THE MASSACHUSETTS TEST SCORE DATA SET # The Massachusetts data are district-wide averages for public elementary school districts in 1998. # The test score is taken from the Massachusetts Comprehensive Assessment System (MCAS) test, # administered to all fourth graders in Massachusetts public schools in the spring of 1998. # The test is sponsored by the Massachusetts Department of Education and is mandatory for all public # schools. The data analyzed here are the overall total score, which is the sum of the scores on the # English, Math, and Science portions of the test. Data on the student-teacher ratio, # he percent of students receiving a subsidized lunch and on the percent of students still # learning english are averages for each elementary school district for the 1997-1998 school # year and were obtained from the massachusetts department of education. Data on average district # income were obtained from the 1990 U.S. Census. library(foreign) data = read.dta("http://fmwww.bc.edu/ec-p/data/stockwatson/mcas.dta",convert.factors=FALSE) attach(data) ###################################################################### # PARTE 1: Explicanto resultado do teste via renda per capita ###################################################################### par(mfrow=c(1,1)) plot(percap,totsc8,ylim=c(640,800),xlab="Per Capita Income",ylab="Test score") x = percap y = totsc8 reg1 = lm(y~x) bhat1 = reg1$coef yhat1 = bhat1[1]+bhat1[2]*x xxx = seq(min(x),max(x),length=1000) lines(xxx,bhat1[1]+bhat1[2]*xxx,col=2,lwd=2) reg2 = lm(log(y)~x) bhat2 = reg2$coef yhat2 = exp(bhat2[1]+bhat2[2]*x) xxx = seq(min(x),max(x),length=1000) lines(xxx,exp(bhat2[1]+bhat2[2]*xxx),col=3,lwd=2) reg3 = lm(log(y)~log(x)) bhat3 = reg3$coef yhat3 = exp(bhat3[1]+bhat3[2]*log(x)) xxx = seq(min(x),max(x),length=1000) lines(xxx,exp(bhat3[1]+bhat3[2]*log(xxx)),col=4,lwd=2) reg4 = lm(y~log(x)) bhat4 = reg4$coef yhat4 = bhat4[1]+bhat4[2]*log(x) xxx = seq(min(x),max(x),length=1000) lines(xxx,bhat4[1]+bhat4[2]*log(xxx),col=5,lwd=2) rmse1 = sqrt(mean((y[!is.na(y)]-yhat1[!is.na(y)])^2)) rmse2 = sqrt(mean((y[!is.na(y)]-yhat2[!is.na(y)])^2)) rmse3 = sqrt(mean((y[!is.na(y)]-yhat3[!is.na(y)])^2)) rmse4 = sqrt(mean((y[!is.na(y)]-yhat4[!is.na(y)])^2)) c(rmse1,rmse2,rmse3,rmse4) legend("topleft",legend=c("nivel-nivel, MSE=13.22865","log-nivel, MSE=13.39479","log-log, MSE=11.89769","nivel-log, MSE=11.78571"),col=2:5,lty=1,lwd=2) ############################################################################################### # PARTE 2: Explicanto resultado do teste pela % alunos elegiveis a reducao de preco do almoco ############################################################################################### par(mfrow=c(1,1)) plot(lnch_pct,totsc8,ylim=c(640,800),xlab="% Eligible for free/reduced price lunch",ylab="Test score") x = lnch_pct y = totsc8 reg1 = lm(y~x) bhat1 = reg1$coef yhat1 = bhat1[1]+bhat1[2]*x xxx = seq(min(x),max(x),length=1000) lines(xxx,bhat1[1]+bhat1[2]*xxx,col=2,lwd=2) reg2 = lm(log(y)~x) bhat2 = reg2$coef yhat2 = exp(bhat2[1]+bhat2[2]*x) xxx = seq(min(x),max(x),length=1000) lines(xxx,exp(bhat2[1]+bhat2[2]*xxx),col=3,lwd=2) reg3 = lm(log(y)~log(x)) bhat3 = reg3$coef yhat3 = exp(bhat3[1]+bhat3[2]*log(x)) xxx = seq(min(x),max(x),length=1000) lines(xxx,exp(bhat3[1]+bhat3[2]*log(xxx)),col=4,lwd=2) reg4 = lm(y~log(x)) bhat4 = reg4$coef yhat4 = bhat4[1]+bhat4[2]*log(x) xxx = seq(min(x),max(x),length=1000) lines(xxx,bhat4[1]+bhat4[2]*log(xxx),col=5,lwd=2) rmse1 = sqrt(mean((y[!is.na(y)]-yhat1[!is.na(y)])^2)) rmse2 = sqrt(mean((y[!is.na(y)]-yhat2[!is.na(y)])^2)) rmse3 = sqrt(mean((y[!is.na(y)]-yhat3[!is.na(y)])^2)) rmse4 = sqrt(mean((y[!is.na(y)]-yhat4[!is.na(y)])^2)) c(rmse1,rmse2,rmse3,rmse4) legend("topright",legend=c("nivel-nivel, MSE=11.59168","log-nivel, MSE=11.46681","log-log, MSE=10.91771","nivel-log, MSE=10.79310"),col=2:5,lty=1,lwd=2)