####################################################################### # # Dataset: Grade Point Average (GPA) # ####################################################################### # # Summary: Data on 4,137 US college students. # # 1. sat combined SAT score # 2. hsize total hours through fall semest # 3. colgpa GPA after fall semester # 4. athlete =1 if athlete # 5. verbmath verbal/math SAT score # 6. hsize size graduating class, 100s # 7. hsrank rank in graduating class # 8. hsperc 100*(hsrank/hssize) # 9. female =1 if female # 10. white =1 if white # 11. black =1 if black # 12. hsizesq hsize^2 # # colgpa is measured on a four-point scale # hsperc is the percentile in the high school graduating class (defined so that, for # example, hsperc 5 5 means the top 5% of the class) # sat is the combined math and verbal scores on the student achievement test. # ####################################################################### # # Source: Wooldridge (2012) # Introductory Econometrics: A Modern Approach (5th edition) # South-Western, Cengage Learning # ####################################################################### # # Copyright of R code by: # # Hedibert Freitas Lopes # Professor of Statistics and Econometrics # Insper - Institute for Education and Research # ####################################################################### data = read.table("gpa2-wooldridge.txt",header=TRUE) n = nrow(data) k = ncol(data) data = data-matrix(apply(data,2,mean),n,k,byrow=TRUE) for (i in 1:k) data[,i] = data[,i]/sqrt(var(data[,i])) data1 = as.data.frame(data) ols = lm(sat ~ hsize*athlete+hsize*female+hsize*white+hsize*black+ colgpa*athlete+colgpa*female+colgpa*white+colgpa*black+ verbmath*athlete+verbmath*female+verbmath*white+verbmath*black+ hsize*athlete+hsize*female+hsize*white+hsize*black+ hsrank*athlete+hsrank*female+hsrank*white+hsrank*black+ hsperc*athlete+hsperc*female+hsperc*white+hsperc*black+ tothrs*athlete+tothrs*female+tothrs*white+tothrs*black-1, data=data1) summary(ols) p = length(ols$coef) bhat = ols$coef se = summary(ols)$sigma*sqrt(diag(summary(ols)$cov.unscaled)) L = bhat-2*se U = bhat+2*se plot(bhat,pch=16,ylim=range(L,U)) for (i in 1:p) segments(i,L[i],i,U[i]) abline(h=0,lty=2)