#########################################################################################################
#
# Randomly selecting 100 (ncomp) components of the S&P500 components and forming equally weighted 
# portfolios with k=1,2,....,100 components.  The process is repeated 1000 (nrep) times to study
# the variation due to the selection of components.
#
#########################################################################################################
#
# Hedibert Freitas Lopes
# Associate Professor of Econometrics and Statistics
# The University of Chicago Booth School of Business
# Email: hlopes@chicagobooth.edu
# URL: http://faculty.chicagobooth.edu/hedibert.lopes
#
#########################################################################################################
data  = read.table("sp500-components.txt",header=TRUE)
ncomp = 100
nrep  = 1000
p     = ncol(data)
sPs   = matrix(0,nrep,ncomp)
sP    = rep(0,ncomp)
for (j in 1:nrep){
  ind = sample(1:p,size=ncomp,replace=FALSE,prob=rep(1/p,p))
  plot(data[,ind[1]],main=paste("rep=",j,sep=""))
  data1 = data[,ind]
  sP[1] = sqrt(var(data1[,1]))
  for (i in 2:ncomp)
    sP[i] = sqrt(var(apply(data1[,1:i],1,mean)))
  sPs[j,] = sP
}
quants = apply(sPs,2,quantile,c(0.25,0.5,0.75))

pdf(file="sp500-portfolios.pdf",width=20,height=15)
plot(quants[2,],ylim=range(quants),xlab="Number of components",
     ylab="Standard deviation",pch=16,cex=2,axes=FALSE)
axis(2);box();axis(1,at=1:ncomp)
for (i in 1:ncomp){
  segments(i,quants[1,i],i,quants[3,i],lwd=3,col=grey(0.5))
  points(i,quants[2,i],pch=16,cex=2)
}
title("Randomly picking 100 components from the S&P500\n 1000 replications")

dev.off()