# https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#For_a_sample install.packages("hypergeo") library(hypergeo) density.r = function(r,n,rho){ term1=log((n-2))+lgamma(n-1)+((n-1)/2)*log(1-rho^2)+(n/2-2)*log(1-r^2) term2=0.5*log(2*pi)+lgamma(n-1/2) + (n-3/2)*log(1-rho*r) term3=hypergeo(1/2,1/2,(2*n-1)/2,(rho*r+1)/2) return(exp(term1-term2)*term3) } set.seed(31415) par(mfrow=c(2,3)) for (rho in c(0.7,0.9)){ for (n in c(20,100,500)){ if (rho<0.9){ r1 = seq(0,1,length=1000) }else{ r1 = seq(0.7,1,length=1000) } den = abs(density.r(r1,n,rho)) x = rnorm(n) y = rnorm(n,rho*x,sqrt(1-rho^2)) r = cor(x,y) Nrep = 10000 rs = rep(0,Nrep) # True sampling distribution of r set.seed(123455) for (rep in 1:Nrep){ x1 = rnorm(n) y1 = rnorm(n,rho*x1,sqrt(1-rho^2)) rs[rep] = cor(x1,y1) } den1 = density(rs) # Bootstrapped distribution of r for (rep in 1:Nrep){ ind = sample(1:n,size=n,replace=TRUE) x1 = x[ind] y1 = y[ind] rs[rep] = cor(x1,y1) } den2 = density(rs) plot(r1,den,xlab="",main="",ylim=c(0,max(den,den1$y,den2$y)),lwd=2,type="l") title(paste("Sampling distribution of r\n n=",n," - ",Nrep," replications",sep="")) abline(v=rho,col=4,lwd=2) lines(den1$x,den1$y,col=2,lwd=2) lines(den2$x,den2$y,col=3,lwd=2) legend("topleft",legend=c("True","Est.","Bootstrap"),col=1:3,lwd=2) box() }}