# https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#For_a_sample
install.packages("hypergeo")
library(hypergeo)

loglike = function(rho,x,y){
 sum(dnorm(x,log=TRUE)) + sum(dnorm(y,rho*x,sqrt(1-rho^2),log=TRUE))
}

density.r = function(r,n,rho){
  term1=log((n-2))+lgamma(n-1)+((n-1)/2)*log(1-rho^2)+(n/2-2)*log(1-r^2)
  term2=0.5*log(2*pi)+lgamma(n-1/2) + (n-3/2)*log(1-rho*r)
  term3=hypergeo(1/2,1/2,(2*n-1)/2,(rho*r+1)/2)
  return(exp(term1-term2)*term3)
}

set.seed(31415)

rho = 0.95

par(mfrow=c(1,2))
for (n in c(30,100)){
if (rho<0.9){
  r1 = seq(0.6,1,length=1000)
}else{
  r1 = seq(0.8,1,length=1000)
}
den  = abs(density.r(r1,n,rho))
x = rnorm(n)
y = rnorm(n,rho*x,sqrt(1-rho^2))
r = cor(x,y)

llike = rep(0,1000)
for (i in 1:1000)
  llike[i] = loglike(r1[i],x,y)
like = exp(llike-max(llike))
like = like/sum(like)
like = like/sum(like*(r1[2]-r1[1]))

Nrep = 10000
rs   = rep(0,Nrep)
# True sampling distribution of r
set.seed(123455)
for (rep in 1:Nrep){
  x1 = rnorm(n)
  y1 = rnorm(n,rho*x1,sqrt(1-rho^2))
  rs[rep] = cor(x1,y1)
}
den1 = density(rs)

# Bootstrapped distribution of r
for (rep in 1:Nrep){
  ind = sample(1:n,size=n,replace=TRUE)
  x1  = x[ind]
  y1  = y[ind]
  rs[rep] = cor(x1,y1)
}
den2 = density(rs)

# Bayes
N = 100000
N1 = N/10
rhos = runif(N,0,1)
llike = rep(0,N)
for (i in 1:N)
  llike[i] = loglike(rhos[i],x,y)
w = exp(llike-max(llike))
rhos1 = sample(rhos,size=N1,replace=TRUE,prob=w)  
den3 = density(rhos1)


plot(r1,den,xlab="",main="",ylim=c(0,max(den,den1$y,den2$y,den3$y)),lwd=2,type="l")
title(paste("n = ",n," - rho = ",rho," and ",Nrep," replications",sep=""))
abline(v=r,col=6,lwd=2)
lines(den1$x,den1$y,col=2,lwd=2)
lines(den2$x,den2$y,col=3,lwd=2)
lines(den3$x,den3$y,col=4,lwd=2)
lines(r1,like,col=5,lwd=2)
legend("topleft",legend=c("True","Est.","Bootstrap","SIR","Like","r"),col=1:5,lwd=2)
box()

}