In R, it is easy to sample from a standard normal using the rnorm function
## [1] 0.009393749 1.006472627 0.398942280 1.959963985 0.500000000
par(mfrow=c(1,2))
hist(x,prob=TRUE,main="Draws from N(0,1)",xlab="")
plot(density(x),xlab="",main="Kernel density approximation")The Box-Muller transform is a classic algorithm used to generate pairs of independent, standard normally distributed random numbers from a source of uniformly distributed random numbers. It relies on the fact that a two-dimensional standard normal distribution is rotationally symmetric. When expressed in polar coordinates, the angle is uniformly distributed, and the squared radius follows an exponential distribution. If \(U_1\) and \(U_2\) are independent random variables \(U(0,1)\), then the variables \(Z_0\) and \(Z_1\) \[\begin{eqnarray*} Z_0 &=& \left(-2 \ln U_1\right)^{1/2} \cos(2\pi U_2)\\ Z_1 &=& \left(-2 \ln U_1\right)^{1/2} \sin(2\pi U_2), \end{eqnarray*}\] are independent \(N(0, 1)\).
The method is an improvement on the Box-Muller transform because it avoids the computationally expensive calls to \(sin\) and \(cos\). Instead, it uses a rejection sampling step to pick a point \((v_1,v_2)\) inside the unit disk: (1) Generate \(v_1,v_2 \sim U(-1,1)\) and (2) Compute \(s=v_1^2+v_2^2\). (3) If \(s < 1\), return two independent standard normal draws: \[\begin{eqnarray*} z_1 &=& v_1 \left(-\frac{2 \ln s}{s}\right)^{1/2}\\ z_2 &=& v_2 \left(-\frac{2 \ln s}{s}\right)^{1/2}, \end{eqnarray*}\] otherwise reject and repeat steps (1) and (2). The main trade-off is “wasting” some uniform draws (about \(21\%\) are rejected), but you gain speed by skipping the trigonometry.
We compare histograms based on the sampling schemes to the exact density evaluations
M = 20000
N = 20000
samples = matrix(0,M,4)
samples[,1] = rnorm(M)
samples[,2] = BOXMULLER(M)
samples[,3] = MARSAGLIA(M)
samples[,4] = SIR(M,N)
titles = c("rnorm","Box-Muller","Marsaglia","SIR")
x = seq(min(samples),max(samples),length=1000)
den = dnorm(x)
breaks = seq(min(samples),max(samples),length=30)
par(mfrow=c(2,2))
for (i in 1:4){
hist(samples[,i],prob=TRUE,breaks=breaks,main=titles[i],col="skyblue",xlab="Draws")
lines(x,den,col=2,lwd=2)
}probs = c(0.01,0.025,0.05,0.1,0.25,0.34)
quants = qnorm(probs)
tab = cbind(probs,qnorm(probs),apply(samples,2,quantile,probs))
colnames(tab) = c("Quantile","Exact","rnorm","Box-Muller","Marsaglia","SIR")
round(tab,3)## Quantile Exact rnorm Box-Muller Marsaglia SIR
## 1% 0.010 -2.326 -2.350 -2.359 -2.346 -2.318
## 2.5% 0.025 -1.960 -1.976 -1.979 -1.977 -1.988
## 5% 0.050 -1.645 -1.650 -1.659 -1.650 -1.684
## 10% 0.100 -1.282 -1.273 -1.284 -1.300 -1.328
## 25% 0.250 -0.674 -0.682 -0.680 -0.684 -0.686
## 34% 0.340 -0.412 -0.419 -0.431 -0.431 -0.404
set.seed(12345)
Rep = 100
nprob = length(probs)
error = array(0,c(Rep,nprob,4))
error1 = array(0,c(Rep,nprob,4))
samples = matrix(0,M,4)
for (rep in 1:Rep){
samples[,1] = rnorm(M)
samples[,2] = BOXMULLER(M)
samples[,3] = MARSAGLIA(M)
samples[,4] = SIR(M,N)
for (i in 1:4){
quants.d = quantile(samples[,i],probs)
error[rep,,i] = quants-quants.d
error1[rep,,i] = abs(100*(error[rep,,i]/quants))
}
}par(mfrow=c(2,2))
for (i in 1:4)
boxplot(error[,,i],ylim=range(error),main=titles[i],
ylab="Quantile error",names=probs,xlab=titles[i])par(mfrow=c(2,2))
for (i in 1:4)
boxplot(error1[,,i],ylim=range(error1),main=titles[i],
ylab="Percentage error",names=probs,xlab=titles[i])rmse = matrix(0,nprob,4)
mae = matrix(0,nprob,4)
mdae = matrix(0,nprob,4)
for (i in 1:4){
rmse[,i] = sqrt(apply(error[,,i]^2,2,mean))
mae[,i] = apply(abs(error[,,i]),2,mean)
mdae[,i] = apply(error1[,,i],2,median)
}
par(mfrow=c(1,3))
plot(rmse[,1],ylim=range(rmse,mae),type="b",pch=16,axes=FALSE,
xlab="N(0,1) tail probability",ylab="Error measure")
axis(2);box();axis(1,at=1:nprob,label=probs)
for (i in 2:4)
points(rmse[,i],col=i,type="b",pch=16)
legend("topright",legend=titles,col=1:4,pch=16,bty="n")
title("Root mean squared error")
plot(mae[,1],ylim=range(rmse,mae),type="b",pch=16,axes=FALSE,
xlab="N(0,1) tail probability",ylab="Error measure")
axis(2);box();axis(1,at=1:nprob,label=probs)
for (i in 2:4)
points(mae[,i],col=i,type="b",pch=16)
title("Mean absolute error")
plot(mdae[,1],ylim=range(rmse,mae,mdae),type="b",pch=16,axes=FALSE,
xlab="N(0,1) tail probability",ylab="Error measure")
axis(2);box();axis(1,at=1:nprob,label=probs)
for (i in 2:4)
points(mdae[,i],col=i,type="b",pch=16)
title("Median absolute percentage error")