Introduction

In this exercise, \(\theta\) follows a standard Student’s \(t\) distribution with \(\nu\) degrees of freedom, i.e. \(\theta \sim t_\nu(0,1)\), and the goal is two-fold: sampling this distribution and approximating \(E(g(\theta))\) for some function \(g(\theta)\), \[ E[g(\theta)] = \int_{-\infty}^\infty g(\theta)p(\theta) d\theta. \] For instance, we will consider \(g_1(\theta)=\theta\), \(g_2(\theta)=(\theta-E(\theta))^2\) and \(g_3(\theta)=\delta(\theta>2.5)\), where \(\delta\) is the dirac function, ie. \(\delta(x)=1\) if \(x\) is true and zero otherwise. The integral \(E[g(\theta)]\) are the mean, variance and a tail probability. Let us consider \(\nu=4\), so quantities are known \[\begin{eqnarray*} E(\theta) &=& 0\\ V(\theta) &=& \frac{\nu}{\nu-2}=2\\ Pr(\theta>2.5) &=& 0.03338327. \end{eqnarray*}\]

Monte Carlo via importance sampling (MCIS)

We will assume that we can only sample from a proposal distribution, \(q(\theta)\), which in this case will be the Student’s \(t\) with one degree of freedom, \(t_1\), also commonly and widely known as the Cauchy distribution: \[ E[g(\theta)] = \int \frac{g(\theta)p(\theta)}{q(\theta)} q(\theta)d\theta. \] With draws \(\theta^{(i)}, \ i=1,\ldots,M\) from \(q(\theta)\), we can approximate the above integral by Monte Carlo integration, \[ E_1 = \frac{1}{M} \sum_{i=1}^M \frac{g(\theta^{(i)})p(\theta^{(i)})}{q(\theta^{(i)})}, \] or, based on SIR draws \({\tilde \theta}^{(i)}, \ i=1,\ldots,M\), \[ E_2 = \frac{1}{N} \sum_{i=1}^M {\tilde \theta}^{(i)}. \] We will see, in the exercise that follows, that \(V(E_1) \leq V(E_2)\).

Scale mixture of normals (SMN)

We will compare the above MC method with the following probability result: \[ \mbox{When} \ \ \theta|\nu,\sigma^2 \sim N(0,\sigma^2) \ \ \mbox{and} \ \ \sigma^2 \sim IG(\nu/2,\nu/2), \ \ \mbox{then} \ \ \theta|\nu \sim t_\nu(0,1), \] and we will assume we are able to sample from the normal and from the inverse-gamma distributions, as well. Therefore, \[ E[g(\theta)] = E[E(g(\theta)|\sigma^2)]. \]

When \(\sigma^{2(i)}, \ i=1,\ldots,M\) are drawn from \(IG(\nu/2,\nu/2)\) and \(\theta^{(i)} \sim N(0,\sigma^{2(i)})\), it follows that \(\theta^{(i)}, \ i=1,\ldots,M\) are exact draws from \(t_\nu\). We can approximate the above integral as \[ E_3 = \frac{1}{M} \sum_{i=1}^M E[g(\theta|\sigma^{2(i)})], \] or \[ E_4 =\frac{1}{M} \sum_{i=1}^n \theta^{(i)}. \]

nu  = 4

par(mfrow=c(1,1))
thetas = seq(-10,10,length=1000)
plot(thetas,dt(thetas,df=nu),xlab=expression(theta),ylab="Density",type="l",lwd=2)
lines(thetas,dcauchy(thetas),col=2,lwd=2)
legend("topleft",legend=c(paste("Target: Student's t(",nu,")",sep=""),
                          "Proposal: Cauchy"),col=1:2,lwd=2)

Comparison based on SIR and SMN draws

set.seed(54321)
M       = 10000
N       = M
th.prop = rcauchy(M)
w       = dt(th.prop,df=nu)/dcauchy(th.prop)
th.sir  = sample(th.prop,size=N,replace=TRUE,prob=w)
sig2    = 1/rgamma(N,nu/2,nu/2)
th.smn  = rnorm(N,0,sqrt(sig2))

breaks = seq(min(th.sir,th.smn),max(th.sir,th.smn),length=100)
par(mfrow=c(1,2))
hist(th.sir,breaks=breaks,xlim=c(-10,10),prob=TRUE,
     xlab=expression(theta),ylab="Density",main="")
lines(thetas,dt(thetas,df=nu),col=2,lwd=2)
title("MC via SIR")
hist(th.smn,breaks=breaks,xlim=c(-10,10),prob=TRUE,
     xlab=expression(theta),ylab="Density",main="")
lines(thetas,dt(thetas,df=nu),col=2,lwd=2)
title("MC via SMN")

True values versus MC-based approximations

true = c(0,nu/(nu-2),1-pt(2.5,df=nu))
MC.sir = c(mean(th.sir),var(th.sir),mean(th.sir>2.5))
MC.smn = c(mean(th.smn),var(th.smn),mean(th.smn>2.5))

cbind(true,MC.sir,MC.smn)
##            true     MC.sir      MC.smn
## [1,] 0.00000000 -0.0116427 0.002313822
## [2,] 2.00000000  1.9694401 2.010572448
## [3,] 0.03338327  0.0346000 0.035700000

Size of the MC error

set.seed(154321)
M      = 10000
N      = M
R      = 100
MC.sir = matrix(0,R,3)
MC.smn = matrix(0,R,3)
names  = c("Mean","Var","Pr(>2)")

par(mfrow=c(3,3))
for (nu in c(4,10,100)){
  true = c(0,nu/(nu-2),1-pt(2.5,df=nu))
  for (r in 1:R){
    # SIR
    th.prop    = rcauchy(M)
    w          = dt(th.prop,df=nu)/dcauchy(th.prop)
    th.sir     = sample(th.prop,size=N,replace=TRUE,prob=w)
    MC.sir[r,] = c(mean(th.sir),var(th.sir),mean(th.sir>2.5))
    # SMN
    sig2       = 1/rgamma(N,nu/2,nu/2)
    th.smn     = rnorm(N,0,sqrt(sig2))
    MC.smn[r,] = c(mean(th.smn),var(th.smn),mean(th.smn>2.5))
  }
  for (i in 1:3){
    boxplot(MC.sir[,i],MC.smn[,i],main="",ylab="MC approximation",names=c("SIR","SMN"))
    abline(h=true[i],col=2,lwd=2)
    title(paste(names[i]," - nu=",nu,"\n Relative RMSE = ",
          round(sqrt(var(MC.sir[,i]-true[i]))/
          sqrt(var(MC.smn[,i]-true[i])),2),sep=""))
  }
}

Raoblackwellization

We now compare the four approximations of \(E(g_3(\theta))=Pr(\theta>2.5|\nu=4)\). For MC approximations based on \(M=5000\) draws, we run \(R=100\) replications.

set.seed(154321)
nu   = 4
M    = 5000
N    = M
R    = 100
MC   = matrix(0,R,4)
true = 1-pt(2.5,df=nu)
for (r in 1:R){
  # SIR
  th.prop    = rcauchy(M)
  w          = dt(th.prop,df=nu)/dcauchy(th.prop)
  th.sir     = sample(th.prop,size=N,replace=TRUE,prob=w)
  cond       = ifelse(th.prop>2.5,1,0)
  MC[r,1:2]  = c(mean(cond*w),mean(th.sir>2.5))

  # SMN
  sig2      = 1/rgamma(N,nu/2,nu/2)
  th.smn    = rnorm(N,0,sqrt(sig2))
  MC[r,3:4] = c(mean(1-pnorm(2.5,0,sqrt(sig2))),mean(th.smn>2.5))
}
methods = c("SIR-RB","SIR","SMN-RB","SMN")
boxplot(MC,main="",ylab=paste("MC approximation - ",R," Replications",sep=""),names=methods)
abline(h=true,col=2,lwd=2)
title(paste("Pr(X>2.5|nu=",nu,")=",round(1-pt(2.5,df=nu),5),sep=""))

Relative root mean squared errors

RMSE = sqrt(apply((MC-true)^2,2,mean))
tab = cbind(round(RMSE,5),round(RMSE/RMSE[3],2))
colnames(tab) = c("RMSE","Relative RMSE")
rownames(tab) = methods
tab[c(2,4,1,3),]
##           RMSE Relative RMSE
## SIR    0.00330          4.97
## SMN    0.00284          4.28
## SIR-RB 0.00177          2.67
## SMN-RB 0.00066          1.00
---
title: "Monte Carlo integration and simulation"
subtitle: "SIR, scale mixture of normals and raoblackwellization"
author: "Hedibert Freitas Lopes"
date: "2/25/2022"
output:
  html_document:
    toc: true
    toc_depth: 2
    code_download: yes
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

# Introduction
In this exercise, $\theta$ follows a standard Student's $t$ distribution with $\nu$ degrees of freedom, i.e. $\theta \sim t_\nu(0,1)$, and the goal is two-fold: sampling this distribution and approximating $E(g(\theta))$ for some function $g(\theta)$,
$$
E[g(\theta)] = \int_{-\infty}^\infty g(\theta)p(\theta) d\theta.
$$
For instance, we will consider $g_1(\theta)=\theta$, $g_2(\theta)=(\theta-E(\theta))^2$ and $g_3(\theta)=\delta(\theta>2.5)$, where $\delta$ is the dirac function, ie. $\delta(x)=1$ if $x$ is true and zero otherwise.  The integral $E[g(\theta)]$ are the mean, variance and a tail probability.  Let us consider $\nu=4$, so quantities are known
\begin{eqnarray*}
E(\theta) &=& 0\\
V(\theta) &=& \frac{\nu}{\nu-2}=2\\
Pr(\theta>2.5) &=& 0.03338327.
\end{eqnarray*}

# Monte Carlo via importance sampling (MCIS)
We will assume that we can only sample from a proposal distribution, $q(\theta)$, which in this case will be the Student's $t$ with one degree of freedom, $t_1$, also commonly and widely known as the Cauchy distribution:
$$
E[g(\theta)] = \int \frac{g(\theta)p(\theta)}{q(\theta)} q(\theta)d\theta.
$$
With draws $\theta^{(i)}, \ i=1,\ldots,M$ from $q(\theta)$, we can approximate the above integral by Monte Carlo integration,
$$
E_1 = \frac{1}{M} \sum_{i=1}^M \frac{g(\theta^{(i)})p(\theta^{(i)})}{q(\theta^{(i)})},
$$
or, based on SIR draws ${\tilde \theta}^{(i)}, \ i=1,\ldots,M$,
$$
E_2 = \frac{1}{N} \sum_{i=1}^M {\tilde \theta}^{(i)}.
$$
We will see, in the exercise that follows, that $V(E_1) \leq V(E_2)$. 

# Scale mixture of normals (SMN)
We will compare the above MC method with the following probability result:
$$
\mbox{When} \ \ \theta|\nu,\sigma^2 \sim N(0,\sigma^2) \ \ \mbox{and} \ \ 
\sigma^2 \sim IG(\nu/2,\nu/2), \ \ \mbox{then} \ \ \theta|\nu \sim t_\nu(0,1),
$$
and we will assume we are able to sample from the normal and from the inverse-gamma distributions, as well.  Therefore,
$$
E[g(\theta)] = E[E(g(\theta)|\sigma^2)].
$$

When $\sigma^{2(i)}, \ i=1,\ldots,M$ are drawn from $IG(\nu/2,\nu/2)$ and 
$\theta^{(i)} \sim N(0,\sigma^{2(i)})$, it follows that $\theta^{(i)}, \ i=1,\ldots,M$ are exact draws from $t_\nu$. We can approximate the above integral as
$$
E_3 = \frac{1}{M} \sum_{i=1}^M E[g(\theta|\sigma^{2(i)})],
$$
or 
$$
E_4 =\frac{1}{M} \sum_{i=1}^n \theta^{(i)}.
$$

```{r fig.width=8, fig.height=6}
nu  = 4

par(mfrow=c(1,1))
thetas = seq(-10,10,length=1000)
plot(thetas,dt(thetas,df=nu),xlab=expression(theta),ylab="Density",type="l",lwd=2)
lines(thetas,dcauchy(thetas),col=2,lwd=2)
legend("topleft",legend=c(paste("Target: Student's t(",nu,")",sep=""),
                          "Proposal: Cauchy"),col=1:2,lwd=2)
```


# Comparison based on SIR and SMN draws

```{r fig.width=8, fig.height=6}
set.seed(54321)
M       = 10000
N       = M
th.prop = rcauchy(M)
w       = dt(th.prop,df=nu)/dcauchy(th.prop)
th.sir  = sample(th.prop,size=N,replace=TRUE,prob=w)
sig2    = 1/rgamma(N,nu/2,nu/2)
th.smn  = rnorm(N,0,sqrt(sig2))

breaks = seq(min(th.sir,th.smn),max(th.sir,th.smn),length=100)
par(mfrow=c(1,2))
hist(th.sir,breaks=breaks,xlim=c(-10,10),prob=TRUE,
     xlab=expression(theta),ylab="Density",main="")
lines(thetas,dt(thetas,df=nu),col=2,lwd=2)
title("MC via SIR")
hist(th.smn,breaks=breaks,xlim=c(-10,10),prob=TRUE,
     xlab=expression(theta),ylab="Density",main="")
lines(thetas,dt(thetas,df=nu),col=2,lwd=2)
title("MC via SMN")
```

## True values versus MC-based approximations
```{r fig.width=8, fig.height=6}

true = c(0,nu/(nu-2),1-pt(2.5,df=nu))
MC.sir = c(mean(th.sir),var(th.sir),mean(th.sir>2.5))
MC.smn = c(mean(th.smn),var(th.smn),mean(th.smn>2.5))

cbind(true,MC.sir,MC.smn)
```

## Size of the MC error
```{r fig.width=12, fig.height=12}
set.seed(154321)
M      = 10000
N      = M
R      = 100
MC.sir = matrix(0,R,3)
MC.smn = matrix(0,R,3)
names  = c("Mean","Var","Pr(>2)")

par(mfrow=c(3,3))
for (nu in c(4,10,100)){
  true = c(0,nu/(nu-2),1-pt(2.5,df=nu))
  for (r in 1:R){
    # SIR
    th.prop    = rcauchy(M)
    w          = dt(th.prop,df=nu)/dcauchy(th.prop)
    th.sir     = sample(th.prop,size=N,replace=TRUE,prob=w)
    MC.sir[r,] = c(mean(th.sir),var(th.sir),mean(th.sir>2.5))
    # SMN
    sig2       = 1/rgamma(N,nu/2,nu/2)
    th.smn     = rnorm(N,0,sqrt(sig2))
    MC.smn[r,] = c(mean(th.smn),var(th.smn),mean(th.smn>2.5))
  }
  for (i in 1:3){
    boxplot(MC.sir[,i],MC.smn[,i],main="",ylab="MC approximation",names=c("SIR","SMN"))
    abline(h=true[i],col=2,lwd=2)
    title(paste(names[i]," - nu=",nu,"\n Relative RMSE = ",
          round(sqrt(var(MC.sir[,i]-true[i]))/
          sqrt(var(MC.smn[,i]-true[i])),2),sep=""))
  }
}
```

# Raoblackwellization
We now compare the four approximations of $E(g_3(\theta))=Pr(\theta>2.5|\nu=4)$. For MC approximations based on $M=5000$ draws, we run $R=100$ replications.
```{r}
set.seed(154321)
nu   = 4
M    = 5000
N    = M
R    = 100
MC   = matrix(0,R,4)
true = 1-pt(2.5,df=nu)
for (r in 1:R){
  # SIR
  th.prop    = rcauchy(M)
  w          = dt(th.prop,df=nu)/dcauchy(th.prop)
  th.sir     = sample(th.prop,size=N,replace=TRUE,prob=w)
  cond       = ifelse(th.prop>2.5,1,0)
  MC[r,1:2]  = c(mean(cond*w),mean(th.sir>2.5))

  # SMN
  sig2      = 1/rgamma(N,nu/2,nu/2)
  th.smn    = rnorm(N,0,sqrt(sig2))
  MC[r,3:4] = c(mean(1-pnorm(2.5,0,sqrt(sig2))),mean(th.smn>2.5))
}
```

```{r fig.width=8, fig.height=8}
methods = c("SIR-RB","SIR","SMN-RB","SMN")
boxplot(MC,main="",ylab=paste("MC approximation - ",R," Replications",sep=""),names=methods)
abline(h=true,col=2,lwd=2)
title(paste("Pr(X>2.5|nu=",nu,")=",round(1-pt(2.5,df=nu),5),sep=""))
```

## Relative root mean squared errors
```{r}
RMSE = sqrt(apply((MC-true)^2,2,mean))
tab = cbind(round(RMSE,5),round(RMSE/RMSE[3],2))
colnames(tab) = c("RMSE","Relative RMSE")
rownames(tab) = methods
tab[c(2,4,1,3),]
```



