1 Introduction

Here we use a banana-shape posterior distribution to illustrate the how Bayesian computation via Monte Carlo strategies allows for more flexible Bayesian inference in non-conjugate, non-Gaussian and non-linear cases. The Source of the banana-shape joint posterior is the article Recursive Pathways to Marginal Likelihood Estimation with Prior-Sensitivity Analysis, by Ewan Cameron and Anthony Pettitt, published in the journal Statistical Science in 2014 (Volume 29, Number3, Pages 397-419).

2 Univariate example

Let us assume that \[ p(\theta|\mbox{data}) \propto p(\theta)L(\theta|\mbox{data})=\left(\frac{1}{1.5}\right) \exp\left\{-25(0.45-\theta)^2-400(0.065-\theta^4)^2\right\}, \] where \(\theta \in (-0.5,1.0)\). In other words, the prior for \(\theta\) is \(U(-0.5,1)\) and the likelihood function is nonlinear on \(\theta\). Below, we use a grid with 10,000 points between \(-0.5\) and \(1.0\) in order to approximate the normalizing constant (also known as predictive density) and posterior moments of \(\theta\).

prior.likelihood = function(theta){
  prior=1/1.5
  likelihood=exp(-25*(0.45-theta)^2-(20*(0.065-theta^4))^2)
  return(prior*likelihood)
}

N     = 10000
theta = seq(-0.5,1,length=N)
post  = prior.likelihood(theta)
h     = theta[2]-theta[1]
py    = sum(post*h)
post  = post/py

2.1 Predictive density and posterior moments on a grid

E = sum(theta*post*h)
E2 = sum(theta^2*post*h) 
V = E2 - E^2
S = sqrt(V)
c(E,S,py)
## [1] 0.45189309 0.08907801 0.11782328
plot(theta,post,type="l",xlab=expression(theta),main="Posterior density",ylab="Density")
abline(v=E,col=2)
abline(v=E-2*S,col=2,lty=2)
abline(v=E+2*S,col=2,lty=2)

2.2 MC-based posterior approximation via SIR

# Step 1:  Sampling from proposal
M      = 1000
thetas = runif(M,-0.5,1)

# Step 2: Computing resampling weights
w      = prior.likelihood(thetas)
w      = w/sum(w)
plot(thetas,w,pch=16,cex=0.5,xlab=expression(theta),ylab="Weights")

# Step 3: Resampling
thetas.post = sample(thetas,size=M,replace=TRUE,prob=w)


E.sir = mean(thetas.post)
S.sir = sqrt(var(thetas.post))

c(E,E.sir)
## [1] 0.4518931 0.4610462
c(S,S.sir)
## [1] 0.08907801 0.08076191
hist(thetas.post,prob=TRUE,breaks=seq(-0.5,1,length=40),xlab=expression(theta),main="")
lines(theta,post,col=2,lwd=2)
title("Posterior: grid vs SIR")

2.3 How good (or bad) is the MC approximation?

Ms = c(seq(10,100,by=10),seq(200,1000,by=1000),
       seq(2000,10000,by=1000),seq(20000,100000,by=10000),seq(200000,1000000,by=100000))
nM = length(Ms)
E.sir = rep(0,nM)
for (i in 1:nM){
  thetas = runif(Ms[i],-0.5,1)
  w      = prior.likelihood(thetas)
  thetas.post = sample(thetas,size=M,replace=TRUE,prob=w)
  E.sir[i] = mean(thetas.post)
}


plot(log10(Ms),E.sir,ylab="Posterior mean",xlab="Monte Carlo size (log10)",type="b",pch=16)
abline(h=E,col=2,lwd=2)
title("Monte Carlo approximation of posterior mean")

3 Bivariate example

\[ p(\theta|\mbox{data}) \propto p(\theta)L(\theta|\mbox{data})=\left(\frac{1}{1.5}\right)^2 \exp\left\{-25(0.45-\theta)^2-400(\theta_2-\theta^4)^2\right\}, \] where \(\theta=(\theta_1,\theta_2)\), the prior of \(\theta_1\) is \(U(-0.5,1)\) and the prior of \(\theta_2\) is \(U(-0.5,1)\), with \(\theta_1\) and \(\theta_2\) independent a priori

prior.likelihood = function(theta1,theta2){
  prior=(1/1.5)^2
  likelihood=exp(-(10*(0.45-theta1))^2/4-(20*(theta2/2-theta1^4))^2)
  return(prior*likelihood)
}

N      = 200
theta1 = seq(-0.5,1,length=N)
theta2 = seq(-0.5,1,length=N)

post = matrix(0,N,N)
for (i in 1:N)
  for (j in 1:N)
    post[i,j] = prior.likelihood(theta1[i],theta2[j])

h = theta1[2]-theta1[1]
py = sum(apply(post*h^2,1,sum))
post = post/py

E1 = sum(t(post)%*%theta1*h^2)
E2 = sum(post%*%theta2*h^2)

c(E1,E2,py)
## [1] 0.44872266 0.12973071 0.02784243
par(mfrow=c(1,1))
contour(theta1,theta2,post,nlevels=15,xlab=expression(theta[1]),ylab=expression(theta[2]),
drawlabels=FALSE,main="Posterior")

3.1 Sampling importance resampling

# Step 1:  Sampling from proposal
M = 100000
theta1.prior = runif(M,-0.5,1)
theta2.prior = runif(M,-0.5,1)

# Step 2: Computing resampling weights
w = rep(0,M)
for (i in 1:M)
  w[i] = prior.likelihood(theta1.prior[i],theta2.prior[i])
  
# Step 3: Resampling
ind = sample(1:M,size=M,replace=TRUE,prob=w)
theta1.post = theta1.prior[ind]
theta2.post = theta2.prior[ind]

# Computing normalizing constant (prior predictive)
py.mc = mean(w)


par(mfrow=c(2,2))
plot(theta1.prior,theta2.prior,xlab=expression(theta[1]),ylab=expression(theta[2]),
     main="Prior draws vs posterior evaluations",pch=".")
contour(theta1,theta2,post,nlevels=15,drawlabels=FALSE,add=TRUE,col=3,lwd=2)
plot(theta1.post,theta2.post,xlim=range(theta1.prior),ylim=range(theta2.prior),
     xlab=expression(theta[1]),ylab=expression(theta[2]),pch=".",
     main="Posterior draws vs posterior evaluations")
contour(theta1,theta2,post,nlevels=15,drawlabels=FALSE,add=TRUE,col=3,lwd=2)
hist(theta1.post,prob=TRUE,xlab=expression(theta[1]),main="",xlim=c(-0.5,1),col=3)
title("Marginal posterior")
abline(h=1/1.5,col=2,lwd=3)
hist(theta2.post,prob=TRUE,xlab=expression(theta[2]),main="",xlim=c(-0.5,1),col=3)
title("Marginal posterior")
abline(h=1/1.5,col=2,lwd=3)

# Marginal priors and posteriors
tab = rbind(
summary(theta1.prior),
summary(theta2.prior),
summary(theta1.post),
summary(theta2.post))
rownames(tab) = c("Prior theta1","Prior theta2","Posterior theta1","Posterior theta2")
round(tab,3)
##                    Min. 1st Qu. Median  Mean 3rd Qu.  Max.
## Prior theta1     -0.500  -0.124  0.251 0.251   0.625 1.000
## Prior theta2     -0.500  -0.124  0.248 0.250   0.627 1.000
## Posterior theta1 -0.260   0.355  0.449 0.449   0.542 0.858
## Posterior theta2 -0.281   0.024  0.097 0.129   0.194 1.000
E1.mc = mean(theta1.post)
E2.mc = mean(theta2.post)


c(E1,E1.mc)
## [1] 0.4487227 0.4488311
c(E2,E2.mc)
## [1] 0.1297307 0.1289639
c(py,py.mc)
## [1] 0.02784243 0.01254233
---
title: "Banana-shape posterior"
subtitle: "MC-based posterior inference"
author: "Hedibert Freitas Lopes"
date: "5/7/2024"
output:
  html_document:
    toc: true
    toc_depth: 3
    toc_float: true
    toc_collapsed: true
    code_download: yes
    number_sections: true
    theme: lumen
---



```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

# Introduction
Here we use a banana-shape posterior distribution to illustrate the how Bayesian computation via Monte Carlo strategies allows for more flexible Bayesian inference in non-conjugate, non-Gaussian and non-linear cases.  The Source of the banana-shape joint posterior is the article *Recursive Pathways to Marginal Likelihood Estimation with Prior-Sensitivity Analysis*, by Ewan Cameron and Anthony Pettitt, published in the journal *Statistical Science* in 2014 (Volume 29, Number3, Pages 397-419).


# Univariate example

Let us assume that 
$$
p(\theta|\mbox{data}) \propto p(\theta)L(\theta|\mbox{data})=\left(\frac{1}{1.5}\right)
\exp\left\{-25(0.45-\theta)^2-400(0.065-\theta^4)^2\right\},
$$
where $\theta \in (-0.5,1.0)$.
In other words, the prior for $\theta$ is $U(-0.5,1)$ and the likelihood function is nonlinear on $\theta$.  Below, we use a grid with 10,000 points between $-0.5$ and $1.0$ in order to approximate the normalizing constant (also known as predictive density) and posterior moments of $\theta$.

```{r}
prior.likelihood = function(theta){
  prior=1/1.5
  likelihood=exp(-25*(0.45-theta)^2-(20*(0.065-theta^4))^2)
  return(prior*likelihood)
}

N     = 10000
theta = seq(-0.5,1,length=N)
post  = prior.likelihood(theta)
h     = theta[2]-theta[1]
py    = sum(post*h)
post  = post/py
```

## Predictive density and posterior moments on a grid
```{r fig.width=6, fig.height=5, fig.align='center'}
E = sum(theta*post*h)
E2 = sum(theta^2*post*h) 
V = E2 - E^2
S = sqrt(V)
c(E,S,py)

plot(theta,post,type="l",xlab=expression(theta),main="Posterior density",ylab="Density")
abline(v=E,col=2)
abline(v=E-2*S,col=2,lty=2)
abline(v=E+2*S,col=2,lty=2)

```

## MC-based posterior approximation via SIR
```{r fig.width=6, fig.height=5, fig.align='center'}
# Step 1:  Sampling from proposal
M      = 1000
thetas = runif(M,-0.5,1)

# Step 2: Computing resampling weights
w      = prior.likelihood(thetas)
w      = w/sum(w)
plot(thetas,w,pch=16,cex=0.5,xlab=expression(theta),ylab="Weights")

# Step 3: Resampling
thetas.post = sample(thetas,size=M,replace=TRUE,prob=w)


E.sir = mean(thetas.post)
S.sir = sqrt(var(thetas.post))

c(E,E.sir)
c(S,S.sir)

hist(thetas.post,prob=TRUE,breaks=seq(-0.5,1,length=40),xlab=expression(theta),main="")
lines(theta,post,col=2,lwd=2)
title("Posterior: grid vs SIR")
```

## How good (or bad) is the MC approximation?
```{r fig.width=6, fig.height=5, fig.align='center'}
Ms = c(seq(10,100,by=10),seq(200,1000,by=1000),
       seq(2000,10000,by=1000),seq(20000,100000,by=10000),seq(200000,1000000,by=100000))
nM = length(Ms)
E.sir = rep(0,nM)
for (i in 1:nM){
  thetas = runif(Ms[i],-0.5,1)
  w      = prior.likelihood(thetas)
  thetas.post = sample(thetas,size=M,replace=TRUE,prob=w)
  E.sir[i] = mean(thetas.post)
}


plot(log10(Ms),E.sir,ylab="Posterior mean",xlab="Monte Carlo size (log10)",type="b",pch=16)
abline(h=E,col=2,lwd=2)
title("Monte Carlo approximation of posterior mean")
```

# Bivariate example
$$
p(\theta|\mbox{data}) \propto p(\theta)L(\theta|\mbox{data})=\left(\frac{1}{1.5}\right)^2
\exp\left\{-25(0.45-\theta)^2-400(\theta_2-\theta^4)^2\right\},
$$
where $\theta=(\theta_1,\theta_2)$, the prior of $\theta_1$ is $U(-0.5,1)$ and the prior of $\theta_2$ is $U(-0.5,1)$, with $\theta_1$ and $\theta_2$ independent *a priori*

```{r fig.width=6, fig.height=5, fig.align='center'}
prior.likelihood = function(theta1,theta2){
  prior=(1/1.5)^2
  likelihood=exp(-(10*(0.45-theta1))^2/4-(20*(theta2/2-theta1^4))^2)
  return(prior*likelihood)
}

N      = 200
theta1 = seq(-0.5,1,length=N)
theta2 = seq(-0.5,1,length=N)

post = matrix(0,N,N)
for (i in 1:N)
  for (j in 1:N)
    post[i,j] = prior.likelihood(theta1[i],theta2[j])

h = theta1[2]-theta1[1]
py = sum(apply(post*h^2,1,sum))
post = post/py

E1 = sum(t(post)%*%theta1*h^2)
E2 = sum(post%*%theta2*h^2)

c(E1,E2,py)

par(mfrow=c(1,1))
contour(theta1,theta2,post,nlevels=15,xlab=expression(theta[1]),ylab=expression(theta[2]),
drawlabels=FALSE,main="Posterior")
```

## Sampling importance resampling
```{r fig.width=8, fig.height=8, fig.align='center'}
# Step 1:  Sampling from proposal
M = 100000
theta1.prior = runif(M,-0.5,1)
theta2.prior = runif(M,-0.5,1)

# Step 2: Computing resampling weights
w = rep(0,M)
for (i in 1:M)
  w[i] = prior.likelihood(theta1.prior[i],theta2.prior[i])
  
# Step 3: Resampling
ind = sample(1:M,size=M,replace=TRUE,prob=w)
theta1.post = theta1.prior[ind]
theta2.post = theta2.prior[ind]

# Computing normalizing constant (prior predictive)
py.mc = mean(w)


par(mfrow=c(2,2))
plot(theta1.prior,theta2.prior,xlab=expression(theta[1]),ylab=expression(theta[2]),
     main="Prior draws vs posterior evaluations",pch=".")
contour(theta1,theta2,post,nlevels=15,drawlabels=FALSE,add=TRUE,col=3,lwd=2)
plot(theta1.post,theta2.post,xlim=range(theta1.prior),ylim=range(theta2.prior),
     xlab=expression(theta[1]),ylab=expression(theta[2]),pch=".",
     main="Posterior draws vs posterior evaluations")
contour(theta1,theta2,post,nlevels=15,drawlabels=FALSE,add=TRUE,col=3,lwd=2)
hist(theta1.post,prob=TRUE,xlab=expression(theta[1]),main="",xlim=c(-0.5,1),col=3)
title("Marginal posterior")
abline(h=1/1.5,col=2,lwd=3)
hist(theta2.post,prob=TRUE,xlab=expression(theta[2]),main="",xlim=c(-0.5,1),col=3)
title("Marginal posterior")
abline(h=1/1.5,col=2,lwd=3)



# Marginal priors and posteriors
tab = rbind(
summary(theta1.prior),
summary(theta2.prior),
summary(theta1.post),
summary(theta2.post))
rownames(tab) = c("Prior theta1","Prior theta2","Posterior theta1","Posterior theta2")
round(tab,3)


E1.mc = mean(theta1.post)
E2.mc = mean(theta2.post)


c(E1,E1.mc)
c(E2,E2.mc)
c(py,py.mc)

```
