Example 1: a local-level dynamic linear model

Here we revisit the most basic normal dynamic linear model: \[\begin{eqnarray*} y_t &=& x_t + \epsilon_t \qquad \epsilon_t \sim N(0,\sigma^2)\\ x_t &=& x_{t-1} + \omega_t \qquad \omega_t \sim N(0,\tau^2). \end{eqnarray*}\]

Below we simulated some data from this model.

set.seed(54321)
n     = 100
sig2  = 1.00
tau2  = 0.25
x     = rnorm(n)
for (t in 2:n)
  x[t] = x[t-1]+rnorm(1,0,sqrt(tau2))
y = x + rnorm(n,0,sqrt(sig2))

par(mfrow=c(1,1))
plot(y,xlab="Time",ylab="",type="b",pch=16,lwd=2)
lines(x,col=2,lwd=2,type="b",pch=16)
legend("topright",legend=c("Y(t)","X(t)"),col=1:2,lwd=2,lty=1)

The Kalman filter

Let us assume that \(x_0|D_0 \sim N(m_0,C_0)\), for known \((\sigma^2,\tau^2,m_0,C_0)\) and \(D_0\) the information set up to time \(t=0\), so \(D_t=\{D_{t-1},y_t\}\). As we learned in class, the Kalman recursions can be used to show that, for \(x_{t-1}|D_{t-1} \sim N(m_{t-1},C_{t-1})\), we can derive the sequential distributions: \[\begin{eqnarray*} x_t|D_{t-1} &\sim& N(m_{t-1},C_{t-1}+\tau^2)\\ x_t|D_t &\sim& N(m_t,C_t), \end{eqnarray*}\] where \[\begin{eqnarray*} m_t &=& (1-A_t)m_{t-1} + A_t y_t\\ C_t &=& C_{t-1} + \tau^2 - A_t^2(C_{t-1}+\tau^2+\sigma^2)\\ A_t &=& \frac{C_{t-1}+\tau^2}{C_{t-1}+\tau^2+\sigma^2}. \end{eqnarray*}\]

m0   = 0
C0   = 1
m    = m0
C    = C0
mf   = rep(0,n)
Cf   = rep(0,n)

for (t in 1:n){
  A     = (C+tau2)/(C+tau2+sig2)
  m     = (1-A)*m + A*y[t]
  C     = C + tau2 - A^2*(C+tau2+sig2)
  mf[t] = m
  Cf[t] = C
}
qf = cbind(mf+qnorm(0.025)*sqrt(Cf),mf,mf+qnorm(0.975)*sqrt(Cf))

par(mfrow=c(1,1))
ts.plot(qf,lwd=2,lty=c(2,1,2),ylab="State")
points(y,col=2,pch=16)
lines(x,col=6,lwd=2)
legend("topright",legend=c("data","states","posterior"),col=c(2,6,1),lwd=2)

Revisiting sampling importance resampling (SIR)

The SIR scheme uses samples from a proposal distribution, \(q(\theta)\), in order to sample from the target distribution, \(p(\theta)\). Below, we assume that \(p(\theta)\) is \(N(0,3)\) and that \(q(\theta)\) is \(t_3(0,1)\). To sum up, the SIR is implemented as follows:

set.seed(123567)
M        = 100000
thetas.q = rt(M,df=3)
w        = dnorm(thetas.q,0,sqrt(3))/dt(thetas.q,df=3)
thetas.p = sample(thetas.q,size=M,replace=TRUE,prob=w)

par(mfrow=c(1,2))
xbreaks = seq(min(thetas.q),max(thetas.q),length=200)
hist(thetas.q,breaks=xbreaks,prob=TRUE,main="Draws from proposal",xlim=c(-10,10),ylim=c(0,0.4),xlab="Theta")
xx = seq(min(thetas.q),max(thetas.q),length=1000)
lines(xx,dt(xx,df=3),col=2,lwd=2)
xbreaks = seq(min(thetas.p),max(thetas.p),length=30)
hist(thetas.p,breaks=xbreaks,prob=TRUE,main="Draws from target\nvia SIR",
     xlim=c(-10,10),ylim=c(0,0.4),xlab="Theta")
xx = seq(min(thetas.p),max(thetas.p),length=1000)
lines(xx,dnorm(xx,0,sqrt(3)),col=2,lwd=2)

Example 1: The Bootstrap filter

The Boostrap filter is basically the SIR applied sequentially in the dynamic model framework. More precisely, let \(\{x_{t-1}^{(1)},\ldots,x_{t-1}^{(M)}\}\) be a sample from \(p(x_{t-1}|D_{t-1})\).

Closer look at from \(t=0\) to \(t=1\)

set.seed(12345)
m0     = 0
C0     = 1
M      = 10000

# x0|D0
x0post   = rnorm(M,m0,sqrt(C0))

# x1|D0
x1prior = rnorm(M,x0post,sqrt(tau2))

# x1|D1
w      = dnorm(y[1],x1prior,sqrt(sig2))
x1post = sample(x1prior,size=M,replace=TRUE,prob=w)

par(mfrow=c(1,3))
hist(x0post,xlab="",prob=TRUE,main="p(x0|D0)")
hist(x1prior,xlab="",prob=TRUE,main="p(x1|D0)")
hist(x1post,xlab="",prob=TRUE,main="p(x1|D1)")
points(y[1],0,col=3,pch=16,cex=2)

For \(t=2,\ldots,n\)

xdraws     = matrix(0,n,M)
x          = x1post
xdraws[1,] = x

for (t in 2:n){
  xprior  = rnorm(M,x,sqrt(tau2))
  weights = dnorm(y[t],xprior,sqrt(sig2))
  x       = sample(xprior,size=M,replace=TRUE,prob=weights)
  xdraws[t,] = x
}
qpf = t(apply(xdraws,1,quantile,c(0.025,0.5,0.975)))
ind = c(5,6,7,42,43,44,98,99,n)
par(mfrow=c(3,3))
for (i in ind){
  hist(xdraws[i,],xlab="",prob=TRUE,main=paste("p(x",i,"|D",i,")",sep=""))
  points(y[i],0,col=3,pch=16,cex=2)
  xxx = seq(min(xdraws[i,]),max(xdraws[i,]),length=200)
  lines(xxx,dnorm(xxx,mf[i],sqrt(Cf[i])),col=2,lwd=2)
}

par(mfrow=c(1,1))
ts.plot(qf,lwd=2,ylim=range(qf,qpf))
lines(qpf[,1],col=2,lwd=2)
lines(qpf[,2],col=2,lwd=2)
lines(qpf[,3],col=2,lwd=2)
legend("topleft",legend=c("Kalman filter","Particle filter"),col=1:2,lwd=2)
title(paste("M=",M," particles",sep=""))

Example 2: a nonlinear dynamic model

Let us now illustrate the Bootstrap filter for a nonlinear dynamic model, where the nonlinearity appears in the observation equation: \[\begin{eqnarray*} y_t &=& |x_t| + \epsilon_t \qquad \epsilon_t \sim N(0,\sigma^2)\\ x_t &=& x_{t-1} + \omega_t \qquad \omega_t \sim N(0,\tau^2). \end{eqnarray*}\]

set.seed(54321)
n     = 100
sig2  = 1.00
tau2  = 0.25
x     = rep(0,n)
for (t in 2:n)
  x[t] = x[t-1] + rnorm(1,0,sqrt(tau2))
y = abs(x) + rnorm(n,0,sqrt(sig2))
x.true = x

par(mfrow=c(1,1))
plot(y,xlab="Time",ylab="",type="b",pch=16,lwd=2,ylim=range(x,y))
lines(x,col=2,lwd=2,type="b",pch=16)
legend("top",legend=c("Y(t)","X(t)"),col=1:2,lwd=2,lty=1)
abline(h=0,lty=2,lwd=2)

The Boostrap filter

set.seed(12345)
m0     = 0
C0     = 1
M      = 10000
xdraws = matrix(0,n,M)
x      = rnorm(M,m0,sqrt(C0))
for (t in 1:n){
  xprior  = rnorm(M,x,sqrt(tau2))
  weights = dnorm(y[t],abs(xprior),sqrt(sig2))
  x       = sample(xprior,size=M,replace=TRUE,prob=weights)
  xdraws[t,] = x
}
ts = c(1:5,(n/2-2):(n/2+2),(n-4):n)
par(mfrow=c(3,5))
for (t in (n-14):n){
  hist(xdraws[t,],xlab="",prob=TRUE,main="",ylab="Posterior density",xlim=range(xdraws),ylim=c(0,0.5))
  legend("topleft",legend=paste("t=",t,sep=""),bty="n")
  points(y[t],0,col=2,pch=16,cex=2)
}

N = 100
dens = matrix(0,n,N)
for (t in 1:n){
  den = density(abs(xdraws[t,]),from=-8,to=7,n=N)
  dens[t,] = den$y
}
xx = den$x

par(mfrow=c(1,1))
contour(1:n,xx,dens,nlevels=20,ylim=range(y))
points(abs(x.true),col=3,pch=16)
abline(h=0,col=4,lwd=2)
points(y,col=2,pch=16)
legend("topleft",legend=c("Data","|States|","Posteriors"),col=c(2,3,1),lwd=2,lty=1)

---
title: "Sequential Monte Carlo"
subtitle: "The Boostrap filter"
author: "Hedibert Freitas Lopes"
date: "4/14/2022"
output:
  html_document:
    toc: true
    toc_depth: 2
    code_download: yes
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

# Example 1: a local-level dynamic linear model

Here we revisit the most basic normal dynamic linear model:
\begin{eqnarray*}
y_t &=& x_t + \epsilon_t \qquad \epsilon_t \sim N(0,\sigma^2)\\
x_t &=& x_{t-1} + \omega_t \qquad \omega_t \sim N(0,\tau^2).
\end{eqnarray*}

Below we simulated some data from this model.

```{r fig.width=10,fig.height=6}
set.seed(54321)
n     = 100
sig2  = 1.00
tau2  = 0.25
x     = rnorm(n)
for (t in 2:n)
  x[t] = x[t-1]+rnorm(1,0,sqrt(tau2))
y = x + rnorm(n,0,sqrt(sig2))

par(mfrow=c(1,1))
plot(y,xlab="Time",ylab="",type="b",pch=16,lwd=2)
lines(x,col=2,lwd=2,type="b",pch=16)
legend("topright",legend=c("Y(t)","X(t)"),col=1:2,lwd=2,lty=1)
```

## The Kalman filter

Let us assume that $x_0|D_0 \sim N(m_0,C_0)$, for known $(\sigma^2,\tau^2,m_0,C_0)$ and 
$D_0$ the information set up to time $t=0$, so $D_t=\{D_{t-1},y_t\}$.
As we learned in class, the Kalman recursions can be used to show that, for $x_{t-1}|D_{t-1} \sim N(m_{t-1},C_{t-1})$, we can derive the sequential distributions:
\begin{eqnarray*}
x_t|D_{t-1} &\sim& N(m_{t-1},C_{t-1}+\tau^2)\\
x_t|D_t &\sim& N(m_t,C_t),
\end{eqnarray*}
where 
\begin{eqnarray*}
m_t &=& (1-A_t)m_{t-1} + A_t y_t\\
C_t &=& C_{t-1} + \tau^2 - A_t^2(C_{t-1}+\tau^2+\sigma^2)\\
A_t &=& \frac{C_{t-1}+\tau^2}{C_{t-1}+\tau^2+\sigma^2}.
\end{eqnarray*}

```{r fig.width=10,fig.height=6}
m0   = 0
C0   = 1
m    = m0
C    = C0
mf   = rep(0,n)
Cf   = rep(0,n)

for (t in 1:n){
  A     = (C+tau2)/(C+tau2+sig2)
  m     = (1-A)*m + A*y[t]
  C     = C + tau2 - A^2*(C+tau2+sig2)
  mf[t] = m
  Cf[t] = C
}
qf = cbind(mf+qnorm(0.025)*sqrt(Cf),mf,mf+qnorm(0.975)*sqrt(Cf))

par(mfrow=c(1,1))
ts.plot(qf,lwd=2,lty=c(2,1,2),ylab="State")
points(y,col=2,pch=16)
lines(x,col=6,lwd=2)
legend("topright",legend=c("data","states","posterior"),col=c(2,6,1),lwd=2)
```

# Revisiting sampling importance resampling (SIR)

The SIR scheme uses samples from a proposal distribution, $q(\theta)$, in order to sample from the target distribution, $p(\theta)$.  Below, we assume that $p(\theta)$ is $N(0,3)$ and that $q(\theta)$ is $t_3(0,1)$.  To sum up, the SIR is implemented as follows:

* Sample $\{{\tilde \theta}^{(1)},\ldots,{\tilde \theta}^{(M)}\}$ from $q(\theta)$.

* Compute resampling weights, for $i=1,\ldots,M$
$$
\omega^{(i)} \propto \frac{p({\tilde \theta}^{(i)})}{q({\tilde \theta}^{(i)})}.
$$

* For $j=1,\ldots,M$, sample $\theta^{(j)}$ from $\{{\tilde \theta}^{(1)},\ldots,{\tilde \theta}^{(M)}\}$ with probabilities $\{\omega^{(1)},\ldots,\omega^{(M)}\}$.

```{r fig.width=10,fig.height=6}
set.seed(123567)
M        = 100000
thetas.q = rt(M,df=3)
w        = dnorm(thetas.q,0,sqrt(3))/dt(thetas.q,df=3)
thetas.p = sample(thetas.q,size=M,replace=TRUE,prob=w)

par(mfrow=c(1,2))
xbreaks = seq(min(thetas.q),max(thetas.q),length=200)
hist(thetas.q,breaks=xbreaks,prob=TRUE,main="Draws from proposal",xlim=c(-10,10),ylim=c(0,0.4),xlab="Theta")
xx = seq(min(thetas.q),max(thetas.q),length=1000)
lines(xx,dt(xx,df=3),col=2,lwd=2)
xbreaks = seq(min(thetas.p),max(thetas.p),length=30)
hist(thetas.p,breaks=xbreaks,prob=TRUE,main="Draws from target\nvia SIR",
     xlim=c(-10,10),ylim=c(0,0.4),xlab="Theta")
xx = seq(min(thetas.p),max(thetas.p),length=1000)
lines(xx,dnorm(xx,0,sqrt(3)),col=2,lwd=2)
```


# Example 1: The Bootstrap filter

The Boostrap filter is basically the SIR applied sequentially in the dynamic model framework. More precisely, let $\{x_{t-1}^{(1)},\ldots,x_{t-1}^{(M)}\}$ be a sample from $p(x_{t-1}|D_{t-1})$.


* For $i=1,\ldots,M$, propagate posterior draws at $t-1$ to prior draws at $t$:
$$
{\tilde x}_t^{(i)} \sim N(x_{t-1}^{(i)},\tau^2)
$$

* For $i=1,\ldots,M$, compute resampling weights: 
$$
\omega^{(i)} \propto p(y_t|{\tilde x}_t^{(i)}).
$$

* For $j=1,\ldots,M$, sample $x_t^{(j)}$ from $\{{\tilde x}^{(1)},\ldots,{\tilde x}^{(M)}\}$ with probabilities $\{\omega^{(1)},\ldots,\omega^{(M)}\}$.

## Closer look at from $t=0$ to $t=1$
```{r fig.width=10,fig.height=4}
set.seed(12345)
m0     = 0
C0     = 1
M      = 10000

# x0|D0
x0post   = rnorm(M,m0,sqrt(C0))

# x1|D0
x1prior = rnorm(M,x0post,sqrt(tau2))

# x1|D1
w      = dnorm(y[1],x1prior,sqrt(sig2))
x1post = sample(x1prior,size=M,replace=TRUE,prob=w)

par(mfrow=c(1,3))
hist(x0post,xlab="",prob=TRUE,main="p(x0|D0)")
hist(x1prior,xlab="",prob=TRUE,main="p(x1|D0)")
hist(x1post,xlab="",prob=TRUE,main="p(x1|D1)")
points(y[1],0,col=3,pch=16,cex=2)
```

## For $t=2,\ldots,n$

```{r}
xdraws     = matrix(0,n,M)
x          = x1post
xdraws[1,] = x

for (t in 2:n){
  xprior  = rnorm(M,x,sqrt(tau2))
  weights = dnorm(y[t],xprior,sqrt(sig2))
  x       = sample(xprior,size=M,replace=TRUE,prob=weights)
  xdraws[t,] = x
}
qpf = t(apply(xdraws,1,quantile,c(0.025,0.5,0.975)))
```

```{r fig.width=10,fig.height=8}
ind = c(5,6,7,42,43,44,98,99,n)
par(mfrow=c(3,3))
for (i in ind){
  hist(xdraws[i,],xlab="",prob=TRUE,main=paste("p(x",i,"|D",i,")",sep=""))
  points(y[i],0,col=3,pch=16,cex=2)
  xxx = seq(min(xdraws[i,]),max(xdraws[i,]),length=200)
  lines(xxx,dnorm(xxx,mf[i],sqrt(Cf[i])),col=2,lwd=2)
}
```

```{r fig.width=10,fig.height=6}
par(mfrow=c(1,1))
ts.plot(qf,lwd=2,ylim=range(qf,qpf))
lines(qpf[,1],col=2,lwd=2)
lines(qpf[,2],col=2,lwd=2)
lines(qpf[,3],col=2,lwd=2)
legend("topleft",legend=c("Kalman filter","Particle filter"),col=1:2,lwd=2)
title(paste("M=",M," particles",sep=""))
```




# Example 2: a nonlinear dynamic model
Let us now illustrate the Bootstrap filter for a nonlinear dynamic model, where the nonlinearity appears in the observation equation:
\begin{eqnarray*}
y_t &=& |x_t| + \epsilon_t \qquad \epsilon_t \sim N(0,\sigma^2)\\
x_t &=& x_{t-1} + \omega_t \qquad \omega_t \sim N(0,\tau^2).
\end{eqnarray*}

```{r fig.width=10,fig.height=6}
set.seed(54321)
n     = 100
sig2  = 1.00
tau2  = 0.25
x     = rep(0,n)
for (t in 2:n)
  x[t] = x[t-1] + rnorm(1,0,sqrt(tau2))
y = abs(x) + rnorm(n,0,sqrt(sig2))
x.true = x

par(mfrow=c(1,1))
plot(y,xlab="Time",ylab="",type="b",pch=16,lwd=2,ylim=range(x,y))
lines(x,col=2,lwd=2,type="b",pch=16)
legend("top",legend=c("Y(t)","X(t)"),col=1:2,lwd=2,lty=1)
abline(h=0,lty=2,lwd=2)
```

## The Boostrap filter

```{r}
set.seed(12345)
m0     = 0
C0     = 1
M      = 10000
xdraws = matrix(0,n,M)
x      = rnorm(M,m0,sqrt(C0))
for (t in 1:n){
  xprior  = rnorm(M,x,sqrt(tau2))
  weights = dnorm(y[t],abs(xprior),sqrt(sig2))
  x       = sample(xprior,size=M,replace=TRUE,prob=weights)
  xdraws[t,] = x
}
```

```{r fig.width=10,fig.height=8}
ts = c(1:5,(n/2-2):(n/2+2),(n-4):n)
par(mfrow=c(3,5))
for (t in (n-14):n){
  hist(xdraws[t,],xlab="",prob=TRUE,main="",ylab="Posterior density",xlim=range(xdraws),ylim=c(0,0.5))
  legend("topleft",legend=paste("t=",t,sep=""),bty="n")
  points(y[t],0,col=2,pch=16,cex=2)
}
```

```{r fig.width=10,fig.height=6}
N = 100
dens = matrix(0,n,N)
for (t in 1:n){
  den = density(abs(xdraws[t,]),from=-8,to=7,n=N)
  dens[t,] = den$y
}
xx = den$x

par(mfrow=c(1,1))
contour(1:n,xx,dens,nlevels=20,ylim=range(y))
points(abs(x.true),col=3,pch=16)
abline(h=0,col=4,lwd=2)
points(y,col=2,pch=16)
legend("topleft",legend=c("Data","|States|","Posteriors"),col=c(2,3,1),lwd=2,lty=1)
```

