AR(1) plus noise model

Posterior inference

In this exercise, we assume that the fixed parameters \(V\), \(\phi\) and \(W\) are known. The prior for \(x_0\) is set at \(N(0,100)\), while the Markov chain Monte Carlo schemes (explained below) ar run for initial \(M_0=1,000\) iterations (to be discarded) and an additional \(M=1,000\) iterations (to be saved for posterior inference).

# Prior hyperparameters
m0   =   0.00
C0   = 100.00

# MCMC setup 
set.seed(12345)
M0   = 1000      # burn-in draws
M    = 1000      # draws from the posterior 
L    = 1         # keep only L-th draw

Exact inference via Kalman filter and Kalman smoother

We chose this model because the Kalman recursions (forward and backward) are available in close form. The kalman filter recursions are, for \(t=1,2,\ldots,n\), \[\begin{eqnarray*} x_t|y^{t-1} &\sim& N(a_t,R_t) \qquad a_t=m_{t-1}, R_t=\phi^2C_{t-1}+W,\\ y_t|y^{t-1} &\sim& N(f_t,Q_t) \qquad f_t=a_t, Q_t=R_t+V,\\ x_t|y^t &\sim& N(m_t,C_t) \qquad m_t=C_t(a_t/R_t+y_t/V), C_t=1/(1/R_t+1/V),\\ \end{eqnarray*}\] while the Kalman smoother recursions are, for \(t=n-1,n-2,\ldots,1\), \[ x_t|y^t \sim N(m_t^n,C_t^n) \] where \(m_n^n=m_n\) and \(C_n^n=C_n\), and \[\begin{eqnarray*} C_t^n &=& C_t - B_t^2(R_{t+1}-C_{t+1}^n)\\ m_t^n &=& m_t + B_t(m_{t+1}^n-a_{t+1}) \end{eqnarray*}\]

where \(B_t=\phi C_t/R_{t+1}\). These are sumarized in the R function kalmansmoother:

kalmansmoother = function(y,m0,c0,phi,V,W){
  n = length(y) 
  mf = rep(0,n)
  Cf = rep(0,n)
  af = rep(0,n)
  Rf = rep(0,n)
  m = m0
  C = C0
  for (t in 1:n){
    a = phi*m
    R = phi^2*C+W
    C = 1/(1/R+1/V)
    m = C*(a/R+y[t]/V)
    af[t] = a
    Rf[t] = R
    Cf[t] = C
    mf[t] = m
  }
  mb = rep(0,n)
  Cb = rep(0,n)
  mb[n] = mf[n]
  Cb[n] = Cf[n]
  for (t in (n-1):1){
    B = phi*Cf[t]/Rf[t+1]
    Cb[t] = Cf[t] - B^2*(Rf[t+1]-Cb[t+1])
    mb[t] = mf[t] + B*(mb[t+1]-af[t+1])
  }
  return(cbind(mb+qnorm(0.05)*sqrt(Cb),mb,mb+qnorm(0.95)*sqrt(Cb)))
}

Approximate inference via block-move MCMC: the FFBS algorithm

In this MCMC scheme we sample jointly from \(p(x_1,\ldots,x_n|y^n)\). This is done by noticing that \[ p(x_1,\ldots,x_n|y^n) = p(x_n|y^n)\prod_{t=1}^{n-1} p(x_t|x_{t+1:n},y^n)= p(x_n|y^n)\prod_{t=1}^{n-1} p(x_t|x_{t+1},y_t), \] and that, for \(t=n-1,n-2,\ldots,1\), \[ x_t|x_{t+1},y_t \sim N(h_t,H_t) \] where \(h_n=m_n\) and \(H_n=C_n\), and \[\begin{eqnarray*} H_t &=& 1/(1/C_t+\phi^2/W)\\ m_t &=& H_t(m_t/C_t+\phi x_{t+1}/W) \end{eqnarray*}\]

ffbs = function(y,m0,c0,phi,V,W){
  n = length(y) 
  mf = rep(0,n)
  Cf = rep(0,n)
  m = m0
  C = C0
  for (t in 1:n){
    a = phi*m
    R = phi^2*C+W
    C = 1/(1/R+1/V)
    m = C*(a/R+y[t]/V)
    Cf[t] = C
    mf[t] = m
  }
  x  = rep(0,n)
  x[n] = rnorm(1,mf[n],sqrt(Cf[n]))
  for (t in (n-1):1){
    var = 1/(1/Cf[t]+phi^2/W)
    mean = var*(mf[t]/Cf[t]+phi*x[t+1]/W)
    x[t] = rnorm(1,mean,sqrt(var))
  }
  return(x)
}

Approximate inference via single-move MCMC

In this MCMC scheme we sample individually and recursive from \(p(x_t|x_{-t},y^n)\), where \(x_{-t}=\{x_1,\ldots,x_{t-1},x_{t+1},\ldots,x_n\}\). It is easy to see that \[ p(x_t|x_{-t},y^n) = p(x_t|x_{t-1},x_{t+1},y_t) = \left\{ \begin{array}{l} p(y_1|x_1)p(x_2|x_1)p(x_1) & t=1\\ p(x_t|x_{t-1})p(x_{t+1}|x_t)p(y_t|x_t) & t=2,\ldots,n-1\\ p(y_n|x_n)p(x_n|x_{n-1}) & t=n \end{array} \right., \] where \(p(x_1)\) is normal with mean \(\phi m_0\) and variance \(\phi^2C_0+W\). The above full conditional distributions are all Gaussian: \[ x_t | x_{t-1},x_{t+1},y_t \sim \left\{ \begin{array}{l} N(h_1,H_1) & t=1\\ N(h_t,H_t) & t=2,\ldots,n-1\\ N(h_n,H_n) & t=n \end{array} \right., \] where \[ H_t = \left\{ \begin{array}{l} 1/(1/(\phi^2C0+W)+\phi^2/W+1/V) & t=1\\ 1/(1/V+1/W+\phi^2/W) & t=2,\ldots,n-1\\ 1/(1/V+1/W) & t=n \end{array} \right., \] and \[ h_t = \left\{ \begin{array}{l} H_1(y_1/V + x_2/W + \phi m_0/(\phi^2C_0+W)) & t=1\\ H_t(y_t/V+\phi(x_{t-1}+x_{t+1})/W) &t=2,\ldots,n-1\\ H_n(y_n/V+\phi x_{t-1}/W) &t=n \end{array} \right. \]

singlemove = function(y,m0,c0,phi,V,W,x){
  n    = length(y)  
  # sampling x[1]
  var = 1/(1/(phi^2*C0+W)+phi^2/W+1/V)
  mean = var*(y[1]/V+x[2]/W+phi*m0/(phi^2*C0+W))
  x[1] = rnorm(1,mean,sqrt(var))
  # sampling x[2],...,x[n-1]
  var = 1/(1/V+1/W+phi^2/W)
  sd  = sqrt(var)
  for (t in 2:(n-1)){
    mean = var*(y[t]/V+phi*(x[t-1]+x[t+1])/W)
    x[t] = rnorm(1,mean,sd) 
  }
  # sampling x[n]
  var = 1/(1/V+1/W)
  mean = var*(y[n]/V+phi*x[t-1]/W)
  x[n] = rnorm(1,mean,sd) 
  return(x)
}

The above sampling schemes are embeded in the following R scripts

mcmc.blockmove = function(y,m0,C0,M0,M,L,V,phi,W){
  niter = M0+M*L
  draws = matrix(0,niter,n)
  for (iter in 1:niter){
    # sampling the states
    x = ffbs(y,m0,C0,phi,V,W)
    # Storing the pars
    draws[iter,] = x 
  }
  return(draws[seq((M0+1),niter,by=L),])
}

mcmc.singlemove = function(y,m0,C0,M0,M,L,V,phi,W,x){
  niter = M0+M*L
  draws = matrix(0,niter,n)
  for (iter in 1:niter){
    # sampling the states
    x = singlemove(y,m0,C0,phi,V,W,x)
    draws[iter,] = x
  }
  return(draws[seq((M0+1),niter,by=L),])
}

We run the three smoothers (Kalman, single-move MCMC and block-move MCCMC). Notice that for the single move MCMC scheme we need initial values for the whole vector of states \(x_1,\ldots,x_n\). We use the data as initial values.

qx.ks   = kalmansmoother(y,m0,c0,phi,V,W)
run.bmf = mcmc.blockmove(y,m0,C0,M0,M,L,V,phi,W)    
run.smf = mcmc.singlemove(y,m0,C0,M0,M,L,V,phi,W,y)

The following piece of R script produces exact and MCMC-based quantiles (5%,50%,95%) for \(p(x_t|y^n), \ t=1,\ldots,n\), as well as autcorrelation functions (up to lag 30) for the draws from both single-move and block-move MCMC schemes.

xs.bmf  = run.bmf
xs.smf  = run.smf
qx.bmf  = t(apply(xs.bmf,2,quantile,c(0.05,0.5,0.95)))
qx.smf  = t(apply(xs.smf,2,quantile,c(0.05,0.5,0.95)))

lag = 30
acf.sm = matrix(0,lag,n)
acf.bm = matrix(0,lag,n)
for (t in 1:n){
  acf.sm[,t] = acf(xs.smf[,t],lag.max=lag,plot=FALSE)$acf[2:(lag+1)]
  acf.bm[,t] = acf(xs.bmf[,t],lag.max=lag,plot=FALSE)$acf[2:(lag+1)]
}
qacf.sm = t(apply(acf.sm,1,quantile,c(0.05,0.5,0.95)))
qacf.bm = t(apply(acf.bm,1,quantile,c(0.05,0.5,0.95)))

Autocorrelation functions of sampled states \(x_t\)s

par(mfrow=c(1,2))
plot(acf.bm[,1],type="l",ylab="ACF of states",xlab="LAG",ylim=c(-0.2,1),axes=FALSE)
axis(2);box();axis(1,at=c(1,seq(5,lag,by=5)))
for (t in 2:n) 
  lines(acf.bm[,t])
abline(h=0,lty=2,col=2)
title("Block move") 
plot(acf.sm[,1],type="l",ylab="ACF of states",xlab="LAG",ylim=c(-0.2,1),axes=FALSE)
axis(2);box();axis(1,at=c(1,seq(5,lag,by=5)))
for (t in 2:n) 
  lines(acf.sm[,t])
abline(h=0,lty=2,col=2)
title("Single move")

Comparing the ACFs

par(mfrow=c(1,1))
ts.plot(qacf.bm,ylim=range(qacf.bm,qacf.sm),xlab="LAG",ylab="ACF of states",col=2)
for (i in 1:3)
  lines(qacf.sm[,i],col=4)
abline(h=0,lty=2)
legend("topright",legend=c("Block move","Single move"),col=c(2,4),lty=1)

Summaries of exact and MCMC-based marginal posterior \(p(x_t|y^n)\)

par(mfrow=c(1,1))
ts.plot(qx.bmf,col=2)
lines(qx.smf[,1],col=3)
lines(qx.smf[,2],col=3)
lines(qx.smf[,3],col=3)
lines(qx.ks[,1],col=1)
lines(qx.ks[,2],col=1)
lines(qx.ks[,3],col=1)
legend("topleft",legend=c("Exact","Block move","Single move"),col=1:3,lty=1)

Comparing the performance of the single-move and block-move MCMC schemes

par(mfrow=c(1,1))
boxplot(
qx.bmf[,1]-qx.ks[,1],qx.smf[,1]-qx.ks[,1],
qx.bmf[,2]-qx.ks[,2],qx.smf[,2]-qx.ks[,2],
qx.bmf[,3]-qx.ks[,3],qx.smf[,3]-qx.ks[,3],ylab="Distance to exact",names=rep(c("single","block"),3),outline=F)
abline(v=2.5)
abline(v=4.5)
text(1.5,0.15,"5%",col=2)
text(3.5,0.15,"50%",col=2)
text(5.5,0.15,"95%",col=2)

Monte Carlo exercise based on \(S=100\) samples

set.seed(54321)
n    =  200
V    =   1.00
W    =   0.25
phi  =   0.95
m0   =   0.00
C0   = 100.00
M0   = 1000
M    = 1000
L    =   1
S    = 100
stats = matrix(0,S,6)
for (s in 1:S){
  sim = sim.dlm(n,phi,V,W)
  y   = sim$y
  x   = sim$x
  qx.ks   = kalmansmoother(y,m0,c0,phi,V,W)
  run.bmf = mcmc.blockmove(y,m0,C0,M0,M,L,V,phi,W)  
  run.smf = mcmc.singlemove(y,m0,C0,M0,M,L,V,phi,W,x)   
  qx.bmf  = t(apply(run.bmf,2,quantile,c(0.05,0.5,0.95)))
  qx.smf  = t(apply(run.smf,2,quantile,c(0.05,0.5,0.95)))
  stats[s,] = apply(abs(cbind(qx.bmf[,1]-qx.ks[,1],qx.smf[,1]-qx.ks[,1],
                              qx.bmf[,2]-qx.ks[,2],qx.smf[,2]-qx.ks[,2],
                              qx.bmf[,3]-qx.ks[,3],qx.smf[,3]-qx.ks[,3])),2,mean)
}

boxplot.matrix(stats,ylab="Mean absolute distance",names=rep(c("block","single"),3),outline=F)
abline(v=2.5)
abline(v=4.5)
text(1.5,0.05,"5%",col=2)
text(3.5,0.05,"50%",col=2)
text(5.5,0.05,"95%",col=2)