In the local levem model, the observation and system equations are given by \[\begin{eqnarray*} y_t|x_t,\theta &\sim& N(x_t,\sigma^2)\\ x_t|x_{t-1},\theta &\sim& N(x_{t-1},\tau^2), \end{eqnarray*}\] for \(t=1,\ldots,n\), with \(x_0 \sim N(m_0,C_0)\) and \(\theta=(\sigma^2,\tau^2)\). We assume that \(\theta\) is known in what follows.
set.seed(123456)
sig2 = 1.0
tau2 = 1.0
n = 100
x0 = 0.0
tau = sqrt(tau2)
sig = sqrt(sig2)
w = rnorm(n,0,tau)
v = rnorm(n,0,sig)
x = rep(0,n)
y = rep(0,n)
x[1] = x0 + w[1]
y[1] = x[1] + v[1]
for (t in 2:n){
x[t] = x[t-1] + w[t]
y[t] = x[t] + v[t]
}
plot(y,xlab="Time",ylab="",pch=16,ylim=range(x,y))
lines(x,col=2,lwd=2,type="b")
legend("topleft",legend=c("Data","State"),bty="n",pch=16,col=1:2)The function DLM returns forward filtering (Kalman filter) and backward smoothing (Kalman smoother) means and variances.
DLM = function(y,sig2,tau2,m0,C0,back){
n = length(y)
m = m0
C = C0
l = 0.0
mf = rep(0,n)
Cf = rep(0,n)
for (t in 1:n){
R = C + tau2
Q = R + sig2
l = l + dnorm(y[t],m,sqrt(Q),log=TRUE)
A = R/Q
m = (1-A)*m+A*y[t]
C = A*sig2
mf[t] = m
Cf[t] = C
}
if(back){
mb = rep(0,n)
Cb = rep(0,n)
mb[n] = mf[n]
Cb[n] = Cf[n]
for (t in (n-1):1){
B = Cf[t]/(Cf[t]+tau2)
mb[t] = (1-B)*mf[t]+B*mb[t+1]
Cb[t] = (1-B)*Cf[t]+B*B*Cb[t+1]
}
return(list(l=l,mf=mf,Cf=Cf,mb=mb,Cb=Cb))
}else{
return(list(l=l,mf=mf,Cf=Cf))
}
} m0 = 0
C0 = 4
dlm = DLM(y,sig2,tau2,m0,C0,TRUE)
mf = dlm$mf
mb = dlm$mb
Cf = dlm$Cf
Cb = dlm$Cb
quantf = cbind(mf+qnorm(0.025)*sqrt(Cf),mf,mf+qnorm(0.975)*sqrt(Cf))
quantb = cbind(mb+qnorm(0.025)*sqrt(Cb),mb,mb+qnorm(0.975)*sqrt(Cb))
par(mfrow=c(1,1))
plot(quantf[,1],col=2,xlab="Time",ylab="",pch=16,ylim=range(x,quantf,quantb),type="l",lwd=2)
for (i in 2:3) lines(quantf[,i],col=2,lwd=2)
for (i in 1:3) lines(quantb[,i],col=4,lwd=2)
legend("topright",legend=c("Filtered density","Smoothed density"),bty="n",lty=1,lwd=3,col=c(2,4))It is worth noticing that the normal local level model can be rewritten as a multivariate normal model with multivariate normal prior for the states: \[\begin{eqnarray*} y | x,\theta &\sim& N(x,\sigma_2 I_n)\\ x |x_0,\theta &\sim& N(x_0 1_n, \tau^2\Omega)\\ x_0 &\sim& N(m_0,C_0) \end{eqnarray*}\] where \(x=(x_1,\ldots,x_n)'\), \(y=(y_1,\ldots,y_n)'\), and \[ \Omega = \left( \begin{array}{cccccccc} 1 & 1 & 1 & 1 & \ldots & 1 & 1 & 1 \\ 1 & 2 & 2 & 2 & \ldots & 2 & 2 & 2 \\ 1 & 2 & 3 & 3 & \ldots & 3 & 3 & 3 \\ 1 & 2 & 3 & 4 & \ldots & 4 & 4 & 4 \\ \vdots & \vdots & \vdots & \vdots & \ddots & \vdots & \vdots & \vdots \\ 1 & 2 & 3 & 4 & \ldots & n-2 & n-2 & n-2 \\ 1 & 2 & 3 & 4 & \ldots & n-1 & n-1 & n-1 \\ 1 & 2 & 3 & 4 & \ldots & n-2 & n-1 & n \end{array} \right) \]
The covariance matrix \(\Omega\) has a tridiagonal inverse \(\Omega^{-1}\): \[ \Omega^{-1} = \begin{pmatrix} 2 & -1 & 0 & \dots & 0 \\ -1 & 2 & -1 & \dots & 0 \\ 0 & -1 & 2 & \dots & 0 \\ \vdots & \vdots & \vdots & \ddots & -1 \\ 0 & 0 & 0 & -1 & 1 \end{pmatrix} \] This sparsity is a key computational advantage in Bayesian state-space modeling, as we illustrate in what follows.
The joint prior of \(x\) given \(\theta\) (integrating out \(x_0\)) is given by \[ x|\theta \sim N(m_0 1_n; C_0 1_n 1_n' + \tau^2 \Omega), \] while its full conditional posterior distribution is \[ x | y, \theta \sim N(m_1,C_1) \] where \[\begin{eqnarray*} C_1^{-1} &=& (C_0 1_n 1_n' + \tau^2 \Omega)^{-1} + \sigma^{-2}I_n\\ C_1^{-1}m_1 &=& (C_0 1_n 1_n' + \tau^2 \Omega)^{-1}m_01_n + \sigma^{-2}y \end{eqnarray*}\]
ones = matrix(1,n,1)
In = diag(1,n)
Omega = matrix(0,n,n)
Omega[n,] = 1:n
for (i in 1:(n-1))
Omega[i,] = c(1:i,rep(i,n-i))
iC1 = solve(C0*ones%*%t(ones)+Omega/tau2) + In/sig2
C1 = solve(iC1)
m1 = C1%*%((C0*ones%*%t(ones)+Omega/tau2)%*%ones*m0+y/sig2)
quantb1 = cbind(m1+qnorm(0.025)*sqrt(diag(C1)),m1,m1+qnorm(0.975)*sqrt(diag(C1)))
par(mfrow=c(1,1))
plot(quantb[,1],col=2,xlab="Time",ylab="",pch=16,ylim=range(x,quantf,quantb),type="l",lwd=1.5)
for (i in 2:3) lines(quantb[,i],col=2,lwd=1.5)
for (i in 1:3) lines(quantb1[,i],col=4,lwd=1.5)
legend("topright",legend=c("Kalman filter/smoother","Multivariate normal"),bty="n",lty=1,lwd=1.5,col=c(2,4))The tridiagonal structure of \(\Phi=\Omega^{-1}\) allows for drastic reduction of the number of operations when inverting \(C_1\), computing the Cholesky of \(C_1\) or sampling from the multivariate normal \(N(m_1,C_1)\).
A more efficient way to obtain \(C_1^{-1}\) takes advantage of the fact that \(\Omega^{-1}\) is tridiagonal \[ C_1^{-1} = \sigma^{-2}I_n + \tau^{-2}\Omega^{-1} - \frac{e_1 e_1'}{\tau^2\left(\frac{\tau^2}{C_0}+1\right)}, \] where \(e_1=(1,0,0,\ldots,0)'\).
iOmega = matrix(0,n,n)
for (i in 2:(n-1))
iOmega[i,(i-1):(i+1)] = c(-1,2,-1)
iOmega[1,1:2] = c(2,-1)
iOmega[n,(n-1):n] = c(-1,1)
e1 = matrix(c(1,rep(0,n-1)),n,1)
iC11 = In/sig2+iOmega/tau2-e1%*%t(e1)/(tau2*(tau2/C0+1))Similarly, \(m_1\) can be computed without the need of inverting \(C_1^{-1}\), which is a dense matrix. The R function solve.tridiagonal solves the system \[ A z = d, \] where \(A\) ia a tridiagonal matrix with the vector \(b\) in the diagonal, the vector \(a\) in the lower diagonal and the vector \(c\) in the upper diagonal. Notice that \(a\) and \(c\) are vectors of length \(n-1\). In our case, \[\begin{eqnarray*} A &=& C_1^{-1},\\ z &=& m_1\\ d &=& (C_1^{-1} - \sigma^{-2}I_n)m_0 1_n + \sigma^{-1}y. \end{eqnarray*}\]
Once we collect the vectors \(a, b, c\) and \(d\), we can easily compute \(m_1\).
# a: (0,vector of lower diagonal)
# b: vector of diagonal
# c: (vector of upper diagonal,0)
solve.tridiagonal = function(a,b,c,d){
n = length(d)
cp = numeric(n)
dp = numeric(n)
x = numeric(n)
# Forward sweep
cp[1] = c[1]/b[1]
dp[1] = d[1]/b[1]
for (i in 2:(n-1))
cp[i] = c[i]/(b[i]-a[i]*cp[i-1])
for (i in 2:n)
dp[i] = (d[i]-a[i]*dp[i-1])/(b[i]-a[i]*cp[i-1])
# Back substitution
x[n] = dp[n]
for (i in (n-1):1) {
x[i] = dp[i] - cp[i]*x[i+1]
}
return(x)
}We are now ready to compute \(m_1\):
ind = matrix(1,n,1)
d = as.vector(m0*(iC1-In/sig2)%*%ind + y/sig2)
a = rep(0,n)
b = rep(0,n)
c = rep(0,n)
for (i in 2:(n-1)){
a[i] = iC11[i,i-1]
b[i] = iC11[i,i]
c[i] = iC11[i,i+1]
}
a[n] = iC11[n,n-1]
b[1] = iC11[1,1]
b[n] = iC11[n,n]
c[1] = iC11[1,2]
m11 = solve.tridiagonal(a, b, c, d)
par(mfrow=c(1,2))
ts.plot(m1,ylab="Posterior mean")
lines(m11,col=2)
ts.plot(round(m1-m11,10),ylab="Difference")We will only need the vectors \(a\) (lower diagonal) and \(b\) (main diagonal) from \(C_1^{-1}\) in order to use the function trichol from the R package mgcv.
## Loading required package: nlme
## This is mgcv 1.9-3. For overview type 'help("mgcv-package")'.
set.seed(2718282)
M = 10000
draws = matrix(0,M,n)
draws1 = matrix(0,M,n)
for (i in 1:M){
z = rnorm(n)
draws[i,] = m1 + as.vector(L1%*%z)
draws1[i,] = as.vector(solve(L11,z,system="Lt"))
}
draws1 = sweep(draws1, 2, m11, "+")
qdraw = t(apply(draws,2,quantile,c(0.025,0.5,0.975)))
qdraw1 = t(apply(draws1,2,quantile,c(0.025,0.5,0.975)))
par(mfrow=c(1,2))
plot(qdraw[,2],ylim=range(qdraw,qdraw1),type="l",xlab="Time",ylab="",col=2)
lines(qdraw[,1],col=2)
lines(qdraw[,3],col=2)
lines(mb+qnorm(0.025)*sqrt(Cb),col=4)
lines(mb,col=4)
lines(mb+qnorm(0.975)*sqrt(Cb),col=4)
title(expression(O(n^3)))
plot(qdraw1[,2],ylim=range(qdraw,qdraw1),type="l",xlab="Time",ylab="",col=2)
lines(qdraw1[,1],col=2)
lines(qdraw1[,3],col=2)
lines(mb+qnorm(0.025)*sqrt(Cb),col=4)
lines(mb,col=4)
lines(mb+qnorm(0.975)*sqrt(Cb),col=4)
title(expression(O(n)))