--- title: "Hidden Markov Model: variance-switching" author: "Hedibert Freitas Lopes" date: "6/14/2018" output: pdf_document: default --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` # Model structure We will assume that observations $\{y_1,\ldots,y_n\}$ are conditionally independent given a hidden bineary state variable $\{s_1,\ldots,s_n\}$ as follows: $$ y_t|\sigma_1^2,\sigma_2^2,s_t \sim N(0,\sigma_t^2) $$ where $$ \sigma_t^2 = \sigma_1^2(1-s_t)+\sigma_2^2s_t, $$ more explicitly, $$ \sigma_t^2= \left\{ \begin{array}{ll} \sigma_1^2 & s_t=0\\ \sigma_2^2 & s_t=1 \end{array} \right. $$ Finally, the binary latent state $s_t$ depends on the previous latent states $s_1,\ldots,s_{t-1}$ only via the most recent one, i.e. $s_{t-1}$, according to a first order Markov chain: $$ s_t|s_{t-1} = \left\{ \begin{array}{ll} Ber(1-\xi) & s_{t-1}=0\\ Ber(\pi) & s_{t-1}=1 \end{array} \right. $$ For now we will assume that $\sigma_1^2$, $\sigma_2^2$, $\xi$ and $\pi$ and pi are all known. # Simulating the dataset Let us simulated $n=800$ observations with parameters $(\sigma_1^2,\sigma_2^2)=(1,1.5^2)$, $(\pi,\xi)=(0.99,0.99)$ and $s_1\sim Ber(0.5)$. ```{r} set.seed(3452345) sig12 = 1^2 sig22 = 5^2 pi = 0.99 xi = 0.99 n = 800 S = rep(0,n) S[1] = rbinom(1,1,0.5) for (t in 2:n) S[t] = rbinom(1,1,pi*S[t-1]+(1-xi)*(1-S[t-1])) y = rnorm(n,0,sqrt(sig12*(1-S)+sig22*S)) xi.true = xi pi.true = pi sig12.true = sig12 sig22.true = sig22 ``` Let us take a look at the time series of $s_t$ and $y_t$. ```{r fig.width=12,fig.height=12} par(mfrow=c(2,1)) plot(S,xlab="Time",ylab="S(t)",pch=16) plot(y,xlab="Time",ylab="y(t)",pch=16) ``` # Forward and backward probabilities One goal is to use the data $y_1,\ldots,y_n$ to compute the following marginal probabilities: $$ \pi_t = Pr(s_t=1|y_{1:t}) \ \ \ \mbox{and} \ \ \ {\tilde \pi}_t = Pr(s_t=1|y_{1:n}). $$ assuming, for instance, that $\pi_0=0.5$. Another important goal is jointly sampling the latent/hidden states $s_1,\ldots,s_n$ from $p(s_1,\ldots,s_n|y_{1:n})$ recursively and backwards (see the backward recursions below). ## Forward probabilities We want to be able to start at time $t-1$ with $\pi_{t-1}=Pr(s_{t-1}=1|y_{1:(t-1)})$ and finish at time $t$ with $\pi_t=Pr(s_t=1|y_{1:t})$: \begin{eqnarray*} Pr(s_t=1|y_{1:t}) &\propto& Pr(s_t=1|y_{1:(t-1)}) p_N(y_t;0,\sigma_1^2),\\ Pr(s_t=0|y_{1:t}) &\propto& Pr(s_t=0|y_{1:(t-1)}) p_N(y_t;0,\sigma_2^2) \end{eqnarray*} with \begin{eqnarray*} Pr(s_t=1|y_{1:(t-1)}) &=& Pr(s_t=1|s_{t-1}=0,y_{1:(t-1)})Pr(s_{t-1}=0|y_{1:(t-1)})\\ &+& Pr(s_t=1|s_{t-1}=1,y_{1:(t-1)})Pr(s_{t-1}=1|y_{1:(t-1)})\\ &=& (1-\xi)(1-\pi_{t-1})+\pi\pi_{t-1}\\ &=& \alpha_t,\\ Pr(s_t=0|y_{1:(t-1)}) &=& Pr(s_t=0|s_{t-1}=0,y_{1:(t-1)})Pr(s_{t-1}=0|y_{1:(t-1)})\\ &+& Pr(s_t=0|s_{t-1}=1,y_{1:(t-1)})Pr(s_{t-1}=1|y_{1:(t-1)})\\ &=& \xi(1-\pi_{t-1})+(1-\pi)\pi_{t-1}\\ &=& 1-\alpha_t. \end{eqnarray*} Therefore, \begin{eqnarray*} Pr(s_t=1|y_{1:t}) &\propto& \alpha_t p_N(y_t;0,\sigma_1^2),\\ Pr(s_t=0|y_{1:t}) &\propto& (1-\alpha_t) p_N(y_t;0,\sigma_2^2) \end{eqnarray*} and $$ \pi_t=Pr(s_t=1|y_{1:t})=\frac{\alpha_tp_N(y_t;0,\sigma_1^2)}{\alpha_tp_N(y_t;0,\sigma_1^2)+(1-\alpha_t)p_N(y_t;0,\sigma_2^2)}. $$ ```{r} # Forward probabilities pi0 = 0.5 pit = rep(0,n) # t=1 A = (1-xi)*(1-pi0)+pi*pi0 term1 = dnorm(y[1],0,sqrt(sig22))*A term2 = dnorm(y[1],0,sqrt(sig12))*(1-A) pit[1] = term1/(term1+term2) # t=2,...,n for (t in 2:n){ A = (1-xi)*(1-pit[t-1])+pi*pit[t-1] term1 = dnorm(y[t],0,sqrt(sig22))*A term2 = dnorm(y[t],0,sqrt(sig12))*(1-A) pit[t] = term1/(term1+term2) } ``` ## Backward probabilities Similarly, for $t=n,\ldots,2$, \begin{eqnarray*} {\tilde \pi}_{t-1} = Pr(s_{t-1}=1|y_{1:n}) &=& Pr(s_{t-1}=1|s_t=0,y_{1:n})Pr(s_t=0|y_{1:n}) + Pr(s_{t-1}=1|s_t=1,y_{1:n})Pr(s_t=1|y_{1:n})\\ &=& Pr(s_{t-1}=1|s_t=0,y_{1:t})(1-{\tilde \pi}_t) + Pr(s_{t-1}=1|s_t=1,y_{1:t}){\tilde \pi}_t. \end{eqnarray*} Therefore, we need to compute $$ Pr(s_{t-1}=1|s_t=0,y_{1:t}) \ \ \ \mbox{and}\ \ \ Pr(s_{t-1}=1|s_t=1,y_{1:t}), $$ which are obtained via \begin{eqnarray*} Pr(s_{t-1}=1|s_t=0,y_{1:t}) &\propto& Pr(s_t=0|s_{t-1}=1,y_{1:t})Pr(s_{t-1}=1|y_{1:(t-1)})=(1-\pi) \pi_{t-1},\\ Pr(s_{t-1}=0|s_t=0,y_{1:t}) &\propto& Pr(s_t=0|s_{t-1}=0,y_{1:t})Pr(s_{t-1}=0|y_{1:(t-1)})=\xi (1-\pi_{t-1}),\\ \end{eqnarray*} and \begin{eqnarray*} Pr(s_{t-1}=1|s_t=1,y_{1:t}) &\propto& Pr(s_t=1|s_{t-1}=1,y_{1:t})Pr(s_{t-1}=1|y_{1:(t-1)})=\pi\pi_{t-1}\\ Pr(s_{t-1}=0|s_t=1,y_{1:t}) &\propto& Pr(s_t=1|s_{t-1}=0,y_{1:t})Pr(s_{t-1}=0|y_{1:(t-1)})=(1-xi)(1-\pi_{t-1}). \end{eqnarray*} Therefore, \begin{eqnarray*} \beta_t &=& Pr(s_{t-1}=1|s_t=0,y_{1:t})=\frac{(1-\pi) \pi_{t-1}}{(1-\pi) \pi_{t-1}+\xi (1-\pi_{t-1})}\\ \alpha_t &=& Pr(s_{t-1}=1|s_t=1,y_{1:t})= \frac{\pi\pi_{t-1}}{\pi\pi_{t-1}+(1-\xi)(1-\pi_{t-1})}. \end{eqnarray*} The backward recursions are $$ s_{t-1}|s_t=0,y_{1:t} \sim Ber(\beta_t) \ \ \ \mbox{and} \ \ \ s_{t-1}|s_t=1,y_{1:t} \sim Ber(\alpha_t), $$, which are very important when jointly sampling the latent/hidden states $s_1,\ldots,s_n$ from $p(s_1,\ldots,s_n|y_{1:n})$ recursively and backwards: $$ p(s_1,\ldots,s_n|y_{1:n}) = \prod_{t=2}^n p(s_{t-1}|s_t,y_{1:t})p(s_n|y_{1:n}). $$ Finally, the marginal backward probabilities are $$ {\tilde \pi}_{t-1}=Pr(s_{t-1}=1|y_{1:n})=\beta_t (1-{\tilde \pi}_t) + \alpha_t{\tilde \pi}_t. $$ ```{r} # Backward probabilities pitil = rep(0,n) pitil[n] = pit[n] for (t in n:2){ alpha = pit[t-1]*pi/(pit[t-1]*pi+(1-pit[t-1])*(1-xi)) beta = pit[t-1]*(1-pi)/(pit[t-1]*(1-pi)+(1-pit[t-1])*xi) pitil[t-1] = alpha*pitil[t]+beta*(1-pitil[t]) } # storing forward and backward probabilities for later use pis.ff = pit pis.bs = pitil ``` ```{r fig.width=12,fig.height=12} par(mfrow=c(2,1)) plot(pit,xlab="Time",ylab="Pr(S(t)=1|D(t))",pch=16,main="Forward probabilities") lines(S,col=2,lwd=2) plot(pitil,xlab="Time",ylab="Pr(S(t)=1|D(n))",pch=16,main="Backward probabilities") lines(S,col=2,lwd=2) ``` ```{r fig.width=12,fig.height=8} par(mfrow=c(1,1)) plot(S,xlab="Time",ylab="Probabilities",type="l",main="FFBS for HMM",lwd=2) lines(pit,col=2,lwd=2) lines(pitil,col=3,lwd=3) legend("topright",legend=c("S(t)","Pr(S(t)=1|D(t))","Pr(S(t)=1|D(n))"),col=1:3,lwd=2) abline(h=0.5,lty=2) ``` # Full-blown MCMC Let us now include the parameters $\theta=(\pi,\xi,\sigma_1^2,\sigma_2^2)$ into the analysis. We assume conditionally conjugate and independent priors for the components of $\theta$, i.e. $$ p(\theta) = p(\pi)p(\xi)p(\sigma_1^2)p(\sigma_2^2), $$ with $\pi$ and $\xi$ following $U(0,1)$ and $\sigma_1^2$ and $\sigma_2^2$ following $IG(3/2,3/2)$, so $E(\sigma_1^2)=E(\sigma_2^2)=3$ and 95\% probability interval approximately equal to $(0.4,8.5)$. ## Full conditionals Full posterior inference is facilitated by a customized Gibbs sampler that cycles through the full conditionals: \begin{eqnarray*} p(s_{1:n}|y_{1:n},\theta)\\ p(\pi|y_{1:n},s_{1:n},\xi,\sigma_1^2,\sigma_2^2)\\ p(\xi|y_{1:n},s_{1:n},\pi,\sigma_1^2,\sigma_2^2)\\ p(\sigma_1^2|y_{1:n},s_{1:n},\pi,\xi,\sigma_2^2)\\ p(\sigma_2^2|y_{1:n},s_{1:n},\pi,\xi,\sigma_1^2), \end{eqnarray*} with the first one, $p(s_{1:n}|y_{1:n},\theta)$ already derived via the *forward filtering, backward sampling* scheme introduced above. The other three full conditionals are easily derived and are gives below as follows. Given $s_1,\ldots,s_n$, lets compute $n_1=\sum_{i=1}^n \delta_1(s_{n-1})\delta_1(s_n)$, $n_2=\sum_{i=1}^n \delta_1(s_{n-1})\delta_0(s_n)$, $n_3=\sum_{i=1}^n \delta_0(s_{n-1})\delta_0(s_n)$ and$n_4=\sum_{i=1}^n \delta_0(s_{n-1})\delta_1(s_n)$. These are basically the observed jumps between regimes $0$ and $1$. Similarly, let ${\tilde y}_0=\sum_{i:s_i=0} y_i^2$ and ${\tilde y}_1=\sum_{i:s_i=1} y_i^2$. Therefore, \begin{eqnarray*} (\pi|s_{1:n}) &\sim& \mbox{Beta}(n_1+1,n_2+1)\\ (\xi|s_{1:n}) &\sim& \mbox{Beta}(n_3+1,n_4+1)\\ (\sigma_1^2|y_{1:n},s_{1:n}) &\sim& IG((n_2+n_3+3)/2,3+{\tilde y}_0)\\ (\sigma_2^2|y_{1:n},s_{1:n}) &\sim& IG((n_1+n_4+3)/2,3+{\tilde y}_1). \end{eqnarray*} ```{r} # Initial values xi = 0.9 pi = 0.9 sig12 = 1 sig22 = 10 # MCMC setup set.seed(321456) pi0 = 0.5 pit = rep(0,n) burnin = 1000 M = 1000 thin = 1 niter = burnin+M*thin draws.par = matrix(0,niter,4) draws.sta = matrix(0,niter,n) s = rep(0,n) # MCMC in action for (iter in 1:niter){ # Drawing the latent states s(1),...,s(n) A = (1-xi)*(1-pi0)+pi*pi0 term1 = dnorm(y[1],0,sqrt(sig22))*A term2 = dnorm(y[1],0,sqrt(sig12))*(1-A) pit[1] = term1/(term1+term2) for (t in 2:n){ A = (1-xi)*(1-pit[t-1])+pi*pit[t-1] term1 = dnorm(y[t],0,sqrt(sig22))*A term2 = dnorm(y[t],0,sqrt(sig12))*(1-A) pit[t] = term1/(term1+term2) } s[n] = rbinom(1,1,pit[n]) for (t in n:2){ alpha = pit[t-1]*pi/(pit[t-1]*pi+(1-pit[t-1])*(1-xi)) beta = pit[t-1]*(1-pi)/(pit[t-1]*(1-pi)+(1-pit[t-1])*xi) s[t-1] = s[t]*rbinom(1,1,alpha)+(1-s[t])*rbinom(1,1,beta) } # Drawing the fixed parameters (pi,xi,sigma12,sigma22) n1 = sum((s[1:(n-1)]==1)&(s[2:n]==1)) n2 = sum((s[1:(n-1)]==1)&(s[2:n]==0)) n3 = sum((s[1:(n-1)]==0)&(s[2:n]==0)) n4 = sum((s[1:(n-1)]==0)&(s[2:n]==1)) pi = rbeta(1,n1+1,n2+1) xi = rbeta(1,n3+1,n4+1) sig12 = 1/rgamma(1,(n2+n3+3)/2,(3+sum(y[s==0]^2))/2) sig22 = 1/rgamma(1,(n1+n4+3)/2,(3+sum(y[s==1]^2))/2) # Storing MCMC draws draws.par[iter,] = c(sig12,sig22,pi,xi) draws.sta[iter,] = s } pis.full = apply(draws.sta[(burnin+1):niter,],2,mean) ``` # Posterior trace plots and marginal posterior histograms ```{r fig.width=12,fig.height=8} names = c("sigma12","sigma22","pi","xi") true = c(sig12.true,sig22.true,pi.true,xi.true) par(mfrow=c(2,4)) for (i in 1:4){ ts.plot(draws.par[,i],xlab="Iterations",ylab="",main=names[i]) abline(h=true[i],col=2,lwd=2) } for (i in 1:4){ hist(draws.par[(burnin+1):niter,i],prob=TRUE,xlab="",main="") abline(v=true[i],col=2,lwd=2) } ``` # Comparing probabilities Let us compare $Pr(s_t=1|y_{1:t},\theta)$, $Pr(s_t=1|y_{1:n},\theta)$ and $Pr(s_t=1|y_{1:n})$. ```{r fig.width=12,fig.height=8} par(mfrow=c(1,1)) plot(S,xlab="Time",ylab="Probabilities",type="l",main="FFBS for HMM") lines(pis.ff,col=2) lines(pis.bs,col=3) lines(pis.full,col=4) legend("topright",legend=c("S(t)","Pr(S(t)=1|D(t),theta)","Pr(S(t)=1|D(n),theta)","Pr(S(t)=1|D(n))"),col=1:4,lwd=2) abline(h=0.5,lty=2) ```