Introduction

For $t=1,,n, let’s model the time series data ${y_1,,y_n$ as an AR(2) process: \[ y_t = \phi_0 + \phi_1 y_{t-1} + \phi_2 y_{t-2} + \epsilon_t \] with iid errors of two kinds: \[\begin{eqnarray*} \mbox{Model 1} &:& \ \ \epsilon_t \sim N(0,\sigma^2)\\ \mbox{Model 2} &:& \ \ \epsilon_t \sim t_\nu(0,\tau^2) \end{eqnarray*}\] where $tau^2 = (()/^2, and both \(\sigma^2\) and \(\nu\) are known quantities.

Simulating some AR(2) data

library(mvtnorm)
set.seed(1245)
gaussian.data = FALSE
p    = 2
sig  = 0.1
nu   = 4
tau  = sqrt((nu-2)/nu)*sig
phi0 = 0.00
phi1 = 0.60
phi2 = 0.38
phi.true = c(phi0,phi1,phi2)
n    = 100
sig2 = sig^2
q = p + 1
y = rep(0,n+100)
if (gaussian.data){
  for (t in 3:(n+100))
    y[t] = sum(c(1,y[t-1],y[t-2])*phi.true) + sig*rnorm(1)
}else{
  for (t in 3:(n+100))
    y[t] = sum(c(1,y[t-1],y[t-2])*phi.true) + tau*rt(1,df=nu)
}

y = y[101:(n+100)]
par(mfrow=c(1,2))
ts.plot(y,type="b")
acf(y,lag.max=50,main="")

Likelihood-based inference (Gaussian model)

Let us define \[ Y = (y_3,\ldots,y_n) \ \ \ \mbox{and} X = (x_3,\ldots,x_n), \] for \(x_t = (1,y_{t-1},y_{t-2})'\) and \(t=3,\ldots,n\). Therefore, the Gaussian AR(3) model can be written as \[ Y = X \phi + \epsilon \qquad \epsilon \sim N(0,\sigma^2 I_{n-3}), \] where \(\phi=(\phi_0,\phi_1,\phi_2)'\). The (standard) MLE/OLS estimation follows the derivations of multiple linear regression analysis: \[ {\hat \phi}_{MLE} = (X'X)^{-1}X'Y \ \ \ \mbox{and} \ \ \ V({\hat \phi}_{MLE}) = \sigma^2 (X'X)^{-1}. \]

Y           = y[3:n]
X           = cbind(1,y[2:(n-1)],y[1:(n-2)])
phi.mle     = solve(t(X)%*%X)%*%t(X)%*%Y
var.phi.mle = sig2*solve(t(X)%*%X)

Estimation summaries

error.mle   = sqrt(diag(var.phi.mle))
t.stat      = phi.mle/error.mle
pvalue      = 2*(1-pnorm(t.stat))

tab = cbind(round(phi.mle,5),round(error.mle,5),round(t.stat,3),round(pvalue,6))
rownames(tab) = c("phi0","phi1","phi2")
colnames(tab) = c("Estimate","Std. Error","t value","Pr(>|t|)")
tab
##      Estimate Std. Error t value Pr(>|t|)
## phi0  0.04287    0.03315   1.293 0.195932
## phi1  0.55668    0.11174   4.982 0.000001
## phi2  0.36870    0.11213   3.288 0.001008

Now compare to the lm function in R:

summary(lm(Y~X-1))
## 
## Call:
## lm(formula = Y ~ X - 1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.17981 -0.06394 -0.00893  0.03986  0.39495 
## 
## Coefficients:
##    Estimate Std. Error t value Pr(>|t|)    
## X1  0.04287    0.03211   1.335    0.185    
## X2  0.55668    0.10823   5.143 1.45e-06 ***
## X3  0.36870    0.10861   3.395    0.001 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09687 on 95 degrees of freedom
## Multiple R-squared:  0.9592, Adjusted R-squared:  0.9579 
## F-statistic: 744.3 on 3 and 95 DF,  p-value: < 2.2e-16

Bayesian inference via SIR

Prior

Let usassume that the prior of \(\phi\) is Gaussian: \[ \phi \sim N(b_0,B_0), \] for known hyperparameters \(b_0\) and \(B_0\) (phi0 and V0 in R).

Candidate/proposal density for MC-based inference

The proposal density (for Monte Carlo integration and sampling) is a Student’s \(t\) with \(\nu_0\) degrees of freedom (df0 in R), location \({\hat \phi}_{MLE}\) and scale \(\kappa \sigma^2 (X'X)^{-1}\): \[ q(\phi) \equiv t_{\nu_0}({\hat \phi}_{MLE},\kappa \sigma^2 (X'X)^{-1}), \] where \(\kappa\) (fac0 in R) is an inflation factor to force the proposal to have wider variability than the MLE solution.

loglike.g = function(phi){sum(dnorm((Y-X%*%phi)/sig,log=TRUE))}
loglike.t = function(phi){sum(dt((Y-X%*%phi)/tau,df=nu,log=T))-(n-p)*log(tau)}

# Prior hyperparameters
phi0 = rep(0,q)
V0   = diag(4,q)

# Implementing SIR
df0  = 5 
fac0 = 4 
W    = fac0*var.phi.mle
M    = 100000

# Step 1: Draws from proposal distribution
set.seed(1235)
phi.draw = rmvt(M,delta=phi.mle,sigma=W,df=df0)

# Step 2: Evaluating weights
logw.g = rep(0,M)
logw.t = rep(0,M)
for (i in 1:M){
  llike.g   = loglike.g(phi.draw[i,])
  llike.t   = loglike.t(phi.draw[i,])  
  lprop     = dmvt(phi.draw[i,],delta=phi.mle,sigma=W,df=df0,log=TRUE)
  lprior    = dmvnorm(phi.draw[i,],mean=phi0,sigma=V0,log=TRUE)
  logw.g[i] = llike.g+lprior-lprop
  logw.t[i] = llike.t+lprior-lprop  
}
w.g = exp(logw.g)
pred.g = sum(w.g)
w.t = exp(logw.t)
pred.t = sum(w.t)
w.g = exp(logw.g-max(logw.g))
w.t = exp(logw.t-max(logw.t))

# Step 3: Resampling
i1 = sample(1:M,size=M,replace=TRUE,prob=w.g)
i2 = sample(1:M,size=M,replace=TRUE,prob=w.t)
phi.draws = array(0,c(2,M,q))
phi.draws[1,,] = phi.draw[i1,]
phi.draws[2,,] = phi.draw[i2,]

Posterior plots

min = max = rep(0,q)
for (i in 1:q){
  min[i] = min(apply(phi.draws[,,i],1,min))
  max[i] = max(apply(phi.draws[,,i],1,max))
}
name.prior = c("Bayes (Normal model)","Bayes (t model)")
par(mfrow=c(2,q))
for (i in 1:2){
  plot(phi.draws[i,,1],phi.draws[i,,2],col=gray(0.75),pch=16,
       xlim=c(min[1],max[1]),ylim=c(min[2],max[2]),xlab="phi(1)",ylab="phi(2)")
  points(phi.true[1],phi.true[2],col=2,pch=16,cex=2)
  points(phi.mle[1],phi.mle[2],col=3,pch=16,cex=2)
  title(name.prior[i])
  plot(phi.draws[i,,1],phi.draws[i,,3],col=gray(0.75),pch=16,
       xlim=c(min[1],max[1]),ylim=c(min[3],max[3]),xlab="phi(1)",ylab="phi(3)")
  points(phi.true[1],phi.true[3],col=2,pch=16,cex=2)
  points(phi.mle[1],phi.mle[3],col=3,pch=16,cex=2)
  plot(phi.draws[i,,2],phi.draws[i,,3],col=gray(0.75),pch=16,
       xlim=c(min[2],max[2]),ylim=c(min[3],max[3]),xlab="phi(2)",ylab="phi(3)")
  points(phi.true[2],phi.true[3],col=2,pch=16,cex=2)
  points(phi.mle[2],phi.mle[3],col=3,pch=16,cex=2)
}

par(mfrow=c(2,2))
for (i in 1:3){
  range = range(phi.draws[,,i])
  plot(density(phi.draws[2,,i]),main="",xlab="",col=2,xlim=range)
  lines(density(phi.draws[1,,i]))
  points(phi.true[i],0,col=3,pch=16)
  points(phi.mle[i],0,col=4,pch=16)
  title(paste("phi(",i,")",sep=""))
  if (i==1){
    legend("topleft",legend=name.prior,col=1:2,lty=1,bty="n")
    legend("topright",legend=c("TRUE","MLE (Normal model)"),
           col=3:4,pch=16,bty="n")
  }
}
range = range(phi.draws[,,2]+phi.draws[,,3])
plot(density(phi.draws[2,,2]+phi.draws[2,,3]),main="",xlab="",col=2,xlim=range)
lines(density(phi.draws[1,,2]+phi.draws[1,,3]))
points(phi.true[2]+phi.true[3],0,col=3,pch=16)
points(phi.mle[2]+phi.mle[3],0,col=4,pch=16)
title("phi(2)+phi(3)")

Model comparison

BF = pred.g/pred.t
c(pred.g,pred.t,round(BF,2))
## [1] 3.133193e-59 6.072928e+42 0.000000e+00