Simulating ARMA(1,1) time series

set.seed(12345)
n       = 200
sig     = 1.0
nu      = 4
phi     = 0.98
gamma   =-0.64
th.true = c(phi,gamma)
kappa   = sqrt((nu-2)/nu)
tau     = kappa*sig
e.g     = sig*rnorm(n)
e.t     = tau*rt(n,df=nu)
y.g     = rep(0,n)
y.t     = rep(0,n)
y.g[1]  = e.g[1]
y.t[1]  = e.t[1]
for (t in 2:n){
  y.g[t] = phi*y.g[t-1]+e.g[t]+gamma*e.g[t-1]
  y.t[t] = phi*y.t[t-1]+e.t[t]+gamma*e.t[t-1]
}

par(mfrow=c(1,1))
ts.plot(cbind(y.g,y.t),col=1:2,main="ARMA(1,1) data")
legend("bottomleft",legend=c("Gaussian","Student's t"),
       col=1:2,lty=1,bty="n")

Creating a few functions

We create the function eps to compute \(\varepsilon_1,\dots,\varepsilon_n\) for a given pair \((\phi,\gamma)\).

eps = function(y,phi,gamma){
  epsilon = rep(0,n)
  epsilon[1] = y[1]
  for (t in 2:n)
    epsilon[t] = y[t]-phi*y[t-1]-gamma*epsilon[t-1]
  return(epsilon)
}

minusloglikelihood.g = function(theta){
  phi     = 2*exp(theta[1])-1
  gamma   = 2*exp(theta[2])-1
  epsilon = eps(y,phi,gamma)
  return(-sum(dnorm(epsilon/sig,log=TRUE)))
}

minusloglikelihood.t = function(theta){
  phi     = 2*exp(theta[1])-1
  gamma   = 2*exp(theta[2])-1
  epsilon = eps(y,phi,gamma)
  return(-sum(dt(epsilon/tau,df=nu,log=TRUE)))
}

loglikelihood.g = function(phi,gamma){
  epsilon = eps(y,phi,gamma)
  return(sum(dnorm(epsilon/sig,log=TRUE)))
}

loglikelihood.t = function(phi,gamma){
  epsilon = eps(y,phi,gamma)
  return(sum(dt(epsilon/tau,df=nu,log=TRUE)))
}

1) Maximum likelihood inference

Let us first run ARIMA(1,0,1) models with the package arima. The package only handles the Gaussian case

y     = y.g
fit.g = arima(y,order=c(1,0,1),include.mean = FALSE,method="CSS")
theta.mle.g = fit.g$coef
theta.mle.g.trans = log((theta.mle.g+1)/2)
-minusloglikelihood.g(theta.mle.g.trans)
## [1] -298.3182
th.true
## [1]  0.98 -0.64
theta.mle.g
##        ar1        ma1 
##  0.9920161 -0.6693540

Let us know do our own minimization

theta0      = c(log(1/2),log(1/2))
est.g       = nlm(minusloglikelihood.g,theta0)$estimate
## Warning in nlm(minusloglikelihood.g, theta0): NA/Inf replaced by maximum
## positive value

## Warning in nlm(minusloglikelihood.g, theta0): NA/Inf replaced by maximum
## positive value

## Warning in nlm(minusloglikelihood.g, theta0): NA/Inf replaced by maximum
## positive value
est.t       = nlm(minusloglikelihood.t,theta0)$estimate
## Warning in nlm(minusloglikelihood.t, theta0): NA/Inf replaced by maximum
## positive value
## Warning in nlm(minusloglikelihood.t, theta0): NA/Inf replaced by maximum
## positive value

## Warning in nlm(minusloglikelihood.t, theta0): NA/Inf replaced by maximum
## positive value
phi.mle.g   = 2*exp(est.g[1])-1
gamma.mle.g = 2*exp(est.g[2])-1
phi.mle.t   = 2*exp(est.t[1])-1
gamma.mle.t = 2*exp(est.t[2])-1
c(phi.mle.g,gamma.mle.g)
## [1]  0.9922122 -0.6714067
c(phi.mle.t,gamma.mle.t)
## [1]  0.9893444 -0.6481587

Plotting the likelihood function

mphi    = phi.mle.g
mgamma  = gamma.mle.g
sdphi   = 0.01
sdgamma = 0.05
N       = 50
phis    = seq(mphi-5*sdphi,mphi+5*sdphi,length=N)
gammas  = seq(mgamma-5*sdgamma,mgamma+5*sdgamma,length=N)
loglike.g = matrix(0,N,N)
loglike.t = matrix(0,N,N)
for (i in 1:N)
   for (j in 1:N){
     loglike.g[i,j] = loglikelihood.g(phis[i],gammas[j])
     loglike.t[i,j] = loglikelihood.t(phis[i],gammas[j])
}
like.g = exp(loglike.g-max(loglike.g))
like.t = exp(loglike.t-max(loglike.t))

par(mfrow=c(2,2))
contour(phis,gammas,like.g,xlab=expression(phi),ylab=expression(gamma),
        drawlabels=FALSE,main="Gaussian likelihood")
points(phi.mle.g,gamma.mle.g,pch=16)
contour(phis,gammas,like.t,xlab=expression(phi),ylab=expression(gamma),
        drawlabels=FALSE,main="Student's t likelihood")
points(phi.mle.t,gamma.mle.t,pch=16)


contour(phis,gammas,like.g,xlab=expression(phi),ylab=expression(gamma),
        drawlabels=FALSE,main="Gaussian likelihood",
        xlim=c(-1,1),ylim=c(-1,1))
points(phi.mle.g,gamma.mle.g,pch=16)
contour(phis,gammas,like.t,xlab=expression(phi),ylab=expression(gamma),
        drawlabels=FALSE,main="Student's t likelihood",
        xlim=c(-1,1),ylim=c(-1,1))
points(phi.mle.t,gamma.mle.t,pch=16)

See how concentrated the likelihood functions are relative to the prior of \((\phi,\gamma)\), whose domain is \((-1,1) \times (-1,1)\).

2) Bayesian inference via sampling importance resampling (SIR)

This is an example where the prior (very vague) is not a good candidate for the posterior (highly concentrated due to the likelihood). Hence, not all proposals would produce “decent” re-draws. Also, because the likelihood is expensive to compute,brute force SIR (with \(M=1000000\) or more) will take forever to run (very expensive computationally).

Lesson: Even in such a simple bivariate case, using the prior as proposal can be very inneficient and, therefore, computationally expensive.

set.seed(54322)
mphi    = phi.mle.g
mgamma  = gamma.mle.g
sdphi   = 0.01
sdgamma = 0.05
M       = 20000
phi.d   = runif(M,mphi-10*sdphi,mphi+10*sdphi)
gamma.d = runif(M,mgamma-10*sdgamma,mgamma+10*sdgamma)

# Try sampling from the prior and see the messy it is!
# phi.d   = runif(M,-1,1)
# gamma.d = runif(M,-1,1)

# Computing weights 
# No need to include the uniform priors neither the uniform proposals
w.g = rep(0,M)
w.t = rep(0,M)
for (i in 1:M){
  w.g[i]  = loglikelihood.g(phi.d[i],gamma.d[i])
  w.t[i]  = loglikelihood.t(phi.d[i],gamma.d[i])
}

# Prior predictives as a by product
priorpred.g = mean(exp(w.g))
priorpred.t = mean(exp(w.t))
w.g = exp(w.g-max(w.g))
w.t = exp(w.t-max(w.t))

# Resampling
ind.g   = sample(1:M,size=M,replace=TRUE,prob=w.g)
ind.t   = sample(1:M,size=M,replace=TRUE,prob=w.t)
phi.g   = phi.d[ind.g]
gamma.g = gamma.d[ind.g]
phi.t   = phi.d[ind.t]
gamma.t = gamma.d[ind.t]

a) MCMC output and marginal posteriors

par(mfrow=c(2,3))
plot(phi.g,gamma.g,main="Gaussian likelihood",
     xlab=expression(phi),ylab=expression(gamma))
contour(phis,gammas,like.g,col=3,add=TRUE,lwd=3,drawlabels=FALSE)
hist(phi.g,prob=TRUE,main="",xlab=expression(phi))
hist(gamma.g,prob=TRUE,main="",xlab=expression(gamma))

plot(phi.t,gamma.t,main="Student's t likelihood",
     xlab=expression(phi),ylab=expression(gamma))
contour(phis,gammas,like.t,col=3,add=TRUE,lwd=3,drawlabels=FALSE)
hist(phi.t,prob=TRUE,main="",xlab=expression(phi))
hist(gamma.t,prob=TRUE,main="",xlab=expression(gamma))

b) MLEs, posterior means and posterior percentiles

c(phi.mle.g,mean(phi.g))
## [1] 0.9922122 0.9909722
c(phi.mle.t,mean(phi.t))
## [1] 0.9893444 0.9875796
c(gamma.mle.g,mean(gamma.mle.g))
## [1] -0.6714067 -0.6714067
c(gamma.mle.t,mean(gamma.mle.t))
## [1] -0.6481587 -0.6481587
quantile(phi.g,c(0.025,0.5,0.975))
##      2.5%       50%     97.5% 
## 0.9688727 0.9914817 1.0093328
quantile(phi.t,c(0.025,0.5,0.975))
##      2.5%       50%     97.5% 
## 0.9629613 0.9883298 1.0081447
quantile(gamma.g,c(0.025,0.5,0.975))
##       2.5%        50%      97.5% 
## -0.7491263 -0.6580102 -0.5399630
quantile(gamma.t,c(0.025,0.5,0.975))
##       2.5%        50%      97.5% 
## -0.7305234 -0.6334802 -0.5063850

c) \(Pr(\phi>1.0|\mbox{Gaussian})\) and \(Pr(\phi>1.0|\mbox{Student's t})\)

The cut-off 0.9 was kinda boring, so I change it here in the solution to 1.0.

mean(phi.g>1)
## [1] 0.1829
mean(phi.t>1)
## [1] 0.12985

3) Prior predictive, Bayes factor and posterior model probability.

3a) Prior predictives

# Log prior predictive Gaussian model
log(priorpred.g)
## [1] -302.474
# Log prior predictive Student's t model
log(priorpred.t)
## [1] -388.2954

3b) Log Bayes factor

logBFgt = log(priorpred.g)-log(priorpred.t)
logBFgt
## [1] 85.82138

3c) Posterior model probability.

# Pr(Gaussian model | data)
PMP.g   = 1/(1+1/exp(logBFgt))
PMP.g
## [1] 1