set.seed(12345)
n = 200
sig = 1.0
nu = 4
phi = 0.98
gamma =-0.64
th.true = c(phi,gamma)
kappa = sqrt((nu-2)/nu)
tau = kappa*sig
e.g = sig*rnorm(n)
e.t = tau*rt(n,df=nu)
y.g = rep(0,n)
y.t = rep(0,n)
y.g[1] = e.g[1]
y.t[1] = e.t[1]
for (t in 2:n){
y.g[t] = phi*y.g[t-1]+e.g[t]+gamma*e.g[t-1]
y.t[t] = phi*y.t[t-1]+e.t[t]+gamma*e.t[t-1]
}
par(mfrow=c(1,1))
ts.plot(cbind(y.g,y.t),col=1:2,main="ARMA(1,1) data")
legend("bottomleft",legend=c("Gaussian","Student's t"),
col=1:2,lty=1,bty="n")
We create the function eps to compute \(\varepsilon_1,\dots,\varepsilon_n\) for a given pair \((\phi,\gamma)\).
eps = function(y,phi,gamma){
epsilon = rep(0,n)
epsilon[1] = y[1]
for (t in 2:n)
epsilon[t] = y[t]-phi*y[t-1]-gamma*epsilon[t-1]
return(epsilon)
}
minusloglikelihood.g = function(theta){
phi = 2*exp(theta[1])-1
gamma = 2*exp(theta[2])-1
epsilon = eps(y,phi,gamma)
return(-sum(dnorm(epsilon/sig,log=TRUE)))
}
minusloglikelihood.t = function(theta){
phi = 2*exp(theta[1])-1
gamma = 2*exp(theta[2])-1
epsilon = eps(y,phi,gamma)
return(-sum(dt(epsilon/tau,df=nu,log=TRUE)))
}
loglikelihood.g = function(phi,gamma){
epsilon = eps(y,phi,gamma)
return(sum(dnorm(epsilon/sig,log=TRUE)))
}
loglikelihood.t = function(phi,gamma){
epsilon = eps(y,phi,gamma)
return(sum(dt(epsilon/tau,df=nu,log=TRUE)))
}
Let us first run ARIMA(1,0,1) models with the package arima. The package only handles the Gaussian case
y = y.g
fit.g = arima(y,order=c(1,0,1),include.mean = FALSE,method="CSS")
theta.mle.g = fit.g$coef
theta.mle.g.trans = log((theta.mle.g+1)/2)
-minusloglikelihood.g(theta.mle.g.trans)
## [1] -298.3182
th.true
## [1] 0.98 -0.64
theta.mle.g
## ar1 ma1
## 0.9920161 -0.6693540
theta0 = c(log(1/2),log(1/2))
est.g = nlm(minusloglikelihood.g,theta0)$estimate
## Warning in nlm(minusloglikelihood.g, theta0): NA/Inf replaced by maximum
## positive value
## Warning in nlm(minusloglikelihood.g, theta0): NA/Inf replaced by maximum
## positive value
## Warning in nlm(minusloglikelihood.g, theta0): NA/Inf replaced by maximum
## positive value
est.t = nlm(minusloglikelihood.t,theta0)$estimate
## Warning in nlm(minusloglikelihood.t, theta0): NA/Inf replaced by maximum
## positive value
## Warning in nlm(minusloglikelihood.t, theta0): NA/Inf replaced by maximum
## positive value
## Warning in nlm(minusloglikelihood.t, theta0): NA/Inf replaced by maximum
## positive value
phi.mle.g = 2*exp(est.g[1])-1
gamma.mle.g = 2*exp(est.g[2])-1
phi.mle.t = 2*exp(est.t[1])-1
gamma.mle.t = 2*exp(est.t[2])-1
c(phi.mle.g,gamma.mle.g)
## [1] 0.9922122 -0.6714067
c(phi.mle.t,gamma.mle.t)
## [1] 0.9893444 -0.6481587
mphi = phi.mle.g
mgamma = gamma.mle.g
sdphi = 0.01
sdgamma = 0.05
N = 50
phis = seq(mphi-5*sdphi,mphi+5*sdphi,length=N)
gammas = seq(mgamma-5*sdgamma,mgamma+5*sdgamma,length=N)
loglike.g = matrix(0,N,N)
loglike.t = matrix(0,N,N)
for (i in 1:N)
for (j in 1:N){
loglike.g[i,j] = loglikelihood.g(phis[i],gammas[j])
loglike.t[i,j] = loglikelihood.t(phis[i],gammas[j])
}
like.g = exp(loglike.g-max(loglike.g))
like.t = exp(loglike.t-max(loglike.t))
par(mfrow=c(2,2))
contour(phis,gammas,like.g,xlab=expression(phi),ylab=expression(gamma),
drawlabels=FALSE,main="Gaussian likelihood")
points(phi.mle.g,gamma.mle.g,pch=16)
contour(phis,gammas,like.t,xlab=expression(phi),ylab=expression(gamma),
drawlabels=FALSE,main="Student's t likelihood")
points(phi.mle.t,gamma.mle.t,pch=16)
contour(phis,gammas,like.g,xlab=expression(phi),ylab=expression(gamma),
drawlabels=FALSE,main="Gaussian likelihood",
xlim=c(-1,1),ylim=c(-1,1))
points(phi.mle.g,gamma.mle.g,pch=16)
contour(phis,gammas,like.t,xlab=expression(phi),ylab=expression(gamma),
drawlabels=FALSE,main="Student's t likelihood",
xlim=c(-1,1),ylim=c(-1,1))
points(phi.mle.t,gamma.mle.t,pch=16)
See how concentrated the likelihood functions are relative to the prior of \((\phi,\gamma)\), whose domain is \((-1,1) \times (-1,1)\).
This is an example where the prior (very vague) is not a good candidate for the posterior (highly concentrated due to the likelihood). Hence, not all proposals would produce “decent” re-draws. Also, because the likelihood is expensive to compute,brute force SIR (with \(M=1000000\) or more) will take forever to run (very expensive computationally).
Lesson: Even in such a simple bivariate case, using the prior as proposal can be very inneficient and, therefore, computationally expensive.
set.seed(54322)
mphi = phi.mle.g
mgamma = gamma.mle.g
sdphi = 0.01
sdgamma = 0.05
M = 20000
phi.d = runif(M,mphi-10*sdphi,mphi+10*sdphi)
gamma.d = runif(M,mgamma-10*sdgamma,mgamma+10*sdgamma)
# Try sampling from the prior and see the messy it is!
# phi.d = runif(M,-1,1)
# gamma.d = runif(M,-1,1)
# Computing weights
# No need to include the uniform priors neither the uniform proposals
w.g = rep(0,M)
w.t = rep(0,M)
for (i in 1:M){
w.g[i] = loglikelihood.g(phi.d[i],gamma.d[i])
w.t[i] = loglikelihood.t(phi.d[i],gamma.d[i])
}
# Prior predictives as a by product
priorpred.g = mean(exp(w.g))
priorpred.t = mean(exp(w.t))
w.g = exp(w.g-max(w.g))
w.t = exp(w.t-max(w.t))
# Resampling
ind.g = sample(1:M,size=M,replace=TRUE,prob=w.g)
ind.t = sample(1:M,size=M,replace=TRUE,prob=w.t)
phi.g = phi.d[ind.g]
gamma.g = gamma.d[ind.g]
phi.t = phi.d[ind.t]
gamma.t = gamma.d[ind.t]
par(mfrow=c(2,3))
plot(phi.g,gamma.g,main="Gaussian likelihood",
xlab=expression(phi),ylab=expression(gamma))
contour(phis,gammas,like.g,col=3,add=TRUE,lwd=3,drawlabels=FALSE)
hist(phi.g,prob=TRUE,main="",xlab=expression(phi))
hist(gamma.g,prob=TRUE,main="",xlab=expression(gamma))
plot(phi.t,gamma.t,main="Student's t likelihood",
xlab=expression(phi),ylab=expression(gamma))
contour(phis,gammas,like.t,col=3,add=TRUE,lwd=3,drawlabels=FALSE)
hist(phi.t,prob=TRUE,main="",xlab=expression(phi))
hist(gamma.t,prob=TRUE,main="",xlab=expression(gamma))
c(phi.mle.g,mean(phi.g))
## [1] 0.9922122 0.9909722
c(phi.mle.t,mean(phi.t))
## [1] 0.9893444 0.9875796
c(gamma.mle.g,mean(gamma.mle.g))
## [1] -0.6714067 -0.6714067
c(gamma.mle.t,mean(gamma.mle.t))
## [1] -0.6481587 -0.6481587
quantile(phi.g,c(0.025,0.5,0.975))
## 2.5% 50% 97.5%
## 0.9688727 0.9914817 1.0093328
quantile(phi.t,c(0.025,0.5,0.975))
## 2.5% 50% 97.5%
## 0.9629613 0.9883298 1.0081447
quantile(gamma.g,c(0.025,0.5,0.975))
## 2.5% 50% 97.5%
## -0.7491263 -0.6580102 -0.5399630
quantile(gamma.t,c(0.025,0.5,0.975))
## 2.5% 50% 97.5%
## -0.7305234 -0.6334802 -0.5063850
The cut-off 0.9 was kinda boring, so I change it here in the solution to 1.0.
mean(phi.g>1)
## [1] 0.1829
mean(phi.t>1)
## [1] 0.12985
# Log prior predictive Gaussian model
log(priorpred.g)
## [1] -302.474
# Log prior predictive Student's t model
log(priorpred.t)
## [1] -388.2954
logBFgt = log(priorpred.g)-log(priorpred.t)
logBFgt
## [1] 85.82138
# Pr(Gaussian model | data)
PMP.g = 1/(1+1/exp(logBFgt))
PMP.g
## [1] 1