1 SP500 data

data = read.table("sp500.txt",header=TRUE)
n    = nrow(data)
data = data[n:1,]

attach(data)

ret = log(adjclose[2:n]/adjclose[1:(n-1)])
y   = ret-mean(ret)
n   = n - 1
par(mfrow=c(1,1))
ts.plot(adjclose,main=paste("May 9th 2017 - May 6th 2022\n n=",n+1,sep=""),ylab="Price")

par(mfrow=c(1,2))
ts.plot(ret,main=paste("May 10th 2017 - May 6th 2022\n n=",n,sep=""),ylab="Log-return")
hist(ret,breaks=seq(-max(abs(ret)),max(abs(ret)),length=50),prob=TRUE,xlab="Log-return",main="")

2 Model 1: iid Gaussian

y_t iid N(0,sig2)

sig.hat.1 = sqrt(mean(y^2))
sig.hat.1
## [1] 0.01272944

3 Gaussian vs Student’s \(t\)

xxx = seq(-10,10,length=1000)
par(mfrow=c(1,1))
plot(xxx,dnorm(xxx),type="l",xlab="x",ylab="Density")
lines(xxx,dt(xxx,df=1),col=2)
lines(xxx,dt(xxx,df=2),col=3)
lines(xxx,dt(xxx,df=30),col=4)
legend("topleft",legend=c("df=1","df=2","df=30","Gaussian"),col=c(2:4,1),lty=1,lwd=2)

par(mfrow=c(1,1))
plot(xxx,log(dnorm(xxx)),type="l",xlab="x",ylab="Log density")
lines(xxx,log(dt(xxx,df=1)),col=2)
lines(xxx,log(dt(xxx,df=2)),col=3)
lines(xxx,log(dt(xxx,df=30)),col=4)

4 Model 2: iid Student’s \(t\)

sigs = seq(0.001,0.15,length=1000)
nus  = c(1:30,seq(35,100,by=5),seq(200,1000,by=100))
nnu  = length(nus)
like = rep(0,1000)
sig.mle = rep(0,nnu)
likes = rep(0,nnu)
for (j in 1:nnu){
  for (i in 1:1000)
    like[i] = sum(dt(y/sigs[i],df=nus[j],log=TRUE))-n*log(sigs[i])
  sig.mle[j] = sigs[like==max(like)]
  likes[j] = like[like==max(like)]
}

sig.hat.2 = sig.mle[likes==max(likes)]
nu.hat = nus[likes==max(likes)]

c(sig.hat.1,sig.hat.2,nu.hat)
## [1] 0.012729444 0.005921922 2.000000000
par(mfrow=c(1,1))
plot(nus,sig.mle,ylim=range(sig.mle,sig.hat.1))
abline(h=sig.hat.1)

par(mfrow=c(1,1))
plot(likes,sig.mle,col=0)
text(likes,sig.mle,nus,cex=0.5)

par(mfrow=c(1,1))
xxx = seq(-max(abs(ret)),max(abs(ret)),length=1000)
xbreaks = seq(-max(abs(ret)),max(abs(ret)),length=50)
hist(ret,breaks=xbreaks,prob=TRUE,xlab="Log-return",main="")
lines(xxx,dnorm(xxx,0,sig.hat.1),col=2,lwd=3)
lines(xxx,dt(xxx/sig.hat.2,df=nu.hat)/sig.hat.2,col=4,lwd=2)

par(mfrow=c(1,1))
legends  = c("Data","Gaussian model","Student's t model")
alphas   = seq(0.01,0.99,by=0.02)
qdata    = quantile(ret,alphas)
qnormal  = qnorm(alphas,0,sig.hat.1)
qstudent = sig.hat.2*qt(alphas,df=nu.hat)
xrange   = range(qdata,qnormal,qstudent)
plot(qdata,alphas,type="l",xlim=xrange,ylab="Tail probabilities",xlab="Quantiles",lwd=2)
lines(qnormal,alphas,type="l",col=2,lwd=2)
lines(qstudent,alphas,type="l",col=4,lwd=2)
legend("topleft",legend=legends,col=c(1,2,4),lwd=3,lty=1)

5 Stochastic volatility models

Here we assume that \[\begin{eqnarray*} y_t &=& e^{h_t/2} \varepsilon_t\\ h_{t+1} &=& \mu + \phi (h_t-\mu) + \sigma u_t, \end{eqnarray*}\] with \(cor(\varepsilon_t,u_t)=\rho\).

5.1 SV, SVt, SVl, SVtl

Four models contemplated by Gregor Kastner’s R package stochvol:

  • SV with Gaussian errors (SV) \[ \varepsilon_t \sim N(0,1),\ u_t \sim N(0,1), \ \rho=0 \]

  • SV with Student’s \(t\) errors (SVt) \[ \varepsilon_t \sim t_\nu(0,1),\ u_t \sim N(0,1), \ \rho=0, \ \nu \ \mbox{unknown} \]

  • SV with Gaussian errors and leverage effect (SVl) \[ \varepsilon_t \sim N(0,1),\ u_t \sim N(0,1), \ \rho \ \mbox{unknown}, \]

  • SV with Student’s \(t\) errors and leverage effect (SVtl) \[ \varepsilon_t \sim N(0,1),\ u_t \sim N(0,1), \ \rho \ \mbox{unknown}, \ \nu \ \mbox{unknown} \]

5.2 Prior distributions

The package stochvol has the following default prior specifications: \[\begin{eqnarray*} \mu &\sim& N(0,100^2)\\ (\phi+1)/2 &\sim& Beta(5,1.5)\\ \sigma^2 &\sim& Gamma(1/2,1/2)\\ h_1 &\sim& N(\mu,\sigma^2/(1-\phi^2))\\ (\rho+1)/2 &\sim& Beta(4,4)\\ \nu &\sim& Exp(0.1) \end{eqnarray*}\]

5.3 References

  • Source: Modeling Univariate and Multivariate Stochastic Volatility in R with stochvol and factorstochvol - https://cran.r-project.org/web/packages/factorstochvol/vignettes/paper.pdf

  • Kastner (2016) Dealing with Stochastic Volatility in Time Series Using the R Package stochvol. Journal of Statistical Software, 69(5), 1-30. doi:10.18637/jss.v069.i05.

  • Kastner(2019) Sparse Bayesian Time-Varying Covariance Estimation in Many Dimensions. Journal of Econometrics, 210(1), 98-115. doi:10.1016/j.jeconom.2018.11.007.

  • Kastner and Fruhwirth-Schnatter (2014) Ancillarity-Sufficiency Interweaving Strategy (ASIS) for Boosting MCMC Estimation of Stochastic Volatility Models. Computational Statistics and Data Analysis, 76, 408–423. doi:10.1016/j.csda.2013.01.002.

  • Kastner, Fruhwirth-Schnatter and Lopes (2017) Efficient Bayesian Inference for Multivariate Factor Stochastic Volatility Models. Journal of Computational and Graphical Statistics, 26(4), 905–917. doi:10.1080/10618600.2017.1322091.

library("stochvol")
fit.sv   = svsample(ret,draws=1000,burnin=100,quiet=TRUE)
fit.svt  = svtsample(ret,draws=1000,burnin=100,quiet=TRUE)
fit.svl  = svlsample(ret,draws=1000,burnin=100,quiet=TRUE)
fit.svtl = svtlsample(ret,draws=1000,burnin=100,quiet=TRUE)

qlogvols = cbind(apply(fit.sv$latent[[1]],2,median),apply(fit.svt$latent[[1]],2,median),
apply(fit.svl$latent[[1]],2,median),apply(fit.svtl$latent[[1]],2,median))

qstd.sv   = t(apply(exp(fit.sv$latent[[1]]/2),2,quantile,c(0.05,0.5,0.95)))
qstd.svt  = t(apply(exp(fit.svt$latent[[1]]/2),2,quantile,c(0.05,0.5,0.95)))
qstd.svl  = t(apply(exp(fit.svl$latent[[1]]/2),2,quantile,c(0.05,0.5,0.95)))
qstd.svtl = t(apply(exp(fit.svtl$latent[[1]]/2),2,quantile,c(0.05,0.5,0.95)))
par(mfrow=c(2,3))
plot(density(exp(fit.sv$para[[1]][,1]/2)),xlab=expression(mu),main="",ylim=c(0,550),xlim=c(0.004,0.015))
lines(density(exp(fit.svt$para[[1]][,1]/2)),col=2)
lines(density(exp(fit.svl$para[[1]][,1]/2)),col=3)
lines(density(exp(fit.svtl$para[[1]][,1]/2)),col=4)
legend("topleft",legend=c("SV","SVt","SVl","SVtl"),col=1:4,lty=1)

plot(density(fit.sv$para[[1]][,2]),xlab=expression(phi),main="",ylim=c(0,50))
lines(density(fit.svt$para[[1]][,2]),col=2)
lines(density(fit.svl$para[[1]][,2]),col=3)
lines(density(fit.svtl$para[[1]][,2]),col=4)

plot(density(fit.sv$para[[1]][,3]),xlab=expression(sigma),main="",ylim=c(0,12))
lines(density(fit.svt$para[[1]][,3]),col=2)
lines(density(fit.svl$para[[1]][,3]),col=3)
lines(density(fit.svtl$para[[1]][,3]),col=4)

plot(density(fit.svt$para[[1]][,4]),xlab=expression(nu),main="",ylim=c(0,0.08))
lines(density(fit.svtl$para[[1]][,4]),col=2)

plot(density(fit.svl$para[[1]][,5]),xlab=expression(rho),main="",xlim=c(-0.9,-0.4))
lines(density(fit.svtl$para[[1]][,5]),col=2)

par(mfrow=c(1,1))
plot(qlogvols[,1],type="l",ylim=range(qlogvols),xlab="Time",ylab="Log volatilities")
lines(qlogvols[,2],col=2)
lines(qlogvols[,3],col=3)
lines(qlogvols[,4],col=4)

yrange = range(0,qstd.sv[,2],qstd.svt[,2],qstd.svl[,2],qstd.svtl[,2])
par(mfrow=c(1,1))
plot(qstd.sv[,2],ylab="Standard deviations",ylim=yrange,type="l",xlab="Time")
lines(qstd.svt[,2],col=2)
lines(qstd.svl[,2],col=3)
lines(qstd.svtl[,2],col=4)
legend("topleft",legend=c("SV","SVt","SVl","SVtl"),col=1:4,lty=1)

par(mfrow=c(2,2))
yrange = range(qstd.sv,qstd.svt,qstd.svl,qstd.svtl)
ts.plot(qstd.sv,ylab="Standard deviations",col=c(3,2,3),ylim=yrange)
lines(0.1*abs(ret),type="h");title("SV")
ts.plot(qstd.svt,ylab="Standard deviations",col=c(3,2,3),ylim=yrange)
lines(0.1*abs(ret),type="h");title("SVt")
ts.plot(qstd.svl,ylab="Standard deviations",col=c(3,2,3),ylim=yrange)
lines(0.1*abs(ret),type="h");title("SVl")
ts.plot(qstd.svtl,ylab="Standard deviations",col=c(3,2,3),ylim=yrange)
lines(0.1*abs(ret),type="h");title("SVtl")

par(mfrow=c(2,2))
for (i in 1:4){
  ind = ((i-1)*n/4+1):(i*n/4)
  yrange = range(0,qstd.sv[ind,2],qstd.svt[ind,2],qstd.svl[ind,2],qstd.svtl[ind,2])
  plot(ind,qstd.sv[ind,2],ylab="Standard deviations",ylim=yrange,type="l",xlab="Time")
  lines(ind,qstd.svt[ind,2],col=2)
  lines(ind,qstd.svl[ind,2],col=3)
  lines(ind,qstd.svtl[ind,2],col=4)
  lines(ind,0.3*abs(ret[ind]),type="h")
}
legend("topleft",legend=c("SV","SVt","SVl","SVtl"),col=1:4,lty=1)

---
title: "SP500 returns"
subtitle: "Gaussian, Student's $t$ or SV?"
author: "Hedibert Freitas Lopes"
date: "5/11/2022"
output:
  html_document:
    toc: true
    number_sections: true
    toc_depth: 2
    toc_float: 
      collapsed: false
      smooth_scroll: false
    code_download: yes
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

# SP500 data 
```{r}
data = read.table("sp500.txt",header=TRUE)
n    = nrow(data)
data = data[n:1,]

attach(data)

ret = log(adjclose[2:n]/adjclose[1:(n-1)])
y   = ret-mean(ret)
n   = n - 1
```

```{r fig.width=8, fig.height=6}
par(mfrow=c(1,1))
ts.plot(adjclose,main=paste("May 9th 2017 - May 6th 2022\n n=",n+1,sep=""),ylab="Price")
```

```{r fig.width=10, fig.height=6}
par(mfrow=c(1,2))
ts.plot(ret,main=paste("May 10th 2017 - May 6th 2022\n n=",n,sep=""),ylab="Log-return")
hist(ret,breaks=seq(-max(abs(ret)),max(abs(ret)),length=50),prob=TRUE,xlab="Log-return",main="")
```

# Model 1: iid Gaussian
y_t iid N(0,sig2)
```{r}
sig.hat.1 = sqrt(mean(y^2))
sig.hat.1
```

# Gaussian vs Student's $t$
```{r fig.width=8, fig.height=6}
xxx = seq(-10,10,length=1000)
par(mfrow=c(1,1))
plot(xxx,dnorm(xxx),type="l",xlab="x",ylab="Density")
lines(xxx,dt(xxx,df=1),col=2)
lines(xxx,dt(xxx,df=2),col=3)
lines(xxx,dt(xxx,df=30),col=4)
legend("topleft",legend=c("df=1","df=2","df=30","Gaussian"),col=c(2:4,1),lty=1,lwd=2)
```

```{r fig.width=8, fig.height=6}
par(mfrow=c(1,1))
plot(xxx,log(dnorm(xxx)),type="l",xlab="x",ylab="Log density")
lines(xxx,log(dt(xxx,df=1)),col=2)
lines(xxx,log(dt(xxx,df=2)),col=3)
lines(xxx,log(dt(xxx,df=30)),col=4)
```

# Model 2: iid Student's $t$

```{r}
sigs = seq(0.001,0.15,length=1000)
nus  = c(1:30,seq(35,100,by=5),seq(200,1000,by=100))
nnu  = length(nus)
like = rep(0,1000)
sig.mle = rep(0,nnu)
likes = rep(0,nnu)
for (j in 1:nnu){
  for (i in 1:1000)
    like[i] = sum(dt(y/sigs[i],df=nus[j],log=TRUE))-n*log(sigs[i])
  sig.mle[j] = sigs[like==max(like)]
  likes[j] = like[like==max(like)]
}

sig.hat.2 = sig.mle[likes==max(likes)]
nu.hat = nus[likes==max(likes)]

c(sig.hat.1,sig.hat.2,nu.hat)
```

```{r fig.width=8, fig.height=6}
par(mfrow=c(1,1))
plot(nus,sig.mle,ylim=range(sig.mle,sig.hat.1))
abline(h=sig.hat.1)
```

```{r fig.width=8, fig.height=6}
par(mfrow=c(1,1))
plot(likes,sig.mle,col=0)
text(likes,sig.mle,nus,cex=0.5)
```

```{r fig.width=8, fig.height=6}
par(mfrow=c(1,1))
xxx = seq(-max(abs(ret)),max(abs(ret)),length=1000)
xbreaks = seq(-max(abs(ret)),max(abs(ret)),length=50)
hist(ret,breaks=xbreaks,prob=TRUE,xlab="Log-return",main="")
lines(xxx,dnorm(xxx,0,sig.hat.1),col=2,lwd=3)
lines(xxx,dt(xxx/sig.hat.2,df=nu.hat)/sig.hat.2,col=4,lwd=2)
```

```{r fig.width=8, fig.height=6}
par(mfrow=c(1,1))
legends  = c("Data","Gaussian model","Student's t model")
alphas   = seq(0.01,0.99,by=0.02)
qdata    = quantile(ret,alphas)
qnormal  = qnorm(alphas,0,sig.hat.1)
qstudent = sig.hat.2*qt(alphas,df=nu.hat)
xrange   = range(qdata,qnormal,qstudent)
plot(qdata,alphas,type="l",xlim=xrange,ylab="Tail probabilities",xlab="Quantiles",lwd=2)
lines(qnormal,alphas,type="l",col=2,lwd=2)
lines(qstudent,alphas,type="l",col=4,lwd=2)
legend("topleft",legend=legends,col=c(1,2,4),lwd=3,lty=1)
```

# Stochastic volatility models

Here we assume that 
\begin{eqnarray*}
y_t &=& e^{h_t/2} \varepsilon_t\\
h_{t+1} &=& \mu + \phi (h_t-\mu) + \sigma u_t,
\end{eqnarray*}
with $cor(\varepsilon_t,u_t)=\rho$.  


## SV, SVt, SVl, SVtl

Four models contemplated by Gregor Kastner's R package *stochvol*:

* SV with Gaussian errors (SV)
$$
\varepsilon_t \sim N(0,1),\ u_t \sim N(0,1), \ \rho=0
$$

* SV with Student's $t$ errors (SVt)
$$
\varepsilon_t \sim t_\nu(0,1),\ u_t \sim N(0,1), \ \rho=0, \ \nu \ \mbox{unknown}
$$

* SV with Gaussian errors and leverage effect (SVl)
$$
\varepsilon_t \sim N(0,1),\ u_t \sim N(0,1), \ \rho \ \mbox{unknown},
$$

* SV with Student's $t$ errors and leverage effect (SVtl)
$$
\varepsilon_t \sim N(0,1),\ u_t \sim N(0,1), \ \rho \ \mbox{unknown}, \ \nu \ \mbox{unknown}
$$

## Prior distributions

The package *stochvol* has the following default prior specifications:
\begin{eqnarray*}
\mu       &\sim& N(0,100^2)\\
(\phi+1)/2 &\sim& Beta(5,1.5)\\
\sigma^2    &\sim& Gamma(1/2,1/2)\\
h_1      &\sim& N(\mu,\sigma^2/(1-\phi^2))\\
(\rho+1)/2 &\sim& Beta(4,4)\\
\nu        &\sim& Exp(0.1)
\end{eqnarray*}


## References

* Source: Modeling Univariate and Multivariate Stochastic Volatility in R with stochvol and factorstochvol - https://cran.r-project.org/web/packages/factorstochvol/vignettes/paper.pdf

* Kastner (2016) Dealing with Stochastic Volatility in Time Series Using the R Package stochvol.  Journal of Statistical Software, 69(5), 1-30. doi:10.18637/jss.v069.i05.

* Kastner(2019) Sparse Bayesian Time-Varying Covariance Estimation in Many Dimensions.  Journal of Econometrics, 210(1), 98-115. doi:10.1016/j.jeconom.2018.11.007.

* Kastner and Fruhwirth-Schnatter (2014) Ancillarity-Sufficiency Interweaving Strategy (ASIS) for Boosting MCMC Estimation of Stochastic Volatility Models.  Computational Statistics and Data Analysis, 76, 408–423. doi:10.1016/j.csda.2013.01.002.

* Kastner, Fruhwirth-Schnatter and Lopes (2017) Efficient Bayesian Inference for Multivariate Factor Stochastic Volatility Models. Journal of Computational and Graphical Statistics, 26(4), 905–917. doi:10.1080/10618600.2017.1322091.

```{r}
library("stochvol")
fit.sv   = svsample(ret,draws=1000,burnin=100,quiet=TRUE)
fit.svt  = svtsample(ret,draws=1000,burnin=100,quiet=TRUE)
fit.svl  = svlsample(ret,draws=1000,burnin=100,quiet=TRUE)
fit.svtl = svtlsample(ret,draws=1000,burnin=100,quiet=TRUE)

qlogvols = cbind(apply(fit.sv$latent[[1]],2,median),apply(fit.svt$latent[[1]],2,median),
apply(fit.svl$latent[[1]],2,median),apply(fit.svtl$latent[[1]],2,median))

qstd.sv   = t(apply(exp(fit.sv$latent[[1]]/2),2,quantile,c(0.05,0.5,0.95)))
qstd.svt  = t(apply(exp(fit.svt$latent[[1]]/2),2,quantile,c(0.05,0.5,0.95)))
qstd.svl  = t(apply(exp(fit.svl$latent[[1]]/2),2,quantile,c(0.05,0.5,0.95)))
qstd.svtl = t(apply(exp(fit.svtl$latent[[1]]/2),2,quantile,c(0.05,0.5,0.95)))
```

```{r fig.width=10, fig.height=8}
par(mfrow=c(2,3))
plot(density(exp(fit.sv$para[[1]][,1]/2)),xlab=expression(mu),main="",ylim=c(0,550),xlim=c(0.004,0.015))
lines(density(exp(fit.svt$para[[1]][,1]/2)),col=2)
lines(density(exp(fit.svl$para[[1]][,1]/2)),col=3)
lines(density(exp(fit.svtl$para[[1]][,1]/2)),col=4)
legend("topleft",legend=c("SV","SVt","SVl","SVtl"),col=1:4,lty=1)

plot(density(fit.sv$para[[1]][,2]),xlab=expression(phi),main="",ylim=c(0,50))
lines(density(fit.svt$para[[1]][,2]),col=2)
lines(density(fit.svl$para[[1]][,2]),col=3)
lines(density(fit.svtl$para[[1]][,2]),col=4)

plot(density(fit.sv$para[[1]][,3]),xlab=expression(sigma),main="",ylim=c(0,12))
lines(density(fit.svt$para[[1]][,3]),col=2)
lines(density(fit.svl$para[[1]][,3]),col=3)
lines(density(fit.svtl$para[[1]][,3]),col=4)

plot(density(fit.svt$para[[1]][,4]),xlab=expression(nu),main="",ylim=c(0,0.08))
lines(density(fit.svtl$para[[1]][,4]),col=2)

plot(density(fit.svl$para[[1]][,5]),xlab=expression(rho),main="",xlim=c(-0.9,-0.4))
lines(density(fit.svtl$para[[1]][,5]),col=2)
```

```{r fig.width=8, fig.height=6}
par(mfrow=c(1,1))
plot(qlogvols[,1],type="l",ylim=range(qlogvols),xlab="Time",ylab="Log volatilities")
lines(qlogvols[,2],col=2)
lines(qlogvols[,3],col=3)
lines(qlogvols[,4],col=4)
```

```{r fig.width=8, fig.height=6}
yrange = range(0,qstd.sv[,2],qstd.svt[,2],qstd.svl[,2],qstd.svtl[,2])
par(mfrow=c(1,1))
plot(qstd.sv[,2],ylab="Standard deviations",ylim=yrange,type="l",xlab="Time")
lines(qstd.svt[,2],col=2)
lines(qstd.svl[,2],col=3)
lines(qstd.svtl[,2],col=4)
legend("topleft",legend=c("SV","SVt","SVl","SVtl"),col=1:4,lty=1)
```

```{r fig.width=10, fig.height=8}
par(mfrow=c(2,2))
yrange = range(qstd.sv,qstd.svt,qstd.svl,qstd.svtl)
ts.plot(qstd.sv,ylab="Standard deviations",col=c(3,2,3),ylim=yrange)
lines(0.1*abs(ret),type="h");title("SV")
ts.plot(qstd.svt,ylab="Standard deviations",col=c(3,2,3),ylim=yrange)
lines(0.1*abs(ret),type="h");title("SVt")
ts.plot(qstd.svl,ylab="Standard deviations",col=c(3,2,3),ylim=yrange)
lines(0.1*abs(ret),type="h");title("SVl")
ts.plot(qstd.svtl,ylab="Standard deviations",col=c(3,2,3),ylim=yrange)
lines(0.1*abs(ret),type="h");title("SVtl")
```

```{r fig.width=10, fig.height=8}
par(mfrow=c(2,2))
for (i in 1:4){
  ind = ((i-1)*n/4+1):(i*n/4)
  yrange = range(0,qstd.sv[ind,2],qstd.svt[ind,2],qstd.svl[ind,2],qstd.svtl[ind,2])
  plot(ind,qstd.sv[ind,2],ylab="Standard deviations",ylim=yrange,type="l",xlab="Time")
  lines(ind,qstd.svt[ind,2],col=2)
  lines(ind,qstd.svl[ind,2],col=3)
  lines(ind,qstd.svtl[ind,2],col=4)
  lines(ind,0.3*abs(ret[ind]),type="h")
}
legend("topleft",legend=c("SV","SVt","SVl","SVtl"),col=1:4,lty=1)
```
