1 Introduction

In this exercise, we will univariate stochastic volatility (SV) models to six financial time series: three from the US market and three from the Brazilian market: Apple Inc., Microsoft Corp., Amazon.com Inc., Petrobras, Vale and Companhia Siderurgica Nacional. The period considered goes from January 1st 2006 to March 24th 2021, a total of \(n=3831\) observations.

#install.packages("factorstochvol")
library(tidyquant)
library(stochvol)
library(factorstochvol)

getSymbols("AAPL",from='2006-01-01',to='2021-03-24',warnings=FALSE,auto.assign=TRUE)
getSymbols("MSFT",from='2006-01-01',to='2021-03-24',warnings=FALSE,auto.assign=TRUE)
getSymbols("AMZN",from='2006-01-01',to='2021-03-24',warnings=FALSE,auto.assign=TRUE)
getSymbols("PBR",from='2006-01-01',to='2021-03-24',warnings=FALSE,auto.assign=TRUE) 
getSymbols("VALE",from='2006-01-01',to='2021-03-24',warnings=FALSE,auto.assign=TRUE)
getSymbols("SID",from='2006-01-01',to='2021-03-24',warnings=FALSE,auto.assign=TRUE) 

names = c("apple","microsoft","amazon","petrobras","vale","sid")

data = as.matrix(cbind(AAPL$AAPL.Adjusted,MSFT$MSFT.Adjusted,AMZN$AMZN.Adjusted,
                       PBR$PBR.Adjusted,VALE$VALE.Adjusted,SID$SID.Adjusted))
n    = nrow(data)             
p    = ncol(data)
ret = matrix(0,n-1,p)
for (i in 1:p){
 ret[,i] = log(data[2:n,i]/data[1:(n-1),i])
 ret[,i] = ret[,i] - mean(ret[,i])
}
n = nrow(ret)
par(mfrow=c(2,3),mai=c(0.3,0.5,0.2,0.2))
for (i in 1:p)             
  ts.plot(data[,i],main=names[i],ylab="Price")

par(mfrow=c(2,3),mai=c(0.3,0.5,0.2,0.2))
for (i in 1:p)             
  ts.plot(ret[,i],main=names[i],ylab="Log returns")

par(mfrow=c(1,1),mai=c(0.5,0.5,0.2,0.2))
boxplot(ret,outline=FALSE,names=names)
abline(h=0,col=2)

2 Univariate SV modeling

Let us use stochvol to learn about log-volatilities and parameters for the whole time span. The package fits the following version of the SV-AR(1) model \[\begin{eqnarray*} y_t &=& \exp\{h_t/2\} \varepsilon_t \\ h_t &=& \mu + \phi (h_{t-1}-\mu) + \sigma \eta_t, \end{eqnarray*}\] where \(\varepsilon_t\) and \(\eta_t\) are iid standard normal with \(E(\varepsilon_t,\eta_{t'})=0\) for all \(t,t'\). The default prior hyperparameters are such that \[\begin{eqnarray*} \mu &\sim& N(0,100^2)\\ (\phi+1)/2 &\sim& Beta(5,1.5)\\ \sigma^2 &\sim& IG(1/2,1/2)\\ h_0 &\sim& N(\mu,\sigma^2/(1-\phi^2)). \end{eqnarray*}\] Finally, we also use their default for burn-in sample (\(1000\)), but set the number of draws for posterior inference at \(1000\).

ndraw = 10000
nburn = 10000
uvol = matrix(0,n,p)
for (i in 1:p){
  sv.fit = svsample(ret[,i],draws=ndraw,burnin=nburn)
  uvol[,i] = apply(exp(sv.fit$latent[[1]]/2),2,mean)
}
par(mfrow=c(2,3))
for (i in 1:p)
  ts.plot(uvol[,i],main=names[i],ylab="Standard deviation")

3 Multivariate exploratory analysis

3.1 Rolling window correlations

L  = 50
ts = seq(L,n,by=L)
nt = length(ts)
corr = array(0,c(nt,p,p))
for (i in 1:nt){
  ind = (ts[i]-L+1):ts[i]
  corr[i,,] = cor(ret[ind,])
}

rcorr = range(corr)
par(mfrow=c(p,p))
for (i in 1:p)
for (j in 1:p){
  plot(ts,corr[,i,j],type="l",xlab="Time",ylab="",ylim=rcorr)
   title(paste(names[i]," x ",names[j],sep=""),cex=0.75)
   abline(h=1,col=2)
   abline(h=0.5,col=2)
   abline(h=0,col=2)
}

3.2 Standard Gaussian factor analysis

In a standard Gaussian FA, the vector of time series \(y_t=(y_{t1},\ldots,y_{tp})'\) is modeled as \[\begin{eqnarray*} y_t | f_t &\sim& N(\beta f_t,\Sigma),\\ f_t &\sim& N(0,I_k), \end{eqnarray*}\] where \(\beta\) is the \((p \times k)\) matrix of factor loadings, \(f_t\) is the \(k \times 1\) vector of common factors (or factor scores) and \(\Sigma=\mbox{diag}(\sigma_1^2,\ldots,\sigma_p^2)\) is the matrix of idiosyncratic variances. In this context, the unconditional variance of \(y_t\) is given by \[ V(y_t|\beta,\Sigma) = \beta \beta' + \Sigma. \]

fac = factanal(ret,factors=2,scores="regression")

par(mfrow=c(1,2))
ts.plot(fac$scores[,1],ylab="",main="1st common factor")
ts.plot(fac$scores[,2],ylab="",main="2nd common factor")

par(mfrow=c(1,1),mai=c(0.8,0.8,0.2,0.2))
plot(fac$load,col=0,xlab="",ylab="",xlim=range(fac$load),ylim=range(fac$load))
text(fac$load[,1],fac$load[,2],names)
mtext(side=1,"Loadings of the 1st common factor",line=1.75)
mtext(side=2,"Loadings of the 2nd common factor",line=1.75)

Below we present the correlations between the two common factors and the six financial time series of log-returns:

tab = rbind(cor(cbind(fac$scores[,1],ret))[1,2:(p+1)],
cor(cbind(fac$scores[,2],ret))[1,2:(p+1)])
colnames(tab) = names
rownames(tab) = c("1st factor","2nd factor")
round(tab,1)
##            apple microsoft amazon petrobras vale sid
## 1st factor   0.3       0.3    0.2       0.8  0.9 0.9
## 2nd factor   0.8       0.9    0.8       0.4  0.4 0.3

4 Multivariate SV modeling

The standard Gaussian linear factor model presented earlier is now extended to allow for stochastic volatility for both idiosyncratic and common factors: \[\begin{eqnarray*} y_t | f_t &\sim& N(\beta f_t,\Sigma_t),\\ f_t &\sim& N(0,H_t), \end{eqnarray*}\] where \(H_t=\mbox{diag}(\sigma_{t,p+1}^2,\ldots,\sigma_{t,p+k}^2)\). Here \(\sigma_{t,i}^2\) follows the univariate SV-AR(1) model introduced at the beginning, for \(i=1,\ldots,p+k\).

4.1 Modeling the six time series jointly

fit = fsvsample(ret,factors=2,draws=ndraw,burnin=nburn,runningstore=6)
range = range(fac$scores,fit$runningstore$fac[,,1])
par(mfrow=c(2,2))
ts.plot(fit$runningstore$fac[1,,1],ylim=range,ylab="")
lines(fac$scores[,2],col=2)
legend("top",legend=c("1st fsv factor","2nd factor"),col=1:2,bty="n",lty=1)
ts.plot(fit$runningstore$fac[2,,1],ylim=range,ylab="")
lines(fac$scores[,1],col=2)
legend("top",legend=c("2nd fsv factor","1st factor"),col=1:2,bty="n",lty=1)
plot(fac$scores[,2],fit$runningstore$fac[1,,1],xlab="2nd factor",ylab="1st fsv factor")
plot(fac$scores[,1],fit$runningstore$fac[2,,1],xlab="1st factor",ylab="2nd fsv factor")

4.2 Modeling the three US/BR jointly

fitUS = fsvsample(ret[,1:3],factors=1,draws=ndraw,burnin=nburn,runningstore=6)
fitBR = fsvsample(ret[,4:6],factors=1,draws=ndraw,burnin=nburn,runningstore=6)
factors = cbind(t(fit$runningstore$fac[,,1]),fitUS$runningstore$fac[1,,1],fitBR$runningstore$fac[1,,1])
cor(factors)
##           [,1]       [,2]       [,3]      [,4]
## [1,] 1.0000000 0.15896022 0.98884292 0.6360411
## [2,] 0.1589602 1.00000000 0.07261093 0.8578589
## [3,] 0.9888429 0.07261093 1.00000000 0.5623212
## [4,] 0.6360411 0.85785893 0.56232120 1.0000000
range = range(factors)
par(mfrow=c(2,2))
ts.plot(factors[,1],ylim=range,ylab="")
lines(factors[,3],col=2)
legend("top",legend=c("1st fsv 6d-sv","1st fsv 3d-sv USA"),col=1:2,bty="n",lty=1)
ts.plot(factors[,2],ylim=range,ylab="")
lines(factors[,4],col=2)
legend("top",legend=c("2nd fsv 6d-sv","1st fsv 3d-sv BR"),col=1:2,bty="n",lty=1)
plot(factors[,1],factors[,3],xlim=range,ylim=range,xlab="1st fsv 6d-sv",ylab="1st fsv 3d-sv USA")
plot(factors[,2],factors[,4],xlim=range,ylim=range,xlab="2nd fsv 6d-sv",ylab="1st fsv 3d-sv BR")

5 Comparisons

5.1 Comparing factor loadings

range = range(fit$facload)
par(mfrow=c(1,2),mai=c(0.8,0.8,0.2,0.2))
boxplot(t(fit$facload[,1,]),outline=FALSE,main="Factor loadings (1st column)",ylim=range,names=names)
abline(h=0,lty=2)
boxplot(t(fit$facload[,2,]),outline=FALSE,main="Factor loadings (2nd column)",ylim=range,names=names)
abline(h=0,lty=2)

beta = cbind(apply(fit$facload[,1,],1,mean),apply(fit$facload[,2,],1,mean))

colors = c(rep(1,3),rep(2,3))
par(mfrow=c(1,2),mai=c(0.8,0.8,0.2,0.2))
plot(fac$load,col=0,xlab="",ylab="",xlim=range(fac$load),ylim=range(fac$load),main="Factor model")
text(fac$load[,1],fac$load[,2],names,col=colors)
mtext(side=1,"Factor 1",line=1.75)
mtext(side=2,"Factor 2",line=1.75)
plot(beta,col=0,xlab="",ylab="",xlim=range(beta),ylim=range(beta),main="Factor SV model")
text(beta[,1],beta[,2],names,col=colors)
mtext(side=1,"Factor 1",line=1.75)
mtext(side=2,"Factor 2",line=1.75)

5.2 Comparing time-varying covariance matrices

cov   = array(0,c(n,p,p))
l   = 0
for (j in 1:p){
  for (i in j:p){
    l = l + 1 
    cov[,i,j] = fit$runningstore$cov[,l,1]
    cov[,j,i] = cov[,i,j]
  }
}
covBR = array(0,c(n,3,3))
l   = 0
for (j in 1:3){
  for (i in j:3){
    l = l + 1 
    covBR[,i,j] = fitBR$runningstore$cov[,l,1]
    covBR[,j,i] = cov[,i,j]
  }
}

par(mfrow=c(p,p),mai=c(0.3,0.5,0.2,0.2))
for (i in 1:p){
  for (j in 1:p){
    ts.plot(cov[,i,j]/sqrt(cov[,i,i]*cov[,j,j]),ylim=c(-0.5,1),ylab="")
    title(paste(names[i]," x ",names[j],sep=""),cex=0.75)
    abline(h=cor(ret[,i],ret[,j]),col=2)
    lines(ts,corr[,i,j],col=3)
  }
}

5.3 More comparisons

ii = c(1,4,6)
par(mfrow=c(2,3))
for (i in 1:3){
  ts.plot(sqrt(cov[,i,i]),ylab="",main=names[i])
  lines(sqrt(fitUS$runningstore$cov[,ii[i],1]),col=2)
  lines(uvol[,i],col=3)
  if (i==1){
    legend("top",legend=c("1d-sv","3d-sv","6d-sv"),col=3:1,lty=1,bty="n")
  }
}
for (i in 4:6){
  ts.plot(sqrt(cov[,i,i]),ylab="",main=names[i])
  lines(sqrt(fitBR$runningstore$cov[,ii[i-3],1]),col=2)
  lines(uvol[,i],col=3)
}

5.4 closer look

ind = (n-999):n
range = sqrt(range(cov[ind,5,5],cov[ind,6,6],covBR[,2,2],covBR[,3,3]))

par(mfrow=c(1,2))
ts.plot(sqrt(cov[ind,5,5]),ylab="Standard deviation",main=names[5],ylim=range)
lines(sqrt(covBR[ind,2,2]),col=2)
lines(uvol[ind,5],col=3)
legend("topleft",legend=c("6d-sv","3d-sv","1d-sv"),bty="n",lty=1,col=1:3)
ts.plot(sqrt(cov[ind,6,6]),ylab="Standard deviation",main=names[6],ylim=range)
lines(sqrt(covBR[ind,3,3]),col=2)
lines(uvol[ind,6],col=3)

rho  = cov[ind,5,6]/sqrt(cov[ind,5,5]*cov[ind,6,6])
rho1 = covBR[ind,2,3]/sqrt(covBR[ind,2,2]*covBR[ind,3,3])

par(mfrow=c(1,1))
ts.plot(rho,ylab="",main=paste(names[5]," x ",names[6],sep=""),ylim=c(0,1))
lines(rho1,col=2)
abline(h=0.8,col=2)
abline(h=0.6,col=2)
legend("topleft",legend=c("6d-sv","3d-sv"),lty=1,col=1:2,bty="n")