############################################################################### # # Modeling Petrobras' log-returns # # 12/29/00 - 12/29/06 : Training sample (6 years or 1506 observations) # 01/03/06 - 12/31/13 : Dataset (7 years or 1762 observations) # ############################################################################## rm(list=ls()) dig = function(x,a,b){dgamma(1/x,a,b)/x^2} data = read.csv("petrobras.csv",header=TRUE) n = nrow(data) logprice = log(data[,3]) logreturn = log(data[2:n,3]/data[1:(n-1),3]) pdf(file="logprices.pdf",width=6,height=4) ts.plot(logprice,xlab="DAYS",ylab="LOG PRICE",main="") dev.off() pdf(file="logprices-scatterplot.pdf",width=4,height=4) plot(logprice[1:(n-1)],logprice[2:n],ylab=expression(y[t]),xlab=expression(y[t-1]),pch=16) abline(0,1,col=2) dev.off() pdf(file="logreturns.pdf",width=6,height=4) ts.plot(logreturn,xlab="DAYS",ylab="LOG RETURN",main="") dev.off() pdf(file="logreturns-histogram.pdf",width=4,height=4) breaks = seq(min(logreturn),max(logreturn),length=100) hist(logreturn,xlab="LOG RETURN",ylab="Density",prob=TRUE,breaks=breaks,main="") box() dev.off() # Training sample y0 = log(data[1:1506,3]) n0 = length(y0) r0 = y0[2:n0]-y0[1:(n0-1)] # Dataset y = log(data[1507:3268,3]) n = length(y) r = y[2:n]-y[1:(n-1)] ######################################################################## # # Modelo 0: r[t] ~ N(0,sig2) # # Prior: sig2 ~ IG(c,d) # ######################################################################## # Using past data to set up the prior hyperparameters (c,d) c = 5 d = (c-1)*mean(r0^2) # Posterior hyperparameters (conjugate analysis) c1 = c + (n-1)/2 d1 = d + sum(r^2)/2 # Prior and posterior densities pdf(file="model0-graph1.pdf",width=6,height=6) par(mfrow=c(1,1)) sig2s = seq(0.0001,0.002,length=1000) plot(sig2s,dig(sig2s,c1,d1),type="l",xlab=expression(sigma^2),ylab="Density") lines(sig2s,dig(sig2s,c,d),col=2) points(mean(r^2),0,col=4,pch=16) dev.off() # Log-predictive (exact) logpredM0 = -0.5*(n-1)*log(2*pi)+c*log(d)-lgamma(c)+lgamma(c1)-c1*log(d1) # Log-predictive: Simple MC and harmonic mean approximations set.seed(12325) M = 100000 xxx = sort(1/rgamma(M,c1,d1)) yyy = sort(1/rgamma(M,c,d)) llike = rep(0,M) llike1 = rep(0,M) for (i in 1:M){ llike[i] = sum(dnorm(r,0,sqrt(xxx[i]),log=TRUE)) llike1[i] = sum(dnorm(r,0,sqrt(yyy[i]),log=TRUE)) } A = max(llike) like = exp(llike-A) logpredM0app = log(1/mean(1/like))+A A = max(llike1) like1 = exp(llike1-A) logpredM0app1 = log(mean(like1))+A ######################################################################## # # Modelo 1: r[t] ~ N(mu,sig2) # # Prior: mu ~ N(a,A) - sig2 ~ IG(c,d) # ######################################################################## # Using past data to set up the prior hyperparameters (a,A,c,d) a = mean(r0) A = 100*var(r0)/(n0-1) c = 5 d = (c-1)*var(r0) # Prior densities par(mfrow=c(1,2)) mus = seq(-0.03,0.03,length=1000) plot(mus,dnorm(mus,a,sqrt(A)),type="l",xlab=expression(mu),ylab="Density") title("Prior of mu") sig2s = seq(0.0001,0.004,length=1000) plot(sig2s,dig(sig2s,c,d),type="l",xlab=expression(sigma^2),ylab="Density") title("Prior of sig2") # Posterior inference via Gibbs sampler set.seed(12235) sig2 = d/(c-1) burnin = 10000 M = 100000 niter = burnin+M pars = matrix(0,niter,2) for (iter in 1:niter){ v = 1/(1/A+(n-1)/sig2) m = v*(a/A+sum(r)/sig2) mu = rnorm(1,m,sqrt(v)) p1 = c+(n-1)/2 p2 = d+sum((r-mu)^2)/2 sig2 = 1/rgamma(1,p1,p2) pars[iter,] = c(mu,sig2) } pars = pars[(burnin+1):niter,] range = seq(1,M,by=100) # Checking Gibbs sampler output pdf(file="model1-graph1.pdf",width=8,height=6) par(mfrow=c(2,3)) plot(range,pars[range,1],xlab="iterations",ylab="",main=expression(mu),type="l") acf(pars[,1],main="") hist(pars[,1],xlab="",prob=TRUE,main="");box() plot(range,pars[range,2],xlab="iterations",ylab="",main=expression(sigma^2),type="l") acf(pars[,2],main="") hist(pars[,2],xlab="",prob=TRUE,main="");box() dev.off() # Comparing priors and posteriors pdf(file="model1-graph2.pdf",width=6,height=4) par(mfrow=c(1,2)) hist(pars[,1],xlab="",prob=TRUE,main="",xlim=range(mus)) box() lines(mus,dnorm(mus,a,sqrt(A)),col=2) title(expression(mu)) points(mean(r),0,col=4,pch=16) hist(pars[,2],xlab="",prob=TRUE,main="",xlim=range(sig2s)) box() lines(sig2s,dig(sig2s,c,d),col=2) title(expression(sigma^2)) points(var(r),0,col=4,pch=16) dev.off() # Computing log-predictive via simple MC and harmonic mean estimator set.seed(230224) llike = rep(0,M) llike1 = rep(0,M) pars1 = matrix(0,M,2) pars1[,1] = rnorm(M,a,sqrt(A)) pars1[,2] = 1/rgamma(M,c,d) for (i in 1:M){ llike[i] = sum(dnorm(r,pars[i,1],sqrt(pars[i,2]),log=TRUE)) llike1[i] = sum(dnorm(r,pars1[i,1],sqrt(pars1[i,2]),log=TRUE)) } A = max(llike) like = exp(llike-A) logpredM1 = log(1/mean(1/like))+A A = max(llike1) like1 = exp(llike1-A) logpredM11 = log(mean(like1))+A c(logpredM1,logpredM11) ######################################################################## # # Modelo 2: y[t] ~ N(alpha + beta*y[t-1],sig2) # # Prior: alpha ~ N(a,A), beta ~ N(b,B) e sig2 ~ IG(c,d) # ######################################################################## x = y[1:(n-1)] y = y[2:n] X = cbind(1,x) coefhat = solve(t(X)%*%X)%*%t(X)%*%y s2hat = mean((y-X%*%coefhat)^2) # Using past data to set up the prior hyperparameters (a,A,c,d) x0 = y0[1:(n0-1)] y0 = y0[2:n0] X0 = cbind(1,x0) coef = solve(t(X0)%*%X0)%*%t(X0)%*%y0 s2 = mean((y0-X0%*%coef)^2) V = s2*solve(t(X0)%*%X0) a = coef[1] A = 100*V[1,1] b = coef[2] B = 100*V[2,2] c = 5 d = (c-1)*s2 # Gibbs sampler set.seed(12356) alpha = a beta = b burnin = 10000 M = 100000 lag = 1 niter = burnin+M*lag pars = matrix(0,niter,3) for (iter in 1:niter){ p1 = c+(n-1)/2 p2 = d+sum((y-alpha-beta*x)^2)/2 sig2 = 1/rgamma(1,p1,p2) v = 1/(1/A+(n-1)/sig2) m = v*(a/A+sum(y-beta*x)/sig2) alpha = rnorm(1,m,sqrt(v)) v = 1/(1/B+sum(x^2)/sig2) m = v*(b/B+sum((y-alpha)*x)/sig2) beta = rnorm(1,m,sqrt(v)) pars[iter,] = c(alpha,beta,sig2) } pars = pars[seq(burnin+1,niter,by=lag),] range = seq(1,M,by=100) # Checking Gibbs sampler output pdf(file="model2-graph1.pdf",width=10,height=8) par(mfrow=c(3,3)) plot(range,pars[range,1],xlab="iterations",ylab="",main=expression(alpha),type="l") acf(pars[,1],main="") hist(pars[,1],xlab="",prob=TRUE,main="");box() plot(range,pars[range,2],xlab="iterations",ylab="",main=expression(beta),type="l") acf(pars[,2],main="") hist(pars[,2],xlab="",prob=TRUE,main="");box() plot(range,pars[range,3],xlab="iterations",ylab="",main=expression(sigma^2),type="l") acf(pars[,3],main="") hist(pars[,3],xlab="",prob=TRUE,main="");box() dev.off() # Comparing priors and posteriors pdf(file="model2-graph2.pdf",width=8,height=4) par(mfrow=c(1,3)) alphas = seq(-.2,.2,length=1000) hist(pars[,1],xlab="",prob=TRUE,main="",xlim=range(alphas)) box() lines(alphas,dnorm(alphas,a,sqrt(A)),col=2) title(expression(alpha)) points(coefhat[1],0,col=4,pch=16) betas = seq(0.9,1.1,length=1000) hist(pars[,2],xlab="",prob=TRUE,main="",xlim=range(betas)) box() lines(betas,dnorm(betas,b,sqrt(B)),col=2) title(expression(beta)) points(coefhat[2],0,col=4,pch=16) sig2s = seq(0,0.003,length=1000) hist(pars[,3],xlab="",prob=TRUE,main="",xlim=range(sig2s)) box() lines(sig2s,dig(sig2s,c,d),col=2) title(expression(sigma^2)) points(s2hat,0,col=4,pch=16) dev.off() # Computing log-predictive via simple MC and harmonic mean estimator set.seed(230224) llike = rep(0,M) llike1 = rep(0,M) pars1 = matrix(0,M,3) pars1[,1] = rnorm(M,a,sqrt(A)) pars1[,2] = rnorm(M,b,sqrt(B)) pars1[,3] = 1/rgamma(M,c,d) for (i in 1:M){ llike[i] = sum(dnorm(y,pars[i,1]+pars[i,2]*x,sqrt(pars[i,3]),log=TRUE)) llike1[i] = sum(dnorm(y,pars1[i,1]+pars1[i,2]*x,sqrt(pars1[i,3]),log=TRUE)) } A = max(llike) like = exp(llike-A) logpredM2 = log(1/mean(1/like))+A A = max(llike1) like1 = exp(llike1-A) logpredM21 = log(mean(like1))+A c(logpredM2,logpredM21) # Comparing all models logpred1 = c(logpredM0,logpredM1,logpredM2) logpred2 = c(logpredM0,logpredM11,logpredM21) pred1 = exp(logpred1-max(logpred1)) Prob1 = round(100*pred1/sum(pred1),3) pred2 = exp(logpred2-max(logpred2)) Prob2 = round(100*pred2/sum(pred2),3) round(cbind(logpred1,logpred2),2) cbind(Prob1,Prob2) ######################################################################## # # Modelo 3: r[t] ~ t_nu(0,rho*h[t]) where rho=(nu-2)/nu # # h[t] = alpha0 + alpha1*y[t-1]^2 + beta*h[t-1] # # with h[0]=0, alpha0,alpha1,beta >0 and nu>delta # # Prior: alpha ~ TN(mua,Siga) # beta ~ TN(mub,Sigb) # nu ~ Translated Exponential (lambda,delta) # # with lambda>0 and delta >= 2, such that E(nu) = delta + 1/lambda # ######################################################################## # Running the MCMC for the dataset library(bayesGARCH) M0 = 10000 M = 10000 niter = M0+M MCMC.initial = bayesGARCH(r0, mu.alpha = c(0,0), Sigma.alpha = 1000 * diag(1,2), mu.beta = 0, Sigma.beta = 1000, lambda = 0.01, delta = 2, control = list(n.chain=1,l.chain=niter,refresh=1000)) draws = MCMC.initial$chain1 par(mfrow=c(3,4)) ts.plot(10000*draws[(M0+1):niter,1],xlab="iterations", main=expression(10000*alpha[0]),ylab="") ts.plot(draws[(M0+1):niter,2],xlab="iterations",main=expression(alpha[1]),ylab="") ts.plot(draws[(M0+1):niter,3],xlab="iterations",main=expression(beta),ylab="") ts.plot(draws[(M0+1):niter,4],xlab="iterations",main=expression(nu),ylab="") for (i in 1:4) acf(draws[(M0+1):niter,i],main="") hist(10000*draws[(M0+1):niter,1],main="",prob=TRUE,ylab="Density",xlab="");box() for (i in 2:4){ hist(draws[(M0+1):niter,i],main="",prob=TRUE,ylab="Density",xlab="");box() } mpsi = apply(draws,2,mean) vpsi = apply(draws,2,var) # Running the MCMC for the dataset M0 = 10000 M = 100000 niter = M0+M MCMC.final = bayesGARCH(r, mu.alpha = mpsi[1:2], Sigma.alpha = 100*diag(vpsi[1:2]), mu.beta = mpsi[3], Sigma.beta = 100*vpsi[3], lambda = 2/mpsi[4], delta = 2, control = list(n.chain=1,l.chain=niter,refresh=5000)) draws = MCMC.final$chain1 range = (M0+1):niter range1 = seq(M0+1,niter,by=100) pdf(file="model3-graph1.pdf",width=10,height=8) par(mfrow=c(2,4)) plot(range1,draws[range1,1],xlab="iterations",main=expression(alpha[0]),ylab="",type="l") plot(range1,draws[range1,2],xlab="iterations",main=expression(alpha[1]),ylab="",type="l") plot(range1,draws[range1,3],xlab="iterations",main=expression(beta),ylab="",type="l") plot(range1,draws[range1,4],xlab="iterations",main=expression(nu),ylab="",type="l") for (i in 1:4) acf(draws[range,i],main="",lag.max=200) dev.off() pdf(file="model3-graph2.pdf",width=6,height=6) par(mfrow=c(2,2)) hist(draws[range,1],main=expression(alpha[0]),prob=TRUE,ylab="Density",xlab="") hist(draws[range,2],main=expression(alpha[1]),prob=TRUE,ylab="Density",xlab="") hist(draws[range,3],main=expression(beta),prob=TRUE,ylab="Density",xlab="") hist(draws[range,4],main=expression(nu),prob=TRUE,ylab="Density",xlab="") dev.off() pdf(file="model3-graph3.pdf",width=6,height=6) par(mfrow=c(1,1)) alpha2beta = draws[range,2]+draws[range,3] hist(alpha2beta,main=expression(alpha[1]+beta),prob=TRUE,ylab="Density",xlab="") dev.off() alpha0 = draws[range,1] alpha1 = draws[range,2] beta = draws[range,3] nu = draws[range,4] ind = seq(1,M,by=100) nind = length(ind) hs = matrix(0,nind,n-1) l = 0 for (i in ind){ print(i) h = rep(0,n-1) h[1] = alpha0[i]+alpha1[i]*r0[n0-1]^2 for (t in 2:(n-1)) h[t] = alpha0[i]+alpha1[i]*r[t-1]^2+beta[i]*h[t-1] l = l + 1 hs[l,] = h } qh = t(apply(sqrt(hs),2,quantile,c(0.025,0.5,0.975))) pdf(file="model3-graph4.pdf",width=8,height=6) par(mfrow=c(1,1)) ts.plot(qh,col=c(3,2,3),ylim=c(0,0.15),xlab="Days",ylab="Standard deviations") lines(r^2,type="h") dev.off() # Computing the predictive llike = rep(0,M) for (i in 1:M){ if ((i%%1000)==0){ print(i) } h = rep(0,n-1) h[1] = alpha0[i]+alpha1[i]*r0[n0-1]^2 for (t in 2:(n-1)) h[t] = alpha0[i]+alpha1[i]*r[t-1]^2+beta[i]*h[t-1] rho = (nu[i]-2)/nu[i] llike[i] = sum(dt(r/sqrt(rho*h),nu[i],log=TRUE))-0.5*(n-1)*log(rho)-0.5*sum(log(h)) } A = max(llike) like = exp(llike-A) logpredM3 = log(1/mean(1/like))+A # Comparing all models logpred = c(logpredM0,logpredM11,logpredM21,logpredM3) pred = exp(logpred-max(logpred)) Prob = round(100*pred/sum(pred),3) cbind(logpred,Prob) ######################################################################## # # Modelo 4: r[t]|h[t] ~ N(0,exp(h[t])) # h[t]|h[t-1] ~ N(mu + phi*(h[t-1]-mu),sig2) # h[0] ~ N(mu,sig2/(1-phi^2)) # # with mu in R, -1 < phi < 1 and sig2>0. # # Prior: mu ~ N(b.mu,B.mu) # (phi+1)/2 ~ Beta(a0,b0) # sig2 ~ G(1/2,1/(2*B.sig)) # # ######################################################################## # Running the MCMC for the dataset library("stochvol") r1 = r0 - mean(r0) sv.fit = svsample(r1,draws=10000,burnin=10000, priormu = c(0,100), priorphi = c(1,1), priorsigma = 0.1) par.prior = sv.fit$para b.mu = mean(sv.fit$para[,1]) B.mu = 100*var(sv.fit$para[,1]) E = mean(sv.fit$para[,2]) V = 100*var(sv.fit$para[,2]) B = mean(sv.fit$para[,3]) b0 = seq(0.0001,0.1,length=1000) ppp = (E+1)/(1-E) qqq = 4*ppp/(((ppp+1)^2)*((ppp+1)*b0+1)) b0 = (b0[abs(qqq-V)<0.0001])[1] a0 = b0*ppp r1 = r - mean(r) sv.fit = svsample(r1,draws=15000,burnin=15000, priormu = c(b.mu,B.mu), priorphi = c(a0,b0), priorsigma = B) pars = sv.fit$para range = seq(1,15000,by=15) pdf(file="model4-graph1.pdf",width=8,height=6) par(mfrow=c(3,3)) ts.plot(pars[range,1],xlab="iterations",ylab="",main=expression(mu)) acf(pars[,1],main="",lag=40) hist(pars[,1],xlab="",main="",prob=TRUE) ts.plot(pars[range,2],xlab="iterations",ylab="",main=expression(beta)) acf(pars[,2],main="",lag=40) hist(pars[,2],xlab="",main="",prob=TRUE) ts.plot(pars[range,3],xlab="iterations",ylab="",main=expression(sigma^2)) acf(pars[,3],main="",lag=40) hist(pars[,3],xlab="",main="",prob=TRUE) dev.off() pdf(file="model4-graph2.pdf",width=8,height=6) par(mfrow=c(2,3)) hist(par.prior[,1],prob=TRUE,xlim=range(par.prior[,1],pars[,1]),main=expression(mu),xlab="");box() hist(par.prior[,2],prob=TRUE,xlim=range(par.prior[,2],pars[,2]),main=expression(beta),xlab="");box() hist(par.prior[,3],prob=TRUE,xlim=range(par.prior[,3],pars[,3]),main=expression(sigma^2),xlab="");box() hist(pars[,1],xlab="",main="",prob=TRUE,xlim=range(par.prior[,1],pars[,1]));box() hist(pars[,2],xlab="",main="",prob=TRUE,xlim=range(par.prior[,2],pars[,2]));box() hist(pars[,3],xlab="",main="",prob=TRUE,xlim=range(par.prior[,3],pars[,3]));box() dev.off() # Posterior distribution of standard deviations qsd = t(apply(exp(sv.fit$latent/2),2,quantile,c(0.025,0.5,0.975))) pdf(file="model4-graph3.pdf",width=8,height=6) par(mfrow=c(1,1)) ts.plot(qsd,col=c(3,2,3),xlab="Days",ylab="Standard deviation",ylim=c(0,0.16)) lines(r1^2,type="h") dev.off()