The very first thing to do is to install (and upload) the package “forecast”.

#install.packages("forecast") 
library(forecast) 

The data “AirPassengers” is the classic Box & Jenkins airline data and represents the monthly totals of international airline passangers from 1949 to 1960.

par(mfrow=c(2,2))
plot(AirPassengers)
plot(diff(AirPassengers),ylab="Diff 1",main=expression(y[t]-y[t-1]))
plot(diff(AirPassengers,12),ylab="Diff 12",main=expression(y[t]-y[t-12]))
plot(diff(diff(AirPassengers,1),12),ylab="Diff 1,12",main=expression(y[t]-y[t-1]-y[t-12]+y[t-13]))

Let also take logs to linearize themultiplicative seasonality:

par(mfrow=c(2,2))
plot(log(AirPassengers))
plot(diff(log(AirPassengers)),ylab="Diff 1",main=expression(y[t]-y[t-1]))
plot(diff(log(AirPassengers),12),ylab="Diff 12",main=expression(y[t]-y[t-12]))
plot(diff(diff(log(AirPassengers),1),12),ylab="Diff 1,12",main=expression(y[t]-y[t-1]-y[t-12]+y[t-13]))

We will split the \(n=144\) observations into two parts. A training sample of size \(n_1=108\) (9 years) and a testing sample of size \(n_2=36\) (3 years). We also construct 11 dummy variables to represent the months of february to december when fitting a deterministic trend+cycle model. An intercept, a linear growth and the 11 dummies are then included in the design matrix \(X\).

# Training and testing samples
# Training sample: 1949.1 - 1957.12
# Testing sample: 1958.1 - 1960.12
n1 = 108 
n = length(AirPassengers)
trend = 1:n
d1 = rep(c(1,rep(0,11)),12)
d2 = c(0,d1)[1:n]
d3 = c(0,d2)[1:n]
d4 = c(0,d3)[1:n]
d5 = c(0,d4)[1:n]
d6 = c(0,d5)[1:n]
d7 = c(0,d6)[1:n]
d8 = c(0,d7)[1:n]
d9 = c(0,d8)[1:n]
d10 = c(0,d9)[1:n]
d11 = c(0,d10)[1:n]
X = cbind(1,trend,d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,d11)

We first fit the deterministic seasonality model to the airline data and its log transformation.

par(mfrow=c(1,2))
# Deterministic seasonality - AirPassengers
###################################
reg = lm(AirPassengers[1:n1]~X[1:n1,]-1)
fit = ts(reg$fit,start=c(1949,1),end=c(1957,12),frequency=12)
plot(AirPassengers,ylim=range(AirPassengers,fit))
lines(fit,col=2)
legend("topleft",legend=c("Data","Fit"),col=1:2,lty=1)
title("Deterministic cycle")

# Deterministic seasonality - log-AirPassengers
######################################
reg = lm(log(AirPassengers[1:n1])~X[1:n1,]-1)
fit = ts(reg$fit,start=c(1949,1),end=c(1957,12),frequency=12)
plot(log(AirPassengers),ylim=range(log(AirPassengers),fit))
lines(fit,col=2)
legend("topleft",legend=c("Data","Fit"),col=1:2,lty=1)
title("Deterministic cycle")

We now fit seasonal ARIMA models to the airline data.

bics = matrix(0,16,5)
s = 0
for (i in 0:1)
for (j in 0:1)
for (k in 0:1)
for (l in 0:1){
 s = s + 1
bics[s,] = c(i,j,k,l,Arima(window(AirPassengers,end=1957+11/12), 
                      order=c(i,1,j),seasonal=list(order=c(k,1,l),period=12), lambda=0)$bic)
}
bics[bics[,5]==min(bics[,5]),]
## [1]    0.0000    1.0000    0.0000    1.0000 -336.9304

The best model, based on the BIC, is an ARIMA\((0,1,1)(0,1,1)_{12}\).

air.model = Arima(window(AirPassengers,end=1957+11/12), order=c(0,1,1),
                              seasonal=list(order=c(0,1,1),period=12), lambda=0) 
air.model
## Series: window(AirPassengers, end = 1957 + 11/12) 
## ARIMA(0,1,1)(0,1,1)[12]                    
## Box Cox transformation: lambda= 0 
## 
## Coefficients:
##           ma1     sma1
##       -0.3864  -0.5885
## s.e.   0.1097   0.0927
## 
## sigma^2 estimated as 0.001416:  log likelihood=175.3
## AIC=-344.59   AICc=-344.33   BIC=-336.93
plot(AirPassengers,ylim=range(AirPassengers,air.model$fit))
lines(air.model$fit,col=2)
legend("topleft",legend=c("Data","Fit"),col=1:2,lty=1)
title("ARIMA(0,1,1)(0,1,1)[12]")

We ned by comparing the models via out-of-sample forecasting performance.

par(mfrow=c(1,2))

reg = lm(AirPassengers[1:n1]~X[1:n1,]-1)
fit = ts(reg$fit,start=c(1949,1),end=c(1957,12),frequency=12)
plot(AirPassengers,ylim=c(0,1000))
lines(fit,col=2)
fit1 = ts(X[(n1+1):n,]%*%reg$coef,start=c(1958,1),end=c(1960,12),frequency=12)
lines(fit1,col=4)
title("Deterministic cycle")

air.model = Arima(window(AirPassengers,end=1957+11/12), order=c(0,1,1),
                              seasonal=list(order=c(0,1,1),period=12), lambda=0) 
plot(forecast(air.model,h=36),ylab="AirPassengers",ylim=c(0,1000))
lines(AirPassengers)