Introduction

We have already talked about the skew-normal distribution: http://hedibert.org/wp-content/uploads/2021/05/normal-vs-skewnormal.html. Let us now add the skew-t distribution.

#install.packages("sn")
library("sn")
## Loading required package: stats4
## 
## Attaching package: 'sn'
## The following object is masked from 'package:stats':
## 
##     sd
library("fBasics")
## Loading required package: timeDate
## Loading required package: timeSeries
## 
## Attaching package: 'fBasics'
## The following objects are masked from 'package:sn':
## 
##     tr, vech

Let us sample data from both distributions

Let \(x_1,\ldots,x_n \sim SN(\theta,\sigma^2,\alpha)\) and \(y_1,\ldots,y_n \sim ST(\theta,\sigma^2,\alpha,\nu)\), where \(\theta\) is the location parameter, \(\sigma\) the scale parameter, \(\alpha\) the skewness parameter and \(\nu\) the tail-heaviness parameter.

theta = 0
sigma = 1
alpha = 3
nu    = 4
n     = 10000
set.seed(13579)
x     = rnorm(n,theta,sigma)
y     = rsn(n,xi=theta,omega=sigma,alpha=alpha)
z     = rst(n,xi=theta,omega=sigma,alpha=alpha,nu=nu)

summary.x = c(mean(x),var(x),quantile(x,c(0.01,0.5,0.99)),skewness(x),kurtosis(x))
summary.y = c(mean(y),var(y),quantile(y,c(0.01,0.5,0.99)),skewness(y),kurtosis(y))
summary.z = c(mean(z),var(z),quantile(z,c(0.01,0.5,0.99)),skewness(z),kurtosis(z))

tab = cbind(summary.x,summary.y,summary.z)
rownames(tab) = c("Mean","Var","1%","50%","99%","Skewness","Kurtosis")
colnames(tab) = c("Normal","SkewNormal","SkewT")
tab
##                Normal SkewNormal      SkewT
## Mean      0.002086571  0.7507519  0.9428213
## Var       0.967650306  0.4260065  1.0575243
## 1%       -2.253153955 -0.4321687 -0.5468168
## 50%       0.001644743  0.6681764  0.7287820
## 99%       2.276506080  2.5535598  4.4274761
## Skewness  0.006641130  0.6738564  2.5980144
## Kurtosis -0.064160572  0.5077461 18.3475135
xxx = seq(-10,10,length=1000)
par(mfrow=c(2,2))
plot(xxx,dsn(xxx,xi=theta, omega=sigma, alpha=alpha),type="l",xlab="x",
ylab="Density",main=paste("Standard skew-normal\n theta=",theta," - sigma=",sigma," - alpha=",alpha,sep=""))
lines(density(y),col=2)
legend("topleft",legend=c("Density","Kernel approximation"),col=1:2,bty="n",lty=1)
plot(xxx,dst(xxx,xi=theta, omega=sigma, alpha=alpha, nu=nu),type="l",xlab="x",
ylab="Density",main=paste("Standard skew-t\n theta=",theta," - sigma=",sigma," - alpha=",alpha," - nu=",nu,sep=""))
lines(density(z),col=2)
plot(xxx,dsn(xxx,xi=theta, omega=sigma, alpha=alpha),type="l",xlab="x",ylab="Density")
lines(xxx,dst(xxx,xi=theta, omega=sigma, alpha=alpha, nu=nu),col=2)
legend("topleft",legend=c("Skew-normal","Skew-t"),col=1:2,bty="n",lty=1)
plot(xxx,dsn(xxx,xi=theta, omega=sigma, alpha=alpha,log=TRUE),type="l",xlab="x",ylab="Log density")
lines(xxx,dst(xxx,xi=theta, omega=sigma, alpha=alpha, nu=nu,log=TRUE),col=2)

BMI example

#install.packages("mixsmsn")
library("mixsmsn")
## Loading required package: mvtnorm
data("bmi")
x = bmi$bmi[bmi$bmi>26]
hist(x,breaks=40,main="Histogram of BMI\nBMI greater than 26",xlab="bmi",prob=TRUE)

Snorm.analysis = smsn.mix(x,nu=3,g=1,get.init = TRUE,criteria=TRUE,group=TRUE,
                          family="Skew.normal",calc.im=FALSE) 
 
St.analysis = smsn.mix(x,nu=3,g=1,get.init=TRUE,criteria=TRUE,group=TRUE,
                       family="Skew.t",calc.im = FALSE)

par(mfrow=c(1,2))
mix.hist(x,Snorm.analysis) 
mix.hist(x,St.analysis)

mix.print(Snorm.analysis)
## 
##  Number of observations:  1090 
## 
##        group 1
## mu      28.116
## sigma2  65.188
## shape    4.461
## 
##  AIC:  6483.2 
## 
##  BIC:  6498.182 
## 
##  EDC:  6497.009 
## 
##  ICL:  6498.182 
## 
##  EM iterations:  61
mix.print(St.analysis)
## 
##  Number of observations:  1090 
## 
## 
##  Hyperparameter(nu):  7.598395 
## 
##        group 1
## mu      28.784
## sigma2  41.585
## shape    3.018
## 
##  AIC:  6464.844 
## 
##  BIC:  6484.819 
## 
##  EDC:  6483.256 
## 
##  ICL:  6484.819 
## 
##  EM iterations:  83
bmi.im = im.smsn(bmi$bmi, Snorm.analysis) 
sdevn  = sqrt(diag(solve(bmi.im$IM)))

bmi.im = im.smsn(bmi$bmi, St.analysis) 
sdevt  = sqrt(diag(solve(bmi.im$IM)))

round(sdevn,3)
##    mu1 sigma1 shape1 
##  0.055  2.404  0.100
round(sdevt,3)
##    mu1 sigma1 shape1     nu 
##  0.148  2.519  0.157  0.494

BMI example - Finite mixture of skew-normal and skew-t distributions

We will not dig into finite mixture of distributions yet. Nonetheless, I would like to give you a flavor of its power in flexibly modeling non-Gaussian data. For model details, take a look at our set of notes at http://hedibert.org/mixture-models-and-regime-switching-models. In general, we represent a finite mixture of, say, normal distributions as \[ p(x|\theta) = \sum_{j=1}^k \pi_j p_N(x|\mu_j,\sigma_i^2), \] where \(0 \leq \pi_j \leq 1\) for \(j=1,\ldots,k\) and \(\sum_{j=1}^k \pi_j=1\). The parameters \(\mu_i\) and \(\sigma_i^2\) are, respectively, the location and the scale parameters of the \(i\)th normal component of the mixture of normals. Therefore, conditionally on \(k\), the number of components (or groups), the whole vector of unknown parameters is \(\theta=(\pi_1,\ldots,\pi_k,\mu_1,\ldots,\mu_k,\sigma_1,\ldots,\sigma_k)\). Bayesian inference proceeds as usual: \[ p(\theta|x_1,\ldots,x_n,k) = \frac{\prod_{i=1}^n p(x_i|\theta,k)p(\theta|k)d\theta}{\int_\Theta \prod_{i=1}^n p(x_i|\theta,k)p(\theta|k)d\theta}, \] where the likelihood is a very non-linear function of \(\theta\): \[ \prod_{i=1}^n p(x_i|\theta,k) = \prod_{i=1}^n \sum_{j=1}^k \pi_j p_N(x_i|\mu_j,\sigma_j^2). \] Predictive and posterior inference are only available via Monte Carlo approximations, usually via Markov chain Monte Carlo algorithms. Below we consider a simple application of finite mixture of skew-normal and skew-t distributions.

x = bmi$bmi
hist(x,breaks=40,main="Histogram of BMI",xlab="bmi",prob=TRUE)

Snorm.analysis = smsn.mix(x,nu=3,g=2,get.init = TRUE,criteria=TRUE,group=TRUE,
                          family="Skew.normal",calc.im=FALSE) 
 
St.analysis = smsn.mix(x,nu=3,g=2,get.init=TRUE,criteria=TRUE,group=TRUE,
                       family="Skew.t",calc.im = FALSE)

par(mfrow=c(1,2))
mix.hist(x,Snorm.analysis) 
mix.hist(x,St.analysis)

mix.print(Snorm.analysis)
## 
##  Number of observations:  2107 
## 
##        group 1 group 2
## mu      28.455  20.221
## sigma2  62.240   7.997
## shape    3.667   0.991
## 
##  AIC:  13764.42 
## 
##  BIC:  13803.99 
## 
##  EDC:  13814.68 
## 
##  ICL:  13909.3 
## 
##  EM iterations:  72
mix.print(St.analysis)
## 
##  Number of observations:  2107 
## 
## 
##  Hyperparameter(nu):  6.048196 
## 
##        group 1 group 2
## mu      29.735  19.912
## sigma2  33.281   8.986
## shape    2.221   1.403
## 
##  AIC:  13739.62 
## 
##  BIC:  13784.84 
## 
##  EDC:  13797.06 
## 
##  ICL:  13964.65 
## 
##  EM iterations:  77
bmi.im = im.smsn(bmi$bmi, Snorm.analysis) 
sdevn  = sqrt(diag(solve(bmi.im$IM)))

bmi.im = im.smsn(bmi$bmi, St.analysis) 
sdevt  = sqrt(diag(solve(bmi.im$IM)))

round(sdevn,3)
##    mu1 sigma1 shape1     p1    mu2 sigma2 shape2 
##  0.323  2.874  1.149  0.023  0.773  3.771  0.707
round(sdevt,3)
##    mu1 sigma1 shape1     p1    mu2 sigma2 shape2     nu 
##  0.401  5.615  0.759  0.021  0.338  2.394  0.426  1.443