The following non-linear regression model was considered by Marske (1967), and studied in detail by Bates and Watts (1988), relates biochemical oxygen demand (bod), denoted here by \(y\) and measured in mg/l, of prepared water samples to incubation time x (in days).
x = c(1:5,7)
y = c(8.3,10.3,19,16,15.6,19.8)
n = length(x)
plot(x,y,xlab="Incubation time (days)",ylab="biochemical oxygen demand (mg/l)")We will assume that \(y_i\) is related to \(x_i\), for \(i=1,\ldots,n\), nonlinearly as follows \[ y_i = \beta_1(1-e^{-\beta_2 x_i}) + \varepsilon_i \qquad \varepsilon_i \sim N(0,\sigma^2), \] where \(\beta_1\) is the asymptotic maximum BOD and \(\beta_2\) is the rate constant.
Bates D. M., Watts D. G. (1988). Nonlinear Regression Analysis and Its Applications, series Wiley Series in Probability and Statistics. Wiley.
Marske D. M. (1967). Biochemical Oxygen Demand Data Interpretation Using Sum of Squares Surface. Master’s thesis, University of Wisconsin – Madison.
Here we are assuming \(\sigma=2.55\) (which is actually an MLE estimate) in order to plot the contours of the bivariate likelihood for \((\beta_1,\beta_2)\).
like = function(beta1,beta2,sigma){
prod(dnorm(y,beta1*(1-exp(-beta2*x)),sigma))
}
prior = function(beta1,beta2){
V = cbind(1-exp(-beta2*x),beta1*x*(1-exp(-beta2*x)))
return(sqrt(det(t(V)%*%V)))
}
post = function(beta1,beta2,sigma){
prior(beta1,beta2)*like(beta1,beta2,sigma)
}
sigma.mle = 2.55
N = 200
beta1 = seq(10,40,length=N)
beta2 = seq(0,2.5,length=N)
likes = matrix(0,N,N)
priors = matrix(0,N,N)
for (i in 1:N)
for (j in 1:N){
likes[i,j] = like(beta1[i],beta2[j],sigma.mle)
priors[i,j] = prior(beta1[i],beta2[j])
}
posts = priors*likes
par(mfrow=c(1,3))
contour(beta1,beta2,priors,drawlabels=FALSE,nlevels=20,xlab=expression(beta[1]),ylab=expression(beta[2]),main="Prior")
contour(beta1,beta2,likes,drawlabels=FALSE,nlevels=20,xlab=expression(beta[1]),ylab=expression(beta[2]),main="Likelihood")
contour(beta1,beta2,posts,drawlabels=FALSE,nlevels=20,xlab=expression(beta[1]),ylab=expression(beta[2]),main="Posterior")We will fit the non-linear model to find MLEs via the nls R fucntion for nonlinear least squares, which is equivalent to MLE for Gaussian errors.
#install.packages("MASS")
#install.packages("mvtnorm")
library("MASS")
library("mvtnorm")
model = nls(y~beta1*(1-exp(-beta2*x)),start=list(beta1=20,beta2=0.5))
summary(model)##
## Formula: y ~ beta1 * (1 - exp(-beta2 * x))
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## beta1 19.1426 2.4959 7.670 0.00155 **
## beta2 0.5311 0.2031 2.615 0.05910 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.549 on 4 degrees of freedom
##
## Number of iterations to convergence: 3
## Achieved convergence tolerance: 1.459e-06
# MLE of beta1, beta2 and sigma2
beta.mle = coef(model)
beta1.mle = beta.mle["beta1"]
beta2.mle = beta.mle["beta2"]
res = residuals(model)
sigma.mle = sqrt(sum(res^2)/(n-2))
c(beta.mle,sigma.mle)## beta1 beta2
## 19.1425816 0.5310908 2.5490325
xx = seq(min(x),max(x),length=N)
plot(x,y,xlab="Incubation time (days)",ylab="biochemical oxygen demand (mg/l)")
lines(xx,beta1.mle*(1-exp(-beta2.mle*xx)),col=4)set.seed(54321)
k = 9
M1 = 100000
M = 20000
beta.draws = mvrnorm(M1,beta.mle,k*V.beta)
sigma.draws = runif(M1,0,10)
par(mfrow=c(1,1))
plot(beta.draws,xlab=expression(beta[1]),ylab=expression(beta[2]))
contour(beta1,beta2,posts,drawlabels=FALSE,nlevels=10,add=TRUE,col=3)
title("Proposal draws")ind = sample(1:M1,size=M,replace=TRUE,prob=w)
beta.post = beta.draws[ind,]
sigma.post = sigma.draws[ind]
par(mfrow=c(2,2))
plot(beta.post,xlab=expression(beta[1]),ylab=expression(beta[2]),pch=16)
contour(beta1,beta2,posts,drawlabels=FALSE,nlevels=20,add=TRUE,col=3,lwd=2)
title("Posterior draws")
hist(beta.post[,1],prob=TRUE,main="",xlab=expression(beta[1]),ylab="Posterior density")
hist(beta.post[,2],prob=TRUE,main="",xlab=expression(beta[2]),ylab="Posterior density")
hist(sigma.post,prob=TRUE,main="",xlab=expression(sigma),ylab="Posterior density")Below we will sample from \[ p(y_{new}|x,y,x_{new}) = \int_\Theta p(y_{new}|x_{new},\theta)p(\theta|x,y)d\theta, \] where \(x_{new}\) will be values between \(1\) and \(7\). Since we have draws \(\theta^{(1)},\ldots,\theta^{(M)}\) from \(p(\theta|x,y)\), we can sample \(y_{new}^{(i)}\) from \(p(y|x_{new},\theta^{(i)}))\), for \(i=1,\ldots,M\). Therefore \[ y_{new}^{(1)},\ldots,y_{new}^{(M)}\sim p(y_{new}|x,y,x_{new}). \]
set.seed(1256654)
N = 60
xx = seq(min(x),max(x),length=N)
pred = matrix(0,M,N)
predx6 = rep(0,M)
for (i in 1:M){
pred[i,] = beta.post[i,1]*(1-exp(-beta.post[i,2]*xx))+rnorm(N,0,sigma.post[i])
predx6[i] = beta.post[i,1]*(1-exp(-beta.post[i,2]*6))+rnorm(1,0,sigma.post[i])
}
qpred = t(apply(pred,2,quantile,c(0.1,0.5,0.9)))
qx6 = round(quantile(predx6,c(0.1,0.5,0.9)),1)
par(mfrow=c(1,2))
plot(x,y,ylim=range(qpred,y),xlab="Incubation time (days)",ylab="biochemical oxygen demand (mg/l)",pch=16)
lines(xx,qpred[,1],lwd=2,col=2,lty=2)
lines(xx,qpred[,2],lwd=2,col=2,lty=2)
lines(xx,qpred[,3],lwd=2,col=2,lty=2)
lines(xx,beta1.mle*(1-exp(-beta2.mle*xx)),col=4,lwd=2)
legend("bottomright",legend=c("OLS/MLE","BAYES"),lty=1,col=c(4,2),bty="n",lwd=2)
hist(predx6,prob=TRUE,main=paste("90% IC:(",qx6[1],",",qx6[3],")\n Median=",qx6[2],sep=""),
xlab="BOD (mg/l) when x[new]=6",ylab="Predictive density",breaks=20)