# Nonlinear mean function
psi = function(x){45*tanh(x/1.9-7)+57}

# Simulating some data
set.seed(12345)
sigma = 4
n     = 30
x     = sort(runif(n,8,18))
y     = rnorm(n,psi(x),sigma)

plot(x,y,pch=16,cex=1.5,xlab="x",ylab="y",las=1)
curve(psi,lwd=4,col="magenta",add=TRUE)
title(paste("n=",n," - sigma=",sigma,sep=""))

# Four regression functions
dfs = c(2,7,0.8*n)
ndf = length(dfs)
for (i in 1:ndf){
  df = dfs[i]
  psi_hat=smooth.spline(x,y,df=df, all.knots = TRUE)
  lines(psi_hat, lwd = 3, col=1+i)
}
legend("topleft",legend=c(expression(psi),dfs),col=c("magenta",2:(ndf+1)),lty=1,bty="n",lwd=3)


# Simulação do perde-ganha viés-variância (1)
bias2_variance = function(x, psi, sigma, n, df, N) {
    x_tr = matrix(runif(N*n, 8, 18), nrow = N)
    epsilon = matrix(rnorm(N*n, sd = sigma), nrow = N)
    y_tr = t(apply(x_tr, 1, psi)) + epsilon
    prediction = numeric(N)
    for (j in 1:N) {
        psi_hat=smooth.spline(x_tr[j,], y_tr[j,], df = df, all.knots = TRUE)$fit
        prediction[j] = predict(psi_hat, x)$y
    }
    list(bias2 = (mean(prediction) - psi(x))^2, variance = var(prediction))
}

set.seed(1234)
par(mfrow=c(3,3))
for (n in c(30,40,50)){
for (sigma in c(2,4,6)){

N          = 1000
dfs        = 4:20
ndf        = length(dfs)
bias2      = numeric(ndf)
variance   = numeric(ndf)
test_error = numeric(ndf)
x = 12
for (i in 1:ndf) {
	df = dfs[i]
    trade_off = bias2_variance(x, psi, sigma, n, df, N)
    bias2[i] = trade_off$bias2
    variance[i] = trade_off$variance
    test_error[i] = trade_off$bias2 + trade_off$variance + sigma^2
}

plot(dfs, bias2, ylim = c(0, max(test_error)), 
type = "l", lwd = 3,col = "blue", las = 1, xlab = "Spline's df", ylab = "")

lines(dfs, variance, type = "l", lwd = 3, col = "red")

lines(dfs, test_error, type = "l", lwd = 3, col = "dark green")

abline(h = sigma^2, lty = 2)

title(paste("n=",n," - sigma=",sigma,"\nOptimal df=",
dfs[test_error==min(test_error)],sep=""))

if (i==1){
legend("topright", c("Erro de predição", "Viés", "Variância", "Erro irredutível"),
       lty = c(1, 1, 1, 2), lwd = c(3, 3, 3, 1), col = c("dark green", "blue", "red", "black"), cex = 1.5)

}

}}