The “Bank Personal Loan Modelling” dataset comprises a comprehensive numerical table aimed at assisting banks in tailoring loan offerings to individual clients. It encompasses key demographic and financial indicators, including age, experience, income, and ZIP code, among others. By leveraging this dataset, banks can analyze client profiles with precision, identifying optimal loan products tailored to specific financial circumstances and risk profiles. With insights gleaned from this dataset, financial institutions can enhance their decision-making processes, ensuring that clients receive personalized loan recommendations that align with their unique needs and financial capabilities. Source: https://www.kaggle.com/datasets/samira1992/bank-loan-intermediate-dataset
data = read.csv("https://hedibert.org/wp-content/uploads/2024/05/Bank_Personal_Loan_Modelling.csv",header=TRUE)
n = nrow(data)
attach(data)
par(mfrow=c(1,2))
hist(Income,xlab="Income",prob=TRUE,main="")
abline(v=quantile(Income,0.25),lwd=2,col=2)
abline(v=quantile(Income,0.5),lwd=2,col=2)
abline(v=quantile(Income,0.75),lwd=2,col=2)
plot(Income,Personal.Loan+rnorm(n,0,0.02),xlab="Income",ylab="Loan?",axes=FALSE)
axis(1);box();axis(2,at=c(0,1),lab=c("No (0)","Yes (1)"))
mean(Personal.Loan)
## [1] 0.096
mean(Personal.Loan[Education==1])
## [1] 0.04437023
mean(Personal.Loan[Education==2])
## [1] 0.129722
mean(Personal.Loan[Education==3])
## [1] 0.1365756
Educ = rep(1,n)
Educ[Education==1]=0
fit = glm(Personal.Loan~Income+Educ,family=binomial(link="logit"))
summary(fit)
##
## Call:
## glm(formula = Personal.Loan ~ Income + Educ, family = binomial(link = "logit"))
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -11.304484 0.419049 -26.98 <2e-16 ***
## Income 0.059587 0.002397 24.86 <2e-16 ***
## Educ 4.055082 0.221122 18.34 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3162 on 4999 degrees of freedom
## Residual deviance: 1425 on 4997 degrees of freedom
## AIC: 1431
##
## Number of Fisher Scoring iterations: 7
inc = seq(min(Income),max(Income),length=100)
par(mfrow=c(1,1))
plot(Income,Personal.Loan+rnorm(n,0,0.02),xlab="Income",ylab="Probability of loan")
eta = fit$coef[1]+fit$coef[2]*inc
eta = 1/(1+exp(-eta))
lines(inc,eta,col=2,lwd=2)
etas = eta
eta = fit$coef[1]+fit$coef[3]+fit$coef[2]*inc
eta = 1/(1+exp(-eta))
lines(inc,eta,col=3,lwd=2)
etas = cbind(etas,eta)
abline(h=0.5,lty=2)
title("Generalized linear model (GLM)\nBernoulli with logit link")
legend("topleft",legend=c("Education=1","Education 2 or 3"),col=2:3,lwd=2,lty=1,bty="n")
eta.hat = etas
#install.packages("UPG")
library("UPG")
y = Personal.Loan
X = cbind(1,Income,Educ)
M0 = 5000
M = 5000
fit.bayes = UPG(y, X, model = "logit",draws=M,burn=M0)
## Checking data & inputs ...
## Initializing Gibbs Sampler ...
## Simulating from posterior distribution ...
## | | | 0% | | | 1% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |== | 4% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 9% | |======= | 10% | |======= | 11% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========= | 14% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================ | 24% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 29% | |===================== | 30% | |===================== | 31% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================= | 34% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 39% | |============================ | 40% | |============================ | 41% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |============================== | 44% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 49% | |=================================== | 50% | |=================================== | 51% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |===================================== | 54% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 58% | |========================================= | 59% | |========================================== | 59% | |========================================== | 60% | |========================================== | 61% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================ | 64% | |============================================= | 64% | |============================================= | 65% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 69% | |================================================= | 70% | |================================================= | 71% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |=================================================== | 74% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 79% | |======================================================== | 80% | |======================================================== | 81% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |========================================================== | 84% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 89% | |=============================================================== | 90% | |=============================================================== | 91% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================= | 94% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 99% | |======================================================================| 100%
## Sampling succesful!
## Saving output ...
## Finished! Posterior simulation took 37.08 seconds.
summary(fit.bayes)
##
##
## --- Bayesian Logit Results ---
##
## N = 5000
## Analysis based on 5000 posterior draws after
## an initial burn-in period of 5000 iterations.
## MCMC sampling took a total of 37.08 seconds.
Mean | SD | Q2.5 | Q97.5 | 95% CI excl. 0 | |
---|---|---|---|---|---|
-10.82 | 0.39 | -11.60 | -10.09 | * | |
Income | 0.06 | 0.00 | 0.05 | 0.06 | * |
Educ | 3.82 | 0.21 | 3.43 | 4.26 | * |
coef = fit.bayes$posterior$beta
names = c("Intercept","Income","Education")
par(mfrow=c(3,3))
for (i in 1:3)
ts.plot(coef[,i],xlab="Iterations",ylab="",main=names[i])
for (i in 1:3)
acf(coef[,i],main="")
for (i in 1:3){
hist(coef[,i],prob=TRUE,main="",xlab="")
abline(v=fit$coef[i],lwd=2,col=2)
}
mean.coef = apply(coef,2,mean)
inc = seq(min(Income),max(Income),length=100)
fit.bayes = array(0,c(2,M,100))
for (i in 1:100){
eta = coef[,1]+coef[,2]*inc[i]
fit.bayes[1,,i] = 1/(1+exp(-eta))
eta = coef[,1]+coef[,3]+coef[,2]*inc[i]
fit.bayes[2,,i] = 1/(1+exp(-eta))
}
fit1 = t(apply(fit.bayes[1,,],2,quantile,c(0.025,0.5,0.975)))
fit2 = t(apply(fit.bayes[2,,],2,quantile,c(0.025,0.5,0.975)))
par(mfrow=c(1,1))
plot(Income,Personal.Loan+rnorm(n,0,0.02),xlab="Income",ylab="Probability of loan")
eta = mean.coef[1]+mean.coef[2]*inc
eta = 1/(1+exp(-eta))
lines(inc,eta,col=2,lwd=2)
eta = mean.coef[1]+mean.coef[3]+mean.coef[2]*inc
eta = 1/(1+exp(-eta))
lines(inc,eta,col=3,lwd=2)
abline(h=0.5,lty=2)
title("Bayesian GLM\nBernoulli with logit link")
lines(inc,eta.hat[,1],col=2,lty=2)
lines(inc,eta.hat[,2],col=3,lty=2)
for (i in 1:3) lines(inc,fit1[,i],col=2)
for (i in 1:3) lines(inc,fit2[,i],col=3)
legend("topleft",legend=c("Education=1","Education 2 or 3"),col=2:3,lwd=2,lty=1,bty="n")
abline(v=inc[64],lty=2)
abline(v=inc[75],lty=2)
abline(v=inc[86],lty=2)
par(mfrow=c(1,1))
plot(density(fit.bayes[1,,64]),xlim=c(0,1),ylim=c(0,45),xlab="Probability of loan",main="",lwd=2)
lines(density(fit.bayes[1,,75]),col=2,lwd=2)
lines(density(fit.bayes[1,,86]),col=3,lwd=2)
lines(density(fit.bayes[2,,64]),col=1,lwd=2,lty=2)
lines(density(fit.bayes[2,,75]),col=2,lwd=2,lty=2)
lines(density(fit.bayes[2,,86]),col=3,lwd=2,lty=2)
legend("top",legend=c("Income=145 (90%)","Income=170 (95%)","Income=193 (99%)"),col=1:3,lty=1,bty="n",lwd=2)
legend("topleft",legend=c("Less education","More education"),lty=1:2,bty="n",lwd=2)