# # Logistic regression example # # https://www.kaggle.com/datasets/samira1992/bank-loan-intermediate-dataset # # The "Bank Personal Loan Modelling" dataset comprises a comprehensive # numerical table aimed at assisting banks in tailoring loan offerings to # individual clients. It encompasses key demographic and financial indicators, # including age, experience, income, and ZIP code, among others. By leveraging # this dataset, banks can analyze client profiles with precision, identifying # optimal loan products tailored to specific financial circumstances and risk # profiles. With insights gleaned from this dataset, financial institutions can # enhance their decision-making processes, ensuring that clients receive # personalized loan recommendations that align with their unique needs and # financial capabilities. data = read.csv("Bank_Personal_Loan_Modelling.csv",header=TRUE) n = nrow(data) attach(data) pdf(file="Bank_Personal_Loan_Modelling.pdf",width=10,height=6) par(mfrow=c(1,2)) hist(Income,xlab="Renda",prob=TRUE,main="") abline(v=quantile(Income,0.25),lwd=2,col=2) abline(v=quantile(Income,0.5),lwd=2,col=2) abline(v=quantile(Income,0.75),lwd=2,col=2) plot(Income,Personal.Loan+rnorm(n,0,0.02),xlab="Renda", ylab="Empréstimo?",axes=FALSE) axis(1);box();axis(2,at=c(0,1),lab=c("Não (0)","Sim (1)")) mean(Personal.Loan) mean(Personal.Loan[Education==1]) mean(Personal.Loan[Education==2]) mean(Personal.Loan[Education==3]) Educ = rep(1,n) Educ[Education==1]=0 ############################### # GLM with logit link ############################### fit = glm(Personal.Loan~Income+Educ,family=binomial(link="logit")) summary(fit) inc = seq(min(Income),max(Income),length=100) par(mfrow=c(1,1)) plot(Income,Personal.Loan+rnorm(n,0,0.02),xlab="Renda",ylab="Probabilidade de empréstimo") eta = fit$coef[1]+fit$coef[2]*inc eta = 1/(1+exp(-eta)) lines(inc,eta,col=2,lwd=2) etas = eta eta = fit$coef[1]+fit$coef[3]+fit$coef[2]*inc eta = 1/(1+exp(-eta)) lines(inc,eta,col=3,lwd=2) etas = cbind(etas,eta) abline(h=0.5,lty=2) title("Modelo linear generalizado\nBernoulli com ligação logit") eta.hat = etas ########################################## # Bayesian approach: GLM with logit link ########################################## install.packages("UPG") library("UPG") y = Personal.Loan X = cbind(1,Income,Educ) M0 = 5000 M = 5000 fit.bayes = UPG(y, X, model = "logit",draws=M,burn=M0) summary(fit.bayes) coef = fit.bayes$posterior$beta names = c("Intercepto","Renda","Educação") par(mfrow=c(3,3)) for (i in 1:3) ts.plot(coef[,i],xlab="Iterations",ylab="",main=names[i]) for (i in 1:3) acf(coef[,i],main="") for (i in 1:3) hist(coef[,i],prob=TRUE,main="",xlab="") mean.coef = apply(coef,2,mean) inc = seq(min(Income),max(Income),length=100) fit.bayes = array(0,c(2,M,100)) for (i in 1:100){ eta = coef[,1]+coef[,2]*inc[i] fit.bayes[1,,i] = 1/(1+exp(-eta)) eta = coef[,1]+coef[,3]+coef[,2]*inc[i] fit.bayes[2,,i] = 1/(1+exp(-eta)) } fit1 = t(apply(fit.bayes[1,,],2,quantile,c(0.025,0.5,0.975))) fit2 = t(apply(fit.bayes[2,,],2,quantile,c(0.025,0.5,0.975))) par(mfrow=c(1,1)) plot(Income,Personal.Loan+rnorm(n,0,0.02),xlab="Renda", ylab="Probabilidade de empréstimo") eta = mean.coef[1]+mean.coef[2]*inc eta = 1/(1+exp(-eta)) lines(inc,eta,col=2,lwd=2) eta = mean.coef[1]+mean.coef[3]+mean.coef[2]*inc eta = 1/(1+exp(-eta)) lines(inc,eta,col=3,lwd=2) abline(h=0.5,lty=2) title("GLM Bayesiano\nBernoulli com ligação logit") lines(inc,eta.hat[,1],col=2,lty=2) lines(inc,eta.hat[,2],col=3,lty=2) for (i in 1:3) lines(inc,fit1[,i],col=2) for (i in 1:3) lines(inc,fit2[,i],col=3) par(mfrow=c(1,2)) plot(density(fit.bayes[1,,64]),xlim=c(0,0.7),ylim=c(0,45), xlab="Probabilidade de empréstimo",main="",lwd=2) lines(density(fit.bayes[1,,75]),col=2,lwd=2) lines(density(fit.bayes[1,,86]),col=3,lwd=2) title("Baixa educação") legend("topright",legend=c("Renda=145 (90%)","Renda=170 (95%)", "Renda=193 (99%)"),col=1:3,lty=1,bty="n",lwd=2) plot(density(fit.bayes[2,,64]),xlim=c(0.6,1),ylim=c(0,100), xlab="Probabilidade de empréstimo",main="",lwd=2) lines(density(fit.bayes[2,,75]),col=2,lwd=2) lines(density(fit.bayes[2,,86]),col=3,lwd=2) title("Alta educação") dev.off()