Logistic Regression
Description
Having Trouble Meeting Your Deadline?
Get your assignment on Logistic Regression completed on time. avoid delay and – ORDER NOW
Use the below codes to solve the questions: I am expecting a R file and a word document
if (!require(mlba)) {
library(devtools)
install_github(“gedeck/mlba/mlba”, force=TRUE)
}
options(scipen=999)
# Logistic Regression
## The Logistic Regression Model
library(ggplot2)
library(gridExtra)
p <- seq(0.005, 0.995, 0.01)
df <- data.frame(
p = p,
odds = p / (1 – p),
logit = log(p / (1 – p))
)
g1 <- ggplot(df, aes(x=p, y=odds)) +
geom_line() + coord_cartesian(xlim=c(0, 1), ylim=c(0, 100)) +
labs(x=’Probability of success’, y=’Odds’, title='(a)’) +
geom_hline(yintercept = 0) +
theme_bw() +
theme(axis.line = element_line(colour = “black”),
axis.line.x = element_blank(),
panel.border = element_blank())
g2 <- ggplot(df, aes(x=p, y=logit)) +
geom_line() + coord_cartesian(xlim=c(0, 1), ylim=c(-4, 4)) +
labs(x=’Probability of success’, y=’Logit’, title='(b)’ ) +
geom_hline(yintercept = 0) +
theme_bw() +
theme(axis.line = element_line(colour = “black”),
axis.line.x = element_blank(),
panel.border = element_blank())
grid.arrange(g1, g2, ncol=2)
## Example: Acceptance of Personal Loan
### Model with a Single Predictor
bank.df <- mlba::UniversalBank
g <- ggplot(bank.df, aes(x=Income, y=Personal.Loan)) +
geom_jitter(width=0, height=0.01, alpha=0.1) +
geom_function(fun=function(x){ return (1 / (1 + exp(6.04892 – 0.036*x)))}) +
xlim(0, 250) +
labs(x=’Income (in $000s)’) +
theme_bw()
g
# Z Obtain coefficients for Personal.Loan ~ Income
glm.model.income <- glm(Personal.Loan ~ Income, data = bank.df, family = binomial)
glm.model.income
###############################################
### Estimating the Logistic Model from Data: Computing Parameter Estimates
#### Estimated Model
library(caret)
library(tidyverse)
# load and preprocess data
bank.df <- mlba::UniversalBank %>%
select(-c(ID, ZIP.Code)) %>% # Drop ID and zip code columns.
mutate(
Education = factor(Education, levels=c(1:3),
labels=c(“Undergrad”, “Graduate”, “Advanced/Professional”)),
Personal.Loan = factor(Personal.Loan, levels=c(0, 1),
labels=c(“No”, “Yes”))
)
# partition data
set.seed(2)
idx <- caret::createDataPartition(bank.df$Personal.Loan, p=0.6, list=FALSE)
train.df <- bank.df[idx, ]
holdout.df <- bank.df[-idx, ]
# build model
trControl <- caret::trainControl(method=”cv”, number=5, allowParallel=TRUE)
logit.reg <- caret::train(Personal.Loan ~ ., data=train.df, trControl=trControl,
# fit logistic regression with a generalized linear model
method=”glm”, family=”binomial”)
logit.reg
summary(logit.reg$finalModel)
## Evaluating Classification Performance
### Interpreting Results in Terms of Odds (for a Profiling Goal)
# use predict() with type = “response” to compute predicted probabilities.
logit.reg.pred <- predict(logit.reg, holdout.df[, -8], type = “prob”)
str(holdout.df)
# display four different cases
interestingCases = c(1, 12, 32, 1333)
results=data.frame(
actual = holdout.df$Personal.Loan[interestingCases],
p0 = logit.reg.pred[interestingCases, 1],
p1 = logit.reg.pred[interestingCases, 2],
predicted = ifelse(logit.reg.pred[interestingCases, 2] > 0.5, 1, 0)
)
# Z evalulate performance ##################################################################
# predict training set
logit.reg.pred.train <- predict(logit.reg, train.df[, -8], type = “prob”)
predicted.train <- factor(ifelse(logit.reg.pred.train[, 2] > 0.5, 1, 0), levels = c(0, 1), labels = c(“No”, “Yes”))
# accuracy train
confusionMatrix(train.df$Personal.Loan ,predicted.train)
# predict holdout
predicted.holdout <- factor(ifelse(logit.reg.pred[, 2] > 0.5, 1, 0), levels = c(0, 1), labels = c(“No”, “Yes”))
# accuracy holdout
confusionMatrix(holdout.df$Personal.Loan ,predicted.holdout)
#########################################################################################
library(gains)
actual <- ifelse(holdout.df$Personal.Loan == “Yes”, 1, 0)
# Z comments#########################################################################################
# when you specify the groups argument below, it typically represents the number of distinct or unique values
#in your predicted probabilities that you want to use for creating the cumulative gains chart.
n_distinct(logit.reg.pred[,2])
#######################################################################################################
gain <- gains(actual, logit.reg.pred[,2], groups=length(actual)-2)
# plot gains chart
nactual <-sum(actual)
g1 <- ggplot() +
geom_line(aes(x=gain$cume.obs, y=gain$cume.pct.of.total * nactual)) +
geom_line(aes(x=c(0, max(gain$cume.obs)), y=c(0, nactual)), color=”darkgrey”) +
labs(x=”# Cases”, y=”Cumulative”)
# plot decile-wise lift chart
gain10 <- gains(actual, logit.reg.pred[,2], groups=10)
g2 <- ggplot(mapping=aes(x=gain10$depth, y=gain10$lift / 100)) +
geom_col(fill=”steelblue”) +
geom_text(aes(label=round(gain10$lift / 100, 1)), vjust=-0.2, size=3) +
ylim(0, 8) + labs(x=”Percentile”, y=”Lift”)
grid.arrange(g1, g2, ncol=2)
### Z residual plot
# Fit a logistic regression model
model <- glm(Personal.Loan ~., data = bank.df, family = binomial(link = “logit”))
summary(model)
residuals = residuals(model, type = “response”)
# Create a residual plot
plot(model$fitted.values, residuals, ylab = “Residuals”, xlab = “Fitted Values”, main = “Logistic Regression Residuals vs. Fitted”)
abline(h = 0, col = “red”, lty = 2) # Add a horizontal line at y = 0 for reference
# Calculate predicted probabilities same as model fitted value
predicted_probs <- predict(model, type = “response”)
predicted_probs[1:5]
model$fitted.values[1:5]
# Define bin intervals (adjust bins and breaks as needed)
bins <- cut(predicted_probs, breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1.0), labels = FALSE)
# Calculate binned residuals
binned_residuals <- data.frame(
PredictedProb = predicted_probs,
ActualOutcome = ifelse(bank.df$Personal.Loan == “Yes”, 1, 0), # Replace with the actual outcome variable
Bin = bins
)
# Example: Calculate mean residuals for each bin
library(dplyr)
binned_residuals_summary <- binned_residuals %>%
group_by(Bin) %>%
summarize(MeanResidual = mean(ActualOutcome – PredictedProb))
# Visualize mean residuals
barplot(binned_residuals_summary$MeanResidual, names.arg = binned_residuals_summary$Bin,
xlab = “Bin”, ylab = “Mean Residual”, main = “Mean Residuals by Bin”)
# Load the ggplot2 package if not already loaded
library(ggplot2)
# Assuming you have calculated binned residuals as described earlier
# Create a scatter plot of binned residuals
binned_residuals_summary %>% ggplot(aes(x = Bin, y = MeanResidual)) +
geom_col() +
labs(x = “Predicted Probabilities”, y = “Binned Residuals”) +
ggtitle(“Scatter Plot of Binned Residuals”) +
theme_minimal()
library(arm)
# confidence bands
binnedplot(predicted_probs,residuals)
STIMULATION:
library(ggplot2)
library(reshape)
library(glmnet)
rm(list=ls())
set.seed(1)
x = rnorm(1000, sd=3) # A random variable
hist(x)
summary(x)
p = 1/(1+exp(-(1+10*x)))
hist(p)
plot(x,p)
y = rbinom(1000,1,p) # bernoulli response variable
plot(x,y)
# two approaches to set outcome categories
# 1 cutoff
#y = ifelse(p>=0.5,1,0)
# bernoulli response
#set.seed(1)
#prob <- c(0.3, 0.5, 0.7, 0.4, 0.6)
#result <- rbinom(5, 1, prob)
#prob
#result
data.plot = data.frame(y,p,x)
# plot probability, category
data.plot %>% ggplot(aes(x)) +
geom_line(aes(y=p)) +
geom_point(aes(y=y,color=factor(y)))+
theme_bw()
# plot category
data.plot %>% ggplot(aes(x,y,color=y)) +
geom_point() +
theme_bw()
# plot linear regression
data.plot %>% ggplot(aes(x,y,color=y)) +
geom_point() +
geom_smooth(method=’lm’,se=FALSE) +
theme_bw()
# plot glm
data.plot %>% ggplot(aes(x,y,color=y)) +
geom_point() +
geom_smooth(method=’glm’,
method.args = list(family = “binomial”),
se=FALSE) +
theme_bw()
# get coefficients through linear regression
logit = log(p/(1-p))
plot(x,logit)
data = data.frame(cbind(logit,x))
summary(data)
# remove infinite in all columns:
data = data %>%
filter_all(all_vars(!is.infinite(.)))
summary(data)
logit.model = lm(logit~x,data)
logit.model
# get coefficients through logistic regression via glm
data.glm = t(rbind(as.numeric(y),as.numeric(x)))
data.glm = data.frame(data.glm)
names(data.glm) = c(“y”,”x”)
#data.glm = data.glm %>% rename(y=X1,x=X2)
data.glm$y = as.factor(data.glm$y)
summary(data.glm)
plot(data.glm$x,data.glm$y)
set.seed(1)
glm.model <- glm(y ~ x, data = data.glm, family = binomial)
glm.model
Our website has a team of professional writers who can help you write any of your homework. They will write your papers from scratch. We also have a team of editors just to make sure all papers are of HIGH QUALITY & PLAGIARISM FREE. To make an Order you only need to click Order Now and we will direct you to our Order Page at Litessays. Then fill Our Order Form with all your assignment instructions. Select your deadline and pay for your paper. You will get it few hours before your set deadline.
Fill in all the assignment paper details that are required in the order form with the standard information being the page count, deadline, academic level and type of paper. It is advisable to have this information at hand so that you can quickly fill in the necessary information needed in the form for the essay writer to be immediately assigned to your writing project. Make payment for the custom essay order to enable us to assign a suitable writer to your order. Payments are made through Paypal on a secured billing page. Finally, sit back and relax.