Logistic Regression

Description

Having Trouble Meeting Your Deadline?

Get your assignment on Logistic Regression  completed on time. avoid delay and – ORDER NOW

Use the below codes to solve the questions: I am expecting a R file and a word document

if (!require(mlba)) {

library(devtools)

install_github(“gedeck/mlba/mlba”, force=TRUE)

}

options(scipen=999)

# Logistic Regression

## The Logistic Regression Model

library(ggplot2)

library(gridExtra)

p <- seq(0.005, 0.995, 0.01)

df <- data.frame(

p = p,

odds = p / (1 – p),

logit = log(p / (1 – p))

)

g1 <- ggplot(df, aes(x=p, y=odds)) +

geom_line() + coord_cartesian(xlim=c(0, 1), ylim=c(0, 100)) +

labs(x=’Probability of success’, y=’Odds’, title='(a)’) +

geom_hline(yintercept = 0) +

theme_bw() +

theme(axis.line = element_line(colour = “black”),

axis.line.x = element_blank(),

panel.border = element_blank())

g2 <- ggplot(df, aes(x=p, y=logit)) +

geom_line() + coord_cartesian(xlim=c(0, 1), ylim=c(-4, 4)) +

labs(x=’Probability of success’, y=’Logit’, title='(b)’ ) +

geom_hline(yintercept = 0) +

theme_bw() +

theme(axis.line = element_line(colour = “black”),

axis.line.x = element_blank(),

panel.border = element_blank())

grid.arrange(g1, g2, ncol=2)

## Example: Acceptance of Personal Loan

### Model with a Single Predictor

bank.df <- mlba::UniversalBank

g <- ggplot(bank.df, aes(x=Income, y=Personal.Loan)) +

geom_jitter(width=0, height=0.01, alpha=0.1) +

geom_function(fun=function(x){ return (1 / (1 + exp(6.04892 – 0.036*x)))}) +

xlim(0, 250) +

labs(x=’Income (in $000s)’) +

theme_bw()

g

# Z Obtain coefficients for Personal.Loan ~ Income

glm.model.income <- glm(Personal.Loan ~ Income, data = bank.df, family = binomial)

glm.model.income

###############################################

### Estimating the Logistic Model from Data: Computing Parameter Estimates

#### Estimated Model

library(caret)

library(tidyverse)

# load and preprocess data

bank.df <- mlba::UniversalBank %>%

select(-c(ID, ZIP.Code)) %>% # Drop ID and zip code columns.

mutate(

Education = factor(Education, levels=c(1:3),

labels=c(“Undergrad”, “Graduate”, “Advanced/Professional”)),

Personal.Loan = factor(Personal.Loan, levels=c(0, 1),

labels=c(“No”, “Yes”))

)

# partition data

set.seed(2)

idx <- caret::createDataPartition(bank.df$Personal.Loan, p=0.6, list=FALSE)

train.df <- bank.df[idx, ]

holdout.df <- bank.df[-idx, ]

# build model

trControl <- caret::trainControl(method=”cv”, number=5, allowParallel=TRUE)

logit.reg <- caret::train(Personal.Loan ~ ., data=train.df, trControl=trControl,

# fit logistic regression with a generalized linear model

method=”glm”, family=”binomial”)

logit.reg

summary(logit.reg$finalModel)

## Evaluating Classification Performance

### Interpreting Results in Terms of Odds (for a Profiling Goal)

# use predict() with type = “response” to compute predicted probabilities.

logit.reg.pred <- predict(logit.reg, holdout.df[, -8], type = “prob”)

str(holdout.df)

# display four different cases

interestingCases = c(1, 12, 32, 1333)

results=data.frame(

actual = holdout.df$Personal.Loan[interestingCases],

p0 = logit.reg.pred[interestingCases, 1],

p1 = logit.reg.pred[interestingCases, 2],

predicted = ifelse(logit.reg.pred[interestingCases, 2] > 0.5, 1, 0)

)

# Z evalulate performance ##################################################################

# predict training set

logit.reg.pred.train <- predict(logit.reg, train.df[, -8], type = “prob”)

predicted.train <- factor(ifelse(logit.reg.pred.train[, 2] > 0.5, 1, 0), levels = c(0, 1), labels = c(“No”, “Yes”))

# accuracy train

confusionMatrix(train.df$Personal.Loan ,predicted.train)

# predict holdout

predicted.holdout <- factor(ifelse(logit.reg.pred[, 2] > 0.5, 1, 0), levels = c(0, 1), labels = c(“No”, “Yes”))

# accuracy holdout

confusionMatrix(holdout.df$Personal.Loan ,predicted.holdout)

#########################################################################################

library(gains)

actual <- ifelse(holdout.df$Personal.Loan == “Yes”, 1, 0)

# Z comments#########################################################################################

# when you specify the groups argument below, it typically represents the number of distinct or unique values

#in your predicted probabilities that you want to use for creating the cumulative gains chart.

n_distinct(logit.reg.pred[,2])

#######################################################################################################

gain <- gains(actual, logit.reg.pred[,2], groups=length(actual)-2)

# plot gains chart

nactual <-sum(actual)

g1 <- ggplot() +

geom_line(aes(x=gain$cume.obs, y=gain$cume.pct.of.total * nactual)) +

geom_line(aes(x=c(0, max(gain$cume.obs)), y=c(0, nactual)), color=”darkgrey”) +

labs(x=”# Cases”, y=”Cumulative”)

# plot decile-wise lift chart

gain10 <- gains(actual, logit.reg.pred[,2], groups=10)

g2 <- ggplot(mapping=aes(x=gain10$depth, y=gain10$lift / 100)) +

geom_col(fill=”steelblue”) +

geom_text(aes(label=round(gain10$lift / 100, 1)), vjust=-0.2, size=3) +

ylim(0, 8) + labs(x=”Percentile”, y=”Lift”)

grid.arrange(g1, g2, ncol=2)

### Z residual plot

# Fit a logistic regression model

model <- glm(Personal.Loan ~., data = bank.df, family = binomial(link = “logit”))

summary(model)

residuals = residuals(model, type = “response”)

# Create a residual plot

plot(model$fitted.values, residuals, ylab = “Residuals”, xlab = “Fitted Values”, main = “Logistic Regression Residuals vs. Fitted”)

abline(h = 0, col = “red”, lty = 2) # Add a horizontal line at y = 0 for reference

# Calculate predicted probabilities same as model fitted value

predicted_probs <- predict(model, type = “response”)

predicted_probs[1:5]

model$fitted.values[1:5]

# Define bin intervals (adjust bins and breaks as needed)

bins <- cut(predicted_probs, breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1.0), labels = FALSE)

# Calculate binned residuals

binned_residuals <- data.frame(

PredictedProb = predicted_probs,

ActualOutcome = ifelse(bank.df$Personal.Loan == “Yes”, 1, 0), # Replace with the actual outcome variable

Bin = bins

)

# Example: Calculate mean residuals for each bin

library(dplyr)

binned_residuals_summary <- binned_residuals %>%

group_by(Bin) %>%

summarize(MeanResidual = mean(ActualOutcome – PredictedProb))

# Visualize mean residuals

barplot(binned_residuals_summary$MeanResidual, names.arg = binned_residuals_summary$Bin,

xlab = “Bin”, ylab = “Mean Residual”, main = “Mean Residuals by Bin”)

# Load the ggplot2 package if not already loaded

library(ggplot2)

# Assuming you have calculated binned residuals as described earlier

# Create a scatter plot of binned residuals

binned_residuals_summary %>% ggplot(aes(x = Bin, y = MeanResidual)) +

geom_col() +

labs(x = “Predicted Probabilities”, y = “Binned Residuals”) +

ggtitle(“Scatter Plot of Binned Residuals”) +

theme_minimal()

library(arm)

# confidence bands

binnedplot(predicted_probs,residuals)

STIMULATION:

library(ggplot2)

library(reshape)

library(glmnet)

rm(list=ls())

set.seed(1)

x = rnorm(1000, sd=3) # A random variable

hist(x)

summary(x)

p = 1/(1+exp(-(1+10*x)))

hist(p)

plot(x,p)

y = rbinom(1000,1,p) # bernoulli response variable

plot(x,y)

# two approaches to set outcome categories

# 1 cutoff

#y = ifelse(p>=0.5,1,0)

# bernoulli response

#set.seed(1)

#prob <- c(0.3, 0.5, 0.7, 0.4, 0.6)

#result <- rbinom(5, 1, prob)

#prob

#result

data.plot = data.frame(y,p,x)

# plot probability, category

data.plot %>% ggplot(aes(x)) +

geom_line(aes(y=p)) +

geom_point(aes(y=y,color=factor(y)))+

theme_bw()

# plot category

data.plot %>% ggplot(aes(x,y,color=y)) +

geom_point() +

theme_bw()

# plot linear regression

data.plot %>% ggplot(aes(x,y,color=y)) +

geom_point() +

geom_smooth(method=’lm’,se=FALSE) +

theme_bw()

# plot glm

data.plot %>% ggplot(aes(x,y,color=y)) +

geom_point() +

geom_smooth(method=’glm’,

method.args = list(family = “binomial”),

se=FALSE) +

theme_bw()

# get coefficients through linear regression

logit = log(p/(1-p))

plot(x,logit)

data = data.frame(cbind(logit,x))

summary(data)

# remove infinite in all columns:

data = data %>%

filter_all(all_vars(!is.infinite(.)))

summary(data)

logit.model = lm(logit~x,data)

logit.model

# get coefficients through logistic regression via glm

data.glm = t(rbind(as.numeric(y),as.numeric(x)))

data.glm = data.frame(data.glm)

names(data.glm) = c(“y”,”x”)

#data.glm = data.glm %>% rename(y=X1,x=X2)

data.glm$y = as.factor(data.glm$y)

summary(data.glm)

plot(data.glm$x,data.glm$y)

set.seed(1)

glm.model <- glm(y ~ x, data = data.glm, family = binomial)

glm.model

Explanation & Answer

Our website has a team of professional writers who can help you write any of your homework. They will write your papers from scratch. We also have a team of editors just to make sure all papers are of HIGH QUALITY & PLAGIARISM FREE. To make an Order you only need to click Order Now and we will direct you to our Order Page at Litessays. Then fill Our Order Form with all your assignment instructions. Select your deadline and pay for your paper. You will get it few hours before your set deadline.

Fill in all the assignment paper details that are required in the order form with the standard information being the page count, deadline, academic level and type of paper. It is advisable to have this information at hand so that you can quickly fill in the necessary information needed in the form for the essay writer to be immediately assigned to your writing project. Make payment for the custom essay order to enable us to assign a suitable writer to your order. Payments are made through Paypal on a secured billing page. Finally, sit back and relax.

Do you need an answer to this or any other questions?

Similar Posts