r - 如何在 R 中获取维基百科示例中逻辑回归模拟的 ggplot 图?
问题描述
(添加了可重复的示例)
我试图在这里模仿维基百科的“通过考试的概率与学习时间”的逻辑回归示例:
我无法在该页面中获得相同的 ggplot 图,我不知道为什么。
df <- data.frame(hour=c(0.50,0.75,1.00,1.25,1.50,1.75,1.75,2.00,2.25,2.50,2.75,3.00,3.25,3.50,4.00,4.25,4.50,4.75,5.00,5.50), pass=c(0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,1,1,1,1))
df
# hour pass
#1 0.50 0
#2 0.75 0
#3 1.00 0
#4 1.25 0
#5 1.50 0
#6 1.75 0
#7 1.75 1
#8 2.00 0
#9 2.25 1
#10 2.50 0
#11 2.75 1
#12 3.00 0
#13 3.25 1
#14 3.50 0
#15 4.00 1
#16 4.25 1
#17 4.50 1
#18 4.75 1
#19 5.00 1
#20 5.50 1
df$pass <- as.factor(df$pass)
my_fit <- glm(df$pass ~ df$hour, data=df, na.action=na.exclude, family="binomial")
summary(my_fit)
非 GGPLOT 情节完美:
my_table <- summary(my_fit)
my_table$coefficients[,1] <- invlogit(coef(my_fit))
my_table
anova(my_fit)
library(pscl); pR2(my_fit) # for McFadden rho^2
plot(df$hour, df$pass, xlab="x", ylab="logit values")
LinearPredictions <- predict(my_fit); LinearPredictions
# LinearPredictions is NOT equal to 0.01666 + 0.81827*(1:20)
# LinearPredictions is NOT equal to -4.0777+1.5046*(1:20)
# LinearPredictions are equal to what (I couldn't solve)?
EstimatedProbability.hat <- exp(LinearPredictions)/(1 + exp(LinearPredictions))
EstimatedProbability.hat
EstimatedProbability <- c(0.25, 0.50, 0.75) # Estimated probabilities for which their x levels are wanted to be found
HoursStudied <- (log(EstimatedProbability/(1- EstimatedProbability)) - my_fit$coefficients[1])/ my_fit$coefficients[2]
HoursStudied.summary <- data.frame(EstimatedProbability, HoursStudied)
HoursStudied.summary
plot(df$hour, EstimatedProbability.hat, xlab="studying hours", ylab="estimated probability (pass)") # , xlim=c(0,6), ylim=c(0,1)
# Add red curve
lines(df$hour, EstimatedProbability.hat, lty=1, col="red")
# Vertical dashes
segments(x0=HoursStudied.summary$HoursStudied, y0=0, x1=HoursStudied.summary$HoursStudied, y1=HoursStudied.summary$EstimatedProbability,
lty=2, col=c("darkblue","darkred","darkgreen"))
# Horizontal dashes
segments(x0=0, y0=HoursStudied.summary$EstimatedProbability, x1=HoursStudied.summary$HoursStudied,
y1=HoursStudied.summary$EstimatedProbability, lty=2, col=c("darkblue","darkred","darkgreen"))
legend("bottomright", legend=c("HS0.25", "HS0.50", "HS0.75"), lty=2, col=c("darkblue","darkred","darkgreen"), bty="n", cex=0.75)
GGPLOT PLOT 失败:
我尝试在 中做同样的事情ggplot
,但失败了:
df$EstimatedProbabilities <- EstimatedProbability.hat; df
HoursStudied.summary$group <- c('HS0.25','HS0.50','HS0.75')
library(ggplot2)
ggplot(df, aes(x=hour, y=df$pass)) +
geom_point() +
geom_line(aes(y=EstimatedProbabilities), colour="black") +
geom_segment(data=HoursStudied.summary, aes(y=EstimatedProbability,
xend=HoursStudied, yend=EstimatedProbability, col=group), x=-Inf, linetype="dashed") +
geom_segment(data=HoursStudied.summary, aes(x=HoursStudied,
xend=HoursStudied, yend=EstimatedProbability, col=group), y=-Inf, linetype="dashed")
问题:ggplot
曲线与 的曲线相同plot
,但是它在 y=0 线下方绘制了整个函数。为什么?
解决方案
编辑:你需要你df$pass
是数字,而不是一个因素。我也不会在初始调用中映射任何美学,而只是在and调用ggplot
中传递它们。geom_point
geom_line
df$pass <- as.numeric(df$pass) - 1
ggplot(df) +
geom_point(aes(x=hour,y=pass)) +
geom_line(aes(x=hour,y=EstimatedProbabilities)) +
geom_segment(data=HoursStudied.summary, aes(y=EstimatedProbability, xend=HoursStudied, yend=EstimatedProbability, col=group), x=-Inf, linetype="dashed") +
geom_segment(data=HoursStudied.summary, aes(x=HoursStudied, xend=HoursStudied, yend=EstimatedProbability, col=group), y=-Inf, linetype="dashed")
推荐阅读
- c# - NuGet 包生成错误
- mysql - 从表中的同一列中选择不同的值并将其显示在不同的列下
- php - Yii2- 使用调度程序在 Windows 上运行控制台应用程序
- vue.js - 模式未在 Vuejs 中显示
- java - 表中相对元素的 XPath
- c# - ASP.Net Core Angular 项目中的装配异常
- python - 如何在 Python 中动态获取 Firefox 配置文件路径?
- python - 不知道为什么标签没有使用 tkinter 更新
- javascript - 在javascript中查找图像的尺寸
- python - 使用 Python 在 MacOSX 文件系统上获取文件类型(不是扩展名)