r - 在 R 中使用多个变量预测 glm 的可视化
问题描述
我想使用以下数据集来拟合 glm 并可视化 predict()。
y=c(-18.948,-19.007,-18.899,-19.022,-20.599,-19.778,-17.344,-20.265,-20.258,-19.886,-18.05,-19.824,-20.1,-20.508,-20.455,-16.573,-20.249,-20.205,-20.436,-16.358,-17.717,-19.794,-20.372,-19.944,-20.072,-19.889,-20.139,-19.132,-20.275,-19.953,-19.769,-20.2,-19.638,-17.419,-19.086,-18.347,-18.73,-18.872,-18.956,-19.28,-18.176,-19.036,-18.084,-20.11,-19.641,-19.656,-19.25,-18.68,-19.089,-18.969,-18.161,-17.603,-20.37,-19.233,-18.961,-19.083,-20.118,-19.795,-17.154,-16.75)
x1=c(9.698,9.583,9.356,9.326,9.438,9.733,8.803,8.973,9.141,9.044,8.788,9.377,9.26,10.186,9.035,9.569,9.431,9.09,8.776,9.117,9.393,9.408,9.307,8.868,8.398,8.407,9.364,9.074,8.444,9.122,10.11,7.81,9.777,6.472,9.521,8.92,9.341,9.446,9.08,8.071,8.047,8.019,7.419,9.022,9.981,9.337,9.989,10.013,9.31,10.843,8.337,9.103,6.438,9.372,9.071,8.749,9.016,8.181,9.284,8.44)
x2=c('S03','S03','S03','S03','S03','S03','S03','S03','S03','S03','S03','S03','S03','S03','S03','S03','S03','S03','S03','S03','S04','S04','S04','S04','S04','S04','S06','S06','S06','S06','S06','S06','S06','S06','S07','S07','S07','S07','S07','S07','S07','S07','S07','S08','S08','S09','S09','S09','S09','S09','S09','S09','S10','S03','S03','S03','S04','S04','S07','S07')
x3=c('A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','P1','P1','P1','P1','P1','P1','P1')
mydata <- data.frame(y,x1,x2,x3)
适合 glm 型号:
myglm <- glm(y ~ x1+x2+x3+x1:x2, family="gaussian", data= mydata)
预言:
1)。提取 x1 的范围
min <- min(mydata$x1)
max <- max(mydata$x1)
2)。创建一个新的数据框.x
问题来了:
我应该如何将 x2 和 x3 包含在new.x
?
new.x <- data.frame(
x1=seq(min, max, length=60),
x2= ???
x3= ???)
然后预测new.y
:myglm
new.y = predict(myglm, newdata=new.x, se.fit=TRUE)
结合 new.x 和 new.y:
addThese <- data.frame(new.x, new.y)
间隔
addThese <- mutate(addThese,
d15N=exp(fit),
lwr=exp(fit-1.96*se.fit),
upr=exp(fit+1.96*se.fit))
3)。添加了原始数据点的可视化和 glm 预测平滑线:
ggplot(addThese, aes(x1, fit))+
geom_point(shape=21, size=3)+
geom_smooth(data=addThese,
aes(ymin=lwr, ymax=upr),
stat='identity')
解决方案
我仍然想知道这是否是一种正确的创建方式new.data
,但我会尝试一下。因此,使用您的数据,稍微修改您的代码:
myglm <- glm(y ~ x1 + x2 + x3 + x1:x2, family = gaussian, data = mydata)
minx <- min(mydata$x1)
maxx <- max(mydata$x1)
# create data with all combinations of x1, x2, x3
new.data <- expand.grid(x1 = seq(minx, maxx, length.out = 60),
x2 = unique(mydata$x2),
x3 = unique(mydata$x3)
)
# visualize data
data.frame(predict(myglm, newdata = new.data, se.fit = T)[1:2]) %>%
bind_cols(new.data) %>%
mutate(d15N = exp(fit), lwr = fit - 1.96 * se.fit, upr = fit + 1.96 * se.fit) %>%
ggplot(aes(x = x1, y = fit, colour = interaction(x2, x3))) +
geom_point(size = 1, alpha = .75, pch = 19, position = "jitter") +
geom_smooth(aes(ymin = lwr, ymax = upr), stat = "identity", alpha = .5) +
facet_wrap(~interaction(x2, x3, sep = " : "), nrow = 5) +
ggthemes::theme_few() +
labs(y = "Predicted value", x = bquote(x[1])) +
theme(legend.position = "none")
推荐阅读
- r - ggplot2中散点图的图例(无颜色)
- sql - 如何解析逗号分隔列表并将这些值连接到另一个表并重新连接?
- java - 使用 open liberty 21.0.0.8 编译 Maven 项目
- r - 如何解决这个问题:[WARN] shiny-server - 无法删除日志文件 /var/log/shiny-server/file.log
- regex - 正则表达式 if then else 混淆
- c++ - 为什么在尝试访问基模板类的受保护成员时会出现访问错误?
- java - JNA 中的 SetCommMask 和 WaitCommEvent 在哪里?
- r - 基于字符串拟合多个逻辑回归模型
- android - 应用程序在后台时,热词检测无法重新启动,Android?
- sql - Oracle 19c 中基于前行数据的累积计算/计算