r - 如何循环回归并获取变量的原始名称?
问题描述
假设我有以下数据框:
df = structure(list(X1 = c(-1.18944760538218, -1.29854014882472, 0.583311783714593,
0.650871260739756, -0.000391966126701203, 1.23053193924217, 1.70030189473426,
-0.576054488536938, 0.0291802126175275, 1.43019460222912, -1.0301194391522,
0.672348388347025, -0.173407147880241, -1.5368071194984, 0.676906374446062,
0.245381059292165, 0.192944401742425, -1.22846757821128, 0.761438857461385,
-1.49010138930763), X2 = c(-1.18944760538218, -1.29854014882472,
0.583311783714593, 0.650871260739756, -0.000391966126701203,
1.23053193924217, 1.70030189473426, -0.576054488536938, 0.0291802126175275,
1.43019460222912, -1.0301194391522, 0.672348388347025, -0.173407147880241,
-1.5368071194984, 0.676906374446062, 0.245381059292165, 0.192944401742425,
-1.22846757821128, 0.761438857461385, -1.49010138930763), X3 = c(-1.18944760538218,
-1.29854014882472, 0.583311783714593, 0.650871260739756, -0.000391966126701203,
1.23053193924217, 1.70030189473426, -0.576054488536938, 0.0291802126175275,
1.43019460222912, -1.0301194391522, 0.672348388347025, -0.173407147880241,
-1.5368071194984, 0.676906374446062, 0.245381059292165, 0.192944401742425,
-1.22846757821128, 0.761438857461385, -1.49010138930763), X4 = c(-1.18944760538218,
-1.29854014882472, 0.583311783714593, 0.650871260739756, -0.000391966126701203,
1.23053193924217, 1.70030189473426, -0.576054488536938, 0.0291802126175275,
1.43019460222912, -1.0301194391522, 0.672348388347025, -0.173407147880241,
-1.5368071194984, 0.676906374446062, 0.245381059292165, 0.192944401742425,
-1.22846757821128, 0.761438857461385, -1.49010138930763), X5 = c(-1.18944760538218,
-1.29854014882472, 0.583311783714593, 0.650871260739756, -0.000391966126701203,
1.23053193924217, 1.70030189473426, -0.576054488536938, 0.0291802126175275,
1.43019460222912, -1.0301194391522, 0.672348388347025, -0.173407147880241,
-1.5368071194984, 0.676906374446062, 0.245381059292165, 0.192944401742425,
-1.22846757821128, 0.761438857461385, -1.49010138930763), X6 = c(-1.18944760538218,
-1.29854014882472, 0.583311783714593, 0.650871260739756, -0.000391966126701203,
1.23053193924217, 1.70030189473426, -0.576054488536938, 0.0291802126175275,
1.43019460222912, -1.0301194391522, 0.672348388347025, -0.173407147880241,
-1.5368071194984, 0.676906374446062, 0.245381059292165, 0.192944401742425,
-1.22846757821128, 0.761438857461385, -1.49010138930763), X7 = c(-1.18944760538218,
-1.29854014882472, 0.583311783714593, 0.650871260739756, -0.000391966126701203,
1.23053193924217, 1.70030189473426, -0.576054488536938, 0.0291802126175275,
1.43019460222912, -1.0301194391522, 0.672348388347025, -0.173407147880241,
-1.5368071194984, 0.676906374446062, 0.245381059292165, 0.192944401742425,
-1.22846757821128, 0.761438857461385, -1.49010138930763), X8 = c(-1.18944760538218,
-1.29854014882472, 0.583311783714593, 0.650871260739756, -0.000391966126701203,
1.23053193924217, 1.70030189473426, -0.576054488536938, 0.0291802126175275,
1.43019460222912, -1.0301194391522, 0.672348388347025, -0.173407147880241,
-1.5368071194984, 0.676906374446062, 0.245381059292165, 0.192944401742425,
-1.22846757821128, 0.761438857461385, -1.49010138930763)), class = "data.frame", row.names = c(NA,
-20L))
X1 X2 X3 X4 X5
1 -1.1894476054 -1.1894476054 -1.1894476054 -1.1894476054 -1.1894476054
2 -1.2985401488 -1.2985401488 -1.2985401488 -1.2985401488 -1.2985401488
3 0.5833117837 0.5833117837 0.5833117837 0.5833117837 0.5833117837
4 0.6508712607 0.6508712607 0.6508712607 0.6508712607 0.6508712607
5 -0.0003919661 -0.0003919661 -0.0003919661 -0.0003919661 -0.0003919661
6 1.2305319392 1.2305319392 1.2305319392 1.2305319392 1.2305319392
7 1.7003018947 1.7003018947 1.7003018947 1.7003018947 1.7003018947
8 -0.5760544885 -0.5760544885 -0.5760544885 -0.5760544885 -0.5760544885
9 0.0291802126 0.0291802126 0.0291802126 0.0291802126 0.0291802126
10 1.4301946022 1.4301946022 1.4301946022 1.4301946022 1.4301946022
11 -1.0301194392 -1.0301194392 -1.0301194392 -1.0301194392 -1.0301194392
12 0.6723483883 0.6723483883 0.6723483883 0.6723483883 0.6723483883
13 -0.1734071479 -0.1734071479 -0.1734071479 -0.1734071479 -0.1734071479
14 -1.5368071195 -1.5368071195 -1.5368071195 -1.5368071195 -1.5368071195
15 0.6769063744 0.6769063744 0.6769063744 0.6769063744 0.6769063744
16 0.2453810593 0.2453810593 0.2453810593 0.2453810593 0.2453810593
17 0.1929444017 0.1929444017 0.1929444017 0.1929444017 0.1929444017
18 -1.2284675782 -1.2284675782 -1.2284675782 -1.2284675782 -1.2284675782
19 0.7614388575 0.7614388575 0.7614388575 0.7614388575 0.7614388575
20 -1.4901013893 -1.4901013893 -1.4901013893 -1.4901013893 -1.4901013893
X6 X7 X8
1 -1.1894476054 -1.1894476054 -1.1894476054
2 -1.2985401488 -1.2985401488 -1.2985401488
3 0.5833117837 0.5833117837 0.5833117837
4 0.6508712607 0.6508712607 0.6508712607
5 -0.0003919661 -0.0003919661 -0.0003919661
6 1.2305319392 1.2305319392 1.2305319392
7 1.7003018947 1.7003018947 1.7003018947
8 -0.5760544885 -0.5760544885 -0.5760544885
9 0.0291802126 0.0291802126 0.0291802126
10 1.4301946022 1.4301946022 1.4301946022
11 -1.0301194392 -1.0301194392 -1.0301194392
12 0.6723483883 0.6723483883 0.6723483883
13 -0.1734071479 -0.1734071479 -0.1734071479
14 -1.5368071195 -1.5368071195 -1.5368071195
15 0.6769063744 0.6769063744 0.6769063744
16 0.2453810593 0.2453810593 0.2453810593
17 0.1929444017 0.1929444017 0.1929444017
18 -1.2284675782 -1.2284675782 -1.2284675782
19 0.7614388575 0.7614388575 0.7614388575
20 -1.4901013893 -1.4901013893 -1.4901013893
我想建立一系列回归,比如说第一列在所有其他的,一次一个。为此,我编写了以下循环:
for (i in df[,-c(1)]) {
model <- lm(df$X1 ~ i)
print(summary(model))
}
问题是这个循环的输出(在汇总表中)为每个回归量读取“i”。相反,我想在回归表中使用变量的原始名称。
Call:
lm(formula = df$X1 ~ i) # instead of i, the original name of each variable
Residuals:
Min 1Q Median 3Q Max
-2.734e-16 -4.960e-17 -4.252e-17 5.340e-18 5.756e-16
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -3.103e-18 3.693e-17 -8.400e-02 0.934
i 1.000e+00 3.748e-17 2.668e+16 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.651e-16 on 18 degrees of freedom
Multiple R-squared: 1, Adjusted R-squared: 1
F-statistic: 7.118e+32 on 1 and 18 DF, p-value: < 2.2e-16
谁能帮我解决这个问题?
谢谢!
解决方案
如果您确实需要将其放入输出的调用部分,则需要使用所需的所有符号构建表达式,然后对其进行评估。代码看起来有点混乱,但类似于
for (i in names(df)[-1]) {
model <- do.call("lm", list(reformulate(i, "X1"), quote(df)))
print(summary(model))
}
推荐阅读
- xml - 使用 Jackson XML 反序列化具有自定义条目名称的 Map
- c# - 无法连接到远程服务器(ftp 在开发中有效,但在生产中无效)
- typescript - 打字稿手册通用类型问题
- java - 从 Java 调用的 Kotlin 协程返回值 (Int)
- swift - XCTestExpectation 和 NSPredicate 的 XCTWaiter().wait 似乎失败了
- r - R: length(abs(t1)>1.97) 它返回总数
- android - 查询参数中的问题:Annotation 参数必须是编译时常量
- go - 导入已经在公共仓库中的本地包
- python - 如何将 .json 文件(共 336 个)与我通过 Guardian API 创建的有关新闻的数据合并?
- postgresql - 连接 Kubernetes 中部署的气流中的外部数据库