r - 在 R 中的函数中提供数据和变量名称
问题描述
目标
我想在函数中同时提供数据和变量名称。这是因为用户可能会为数据集提供相同变量的不同名称。以下是引发错误的可重现示例。请让我参考相关资源来解决此问题。
另外,请让我知道编写此类函数的最佳实践是什么?在文档中,我应该要求用户重命名他们的列还是提供仅包含所需列的数据集?
例子
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
dataset1 <- mtcars %>%
select(mpg, disp, wt)
dataset2 <- mtcars %>%
select(miles_per_gallon = mpg, displacement_cu_in = disp, weight = wt)
my_func <- function(data, var_mileage, var_volume, var_weight){
var_mileage_km_l <- 0.43 * data$var_mileage
var_volume_l <- 0.016 * data$var_volume
var_weight_kg <- 0.45 * data$var_weight
m <- lm(var_mileage_km_l ~ var_volume_l + var_weight_kg)
summary(m)
}
my_func(dataset1, mpg, disp, wt)
#> Error in lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...): 0 (non-NA) cases
由reprex 包于 2021-04-12 创建(v1.0.0)
预期产出
无论用户是否提供dataset1
或dataset2
具有相应的变量名称,我都希望该函数创建以下输出。
Call:
lm(formula = var_mileage_km_l ~ var_volume_l + var_weight_kg)
Residuals:
Min 1Q Median 3Q Max
-1.4657 -0.9994 -0.3304 0.7620 2.7298
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 15.0330 0.9308 16.151 4.91e-16 ***
var_volume_l -0.4764 0.2470 -1.929 0.06362 .
var_weight_kg -3.2019 1.1124 -2.878 0.00743 **
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.254 on 29 degrees of freedom
Multiple R-squared: 0.7809, Adjusted R-squared: 0.7658
F-statistic: 51.69 on 2 and 29 DF, p-value: 2.744e-10
笔记
我知道以上可以通过{{var}}
在数据框上下文中使用来完成。但如您所见,我不想在数据框中进行计算。这只是一个例子,我的真正问题无法在数据框中解决。
编辑:
以下是我的原始功能。在函数体内,你可以看到我使用data$
了几次。如果用户没有与我在这里使用的相同的列名,则该函数将不起作用。这就是为什么我要求提供变量名(列名)以及data
参数。
apply_wiedemann <- function(data,
V_DESIRED,
FAKTORVmult,
BMAXmult,
BNULLmult,
AXadd,
BXadd,
angular_vel_threshold,
EXadd,
OPDVadd
){
## Parameters --------------------------------------------------------------------
V_MAX <- 44
L <- na.omit(unique(data$LV_length_m))
W <- na.omit(unique(data$LV_width_m))
angular_vel_threshold <- angular_vel_threshold
CX = sqrt(W / angular_vel_threshold)
BMIN = -8
AX = L + AXadd
## Time--------------------------------------------------------------------------
delta_T <- (data$frames[2] - data$frames[1])/60
last_time <- (nrow(data) - 1) * delta_T
Time <- seq(from = 0, to = last_time, by = delta_T)
## Empty vectors
BMAX <- rep(NA_real_, times = length(Time)) ### an empty vector
vn_complete <- rep(NA_real_, times = length(Time))
vn_complete[1] <- data$ED_speed_mps[1]
vn1_complete <- data$LV_speed_mps
dv <- rep(NA_real_, times = length(Time))
dv[1] <- data$LV_DV_mps[1]
xn_complete <- rep(NA_real_, times = length(Time))
xn_complete[1] <- data$ED_position_m[1]
xn1_complete <- data$LV_position_m
bn_complete <-rep(NA_real_, times = length(Time))
sn_complete <- rep(NA_real_, times = length(Time))
sn_complete[1] <- data$LV_spacing_m[1]
BX <- rep(NA_real_, times = length(Time)) ### an empty vector
ABX <- rep(NA_real_, times = length(Time)) ### an empty vector
SDV <- rep(NA_real_, times = length(Time)) ### an empty vector
B_App <- rep(NA_real_, times = length(Time)) ### an empty vector
bl <- data$LV_acc_mps2
B_Emg <- rep(NA_real_, times = length(Time))
SDX <- rep(NA_real_, times = length(Time))
CLDV <- rep(NA_real_, times = length(Time))
OPDV <- rep(NA_real_, times = length(Time))
cf_state_sim <- rep(NA_character_, times = length(Time))
## Unintentional Acceleration and Deceleration when the car is at V_DESIRED
# BNULL = BNULLmult * (RND4 + NRND)
BNULL = BNULLmult
FaktorV = V_MAX / (V_DESIRED + FAKTORVmult * (V_MAX - V_DESIRED))
# EX = EXadd + EXmult * (NRND - RND2)
EX = EXadd
for (t in 1:(length(Time)-1)) {
## Speed-dependent part of Minimum following distance
# BX = (BXadd + (BXmult * RND1)) * sqrt(v)
BX[t] = BXadd * sqrt(min(c(vn_complete[t], vn1_complete[t]), na.rm = TRUE)) ###0.8 | 0.886
## Minimum following distance
ABX[t] = AX + BX[t] ### 16.91 | 16.996
## Speed-difference at which driver perceives that the lead vehicle is slow
SDV[t] = ((sn_complete[t] - AX)/CX)^2 ###0.34 |
## Maximum following distance
SDX[t] = AX + (EX * BX[t])
## Speed-difference when driver perceives that lead vehicle is slower
CLDV[t] = SDV[t] * EX^2
## Speed-difference when driver perceives that lead vehicle is faster
# OPDV = CLDV * (((-1) * OPDVadd) - (OPDVmult * NRND))
OPDV[t] = CLDV[t] * ((-1) * OPDVadd)
if (is.na(sn_complete[t]) | is.na(dv[t])) {
BMAX[t] <- BMAXmult * (V_MAX - (vn_complete[t] * FaktorV))
bn_complete[t] <- BMAX[t]
cf_state_sim[t] <- "free_driving"
} else if (sn_complete[t] <= ABX[t]) {
B_Emg[t] = 0.5 * ((dv[t])^2 / (AX - sn_complete[t])) + bl[t] +
(BMIN * ((ABX[t] - sn_complete[t]) / (ABX[t] - AX)))
bn_complete[t] <- ifelse(B_Emg[t] < BMIN | B_Emg[t] > 0, BMIN, B_Emg[t])
cf_state_sim[t] <- "emergency_braking"
} else if (sn_complete[t] < SDX[t]) {
if ( dv[t] > CLDV[t]) {
bn_complete[t] <- BNULL
cf_state_sim[t] <- "following"
} else if (dv[t] > OPDV[t]) {
bn_complete[t] <- BNULL
cf_state_sim[t] <- "following"
} else {
BMAX[t] <- BMAXmult * (V_MAX - (vn_complete[t] * FaktorV))
bn_complete[t] <- BMAX[t]
cf_state_sim[t] <- "free_driving"
}
} else {
if (dv[t] > SDV[t]) {
B_App[t] = 0.5 * ((dv[t])^2 / (ABX[t] - sn_complete[t])) + bl[t]
bn_complete[t] <- ifelse(B_App[t] < BMIN, BMIN, B_App[t])
cf_state_sim[t] <- "approaching"
} else {
BMAX[t] <- BMAXmult * (V_MAX - (vn_complete[t] * FaktorV)) ###2.19
bn_complete[t] <- BMAX[t]
cf_state_sim[t] <- "free_driving"
}
}
vn_complete[t+1] <- vn_complete[t] + (bn_complete[t] * delta_T)
vn_complete[t+1] <- ifelse(vn_complete[t+1] < 0, 0, vn_complete[t+1])
xn_complete[t+1] <- xn_complete[t] - (vn_complete[t] * delta_T) + (0.5 * bn_complete[t] * (delta_T)^2)
sn_complete[t+1] <- xn_complete[t+1] - xn1_complete[t+1]
dv[t+1] <- vn_complete[t+1] - vn1_complete[t+1]
}
frspacing_pred <- sn_complete - L
SSE <- sum(((frspacing_pred - data$LV_frspacing_m)^2)/ abs(data$LV_frspacing_m), na.rm = TRUE)/sum(abs(data$LV_frspacing_m), na.rm = TRUE)
return(SSE)
}
希望现在每个人都清楚,这个问题无法在数据框上下文中解决。感谢@MrFlick,我知道现在该做什么了。我还尝试了enquo
anddata %>% pull(!! var_name)
它似乎做了什么eval(substitute())
。
解决方案
这似乎是编写 R 函数的一种非常不寻常的方式,但你可以这样做
my_func <- function(data, var_mileage, var_volume, var_weight){
eval(substitute({
var_mileage_km_l <- 0.43 * var_mileage
var_volume_l <- 0.016 * var_volume
var_weight_kg <- 0.45 * var_weight
m <- lm(var_mileage_km_l ~ var_volume_l + var_weight_kg)
summary(m)
}), envir = data)
}
将substitute()
您作为列名传递的符号注入到表达式中。然后您可以在 data.frame 的上下文中对其进行评估。
或者你可以做类似的事情
my_func <- function(data, var_mileage, var_volume, var_weight){
var_mileage <- eval(substitute(var_mileage), data)
var_volume <- eval(substitute(var_volume), data)
var_weight <- eval(substitute(var_weight), data)
var_mileage_km_l <- 0.43 * var_mileage
var_volume_l <- 0.016 * var_volume
var_weight_kg <- 0.45 * var_weight
m <- lm(var_mileage_km_l ~ var_volume_l + var_weight_kg)
summary(m)
}
或者另一个常见的技巧是将列名转换为字符串。
my_func <- function(data, var_mileage, var_volume, var_weight){
var_mileage_km_l <- 0.43 * data[[var_mileage]]
var_volume_l <- 0.016 * data[[var_volume]]
var_weight_kg <- 0.45 * data[[var_weight]]
m <- lm(var_mileage_km_l ~ var_volume_l + var_weight_kg)
summary(m)
}
my_func(dataset1, "mpg", "disp", "wt")
推荐阅读
- docker - 如何增加 docker ansible 中的项目最大大小
- javascript - 单击动态选项卡时调用 Javascript 函数
- list-comprehension - 如果结果理解的结果没有,则将 None 或空值传递给字典键
- gcc - gcc 无法正确读取以 utf-16 编码的 c 文件
- ios - 运行 iOS 代码时未找到 -lPods 的库 - 错误
- c# - 来自 Instagram 内部 API C# 的经过身份验证的响应
- git - 从拉取请求中删除提交并在 bitbucket 中分支
- node.js - 我从 nodejs 服务器获取的数据是空的并且没有显示结果
- windows - 在 MASM 中使用 PlaySound(),需要一种方法来帮助在后台运行它,而无需等待整个 .wav 声音文件完成的代码
- r - 使用 dplyr 从矩阵中提取值以添加到数据框列