首页 > 解决方案 > 在 R 中的函数中提供数据和变量名称

问题描述

目标

我想在函数中同时提供数据和变量名称。这是因为用户可能会为数据集提供相同变量的不同名称。以下是引发错误的可重现示例。请让我参考相关资源来解决此问题。

另外,请让我知道编写此类函数的最佳实践是什么?在文档中,我应该要求用户重命名他们的列还是提供仅包含所需列的数据集?

例子

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union

dataset1 <- mtcars %>%
  select(mpg, disp, wt)

dataset2 <- mtcars %>%
  select(miles_per_gallon = mpg, displacement_cu_in = disp, weight = wt)
  


my_func <- function(data, var_mileage, var_volume, var_weight){
  
  var_mileage_km_l <- 0.43 * data$var_mileage
  var_volume_l <- 0.016 * data$var_volume
  var_weight_kg <- 0.45 * data$var_weight
  
  m <- lm(var_mileage_km_l ~ var_volume_l + var_weight_kg)
  
  summary(m)
}


my_func(dataset1, mpg, disp, wt)
#> Error in lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...): 0 (non-NA) cases

reprex 包于 2021-04-12 创建(v1.0.0)

预期产出

无论用户是否提供dataset1dataset2具有相应的变量名称,我都希望该函数创建以下输出。

Call:
lm(formula = var_mileage_km_l ~ var_volume_l + var_weight_kg)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.4657 -0.9994 -0.3304  0.7620  2.7298 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)    15.0330     0.9308  16.151 4.91e-16 ***
var_volume_l   -0.4764     0.2470  -1.929  0.06362 .  
var_weight_kg  -3.2019     1.1124  -2.878  0.00743 ** 
---
Signif. codes:  
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1.254 on 29 degrees of freedom
Multiple R-squared:  0.7809,    Adjusted R-squared:  0.7658 
F-statistic: 51.69 on 2 and 29 DF,  p-value: 2.744e-10

笔记

我知道以上可以通过{{var}}在数据框上下文中使用来完成。但如您所见,我不想在数据框中进行计算。这只是一个例子,我的真正问题无法在数据框中解决。

编辑:

以下是我的原始功能。在函数体内,你可以看到我使用data$了几次。如果用户没有与我在这里使用的相同的列名,则该函数将不起作用。这就是为什么我要求提供变量名(列名)以及data参数。

apply_wiedemann <- function(data,
                            V_DESIRED,
                            FAKTORVmult,
                            BMAXmult,
                            BNULLmult,
                            AXadd,
                            BXadd,
                            angular_vel_threshold,
                            EXadd,
                            OPDVadd
){
  
  
  
  ## Parameters --------------------------------------------------------------------
  V_MAX <- 44
  
  L <- na.omit(unique(data$LV_length_m))
  W <- na.omit(unique(data$LV_width_m))
  
  angular_vel_threshold <- angular_vel_threshold
  CX = sqrt(W / angular_vel_threshold)
  
  BMIN = -8
  AX = L + AXadd 
  
  
  ## Time--------------------------------------------------------------------------
  delta_T <- (data$frames[2] - data$frames[1])/60
  last_time <- (nrow(data) - 1) * delta_T
  Time <- seq(from = 0, to = last_time, by = delta_T)
  
  
  
  
  ## Empty vectors
  BMAX <- rep(NA_real_, times = length(Time)) ### an empty vector 
  vn_complete <- rep(NA_real_, times = length(Time))
  vn_complete[1] <- data$ED_speed_mps[1]
  
  vn1_complete <- data$LV_speed_mps
  dv <- rep(NA_real_, times = length(Time))
  dv[1] <- data$LV_DV_mps[1]
  
  
  xn_complete <- rep(NA_real_, times = length(Time))
  xn_complete[1] <- data$ED_position_m[1]
  
  
  xn1_complete <- data$LV_position_m
  
  
  bn_complete <-rep(NA_real_, times = length(Time))
  
  sn_complete <- rep(NA_real_, times = length(Time))
  sn_complete[1] <- data$LV_spacing_m[1]
  
  
  
  BX <- rep(NA_real_, times = length(Time)) ### an empty vector 
  ABX <- rep(NA_real_, times = length(Time)) ### an empty vector 
  
  SDV <- rep(NA_real_, times = length(Time)) ### an empty vector 
  B_App <- rep(NA_real_, times = length(Time)) ### an empty vector 
  
  bl <- data$LV_acc_mps2
  
  B_Emg <- rep(NA_real_, times = length(Time))
  
  SDX <- rep(NA_real_, times = length(Time))
  
  CLDV <- rep(NA_real_, times = length(Time))
  
  OPDV <- rep(NA_real_, times = length(Time))
  
  cf_state_sim <- rep(NA_character_, times = length(Time))
  
  
  
  ## Unintentional Acceleration and Deceleration when the car is at V_DESIRED
  # BNULL = BNULLmult * (RND4 + NRND) 
  BNULL = BNULLmult 
  
  
  FaktorV = V_MAX / (V_DESIRED + FAKTORVmult * (V_MAX - V_DESIRED))
  
  
  # EX = EXadd + EXmult * (NRND - RND2)
  EX = EXadd
  
  
  for (t in 1:(length(Time)-1)) { 
    
    ## Speed-dependent part of Minimum following distance
    # BX = (BXadd + (BXmult * RND1)) * sqrt(v)
    BX[t] = BXadd * sqrt(min(c(vn_complete[t], vn1_complete[t]), na.rm = TRUE)) ###0.8 | 0.886
    
    ## Minimum following distance
    ABX[t] = AX + BX[t] ### 16.91 | 16.996
    
    
    ## Speed-difference at which driver perceives that the lead vehicle is slow
    SDV[t] = ((sn_complete[t] - AX)/CX)^2 ###0.34 |
    
    
    ## Maximum following distance
    SDX[t] = AX + (EX * BX[t])
    
    
    ## Speed-difference when driver perceives that lead vehicle is slower
    CLDV[t] = SDV[t] * EX^2
    
    
    ## Speed-difference when driver perceives that lead vehicle is faster
    # OPDV = CLDV * (((-1) * OPDVadd) - (OPDVmult * NRND))
    OPDV[t] = CLDV[t] * ((-1) * OPDVadd)
    
    
    
    if (is.na(sn_complete[t]) | is.na(dv[t])) {
      
      BMAX[t] <- BMAXmult * (V_MAX - (vn_complete[t] * FaktorV)) 
      
      bn_complete[t] <- BMAX[t]
      
      cf_state_sim[t] <- "free_driving"  
      
    } else if (sn_complete[t] <= ABX[t]) {
      
      B_Emg[t] = 0.5 * ((dv[t])^2 / (AX - sn_complete[t])) + bl[t] + 
        (BMIN * ((ABX[t] - sn_complete[t]) / (ABX[t] - AX)))
      
      bn_complete[t] <- ifelse(B_Emg[t] < BMIN | B_Emg[t] > 0, BMIN, B_Emg[t])
      
      cf_state_sim[t] <- "emergency_braking"
      
    } else if (sn_complete[t] < SDX[t]) {
      
      if ( dv[t] > CLDV[t]) {
        
        bn_complete[t] <- BNULL
        
        cf_state_sim[t] <- "following"
        
      } else if (dv[t] > OPDV[t]) {
        
        bn_complete[t] <- BNULL
        
        cf_state_sim[t] <- "following"
        
      } else {
        
        BMAX[t] <- BMAXmult * (V_MAX - (vn_complete[t] * FaktorV)) 
        
        bn_complete[t] <- BMAX[t]
        
        cf_state_sim[t] <- "free_driving"
        
      }
      
    } else {
      
      if (dv[t] > SDV[t]) { 
        
        B_App[t] = 0.5 * ((dv[t])^2 / (ABX[t] - sn_complete[t])) + bl[t]
        
        bn_complete[t] <- ifelse(B_App[t] < BMIN, BMIN, B_App[t])
        
        cf_state_sim[t] <- "approaching"
        
      } else {
        
        BMAX[t] <- BMAXmult * (V_MAX - (vn_complete[t] * FaktorV)) ###2.19
        
        bn_complete[t] <- BMAX[t]
        
        cf_state_sim[t] <- "free_driving"
        
      }
    }
    
    vn_complete[t+1] <- vn_complete[t] + (bn_complete[t] * delta_T)
    
    vn_complete[t+1] <- ifelse(vn_complete[t+1] < 0, 0, vn_complete[t+1])
    
    xn_complete[t+1] <- xn_complete[t] - (vn_complete[t] * delta_T) + (0.5 * bn_complete[t] * (delta_T)^2)
    
    sn_complete[t+1] <- xn_complete[t+1] - xn1_complete[t+1]
    
    dv[t+1] <- vn_complete[t+1] - vn1_complete[t+1]
    

    
    
  }
  
  frspacing_pred <- sn_complete - L
  
  
  SSE <- sum(((frspacing_pred - data$LV_frspacing_m)^2)/ abs(data$LV_frspacing_m), na.rm = TRUE)/sum(abs(data$LV_frspacing_m), na.rm = TRUE)
  
  return(SSE)
}

希望现在每个人都清楚,这个问题无法在数据框上下文中解决。感谢@MrFlick,我知道现在该做什么了。我还尝试了enquoanddata %>% pull(!! var_name)它似乎做了什么eval(substitute())

标签: rdplyr

解决方案


这似乎是编写 R 函数的一种非常不寻常的方式,但你可以这样做

my_func <- function(data, var_mileage, var_volume, var_weight){
  
  eval(substitute({
    var_mileage_km_l <- 0.43 * var_mileage
    var_volume_l <- 0.016 * var_volume
    var_weight_kg <- 0.45 * var_weight    
    
    m <- lm(var_mileage_km_l ~ var_volume_l + var_weight_kg)
    
    summary(m)
  }), envir = data)
}

substitute()您作为列名传递的符号注入到表达式中。然后您可以在 data.frame 的上下文中对其进行评估。

或者你可以做类似的事情

my_func <- function(data, var_mileage, var_volume, var_weight){
  
  var_mileage <- eval(substitute(var_mileage), data)
  var_volume <- eval(substitute(var_volume), data)
  var_weight <- eval(substitute(var_weight), data)
  
  var_mileage_km_l <- 0.43 * var_mileage
  var_volume_l <- 0.016 * var_volume
  var_weight_kg <- 0.45 * var_weight
    
  m <- lm(var_mileage_km_l ~ var_volume_l + var_weight_kg)
  
  summary(m)
}

或者另一个常见的技巧是将列名转换为字符串。

my_func <- function(data, var_mileage, var_volume, var_weight){
   
  var_mileage_km_l <- 0.43 * data[[var_mileage]]
  var_volume_l <- 0.016 * data[[var_volume]]
  var_weight_kg <- 0.45 * data[[var_weight]]    
    
  m <- lm(var_mileage_km_l ~ var_volume_l + var_weight_kg)
  
  summary(m)
}
my_func(dataset1, "mpg", "disp", "wt")

推荐阅读