首页 > 解决方案 > 用R中的向量列表替换小标题列表中的指定列

问题描述

我正在分解一个数据文件,然后在它的组件上运行一个函数,然后尝试将它拼凑在一起,用新值替换指定的变量。函数如下,用于统计一个数据文件特定组成部分的c个数,如果只有1个,则用另一个c替换下一个最小值,保证至少有2个数据文件的任何单个组件。

secondary_supp <- function(x){
  if(sum(x == "c") == 1){
    replace(x, which.min(x), "c")
  }
  else{x}
}

目前我开始使用以下方法分解数据文件:

exploded_file <- data %>% 
  group_by(year,area,treatment_a,treatment_b) %>% 
  nest() 

# selecting the list of tibbles
components <- exploded_file[5][[1]]

# running the function on the components
measure_a_supped <- lapply(components, function(x){secondary_supp(x[['measure_a']])})

正是在这一点上,我需要用这些新的“measure_a_supped”值替换列表中每个小标题的“measure_a”变量的原始值。

我假设我可以用另一个 tibbles 列表替换 'exploded_file' 中的 tibbles 列,然后知道我可以取消嵌套和取消组合以返回完整的数据文件。我被困在如何用我创建的向量列表 (measure_a_supped) 替换值,然后返回到小标题列表,让我用更新后的值返回原始数据表的结构.

如果需要在以下位置运行上述代码行的示例数据:

library(tidyverse)

raw_data = read.table(text="year    level   area    location    treatment_a treatment_b measure_a
2015    national    NULL    NULL    Total   Total   563
                  2015  national    NULL    NULL    group_a Total   370
                  2015  national    NULL    NULL    group_b Total   193
                  2015  location    area_a  location_a  Total   Total   119
                  2015  location    area_a  location_a  group_a Total   93
                  2015  location    area_a  location_a  group_b Total   26
                  2015  location    area_a  location_b  Total   Total   163
                  2015  location    area_a  location_b  group_a Total   92
                  2015  location    area_a  location_b  group_b Total   71
                  2015  area    area_a  NULL    Total   Total   282
                  2015  area    area_a  NULL    group_a Total   185
                  2015  area    area_a  NULL    group_b Total   97
                  2015  location    area_b  location_c  Total   Total   143
                  2015  location    area_b  location_c  group_a Total   88
                  2015  location    area_b  location_c  group_b Total   55
                  2015  location    area_b  location_d  Total   Total   138
                  2015  location    area_b  location_d  group_a Total   97
                  2015  location    area_b  location_d  group_b Total   41
                  2015  area    area_b  NULL    Total   Total   281
                  2015  area    area_b  NULL    group_a Total   185
                  2015  area    area_b  NULL    group_b Total   96
                  2015  national    NULL    NULL    Total   group_1 389
                  2015  national    NULL    NULL    group_a group_1 295
                  2015  national    NULL    NULL    group_b group_1 94
                  2015  location    area_a  location_a  Total   group_1 80
                  2015  location    area_a  location_a  group_a group_1 64
                  2015  location    area_a  location_a  group_b group_1 16
                  2015  location    area_a  location_b  Total   group_1 88
                  2015  location    area_a  location_b  group_a group_1 72
                  2015  location    area_a  location_b  group_b group_1 16
                  2015  area    area_a  NULL    Total   group_1 168
                  2015  area    area_a  NULL    group_a group_1 136
                  2015  area    area_a  NULL    group_b group_1 32
                  2015  location    area_b  location_c  Total   group_1 125
                  2015  location    area_b  location_c  group_a group_1 75
                  2015  location    area_b  location_c  group_b group_1 50
                  2015  location    area_b  location_d  Total   group_1 96
                  2015  location    area_b  location_d  group_a group_1 84
                  2015  location    area_b  location_d  group_b group_1 12
                  2015  area    area_b  NULL    Total   group_1 221
                  2015  area    area_b  NULL    group_a group_1 159
                  2015  area    area_b  NULL    group_b group_1 62
                  2015  national    NULL    NULL    Total   group_2 426
                  2015  national    NULL    NULL    group_a group_2 212
                  2015  national    NULL    NULL    group_b group_2 214
                  2015  location    area_a  location_a  Total   group_2 72
                  2015  location    area_a  location_a  group_a group_2 71
                  2015  location    area_a  location_a  group_b group_2 1
                  2015  location    area_a  location_b  Total   group_2 135
                  2015  location    area_a  location_b  group_a group_2 80
                  2015  location    area_a  location_b  group_b group_2 55
                  2015  area    area_a  NULL    Total   group_2 207
                  2015  area    area_a  NULL    group_a group_2 151
                  2015  area    area_a  NULL    group_b group_2 56
                  2015  location    area_b  location_c  Total   group_2 125
                  2015  location    area_b  location_c  group_a group_2 30
                  2015  location    area_b  location_c  group_b group_2 95
                  2015  location    area_b  location_d  Total   group_2 94
                  2015  location    area_b  location_d  group_a group_2 31
                  2015  location    area_b  location_d  group_b group_2 63
                  2015  area    area_b  NULL    Total   group_2 219
                  2015  area    area_b  NULL    group_a group_2 61
                  2015  area    area_b  NULL    group_b group_2 158
                  2014  national    NULL    NULL    Total   Total   470
                  2014  national    NULL    NULL    group_a Total   156
                  2014  national    NULL    NULL    group_b Total   314
                  2014  location    area_a  location_a  Total   Total   117
                  2014  location    area_a  location_a  group_a Total   28
                  2014  location    area_a  location_a  group_b Total   89
                  2014  location    area_a  location_b  Total   Total   125
                  2014  location    area_a  location_b  group_a Total   27
                  2014  location    area_a  location_b  group_b Total   98
                  2014  area    area_a  NULL    Total   Total   242
                  2014  area    area_a  NULL    group_a Total   55
                  2014  area    area_a  NULL    group_b Total   187
                  2014  location    area_b  location_c  Total   Total   71
                  2014  location    area_b  location_c  group_a Total   25
                  2014  location    area_b  location_c  group_b Total   46
                  2014  location    area_b  location_d  Total   Total   157
                  2014  location    area_b  location_d  group_a Total   76
                  2014  location    area_b  location_d  group_b Total   81
                  2014  area    area_b  NULL    Total   Total   228
                  2014  area    area_b  NULL    group_a Total   101
                  2014  area    area_b  NULL    group_b Total   127
                  2014  national    NULL    NULL    Total   group_1 502
                  2014  national    NULL    NULL    group_a group_1 210
                  2014  national    NULL    NULL    group_b group_1 292
                  2014  location    area_a  location_a  Total   group_1 179
                  2014  location    area_a  location_a  group_a group_1 84
                  2014  location    area_a  location_a  group_b group_1 95
                  2014  location    area_a  location_b  Total   group_1 100
                  2014  location    area_a  location_b  group_a group_1 49
                  2014  location    area_a  location_b  group_b group_1 51
                  2014  area    area_a  NULL    Total   group_1 279
                  2014  area    area_a  NULL    group_a group_1 133
                  2014  area    area_a  NULL    group_b group_1 146
                  2014  location    area_b  location_c  Total   group_1 107
                  2014  location    area_b  location_c  group_a group_1 35
                  2014  location    area_b  location_c  group_b group_1 72
                  2014  location    area_b  location_d  Total   group_1 116
                  2014  location    area_b  location_d  group_a group_1 42
                  2014  location    area_b  location_d  group_b group_1 74
                  2014  area    area_b  NULL    Total   group_1 223
                  2014  area    area_b  NULL    group_a group_1 77
                  2014  area    area_b  NULL    group_b group_1 146
                  2014  national    NULL    NULL    Total   group_2 478
                  2014  national    NULL    NULL    group_a group_2 241
                  2014  national    NULL    NULL    group_b group_2 237
                  2014  location    area_a  location_a  Total   group_2 69
                  2014  location    area_a  location_a  group_a group_2 56
                  2014  location    area_a  location_a  group_b group_2 13
                  2014  location    area_a  location_b  Total   group_2 139
                  2014  location    area_a  location_b  group_a group_2 52
                  2014  location    area_a  location_b  group_b group_2 87
                  2014  area    area_a  NULL    Total   group_2 208
                  2014  area    area_a  NULL    group_a group_2 108
                  2014  area    area_a  NULL    group_b group_2 100
                  2014  location    area_b  location_c  Total   group_2 149
                  2014  location    area_b  location_c  group_a group_2 50
                  2014  location    area_b  location_c  group_b group_2 99
                  2014  location    area_b  location_d  Total   group_2 121
                  2014  location    area_b  location_d  group_a group_2 83
                  2014  location    area_b  location_d  group_b group_2 38
                  2014  area    area_b  NULL    Total   group_2 270
                  2014  area    area_b  NULL    group_a group_2 133
                  2014  area    area_b  NULL    group_b group_2 137

                  ",header=TRUE)

data <- raw_data
data$measure_a <- replace(data[["measure_a"]],data[["measure_a"]]<40,"c")

标签: r

解决方案


我们可以使用mutate_if来选择我们应用到的列,然后使用withlagreplace下一个值"area_c""area_c"

library(dplyr)
library(purrr)
#Create one group which includes "area_c" for testing
raw_data$area <- as.character(raw_data$area)
raw_data[1,"area"] <- "area_c"
#Use `group_split` instead of nest()
lst <- raw_data %>% group_split(year, level)

#Test with one element from lst
lst[[6]] %>% 
         mutate_if(is.factor, as.character) %>% 
         mutate_if(.,~sum(grepl("c",.))==1,~replace(.,lag(.)=="area_c","c"))



#Use map_df if you need the output as df
map(lst, ~.x %>% 
             mutate_if(is.factor, as.character) %>% 
             mutate_if(., ~sum(grepl("c",.))==1, ~replace(.,lag(.)=="area_c","area_c")))

推荐阅读