r - 使用列名模式在列表中创建新变量
问题描述
您好我正在尝试使用以下代码将 33 个变量减少为单个指标(我知道它的效率极低:
data_indicator <- data %>%
mutate(plot_1=(farm_sell_1+farm_lease_1+farm_bequeath_1)/3, na.rm=T) %>%
mutate(plot_sec_1=ifelse(plot_1>.5, 1, 0)) %>%
mutate(plot_2=(farm_sell_2+farm_lease_2+farm_bequeath_2)/3, na.rm=T) %>%
mutate(plot_sec_2=ifelse(plot_2>.5, 1, 0)) %>%
mutate(plot_3=(farm_sell_3+farm_lease_3+farm_bequeath_3)/3, na.rm=T) %>%
mutate(plot_sec_3=ifelse(plot_3>.5, 1, 0)) %>%
mutate(plot_4=(farm_sell_4+farm_lease_4+farm_bequeath_4)/3, na.rm=T) %>%
mutate(plot_sec_4=ifelse(plot_4>.5, 1, 0)) %>%
mutate(plot_5=(farm_sell_5+farm_lease_5+farm_bequeath_5)/3, na.rm=T) %>%
mutate(plot_sec_5=ifelse(plot_5>.5, 1, 0)) %>%
mutate(plot_6=(farm_sell_6+farm_lease_6+farm_bequeath_6)/3, na.rm=T) %>%
mutate(plot_sec_6=ifelse(plot_6>.5, 1, 0)) %>%
mutate(plot_7=(farm_sell_7+farm_lease_7+farm_bequeath_7)/3, na.rm=T) %>%
mutate(plot_sec_7=ifelse(plot_7>.5, 1, 0)) %>%
mutate(plot_8=(farm_sell_8+farm_lease_8+farm_bequeath_8)/3, na.rm=T) %>%
mutate(plot_sec_8=ifelse(plot_8>.5, 1, 0)) %>%
mutate(plot_9=(farm_sell_9+farm_lease_9+farm_bequeath_9)/3, na.rm=T) %>%
mutate(plot_sec_9=ifelse(plot_9>.5, 1, 0)) %>%
mutate(plot_10=(farm_sell_10+farm_lease_10+farm_bequeath_10)/3, na.rm=T) %>%
mutate(plot_sec_10=ifelse(plot_10>.5, 1, 0)) %>%
mutate(plot_11=(farm_sell_11+farm_lease_11+farm_bequeath_11)/3, na.rm=T) %>%
mutate(plot_sec_11=ifelse(plot_11>.5, 1, 0)) %>%
mutate(num_plots_sec = plot_sec_1+plot_sec_2+plot_sec_3+plot_sec_4+plot_sec_5+plot_sec_6+plot_sec_7+plot_sec_8+plot_sec_9+plot_sec_10+plot_sec_11, na.rm=T)
我如何循环遍历这 11 个变量,以便为每个以特定数字“_1”结尾的变量获得一个度量,然后从 1:11 对这些数字求和以获得 1 个指标?
如何遍历所有以数字结尾的变量,将它们加在一起以创建以数字结尾的新变量,然后将所有 11 个变量加在一起成为一个指标?
当前数据框示例:
$ farm_sell_1 <dbl+lbl> 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0,...
$ farm_sell_2 <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, NA, 1, 0, 0, 0, 0, 0, 0, NA, N...
$ farm_sell_3 <dbl> 0, 1, 0, 0, NA, 0, NA, NA, NA, 0, 0, NA, 1, NA, 1, 0, NA, NA, NA, NA,...
$ farm_sell_4 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, 1, NA, 1, NA, NA, NA, ...
$ farm_sell_5 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA...
$ farm_sell_6 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA...
$ farm_sell_7 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA...
$ farm_sell_8 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
$ farm_sell_9 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
$ farm_sell_10 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
$ farm_sell_11 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
$ farm_lease_1 <dbl+lbl> 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0,...
$ farm_lease_2 <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, NA, 1, 0, 0, 0, 0, 0, 0, NA, N...
$ farm_lease_3 <dbl> 1, 1, 0, 0, NA, 0, NA, NA, NA, 0, 0, NA, 1, NA, 1, 0, NA, NA, NA, NA,...
$ farm_lease_4 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, 1, NA, 1, NA, NA, NA, ...
$ farm_lease_5 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA...
$ farm_lease_6 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA...
$ farm_lease_7 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA...
$ farm_lease_8 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
$ farm_lease_9 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
$ farm_lease_10 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
$ farm_lease_11 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
$ farm_bequeath_1 <dbl+lbl> 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0,...
$ farm_bequeath_2 <dbl> 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, NA, 1, 1, 0, 0, 0, 0, 0, NA, N...
$ farm_bequeath_3 <dbl> 1, 1, 1, 1, NA, 0, NA, NA, NA, 0, 0, NA, 1, NA, 1, 1, NA, NA, NA, NA,...
$ farm_bequeath_4 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, 1, NA, 1, NA, NA, NA, ...
$ farm_bequeath_5 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA...
$ farm_bequeath_6 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA...
$ farm_bequeath_7 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA...
$ farm_bequeath_8 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
$ farm_bequeath_9 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
$ farm_bequeath_10 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
$ farm_bequeath_11 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
解决方案
使这更清晰的一种方法是将此数据转换为长数据。有关宽与长的更多信息,请查看此站点:https ://uc-r.github.io/tidyr
我将逐步完成此操作,以便您了解它是如何工作的,然后包含最后一次完成所有操作的代码。
首先使用一些假数据:
fake.data <- data.frame(matrix(data = rbinom(1650, 1, 0.5), nrow = 50, ncol = 33))
colnames(fake.data) <- c(paste0("farm_sell_", 1:11), paste0("farm_lease_", 1:11),
paste0("farm_bequeath_", 1:11))
它看起来像你上面的:
'data.frame': 50 obs. of 33 variables:
$ farm_sell_1 : int 0 0 0 1 0 1 0 0 1 1 ...
$ farm_sell_2 : int 0 0 1 1 1 1 1 1 0 0 ...
$ farm_sell_3 : int 1 0 0 0 1 1 0 1 0 0 ...
$ farm_sell_4 : int 1 1 1 0 1 0 0 0 1 1 ...
$ farm_sell_5 : int 1 1 0 0 1 0 0 0 1 1 ...
$ farm_sell_6 : int 0 1 0 0 0 0 0 0 0 0 ...
$ farm_sell_7 : int 1 0 1 1 0 0 0 1 0 1 ...
$ farm_sell_8 : int 0 0 1 0 0 1 1 0 1 0 ...
$ farm_sell_9 : int 1 1 1 0 0 0 1 1 1 1 ...
$ farm_sell_10 : int 1 1 0 0 1 0 1 1 0 0 ...
$ farm_sell_11 : int 0 0 0 0 1 1 1 0 0 0 ...
$ farm_lease_1 : int 0 0 1 1 0 0 1 0 1 0 ...
$ farm_lease_2 : int 0 0 0 1 1 1 1 1 1 0 ...
$ farm_lease_3 : int 0 1 1 1 0 1 1 1 0 0 ...
$ farm_lease_4 : int 1 0 1 1 0 1 1 1 1 1 ...
$ farm_lease_5 : int 0 0 0 0 1 1 0 1 0 1 ...
$ farm_lease_6 : int 0 1 1 0 1 1 0 0 1 1 ...
$ farm_lease_7 : int 0 0 0 1 1 1 0 1 1 1 ...
$ farm_lease_8 : int 0 1 0 1 0 0 1 0 1 0 ...
$ farm_lease_9 : int 0 0 1 0 0 1 0 0 1 1 ...
$ farm_lease_10 : int 1 1 1 1 0 1 1 1 0 1 ...
$ farm_lease_11 : int 1 0 0 1 1 0 0 0 1 1 ...
$ farm_bequeath_1 : int 1 1 1 0 0 1 1 1 0 0 ...
$ farm_bequeath_2 : int 0 1 1 0 0 1 1 1 1 1 ...
$ farm_bequeath_3 : int 1 0 0 1 1 0 1 0 0 1 ...
$ farm_bequeath_4 : int 0 0 1 1 1 0 0 0 1 0 ...
$ farm_bequeath_5 : int 1 1 0 0 0 0 0 1 1 0 ...
$ farm_bequeath_6 : int 0 1 0 0 0 0 1 0 1 1 ...
$ farm_bequeath_7 : int 0 0 0 0 1 0 1 0 0 1 ...
$ farm_bequeath_8 : int 0 1 0 1 1 0 0 1 1 1 ...
$ farm_bequeath_9 : int 0 0 1 1 0 1 1 0 0 1 ...
$ farm_bequeath_10: int 0 0 0 1 1 1 0 0 1 1 ...
$ farm_bequeath_11: int 0 0 0 1 0 0 0 0 0 0 ...
您将需要dplyr
andtidyr
包来完成所有这些工作。
library(dplyr)
library(tidyr)
pivot_longer
然后我们使用from让它变得很长tidyr
。我在此处添加了一个键来参考每个指标适用于哪个农场。我们稍后将需要它进行分组,但基本上与原始数据中的行号匹配。
data.long <- fake.data %>%
#add a key to keep track of stuff
mutate(farm_key = 1:n()) %>%
pivot_longer(farm_sell_1:farm_bequeath_11, names_to = "variable", values_to = "value")
这看起来像这样
# A tibble: 6 x 3
farm_key variable value
<int> <chr> <int>
1 1 farm_sell_1 0
2 1 farm_sell_2 0
3 1 farm_sell_3 1
4 1 farm_sell_4 1
接下来,我们使用separate
将您的 farm_sell_1 等变量拆分为更机器可读的内容:
data.long2 <- data.long %>%
tidyr::separate(col = variable, into = c("farm", "var", "var_num"), sep = "_")
结果数据如下:
# A tibble: 6 x 5
farm_key farm var var_num value
<int> <chr> <chr> <chr> <int>
1 1 farm sell 1 0
2 1 farm sell 2 0
3 1 farm sell 3 1
4 1 farm sell 4 1
然后我们完成您在上面所做的所有添加。首先,我们按 var_num 分组并为每个农场添加这些变量。这与添加 farm_sell_1 + farm_lease_1 + farm_bequeath_1 并除以 3 相同,就像您在上面所做的那样。ifelse
然后我们通过语句计算 plot_sec 。最后,我们可以将每个农场的这 11 个索引(_1、_2、_3 各一个)相加,得到每个农场的一个索引值。
data.long3 <- data.long2 %>%
group_by(farm_key, var_num) %>%
summarise(plot_val = sum(value, na.rm = T)/3) %>% #same as plot_1, plot_2, etc.
ungroup() %>%
mutate(plot_sec = ifelse(plot_val>0.5,1,0)) %>%
#sum together to get one value for each farm_key
group_by(farm_key) %>%
summarise(num_plots_sec = sum(plot_sec)) %>%
ungroup()
然后数据看起来像这样:
# A tibble: 6 x 2
farm_key num_plots_sec
<int> <dbl>
1 1 4
2 2 4
3 3 4
4 4 8
5 5 7
正如所承诺的那样,一段代码可以一次完成所有操作:
data.one.ind <- fake.data %>%
#add a key to keep track of stuff
mutate(farm_key = 1:n()) %>%
pivot_longer(farm_sell_1:farm_bequeath_11, names_to = "variable", values_to = "value") %>%
tidyr::separate(col = variable, into = c("farm", "var", "var_num"), sep = "_") %>%
group_by(farm_key, var_num) %>%
summarise(plot_val = sum(value, na.rm = T)/3) %>% #same as plot_1, plot_2, etc.
ungroup() %>%
mutate(plot_sec = ifelse(plot_val>0.5,1,0)) %>%
#sum together to get one value for each farm_key
group_by(farm_key) %>%
summarise(num_plots_sec = sum(plot_sec)) %>%
ungroup()
总而言之,它实际上可能不会为您节省太多的打字时间。但它更能适应变量的变化。
推荐阅读
- javascript - 如何获取集合的属性并放入其他对象数组?
- mysql - MySQL获取最后10条记录中的第一条记录
- r - 在嵌套组上应用 `qwraps2`
- java - 如何使用 Mididriver 或其他一些 android lib 来选择乐器和音符?
- python - Python 请求:从响应标头中的 Set-Cookie 设置 cookie
- android - 如何获得彩色状态栏图标?
- ios - 如何快速读取预加载的 sqlite 数据库
- mongodb - 如何使用 python 代码从 mongodb 中删除文档
- vb.net - 在自托管 REST API 中上传文件的自定义 Post 功能
- css - Data-sly-list 正在添加空格,导致错误