首页 > 解决方案 > 如何在 R dplyr/tidyr 的单个命令中多次使用 dplyr::summarize?

问题描述

我有一个物种 1、2、3 和 4 的社区。我正在尝试使用 dplyr 计算物种 i 和互惠物种的组合丰度之间的协方差。我想为每个物种组合这样做。dplyr 仅适用于一个物种,但当我尝试将多个汇总在一起时却不行。有什么建议么?

set.seed(111)
month <- rep(c("J","J","J","F","F","F"), time = 3)
site <- rep(c(1,2,3), each = 6)
quadrant <- rep(c(1,2,3), times = 6)
sp1 <- sample(0:20, 18, replace = TRUE)
sp2 <- sample(0:15, 18, replace = TRUE)
sp3 <- sample(0:10, 18, replace = TRUE)
sp4 <- sample(0:4, 18, replace = TRUE)

df <- data.frame(month, site, quadrant, sp1, sp2, sp3, sp4)

df$sp2.3.4 <- df$sp2 + df$sp3 + df$sp4 #no sp1
df$sp3.4.1 <- df$sp3 + df$sp4 + df$sp1 #no sp2
df$sp1.2.4 <- df$sp1 + df$sp2 + df$sp4 #no sp3
df$sp1.2.3 <- df$sp1 + df$sp2 + df$sp3 #no sp4

library(tidyr)

df.long <- gather(df,
                  key = "species",
                  value = "abundance",
                  sp1, sp2, sp3, sp4)


df.long <- gather(df.long,
                  key = "species.covar",
                  value = "abundance.covar",
                  sp2.3.4, sp3.4.1, sp1.2.4, sp1.2.3)

df.long$species <- as.factor(as.character(df.long$species))
df.long$species.covar <- as.factor(as.character(df.long$species.covar))

library(dplyr)

agg.cov <- df.long%>% 
  group_by(month,site)%>% 
  dplyr::summarise(covar.species1 = cor(abundance[species=="sp1"],abundance.covar[species.covar=="sp2.3.4"]))%>%
  as.data.frame()



  agg.cov <- df.long%>% 
  group_by(month,site)%>% 
  dplyr::summarise(covar.species1 = cor(abundance[species=="sp1"],abundance.covar[species.covar=="sp2.3.4"]))%>%
  dplyr::summarise(covar.species2 = cor(abundance[species=="sp2"],abundance.covar[species.covar=="sp3.4.1"]))%>%
  dplyr::summarise(covar.species3 = cor(abundance[species=="sp3"],abundance.covar[species.covar=="sp1.2.4"]))%>%
  dplyr::summarise(covar.species4 = cor(abundance[species=="sp4"],abundance.covar[species.covar=="sp1.2.3"]))%>%
  as.data.frame()

Error: Error: Problem with `summarise()` column `covar.species2`.
ℹ `covar.species2 = cor(...)`.
x object 'abundance.covar' not found
ℹ The error occurred in group 1: month = "F".

标签: rdplyrtidyrsummarize

解决方案


以下三种方法应该可以工作

  1. map- 基于
library(dplyr)
library(stringr)
library(purrr)
nm1 <- names(df)[startsWith(names(df), "sp")]
 
map(nm1, ~  df %>% 
     group_by(month, site) %>%
     summarise(!!str_c("covar_species", "_", .x) := 
         cor(!! rlang::sym(.x), rowSums(select(cur_data(), nm1, - !!.x)) ),
          .groups = 'drop')) %>%
      reduce(left_join)

-输出

# A tibble: 6 x 6
  month  site covar_species_sp1 covar_species_sp2 covar_species_sp3 covar_species_sp4
  <chr> <dbl>             <dbl>             <dbl>             <dbl>             <dbl>
1 F         1             0.479             0.987            -0.170            -0.980
2 F         2            -0.858            -0.454            -0.160             0.359
3 F         3            -0.999            -1.00             -0.933            NA    
4 J         1            -0.945            -0.963            NA                 0.596
5 J         2            -0.516            -0.148            -0.792             0.629
6 J         3             0.277            -0.591            -0.702             0.277

  1. 重塑与pivot_longer
library(tidyr)
df %>% 
   mutate(rn = row_number()) %>% 
   pivot_longer(cols = starts_with('sp'), names_to = "sp") %>% 
   group_by(rn) %>%
   mutate(newvalue = sum(value) - value)  %>%
   group_by(month, site, sp = str_c('covar_species_', sp)) %>% 
   summarise(value = cor(value, newvalue), .groups = 'drop') %>% 
   pivot_wider(names_from = sp, values_from = value)

-输出

# A tibble: 6 x 6
  month  site covar_species_sp1 covar_species_sp2 covar_species_sp3 covar_species_sp4
  <chr> <dbl>             <dbl>             <dbl>             <dbl>             <dbl>
1 F         1             0.479             0.987            -0.170            -0.980
2 F         2            -0.858            -0.454            -0.160             0.359
3 F         3            -0.999            -1.00             -0.933            NA    
4 J         1            -0.945            -0.963            NA                 0.596
5 J         2            -0.516            -0.148            -0.792             0.629
6 J         3             0.277            -0.591            -0.702             0.277

  1. 使用across
df %>%
     mutate(Sum = select(cur_data(), starts_with('sp')) %>%
             rowSums) %>% 
     group_by(month, site) %>% 
     summarise(across(starts_with('sp'),
       ~ cor(., Sum - .), .names = "covar_species_{.col}"), .groups = 'drop')

-输出

# A tibble: 6 x 6
  month  site covar_species_sp1 covar_species_sp2 covar_species_sp3 covar_species_sp4
  <chr> <dbl>             <dbl>             <dbl>             <dbl>             <dbl>
1 F         1             0.479             0.987            -0.170            -0.980
2 F         2            -0.858            -0.454            -0.160             0.359
3 F         3            -0.999            -1.00             -0.933            NA    
4 J         1            -0.945            -0.963            NA                 0.596
5 J         2            -0.516            -0.148            -0.792             0.629
6 J         3             0.277            -0.591            -0.702             0.277

数据

set.seed(111)
month <- rep(c("J","J","J","F","F","F"), time = 3)
site <- rep(c(1,2,3), each = 6)
quadrant <- rep(c(1,2,3), times = 6)
sp1 <- sample(0:20, 18, replace = TRUE)
sp2 <- sample(0:15, 18, replace = TRUE)
sp3 <- sample(0:10, 18, replace = TRUE)
sp4 <- sample(0:4, 18, replace = TRUE)

df <- data.frame(month, site, quadrant, sp1, sp2, sp3, sp4)

推荐阅读