首页 > 解决方案 > 在 R 中汇总列表中的数据

问题描述

我有许多数据框都包含在一个列表1a1中,列表中的名称是收集数据的日期,例如

 names(1a1)
[1] "Jan4" "Jan5" "Jan6" "Jan7" "Jan8" "Jan9" "Jan10"

列表中的所有数据框都采用相同的格式

例如

 dput(Jan4)
structure(list(Species = c("bluti", "bluti", "bluti", "bluti", 
"bluti", "bluti", "bluti", "bluti", "bluti", "bluti", "bluti", 
"bluti", "bluti", "bluti", "bluti", "bluti", "bluti", "bluti", 
"bluti", "bluti", "bluti", "bluti", "bluti", "bluti", "bluti", 
"bluti", "bluti", "bluti", "bluti", "bluti", "bluti", "bluti", 
"bluti", "bluti", "bluti", "bluti", "bluti", "bluti", "bluti", 
"bluti", "bluti", "bluti", "bluti", "bluti", "bluti", "bluti", 
"bluti", "bluti", "bluti", "bluti", "greti", "greti", "greti", 
"greti", "greti", "greti", "greti", "greti", "greti", "greti", 
"greti", "greti", "greti", "greti", "greti", "greti", "greti", 
"greti", "greti", "greti", "greti", "greti", "greti", "greti", 
"greti", "greti", "greti", "greti", "greti", "greti", "greti", 
"greti", "greti", "greti", "greti", "greti", "greti", "greti", 
"greti", "greti", "greti", "greti"), Pit.tag = c("01103FD6EF", 
"01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", 
"01103FD77C", "01103FD77C", "01103FD77C", "01103FD77C", "01103FD6EF", 
"01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", 
"01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", 
"01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", 
"01103FD6EF", "01103FD6EF", "01103FD77C", "01103FD77C", "01103FD77C", 
"01103FD77C", "01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", 
"01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", 
"01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", 
"01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", "01103F9F29", 
"01103F9F29", "01103F9F29", "01103F9F29", "0700EDADB8", "0700EDADB8", 
"0700EDADB8", "0700EDADB8", "0700EDADB8", "0700EDADB8", "0700EDADB8", 
"0700EDADB8", "0700EDADB8", "0700EDADB8", "0700EDADB8", "0700EDADB8", 
"0700EDADB8", "0700EDADB8", "0700EDADB8", "0700EDADB8", "0700EDADB8", 
"0700EDADB8", "0700EDADB8", "0700EDADB8", "01103F9F29", "01103F9F29", 
"01103F9F29", "01103F9F29", "0700EDADB8", "0700EDADB8", "0700EDADB8", 
"0700EDADB8", "0700EDADB8", "0700EDADB8", "0700EDADB8", "0700EDADB8", 
"0700EDADB8", "0700EDADB8", "01103F9F29", "01103F9F29", "01103F9F29", 
"01103F9F29"), Date = c("04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021"), Time = c("08:01:41", 
"08:01:42", "08:01:42", "08:03:09", "08:03:09", "08:03:10", "08:02:57", 
"08:02:57", "08:02:58", "08:03:36", "08:01:41", "08:01:42", "08:01:42", 
"08:03:09", "08:03:09", "08:03:10", "08:01:41", "08:01:42", "08:01:42", 
"08:03:09", "08:03:09", "08:03:10", "08:01:41", "08:01:42", "08:01:42", 
"08:03:09", "08:03:09", "08:03:10", "08:02:57", "08:02:57", "08:02:58", 
"08:03:36", "08:01:41", "08:01:42", "08:01:42", "08:03:09", "08:03:09", 
"08:03:10", "08:01:41", "08:01:42", "08:01:42", "08:03:09", "08:03:09", 
"08:03:10", "08:01:41", "08:01:42", "08:01:42", "08:03:09", "08:03:09", 
"08:03:10", "08:02:26", "08:02:26", "08:03:37", "08:03:38", "08:00:43", 
"08:00:44", "08:00:44", "08:01:39", "08:01:39", "08:01:40", "08:01:40", 
"08:02:54", "08:02:54", "08:02:55", "08:00:43", "08:00:44", "08:00:44", 
"08:01:39", "08:01:39", "08:01:40", "08:01:40", "08:02:54", "08:02:54", 
"08:02:55", "08:02:26", "08:02:26", "08:03:37", "08:03:38", "08:00:43", 
"08:00:44", "08:00:44", "08:01:39", "08:01:39", "08:01:40", "08:01:40", 
"08:02:54", "08:02:54", "08:02:55", "08:02:26", "08:02:26", "08:03:37", 
"08:03:38")), row.names = c(NA, -92L), class = "data.frame")

我想做的是创建一个新的数据框来总结这些数据,在那里我可以看到一个人Pit.tag被看到了多少次(创建一个名为的新变量No_of_visits)以及Species它属于哪个

例如

Pit.tag     Species   No_of_visits
01103FD6EF   bluti    47

我可以通过使用

Visitsbypit<-sapply(tapply(1a1$`Jan4`$Species, 1a1$`Jan4`$Pit.tag, length), unique)
Vistsbyspecies<-sapply(tapply(1a1$`Jan4`$Pit.tag, 1a1$`Jan4`$Species, length), unique)


Visitsbypit
01103F9776 01103FA8DD 01103FC9DE 
        10        133        255

Vistsbyspecies
greti bluti 
   10   388

注意:上面的这些数据与我在这里提供的数据不匹配,它们来自另一个数据框

这也不能真正让我得到我所追求的,所以Visitsbyspecies只是每个物种的总访问量,而不是与Pit.tag记录相关联。我想将Pit.tag记录链接Visitsbypit到相应的物种

但我无法进行下一次飞跃并将这些信息放在一起。我还认为必须有一种更好的方法可以在整个列表中完成此操作,而不是必须指定例如1a1$`Jan4`$Species,在我看来这违背了将数据框放在列表中并使用sapplytapply

标签: rsapplytapply

解决方案


你想要这个吗?

library(dplyr)

Jan4 %>% count(Species, Pit.tag)

#  Species    Pit.tag  n
#1   bluti 01103FD6EF 42
#2   bluti 01103FD77C  8
#3   greti 01103F9F29 12
#4   greti 0700EDADB8 30

要应用于数据框列表,请使用lapply/ purrr::map-

lapply(`1a1`, function(x) x %>% count(Species, Pit.tag))

推荐阅读