首页 > 解决方案 > R提取每天最频繁的具有相同ID的行

问题描述

我的数据集相当大,但如下所示:

PData <- 

    ID      Date        Time    OBJ     Loc Condition  Color
    ID21    12/20/2020  04:52   rack    A1  Good       Bright
    ID21    12/20/2020  04:52   desk    A3  Good       Bright
    ID5     12/20/2020  12:05   rack    A1  Partial    Dark
    ID5     12/20/2020  12:05   desk    A2  Partial    Dark
    ID3     12/21/2020  03:25   rack    A1  Partial    Bright
    ID3     12/21/2020  03:25   rack    A3  Partial    Bright
    ID3     12/21/2020  03:25   rack    A5  Partial    Bright
    ID3     12/21/2020  03:25   rack    A4  Partial    Bright
    ID3     12/21/2020  03:25   rack    A1  Partial    Bright
    ID3     12/21/2020  03:25   rack    A2  Partial    Bright
    ID12    12/21/2020  09:25   chair   A3  Good       Bright
    ID12    12/21/2020  09:25   computerA1  Good       Bright
    ID34    12/21/2020  16:35   rack    A1  Good       Bright
    ID34    12/21/2020  16:35   computerA2  Good       Bright
    ID34    12/21/2020  16:35   chair   A3  Good       Bright
    ID34    12/21/2020  16:35   desk    A4  Good       Bright
    ID33    12/21/2020  10:36   desk    A5  Good       Bright
    ID33    12/21/2020  10:36   desk    A2  Good       Bright
    ID33    12/21/2020  10:36   desk    A1  Good       Bright
    ID33    12/21/2020  10:36   desk    A3  Good       Bright
    ID33    12/21/2020  10:36   desk    A3  Good       Bright

我正在尝试根据各种条件进行过滤,然后提取每天条目最多的 ID。

 newDat <- PData %>%   group_by(Date,ID) %>%   filter(Condition == "Good", Color == "Bright") %>% add_count(ID)

到目前为止,我已经编写了以下代码,但我一直坚持如何提取具有每天最多条目的 ID 的行。

下面是我想要实现的一个例子。

ID      Date        Time    OBJ     Loc Condition  Color
ID21    12/20/2020  04:52   rack    A1  Good       Bright
ID21    12/20/2020  04:52   desk    A3  Good       Bright
ID33    12/21/2020  10:36   desk    A5  Good       Bright
ID33    12/21/2020  10:36   desk    A2  Good       Bright
ID33    12/21/2020  10:36   desk    A1  Good       Bright
ID33    12/21/2020  10:36   desk    A3  Good       Bright
ID33    12/21/2020  10:36   desk    A3  Good       Bright

非常感激任何的帮助!!

标签: rgroup-byfiltering

解决方案


我们可以ungroup并且filtermax'n'做一个

library(dplyr)
PData %>%
      group_by(Date,ID) %>% 
      filter(Condition == "Good", Color == "Bright") %>%
      add_count(ID) %>%         
      group_by(Date) %>%
      filter(n == max(n)) %>%
      ungroup %>%
      select(-n)

-输出

# A tibble: 7 x 7
#  ID    Date       Time  OBJ   Loc   Condition Color 
#  <chr> <chr>      <chr> <chr> <chr> <chr>     <chr> 
#1 ID21  12/20/2020 04:52 rack  A1    Good      Bright
#2 ID21  12/20/2020 04:52 desk  A3    Good      Bright
#3 ID33  12/21/2020 10:36 desk  A5    Good      Bright
#4 ID33  12/21/2020 10:36 desk  A2    Good      Bright
#5 ID33  12/21/2020 10:36 desk  A1    Good      Bright
#6 ID33  12/21/2020 10:36 desk  A3    Good      Bright
#7 ID33  12/21/2020 10:36 desk  A3    Good      Bright

数据

PData <- structure(list(ID = c("ID21", "ID21", "ID5", "ID5", "ID3", "ID3", 
"ID3", "ID3", "ID3", "ID3", "ID12", "ID12", "ID34", "ID34", "ID34", 
"ID34", "ID33", "ID33", "ID33", "ID33", "ID33"), Date = c("12/20/2020", 
"12/20/2020", "12/20/2020", "12/20/2020", "12/21/2020", "12/21/2020", 
"12/21/2020", "12/21/2020", "12/21/2020", "12/21/2020", "12/21/2020", 
"12/21/2020", "12/21/2020", "12/21/2020", "12/21/2020", "12/21/2020", 
"12/21/2020", "12/21/2020", "12/21/2020", "12/21/2020", "12/21/2020"
), Time = c("04:52", "04:52", "12:05", "12:05", "03:25", "03:25", 
"03:25", "03:25", "03:25", "03:25", "09:25", "09:25", "16:35", 
"16:35", "16:35", "16:35", "10:36", "10:36", "10:36", "10:36", 
"10:36"), OBJ = c("rack", "desk", "rack", "desk", "rack", "rack", 
"rack", "rack", "rack", "rack", "chair", "computer", "rack", 
"computer", "chair", "desk", "desk", "desk", "desk", "desk", 
"desk"), Loc = c("A1", "A3", "A1", "A2", "A1", "A3", "A5", "A4", 
"A1", "A2", "A3", "A1", "A1", "A2", "A3", "A4", "A5", "A2", "A1", 
"A3", "A3"), Condition = c("Good", "Good", "Partial", "Partial", 
"Partial", "Partial", "Partial", "Partial", "Partial", "Partial", 
"Good", "Good", "Good", "Good", "Good", "Good", "Good", "Good", 
"Good", "Good", "Good"), Color = c("Bright", "Bright", "Dark", 
"Dark", "Bright", "Bright", "Bright", "Bright", "Bright", "Bright", 
"Bright", "Bright", "Bright", "Bright", "Bright", "Bright", "Bright", 
"Bright", "Bright", "Bright", "Bright")), class = "data.frame",
row.names = c(NA, 
-21L))

推荐阅读