首页 > 解决方案 > R:根据变量应用不同的过滤器

问题描述

从下面的数据集中,我想保留一月份的第一个日期,但对于其他月份,我只想保留该月最后一个日期的行。像这样:

2021-01-01 493 jan 2021
2021-02-28 707 feb 2021
2021-03-31 688 mrt 2021
2021-04-30 1217 apr 2021
2021-05-31 1237 mei 2021

我尝试了以下方法:

if (str_detect(Month2, "jan" ) { 
    filter(Date == min(Date))
  } else filter(Date == max(Date))

但没有运气。然后我尝试为 if 语句创建一个新的(数字)变量:

mutate(JANDECFIX = case_when(
    str_detect(Month2, "jan") ~ 1,
    TRUE ~ 2
  )   
         ) %>%
 
  if (JANDECFIX == 1 ) { 
    filter(Date == min(Date))
  } else filter(Date == max(Date))

但随后出现以下错误:

Error in if (.) JANDECFIX == 1 else { : 
  argument is not interpretable as logical
In addition: Warning message:
In if (.) JANDECFIX == 1 else { :
  the condition has length > 1 and only the first element will be used

我怎样才能使这项工作以开头的示例结束?

东风:

structure(list(Date = structure(c(18722, 18723, 18721, 18653, 
18654, 18650, 18652, 18649, 18758, 18651, 18724, 18737, 18738, 
18775, 18639, 18655, 18749, 18778, 18747, 18772, 18750, 18771, 
18748, 18632, 18640, 18631, 18764, 18690, 18689, 18773, 18763, 
18777, 18646, 18647, 18729, 18757, 18725, 18759, 18680, 18633, 
18769, 18774, 18736, 18661, 18648, 18739, 18730, 18762, 18660, 
18694, 18751, 18776, 18656, 18659, 18761, 18745, 18746, 18707, 
18687, 18688, 18770, 18695, 18681, 18693, 18740, 18638, 18728, 
18676, 18634, 18677, 18641, 18726, 18678, 18743, 18760, 18709, 
18742, 18692, 18657, 18679, 18702, 18720, 18691, 18645, 18662, 
18683, 18635, 18658, 18765, 18701, 18673, 18663, 18706, 18741, 
18682, 18719, 18642, 18731, 18699, 18675, 18755, 18698, 18753, 
18700, 18766, 18732, 18696, 18756, 18668, 18684, 18705, 18686, 
18636, 18715, 18744, 18637, 18717, 18630, 18644, 18685, 18718, 
18735, 18664, 18767, 18727, 18703, 18713, 18674, 18733, 18754, 
18710, 18666, 18768, 18697, 18643, 18667, 18714, 18716, 18734, 
18704, 18752, 18711, 18669, 18670, 18708, 18629, 18672, 18671, 
18712, 18628, 18665), class = "Date"), UPV = c(3358L, 2843L, 
2771L, 2358L, 2035L, 2000L, 1965L, 1664L, 1638L, 1622L, 1578L, 
1572L, 1525L, 1479L, 1448L, 1360L, 1332L, 1237L, 1217L, 1207L, 
1182L, 1181L, 1180L, 1177L, 1163L, 1151L, 1123L, 1118L, 1110L, 
1090L, 1071L, 1068L, 1058L, 1053L, 1049L, 1048L, 1043L, 1038L, 
1031L, 1030L, 1025L, 1014L, 1012L, 1008L, 1006L, 996L, 991L, 
990L, 979L, 974L, 972L, 964L, 961L, 957L, 953L, 944L, 936L, 917L, 
914L, 909L, 900L, 894L, 880L, 878L, 873L, 869L, 862L, 862L, 857L, 
857L, 848L, 844L, 842L, 839L, 832L, 829L, 828L, 826L, 825L, 825L, 
822L, 819L, 813L, 805L, 796L, 795L, 793L, 787L, 784L, 778L, 776L, 
775L, 775L, 774L, 768L, 763L, 754L, 754L, 744L, 738L, 734L, 733L, 
729L, 726L, 719L, 716L, 715L, 715L, 714L, 713L, 708L, 707L, 707L, 
702L, 693L, 690L, 688L, 672L, 672L, 671L, 667L, 662L, 661L, 658L, 
657L, 649L, 645L, 633L, 631L, 624L, 622L, 612L, 608L, 606L, 595L, 
584L, 584L, 582L, 579L, 568L, 568L, 563L, 550L, 547L, 539L, 524L, 
522L, 509L, 497L, 493L, 400L), Month2 = structure(c(16L, 16L, 
16L, 13L, 13L, 13L, 13L, 13L, 17L, 13L, 16L, 16L, 16L, 17L, 13L, 
13L, 17L, 17L, 16L, 17L, 17L, 17L, 17L, 13L, 13L, 13L, 17L, 15L, 
15L, 17L, 17L, 17L, 13L, 13L, 16L, 17L, 16L, 17L, 14L, 13L, 17L, 
17L, 16L, 14L, 13L, 16L, 16L, 17L, 14L, 15L, 17L, 17L, 13L, 14L, 
17L, 16L, 16L, 15L, 15L, 15L, 17L, 15L, 14L, 15L, 16L, 13L, 16L, 
14L, 13L, 14L, 13L, 16L, 14L, 16L, 17L, 15L, 16L, 15L, 13L, 14L, 
15L, 16L, 15L, 13L, 14L, 14L, 13L, 13L, 17L, 15L, 14L, 14L, 15L, 
16L, 14L, 16L, 13L, 16L, 15L, 14L, 17L, 15L, 17L, 15L, 17L, 16L, 
15L, 17L, 14L, 14L, 15L, 14L, 13L, 15L, 16L, 13L, 15L, 13L, 13L, 
14L, 16L, 16L, 14L, 17L, 16L, 15L, 15L, 14L, 16L, 17L, 15L, 14L, 
17L, 15L, 13L, 14L, 15L, 15L, 16L, 15L, 17L, 15L, 14L, 14L, 15L, 
13L, 14L, 14L, 15L, 13L, 14L), .Label = c("jan 2020", "feb 2020", 
"mrt 2020", "apr 2020", "mei 2020", "jun 2020", "jul 2020", "aug 2020", 
"sep 2020", "okt 2020", "nov 2020", "dec 2020", "jan 2021", "feb 2021", 
"mrt 2021", "apr 2021", "mei 2021"), class = "factor")), row.names = c(NA, 
-151L), class = c("tbl_df", "tbl", "data.frame"))

标签: rdatefilter

解决方案


您可以使用 -

library(dplyr)

df %>%
  arrange(Date) %>%
  group_by(Month2) %>%
  slice(if(grepl('jan|dec', Month2)) 1L else n()) %>%
  ungroup

#  Date         UPV Month2  
#  <date>     <int> <fct>   
#1 2021-01-01   493 jan 2021
#2 2021-02-28   707 feb 2021
#3 2021-03-31   688 mrt 2021
#4 2021-04-30  1217 apr 2021
#5 2021-05-31  1237 mei 2021

推荐阅读