首页 > 解决方案 > 根据参考日期列R数据框按总和分组

问题描述

我在 R 中有两个日期列的列表,如下所示:-

d1 <- tibble::tribble(
  ~"Start date",             ~"End date", 
"2010-12-29 00:00:00", "2010-12-31 00:00:00", 
"2011-01-04 00:00:00", "2011-01-04 00:00:00", 
"2011-01-06 00:00:00", "2011-01-12 00:00:00", 
"2011-01-19 00:00:00", "2011-01-21 00:00:00", 
"2011-01-27 00:00:00", "2011-02-04 00:00:00")

我的价格数据框如下:-

d2 <- tibble::tribble(
 ~Dates,   ~A,   ~B,   ~C,   ~D,   ~E,   ~F,   ~G,
"22-12-2010",   80.1785, 31.8176, 11.6533, 23.3259, 41.8601, 22.8007, 146.476,
"23-12-2010",   79.8199, 31.7922, 11.4942, 23.1381, 41.68,   22.8883, 145.0971,
"24-12-2010",   79.8199, 31.7922, 11.4942, 23.1381, 41.68,   22.8883, 145.0971,
"27-12-2010",   79.9725, 32.1526, 11.6186, 23.252,  42.0453, 22.9431, 144.4322,
"28-12-2010",   79.9997, 32.257,  11.7551, 23.2505, 42.4142, 23.1067, 144.8008,
"29-12-2010",   79.9289, 32.0906, 11.7134, 23.1517, 42.3573, 22.8533, 146.0402,
"30-12-2010",   79.4996, 31.9527, 11.6722, 22.9926, 42.2446, 22.8318, 144.5053,
"31-12-2010",   79.93,   32.0311, 11.6942, 23.0776, 41.9869, 22.7553, 145.4875,
"03-01-2011",   80.3999, 32.9737, 12.357,  23.5052, 43.0214, 23.5737, 146.3483,
"04-01-2011",   80.0498, 33.2366, 12.4272, 23.6381, 43.3237, 23.6648, 145.3782,
"05-01-2011",   80.6039, 33.7601, 12.6618, 24.0011, 43.8851, 23.9248, 146.685,
"06-01-2011",   80.5638, 33.6655, 12.7113, 24.0228, 44.2201, 24.0604, 145.7486,
"07-01-2011",   79.9605, 32.9132, 12.5048, 23.3493, 43.7077, 23.699,  144.24,
"10-01-2011",   79.626,  32.8344, 12.5547, 23.2713, 43.4812, 23.3555, 145.7137,
"11-01-2011",   79.5304, 33.0513, 12.8566, 23.3589, 43.9388, 23.4765, 148.3816,
"12-01-2011",   80.4076, 33.7886, 13.1183, 23.818,  44.6717, 23.8636, 148.0783,
"13-01-2011",   80.6306, 33.756,  13.0469, 23.7892, 44.9831, 23.7678, 149.207,
"14-01-2011",   81.3036, 34.1856, 13.2347, 24.3155, 45.3238, 24.1549, 151.4578,
"17-01-2011",   81.3036, 34.1856, 13.2347, 24.3155, 45.3238, 24.1549, 151.4578,
"18-01-2011",   81.8418, 33.9883, 13.1635, 24.1906, 43.1502, 24.0139, 150.5181,
"19-01-2011",   81.0027, 33.3362, 12.7512, 23.8085, 42.6282, 23.3328, 148.3635,
"20-01-2011",   80.8097, 33.6722, 12.5767, 23.7958, 42.3712, 23.8544, 146.4042,
"21-01-2011",   80.7922, 34.1913, 12.62,   24.1677, 43.2221, 24.9343, 147.9034,
"24-01-2011",   82.6827, 34.1314, 12.2929, 24.3067, 43.205,  24.796,  148.0187,
"25-01-2011",   82.8283, 33.9569, 11.9536, 24.1697, 42.6495, 24.4785, 151.2586,
"26-01-2011",   83.1771, 34.1578, 11.9743, 24.2799, 42.9022, 24.2485, 152.3351,
"27-01-2011",   82.9993, 34.1257, 11.9206, 24.155,  42.8672, 24.7682, 154.093,
"28-01-2011",   82.2916, 33.955,  12.0779, 23.8675, 42.366,  24.5382, 149.9169,
"31-01-2011",   81.7952, 33.9655, 12.0438, 24.086,  42.4869, 24.4672, 151.0532,
"01-02-2011",   83.1227, 34.633,  12.4624, 24.7828, 43.3331, 24.9855, 153.9299,
"02-02-2011",   82.8291, 34.5151, 12.5017, 24.5496, 43.1386, 24.9214, 151.9767,
"03-02-2011",   82.8713, 34.2811, 12.5999, 24.4322, 42.6405, 24.752,  150.4262,
"04-02-2011",   82.9434, 33.7929, 12.5218, 24.3922, 42.5437, 24.8826, 149.4832,
"07-02-2011",   83.8433, 34.3763, 12.8448, 24.8656, 43.2382, 25.4261, 150.7768,
"08-02-2011",   84.2483, 34.5351, 12.8532, 25.3836, 43.367,  25.6644, 151.8193,
"09-02-2011",   83.7992, 34.153,  12.7885, 24.7714, 42.9088, 25.1735, 152.7307,
"10-02-2011",   84.2777, 34.3033, 12.7541, 24.6303, 42.4592, 24.9281, 153.4265,
"11-02-2011",   84.5727, 35.1582, 12.9184, 24.9634, 42.9677, 25.1188, 155.0897,
"14-02-2011",   85.1513, 35.2691, 13.0251, 25.2649, 43.4073, 25.1183, 157.0237,
"15-02-2011",   84.9217, 35.5134, 12.9803, 25.2729, 43.5061, 25.3373, 157.1119,
"16-02-2011",   84.9329, 36.2568, 13.0028, 24.7229, 43.5123, 25.672,  157.5118,
"17-02-2011",   84.9294, 36.1789, 13.0111, 24.6282, 43.5951, 25.6098, 156.5715,
"18-02-2011",   85.0344, 36.2394, 12.9882, 24.4448, 43.7121, 25.8423, 157.8411,
"21-02-2011",   85.0344, 36.2394, 12.9882, 24.4448, 43.7121, 25.8423, 157.8411,
"22-02-2011",   83.5354, 35.0389, 12.5103, 23.556,  42.0514, 24.9029, 153.1327,
"23-02-2011",   82.8169, 34.7696, 12.4169, 23.439,  41.3706, 24.7215, 150.2739,
"24-02-2011",   83.2822, 34.6344, 12.2342, 23.382,  41.3232, 24.5396, 152.4142,
"25-02-2011",   84.7207, 35.2869, 12.467,  24.2642, 41.5713, 24.9937, 155.9612,
"28-02-2011",   87.0591, 35.3482, 12.5423, 24.1185, 41.7057, 24.8478, 156.0431,
"01-03-2011",   86.0093, 34.7648, 12.3289, 23.7833, 41.001,  24.3757, 155.3789,
"02-03-2011",   85.155,  34.3521, 12.2164, 23.6086, 40.7716, 24.2642, 153.0803,
"03-03-2011",   86.2803, 34.8856, 12.4841, 24.1216, 41.3547, 24.5782, 158.0159,
"04-03-2011",   85.4894, 34.4172, 12.4136, 23.8046, 40.4829, 23.834,  156.4178,
"07-03-2011",   85.5544, 34.2106, 12.3446, 23.7758, 40.1592, 23.7277, 153.3316,
"08-03-2011",   86.144,  35.0588, 12.7659, 24.2329, 40.9194, 24.0704, 153.4039,
"09-03-2011",   85.9318, 35.2383, 12.8092, 24.3986, 41.1642, 24.0399, 151.8898,
"10-03-2011",   85.159,  34.5498, 12.59,   24.0191, 40.5111, 23.5675, 147.2096,
"11-03-2011",   84.7862, 34.5338, 12.5523, 24.1292, 40.3912, 23.665,  146.3786,
"14-03-2011",   84.1819, 34.1382, 12.4829, 23.9052, 39.981,  23.324,  144.6562,
"15-03-2011",   82.506,  33.6561, 12.2071, 23.879,  39.2748, 23.0187, 142.7008,
"16-03-2011",   81.5316, 33.3135, 12.0989, 23.4764, 39.2111, 22.7336, 140.1458,
"17-03-2011",   82.5505, 33.4589, 12.2205, 23.2939, 39.3582, 22.92,   141.5938,
"18-03-2011",   83.6591, 34.4953, 12.4028, 23.8171, 39.9504, 23.0033, 142.4177,
"21-03-2011",   84.8088, 34.5808, 12.3222, 23.7421, 39.6868, 23.2732, 143.6073,
"22-03-2011",   85.1153, 34.4383, 12.2347, 23.5881, 39.1914, 23.3399, 142.9444,
"23-03-2011",   84.6076, 34.2926, 11.932,  23.4117, 38.9261, 22.999,  141.7258,
"24-03-2011",   85.0947, 34.5043, 11.8248, 23.521,  39.1386, 23.0586, 143.0152,
"25-03-2011",   85.1708, 34.8193, 11.7827, 23.8821, 39.5986, 22.8019, 145.1388,
"28-03-2011",   84.7487, 34.9403, 11.8047, 23.7504, 39.3636, 22.8054, 145.2685,
"29-03-2011",   84.3981, 34.7179, 11.6691, 23.5558, 39.2437, 22.7055, 143.755)

预期输出如下:-

Start date            End date                  A   B   C   D   E   F   G   H
2010-12-29 00:00:00 2010-12-31 00:00:00                             
2011-01-04 00:00:00 2011-01-04 00:00:00                             
2011-01-06 00:00:00 2011-01-12 00:00:00                             
2011-01-19 00:00:00 2011-01-21 00:00:00                             
2011-01-27 00:00:00 2011-02-04 00:00:00     

对于第一个列表中的每个日期范围,我想要这些日期范围的价格数据框中的值的总和。我认为它必须是来自 tidyverse 的 groupby 类型的操作,但我不确定,因为我是 R 新手。请提出前进的方向。

标签: rdataframegroup-bytidyverse

解决方案


假设df_range和,并在此处df_data使用建议的方法

library(dplyr)
library(fuzzyjoin)
fuzzy_left_join(df_dates, df_data, 
                by = c('Start date' = 'Dates',
                       'End date' = 'Dates'),
                match_fun  = c(`<=`, `>=`)) %>% 
  group_by(`Start date`, `End date`) %>% 
  summarize(across(where(is.numeric), sum)) %>% 
  ungroup()
# # A tibble: 5 x 9
#   `Start date`        `End date`              A     B     C     D     E     F     G
#   <dttm>              <dttm>              <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 2010-12-29 00:00:00 2010-12-31 00:00:00 239.   96.1  35.1  69.2 127.   68.4  436.
# 2 2011-01-04 00:00:00 2011-01-04 00:00:00  80.0  33.2  12.4  23.6  43.3  23.7  145.
# 3 2011-01-06 00:00:00 2011-01-12 00:00:00 400.  166.   63.7 118.  220.  118.   732.
# 4 2011-01-19 00:00:00 2011-01-21 00:00:00 243.  101.   37.9  71.8 128.   72.1  443.
# 5 2011-01-27 00:00:00 2011-02-04 00:00:00 579.  239.   86.1 170.  299.  173.  1061.

请注意,所有Start date,End dateDates都应该是 classDatePOSIXct类似的。

可重现的例子

df_dates <- read.table(text = '"Start date"            "End date"
"2010-12-29 00:00:00" "2010-12-31 00:00:00"
"2011-01-04 00:00:00" "2011-01-04 00:00:00"
"2011-01-06 00:00:00" "2011-01-12 00:00:00"
"2011-01-19 00:00:00" "2011-01-21 00:00:00"
"2011-01-27 00:00:00" "2011-02-04 00:00:00"',header =  TRUE ) %>% 
  mutate("Start date" = as.POSIXct(Start.date, origin = '1970-01-01'),
         "End date" = as.POSIXct(End.date, origin = '1970-01-01')) %>% 
  select(c("Start date", "End date"))
df_data <- read.table(text = 
"Dates   A   B   C   D   E   F   G
22-12-2010  80.1785 31.8176 11.6533 23.3259 41.8601 22.8007 146.476
23-12-2010  79.8199 31.7922 11.4942 23.1381 41.68   22.8883 145.0971
24-12-2010  79.8199 31.7922 11.4942 23.1381 41.68   22.8883 145.0971
27-12-2010  79.9725 32.1526 11.6186 23.252  42.0453 22.9431 144.4322
28-12-2010  79.9997 32.257  11.7551 23.2505 42.4142 23.1067 144.8008
29-12-2010  79.9289 32.0906 11.7134 23.1517 42.3573 22.8533 146.0402
30-12-2010  79.4996 31.9527 11.6722 22.9926 42.2446 22.8318 144.5053
31-12-2010  79.93   32.0311 11.6942 23.0776 41.9869 22.7553 145.4875
03-01-2011  80.3999 32.9737 12.357  23.5052 43.0214 23.5737 146.3483
04-01-2011  80.0498 33.2366 12.4272 23.6381 43.3237 23.6648 145.3782
05-01-2011  80.6039 33.7601 12.6618 24.0011 43.8851 23.9248 146.685
06-01-2011  80.5638 33.6655 12.7113 24.0228 44.2201 24.0604 145.7486
07-01-2011  79.9605 32.9132 12.5048 23.3493 43.7077 23.699  144.24
10-01-2011  79.626  32.8344 12.5547 23.2713 43.4812 23.3555 145.7137
11-01-2011  79.5304 33.0513 12.8566 23.3589 43.9388 23.4765 148.3816
12-01-2011  80.4076 33.7886 13.1183 23.818  44.6717 23.8636 148.0783
13-01-2011  80.6306 33.756  13.0469 23.7892 44.9831 23.7678 149.207
14-01-2011  81.3036 34.1856 13.2347 24.3155 45.3238 24.1549 151.4578
17-01-2011  81.3036 34.1856 13.2347 24.3155 45.3238 24.1549 151.4578
18-01-2011  81.8418 33.9883 13.1635 24.1906 43.1502 24.0139 150.5181
19-01-2011  81.0027 33.3362 12.7512 23.8085 42.6282 23.3328 148.3635
20-01-2011  80.8097 33.6722 12.5767 23.7958 42.3712 23.8544 146.4042
21-01-2011  80.7922 34.1913 12.62   24.1677 43.2221 24.9343 147.9034
24-01-2011  82.6827 34.1314 12.2929 24.3067 43.205  24.796  148.0187
25-01-2011  82.8283 33.9569 11.9536 24.1697 42.6495 24.4785 151.2586
26-01-2011  83.1771 34.1578 11.9743 24.2799 42.9022 24.2485 152.3351
27-01-2011  82.9993 34.1257 11.9206 24.155  42.8672 24.7682 154.093
28-01-2011  82.2916 33.955  12.0779 23.8675 42.366  24.5382 149.9169
31-01-2011  81.7952 33.9655 12.0438 24.086  42.4869 24.4672 151.0532
01-02-2011  83.1227 34.633  12.4624 24.7828 43.3331 24.9855 153.9299
02-02-2011  82.8291 34.5151 12.5017 24.5496 43.1386 24.9214 151.9767
03-02-2011  82.8713 34.2811 12.5999 24.4322 42.6405 24.752  150.4262
04-02-2011  82.9434 33.7929 12.5218 24.3922 42.5437 24.8826 149.4832
07-02-2011  83.8433 34.3763 12.8448 24.8656 43.2382 25.4261 150.7768
08-02-2011  84.2483 34.5351 12.8532 25.3836 43.367  25.6644 151.8193
09-02-2011  83.7992 34.153  12.7885 24.7714 42.9088 25.1735 152.7307
10-02-2011  84.2777 34.3033 12.7541 24.6303 42.4592 24.9281 153.4265
11-02-2011  84.5727 35.1582 12.9184 24.9634 42.9677 25.1188 155.0897
14-02-2011  85.1513 35.2691 13.0251 25.2649 43.4073 25.1183 157.0237
15-02-2011  84.9217 35.5134 12.9803 25.2729 43.5061 25.3373 157.1119
16-02-2011  84.9329 36.2568 13.0028 24.7229 43.5123 25.672  157.5118
17-02-2011  84.9294 36.1789 13.0111 24.6282 43.5951 25.6098 156.5715
18-02-2011  85.0344 36.2394 12.9882 24.4448 43.7121 25.8423 157.8411
21-02-2011  85.0344 36.2394 12.9882 24.4448 43.7121 25.8423 157.8411
22-02-2011  83.5354 35.0389 12.5103 23.556  42.0514 24.9029 153.1327
23-02-2011  82.8169 34.7696 12.4169 23.439  41.3706 24.7215 150.2739
24-02-2011  83.2822 34.6344 12.2342 23.382  41.3232 24.5396 152.4142
25-02-2011  84.7207 35.2869 12.467  24.2642 41.5713 24.9937 155.9612
28-02-2011  87.0591 35.3482 12.5423 24.1185 41.7057 24.8478 156.0431
01-03-2011  86.0093 34.7648 12.3289 23.7833 41.001  24.3757 155.3789
02-03-2011  85.155  34.3521 12.2164 23.6086 40.7716 24.2642 153.0803
03-03-2011  86.2803 34.8856 12.4841 24.1216 41.3547 24.5782 158.0159
04-03-2011  85.4894 34.4172 12.4136 23.8046 40.4829 23.834  156.4178
07-03-2011  85.5544 34.2106 12.3446 23.7758 40.1592 23.7277 153.3316
08-03-2011  86.144  35.0588 12.7659 24.2329 40.9194 24.0704 153.4039
09-03-2011  85.9318 35.2383 12.8092 24.3986 41.1642 24.0399 151.8898
10-03-2011  85.159  34.5498 12.59   24.0191 40.5111 23.5675 147.2096
11-03-2011  84.7862 34.5338 12.5523 24.1292 40.3912 23.665  146.3786
14-03-2011  84.1819 34.1382 12.4829 23.9052 39.981  23.324  144.6562
15-03-2011  82.506  33.6561 12.2071 23.879  39.2748 23.0187 142.7008
16-03-2011  81.5316 33.3135 12.0989 23.4764 39.2111 22.7336 140.1458
17-03-2011  82.5505 33.4589 12.2205 23.2939 39.3582 22.92   141.5938
18-03-2011  83.6591 34.4953 12.4028 23.8171 39.9504 23.0033 142.4177
21-03-2011  84.8088 34.5808 12.3222 23.7421 39.6868 23.2732 143.6073
22-03-2011  85.1153 34.4383 12.2347 23.5881 39.1914 23.3399 142.9444
23-03-2011  84.6076 34.2926 11.932  23.4117 38.9261 22.999  141.7258
24-03-2011  85.0947 34.5043 11.8248 23.521  39.1386 23.0586 143.0152
25-03-2011  85.1708 34.8193 11.7827 23.8821 39.5986 22.8019 145.1388
28-03-2011  84.7487 34.9403 11.8047 23.7504 39.3636 22.8054 145.2685
29-03-2011  84.3981 34.7179 11.6691 23.5558 39.2437 22.7055 143.755", header = TRUE) %>% 
  mutate(Dates = as.Date(Dates, format = '%d-%m-%Y'))

library(dplyr)
library(fuzzyjoin)
fuzzy_left_join(df_dates, df_data, 
                by = c('Start date' = 'Dates',
                       'End date' = 'Dates'),
                match_fun  = c(`<=`, `>=`)) %>% 
  group_by(`Start date`, `End date`) %>% 
  summarize(across(where(is.numeric), sum)) %>% 
  ungroup()

推荐阅读