r - 根据参考日期列R数据框按总和分组
问题描述
我在 R 中有两个日期列的列表,如下所示:-
d1 <- tibble::tribble(
~"Start date", ~"End date",
"2010-12-29 00:00:00", "2010-12-31 00:00:00",
"2011-01-04 00:00:00", "2011-01-04 00:00:00",
"2011-01-06 00:00:00", "2011-01-12 00:00:00",
"2011-01-19 00:00:00", "2011-01-21 00:00:00",
"2011-01-27 00:00:00", "2011-02-04 00:00:00")
我的价格数据框如下:-
d2 <- tibble::tribble(
~Dates, ~A, ~B, ~C, ~D, ~E, ~F, ~G,
"22-12-2010", 80.1785, 31.8176, 11.6533, 23.3259, 41.8601, 22.8007, 146.476,
"23-12-2010", 79.8199, 31.7922, 11.4942, 23.1381, 41.68, 22.8883, 145.0971,
"24-12-2010", 79.8199, 31.7922, 11.4942, 23.1381, 41.68, 22.8883, 145.0971,
"27-12-2010", 79.9725, 32.1526, 11.6186, 23.252, 42.0453, 22.9431, 144.4322,
"28-12-2010", 79.9997, 32.257, 11.7551, 23.2505, 42.4142, 23.1067, 144.8008,
"29-12-2010", 79.9289, 32.0906, 11.7134, 23.1517, 42.3573, 22.8533, 146.0402,
"30-12-2010", 79.4996, 31.9527, 11.6722, 22.9926, 42.2446, 22.8318, 144.5053,
"31-12-2010", 79.93, 32.0311, 11.6942, 23.0776, 41.9869, 22.7553, 145.4875,
"03-01-2011", 80.3999, 32.9737, 12.357, 23.5052, 43.0214, 23.5737, 146.3483,
"04-01-2011", 80.0498, 33.2366, 12.4272, 23.6381, 43.3237, 23.6648, 145.3782,
"05-01-2011", 80.6039, 33.7601, 12.6618, 24.0011, 43.8851, 23.9248, 146.685,
"06-01-2011", 80.5638, 33.6655, 12.7113, 24.0228, 44.2201, 24.0604, 145.7486,
"07-01-2011", 79.9605, 32.9132, 12.5048, 23.3493, 43.7077, 23.699, 144.24,
"10-01-2011", 79.626, 32.8344, 12.5547, 23.2713, 43.4812, 23.3555, 145.7137,
"11-01-2011", 79.5304, 33.0513, 12.8566, 23.3589, 43.9388, 23.4765, 148.3816,
"12-01-2011", 80.4076, 33.7886, 13.1183, 23.818, 44.6717, 23.8636, 148.0783,
"13-01-2011", 80.6306, 33.756, 13.0469, 23.7892, 44.9831, 23.7678, 149.207,
"14-01-2011", 81.3036, 34.1856, 13.2347, 24.3155, 45.3238, 24.1549, 151.4578,
"17-01-2011", 81.3036, 34.1856, 13.2347, 24.3155, 45.3238, 24.1549, 151.4578,
"18-01-2011", 81.8418, 33.9883, 13.1635, 24.1906, 43.1502, 24.0139, 150.5181,
"19-01-2011", 81.0027, 33.3362, 12.7512, 23.8085, 42.6282, 23.3328, 148.3635,
"20-01-2011", 80.8097, 33.6722, 12.5767, 23.7958, 42.3712, 23.8544, 146.4042,
"21-01-2011", 80.7922, 34.1913, 12.62, 24.1677, 43.2221, 24.9343, 147.9034,
"24-01-2011", 82.6827, 34.1314, 12.2929, 24.3067, 43.205, 24.796, 148.0187,
"25-01-2011", 82.8283, 33.9569, 11.9536, 24.1697, 42.6495, 24.4785, 151.2586,
"26-01-2011", 83.1771, 34.1578, 11.9743, 24.2799, 42.9022, 24.2485, 152.3351,
"27-01-2011", 82.9993, 34.1257, 11.9206, 24.155, 42.8672, 24.7682, 154.093,
"28-01-2011", 82.2916, 33.955, 12.0779, 23.8675, 42.366, 24.5382, 149.9169,
"31-01-2011", 81.7952, 33.9655, 12.0438, 24.086, 42.4869, 24.4672, 151.0532,
"01-02-2011", 83.1227, 34.633, 12.4624, 24.7828, 43.3331, 24.9855, 153.9299,
"02-02-2011", 82.8291, 34.5151, 12.5017, 24.5496, 43.1386, 24.9214, 151.9767,
"03-02-2011", 82.8713, 34.2811, 12.5999, 24.4322, 42.6405, 24.752, 150.4262,
"04-02-2011", 82.9434, 33.7929, 12.5218, 24.3922, 42.5437, 24.8826, 149.4832,
"07-02-2011", 83.8433, 34.3763, 12.8448, 24.8656, 43.2382, 25.4261, 150.7768,
"08-02-2011", 84.2483, 34.5351, 12.8532, 25.3836, 43.367, 25.6644, 151.8193,
"09-02-2011", 83.7992, 34.153, 12.7885, 24.7714, 42.9088, 25.1735, 152.7307,
"10-02-2011", 84.2777, 34.3033, 12.7541, 24.6303, 42.4592, 24.9281, 153.4265,
"11-02-2011", 84.5727, 35.1582, 12.9184, 24.9634, 42.9677, 25.1188, 155.0897,
"14-02-2011", 85.1513, 35.2691, 13.0251, 25.2649, 43.4073, 25.1183, 157.0237,
"15-02-2011", 84.9217, 35.5134, 12.9803, 25.2729, 43.5061, 25.3373, 157.1119,
"16-02-2011", 84.9329, 36.2568, 13.0028, 24.7229, 43.5123, 25.672, 157.5118,
"17-02-2011", 84.9294, 36.1789, 13.0111, 24.6282, 43.5951, 25.6098, 156.5715,
"18-02-2011", 85.0344, 36.2394, 12.9882, 24.4448, 43.7121, 25.8423, 157.8411,
"21-02-2011", 85.0344, 36.2394, 12.9882, 24.4448, 43.7121, 25.8423, 157.8411,
"22-02-2011", 83.5354, 35.0389, 12.5103, 23.556, 42.0514, 24.9029, 153.1327,
"23-02-2011", 82.8169, 34.7696, 12.4169, 23.439, 41.3706, 24.7215, 150.2739,
"24-02-2011", 83.2822, 34.6344, 12.2342, 23.382, 41.3232, 24.5396, 152.4142,
"25-02-2011", 84.7207, 35.2869, 12.467, 24.2642, 41.5713, 24.9937, 155.9612,
"28-02-2011", 87.0591, 35.3482, 12.5423, 24.1185, 41.7057, 24.8478, 156.0431,
"01-03-2011", 86.0093, 34.7648, 12.3289, 23.7833, 41.001, 24.3757, 155.3789,
"02-03-2011", 85.155, 34.3521, 12.2164, 23.6086, 40.7716, 24.2642, 153.0803,
"03-03-2011", 86.2803, 34.8856, 12.4841, 24.1216, 41.3547, 24.5782, 158.0159,
"04-03-2011", 85.4894, 34.4172, 12.4136, 23.8046, 40.4829, 23.834, 156.4178,
"07-03-2011", 85.5544, 34.2106, 12.3446, 23.7758, 40.1592, 23.7277, 153.3316,
"08-03-2011", 86.144, 35.0588, 12.7659, 24.2329, 40.9194, 24.0704, 153.4039,
"09-03-2011", 85.9318, 35.2383, 12.8092, 24.3986, 41.1642, 24.0399, 151.8898,
"10-03-2011", 85.159, 34.5498, 12.59, 24.0191, 40.5111, 23.5675, 147.2096,
"11-03-2011", 84.7862, 34.5338, 12.5523, 24.1292, 40.3912, 23.665, 146.3786,
"14-03-2011", 84.1819, 34.1382, 12.4829, 23.9052, 39.981, 23.324, 144.6562,
"15-03-2011", 82.506, 33.6561, 12.2071, 23.879, 39.2748, 23.0187, 142.7008,
"16-03-2011", 81.5316, 33.3135, 12.0989, 23.4764, 39.2111, 22.7336, 140.1458,
"17-03-2011", 82.5505, 33.4589, 12.2205, 23.2939, 39.3582, 22.92, 141.5938,
"18-03-2011", 83.6591, 34.4953, 12.4028, 23.8171, 39.9504, 23.0033, 142.4177,
"21-03-2011", 84.8088, 34.5808, 12.3222, 23.7421, 39.6868, 23.2732, 143.6073,
"22-03-2011", 85.1153, 34.4383, 12.2347, 23.5881, 39.1914, 23.3399, 142.9444,
"23-03-2011", 84.6076, 34.2926, 11.932, 23.4117, 38.9261, 22.999, 141.7258,
"24-03-2011", 85.0947, 34.5043, 11.8248, 23.521, 39.1386, 23.0586, 143.0152,
"25-03-2011", 85.1708, 34.8193, 11.7827, 23.8821, 39.5986, 22.8019, 145.1388,
"28-03-2011", 84.7487, 34.9403, 11.8047, 23.7504, 39.3636, 22.8054, 145.2685,
"29-03-2011", 84.3981, 34.7179, 11.6691, 23.5558, 39.2437, 22.7055, 143.755)
预期输出如下:-
Start date End date A B C D E F G H
2010-12-29 00:00:00 2010-12-31 00:00:00
2011-01-04 00:00:00 2011-01-04 00:00:00
2011-01-06 00:00:00 2011-01-12 00:00:00
2011-01-19 00:00:00 2011-01-21 00:00:00
2011-01-27 00:00:00 2011-02-04 00:00:00
对于第一个列表中的每个日期范围,我想要这些日期范围的价格数据框中的值的总和。我认为它必须是来自 tidyverse 的 groupby 类型的操作,但我不确定,因为我是 R 新手。请提出前进的方向。
解决方案
假设df_range
和,并在此处df_data
使用建议的方法
library(dplyr)
library(fuzzyjoin)
fuzzy_left_join(df_dates, df_data,
by = c('Start date' = 'Dates',
'End date' = 'Dates'),
match_fun = c(`<=`, `>=`)) %>%
group_by(`Start date`, `End date`) %>%
summarize(across(where(is.numeric), sum)) %>%
ungroup()
# # A tibble: 5 x 9
# `Start date` `End date` A B C D E F G
# <dttm> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 2010-12-29 00:00:00 2010-12-31 00:00:00 239. 96.1 35.1 69.2 127. 68.4 436.
# 2 2011-01-04 00:00:00 2011-01-04 00:00:00 80.0 33.2 12.4 23.6 43.3 23.7 145.
# 3 2011-01-06 00:00:00 2011-01-12 00:00:00 400. 166. 63.7 118. 220. 118. 732.
# 4 2011-01-19 00:00:00 2011-01-21 00:00:00 243. 101. 37.9 71.8 128. 72.1 443.
# 5 2011-01-27 00:00:00 2011-02-04 00:00:00 579. 239. 86.1 170. 299. 173. 1061.
请注意,所有Start date
,End date
和Dates
都应该是 classDate
或POSIXct
类似的。
可重现的例子
df_dates <- read.table(text = '"Start date" "End date"
"2010-12-29 00:00:00" "2010-12-31 00:00:00"
"2011-01-04 00:00:00" "2011-01-04 00:00:00"
"2011-01-06 00:00:00" "2011-01-12 00:00:00"
"2011-01-19 00:00:00" "2011-01-21 00:00:00"
"2011-01-27 00:00:00" "2011-02-04 00:00:00"',header = TRUE ) %>%
mutate("Start date" = as.POSIXct(Start.date, origin = '1970-01-01'),
"End date" = as.POSIXct(End.date, origin = '1970-01-01')) %>%
select(c("Start date", "End date"))
df_data <- read.table(text =
"Dates A B C D E F G
22-12-2010 80.1785 31.8176 11.6533 23.3259 41.8601 22.8007 146.476
23-12-2010 79.8199 31.7922 11.4942 23.1381 41.68 22.8883 145.0971
24-12-2010 79.8199 31.7922 11.4942 23.1381 41.68 22.8883 145.0971
27-12-2010 79.9725 32.1526 11.6186 23.252 42.0453 22.9431 144.4322
28-12-2010 79.9997 32.257 11.7551 23.2505 42.4142 23.1067 144.8008
29-12-2010 79.9289 32.0906 11.7134 23.1517 42.3573 22.8533 146.0402
30-12-2010 79.4996 31.9527 11.6722 22.9926 42.2446 22.8318 144.5053
31-12-2010 79.93 32.0311 11.6942 23.0776 41.9869 22.7553 145.4875
03-01-2011 80.3999 32.9737 12.357 23.5052 43.0214 23.5737 146.3483
04-01-2011 80.0498 33.2366 12.4272 23.6381 43.3237 23.6648 145.3782
05-01-2011 80.6039 33.7601 12.6618 24.0011 43.8851 23.9248 146.685
06-01-2011 80.5638 33.6655 12.7113 24.0228 44.2201 24.0604 145.7486
07-01-2011 79.9605 32.9132 12.5048 23.3493 43.7077 23.699 144.24
10-01-2011 79.626 32.8344 12.5547 23.2713 43.4812 23.3555 145.7137
11-01-2011 79.5304 33.0513 12.8566 23.3589 43.9388 23.4765 148.3816
12-01-2011 80.4076 33.7886 13.1183 23.818 44.6717 23.8636 148.0783
13-01-2011 80.6306 33.756 13.0469 23.7892 44.9831 23.7678 149.207
14-01-2011 81.3036 34.1856 13.2347 24.3155 45.3238 24.1549 151.4578
17-01-2011 81.3036 34.1856 13.2347 24.3155 45.3238 24.1549 151.4578
18-01-2011 81.8418 33.9883 13.1635 24.1906 43.1502 24.0139 150.5181
19-01-2011 81.0027 33.3362 12.7512 23.8085 42.6282 23.3328 148.3635
20-01-2011 80.8097 33.6722 12.5767 23.7958 42.3712 23.8544 146.4042
21-01-2011 80.7922 34.1913 12.62 24.1677 43.2221 24.9343 147.9034
24-01-2011 82.6827 34.1314 12.2929 24.3067 43.205 24.796 148.0187
25-01-2011 82.8283 33.9569 11.9536 24.1697 42.6495 24.4785 151.2586
26-01-2011 83.1771 34.1578 11.9743 24.2799 42.9022 24.2485 152.3351
27-01-2011 82.9993 34.1257 11.9206 24.155 42.8672 24.7682 154.093
28-01-2011 82.2916 33.955 12.0779 23.8675 42.366 24.5382 149.9169
31-01-2011 81.7952 33.9655 12.0438 24.086 42.4869 24.4672 151.0532
01-02-2011 83.1227 34.633 12.4624 24.7828 43.3331 24.9855 153.9299
02-02-2011 82.8291 34.5151 12.5017 24.5496 43.1386 24.9214 151.9767
03-02-2011 82.8713 34.2811 12.5999 24.4322 42.6405 24.752 150.4262
04-02-2011 82.9434 33.7929 12.5218 24.3922 42.5437 24.8826 149.4832
07-02-2011 83.8433 34.3763 12.8448 24.8656 43.2382 25.4261 150.7768
08-02-2011 84.2483 34.5351 12.8532 25.3836 43.367 25.6644 151.8193
09-02-2011 83.7992 34.153 12.7885 24.7714 42.9088 25.1735 152.7307
10-02-2011 84.2777 34.3033 12.7541 24.6303 42.4592 24.9281 153.4265
11-02-2011 84.5727 35.1582 12.9184 24.9634 42.9677 25.1188 155.0897
14-02-2011 85.1513 35.2691 13.0251 25.2649 43.4073 25.1183 157.0237
15-02-2011 84.9217 35.5134 12.9803 25.2729 43.5061 25.3373 157.1119
16-02-2011 84.9329 36.2568 13.0028 24.7229 43.5123 25.672 157.5118
17-02-2011 84.9294 36.1789 13.0111 24.6282 43.5951 25.6098 156.5715
18-02-2011 85.0344 36.2394 12.9882 24.4448 43.7121 25.8423 157.8411
21-02-2011 85.0344 36.2394 12.9882 24.4448 43.7121 25.8423 157.8411
22-02-2011 83.5354 35.0389 12.5103 23.556 42.0514 24.9029 153.1327
23-02-2011 82.8169 34.7696 12.4169 23.439 41.3706 24.7215 150.2739
24-02-2011 83.2822 34.6344 12.2342 23.382 41.3232 24.5396 152.4142
25-02-2011 84.7207 35.2869 12.467 24.2642 41.5713 24.9937 155.9612
28-02-2011 87.0591 35.3482 12.5423 24.1185 41.7057 24.8478 156.0431
01-03-2011 86.0093 34.7648 12.3289 23.7833 41.001 24.3757 155.3789
02-03-2011 85.155 34.3521 12.2164 23.6086 40.7716 24.2642 153.0803
03-03-2011 86.2803 34.8856 12.4841 24.1216 41.3547 24.5782 158.0159
04-03-2011 85.4894 34.4172 12.4136 23.8046 40.4829 23.834 156.4178
07-03-2011 85.5544 34.2106 12.3446 23.7758 40.1592 23.7277 153.3316
08-03-2011 86.144 35.0588 12.7659 24.2329 40.9194 24.0704 153.4039
09-03-2011 85.9318 35.2383 12.8092 24.3986 41.1642 24.0399 151.8898
10-03-2011 85.159 34.5498 12.59 24.0191 40.5111 23.5675 147.2096
11-03-2011 84.7862 34.5338 12.5523 24.1292 40.3912 23.665 146.3786
14-03-2011 84.1819 34.1382 12.4829 23.9052 39.981 23.324 144.6562
15-03-2011 82.506 33.6561 12.2071 23.879 39.2748 23.0187 142.7008
16-03-2011 81.5316 33.3135 12.0989 23.4764 39.2111 22.7336 140.1458
17-03-2011 82.5505 33.4589 12.2205 23.2939 39.3582 22.92 141.5938
18-03-2011 83.6591 34.4953 12.4028 23.8171 39.9504 23.0033 142.4177
21-03-2011 84.8088 34.5808 12.3222 23.7421 39.6868 23.2732 143.6073
22-03-2011 85.1153 34.4383 12.2347 23.5881 39.1914 23.3399 142.9444
23-03-2011 84.6076 34.2926 11.932 23.4117 38.9261 22.999 141.7258
24-03-2011 85.0947 34.5043 11.8248 23.521 39.1386 23.0586 143.0152
25-03-2011 85.1708 34.8193 11.7827 23.8821 39.5986 22.8019 145.1388
28-03-2011 84.7487 34.9403 11.8047 23.7504 39.3636 22.8054 145.2685
29-03-2011 84.3981 34.7179 11.6691 23.5558 39.2437 22.7055 143.755", header = TRUE) %>%
mutate(Dates = as.Date(Dates, format = '%d-%m-%Y'))
library(dplyr)
library(fuzzyjoin)
fuzzy_left_join(df_dates, df_data,
by = c('Start date' = 'Dates',
'End date' = 'Dates'),
match_fun = c(`<=`, `>=`)) %>%
group_by(`Start date`, `End date`) %>%
summarize(across(where(is.numeric), sum)) %>%
ungroup()
推荐阅读
- android - 如何在服务类的活动中显示对话框?
- excel - VBA - 通过下拉/验证列表迭代并将生成的工作表保存到一个 PDF
- html - 如何使元素 div(即将推出的时钟)具有响应性?
- java - 按双精度对对象数组进行排序并具有特定的字符串值
- javascript - 为什么使用绑定函数(javascript)后属性未定义?
- r - R Markdown:Markdown 工作区到 R 工作区
- python - 未注册具有此名称的 AngularJS 错误控制器
- java - Spring Boot 应用程序因 EXCEPTION_ACCESS_VIOLATION 错误而崩溃
- wpf - WPF 将值发送到可重用的用户控件
- spring-boot - Spring Boot配置-如何仅覆盖指定的属性