首页 > 解决方案 > 在 dplyr 管道中取 ntiles 和 bind_rows 之间的差异

问题描述

编辑:我附上实际数据并重新解释得更好:

我“预处理”了数据并将其过滤到ntile = 1ntile = 5列。看起来像:

# A tibble: 6 x 3
  ntile date        score
  <int> <date>      <dbl>
1     1 2005-08-31 -2.39 
2     1 2005-09-30  0.573
3     1 2005-10-31 -1.61 
4     1 2005-11-30  5.43 
5     1 2005-12-31  0.106
6     1 2006-01-31  6.66

我可以运行以下命令:df$score[df$ntile == '1'] - df$score[df$ntile == '5']计算 ntile 之间的差异。我想将rbind这些结果score与计算差异的日期一起计算出来。即第一个日期是2005-08-31,过滤给出以下内容:

> df%>%
+   filter(date == "2005-08-31")
# A tibble: 2 x 3
  ntile date        score
  <int> <date>      <dbl>
1     1 2005-08-31 -2.39 
2     5 2005-08-31 -0.996

分数之间的差异是(-2.39) - (-0.996) = -1.394哪个是第一个结果df$score[df$ntile == '1'] - df$score[df$ntile == '5']。我正在尝试将其应用于ntile = 1and的所有观察结果ntile = 5,创建一个新列,使数据如下所示:

预期结果:

# A tibble: 316 x 3
   ntile date        score
   <int> <date>      <dbl>
 1   1_5 2005-08-31  -1.392994356
 2   1_5 2005-09-30  -0.189120897
 3   1_5 2005-10-31   0.820927813 
 4   1_5 2005-11-30   0.466263213
 5   1_5 2005-12-31   0.004252737

数据:

df <- structure(list(ntile = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L), date = structure(c(13026, 13056, 13087, 13117, 13148, 
13179, 13207, 13238, 13268, 13299, 13329, 13360, 13391, 13421, 
13452, 13482, 13513, 13544, 13572, 13603, 13633, 13664, 13694, 
13725, 13756, 13786, 13817, 13847, 13878, 13909, 13938, 13969, 
13999, 14030, 14060, 14091, 14122, 14152, 14183, 14213, 14244, 
14275, 14303, 14334, 14364, 14395, 14425, 14456, 14487, 14517, 
14548, 14578, 14609, 14640, 14668, 14699, 14729, 14760, 14790, 
14852, 14882, 14913, 14943, 14974, 15005, 15033, 15064, 15094, 
15125, 15155, 15217, 15247, 15278, 15308, 15339, 15370, 15399, 
15430, 15460, 15491, 15521, 15552, 15583, 15613, 15644, 15674, 
15705, 15736, 15764, 15795, 15825, 15856, 15886, 15917, 15948, 
15978, 16009, 16039, 16070, 16101, 16129, 16160, 16190, 16221, 
16251, 16282, 16313, 16343, 16374, 16404, 16435, 16466, 16494, 
16525, 16555, 16586, 16616, 16647, 16678, 16708, 16739, 16769, 
16800, 16831, 16860, 16891, 16921, 16952, 16982, 17044, 17074, 
17105, 17135, 17166, 17197, 17225, 17256, 17286, 17317, 17347, 
17378, 17409, 17439, 17470, 17500, 17531, 17562, 17590, 17621, 
17651, 17682, 17712, 17743, 17774, 17804, 17835, 17865, 17896, 
13026, 13056, 13087, 13117, 13148, 13179, 13207, 13238, 13268, 
13299, 13329, 13360, 13391, 13421, 13452, 13482, 13513, 13544, 
13572, 13603, 13633, 13664, 13694, 13725, 13756, 13786, 13817, 
13847, 13878, 13909, 13938, 13969, 13999, 14030, 14060, 14091, 
14122, 14152, 14183, 14213, 14244, 14275, 14303, 14334, 14364, 
14395, 14425, 14456, 14487, 14517, 14548, 14578, 14609, 14640, 
14668, 14699, 14729, 14760, 14790, 14852, 14882, 14913, 14943, 
14974, 15005, 15033, 15064, 15094, 15125, 15155, 15217, 15247, 
15278, 15308, 15339, 15370, 15399, 15430, 15460, 15491, 15521, 
15552, 15583, 15613, 15644, 15674, 15705, 15736, 15764, 15795, 
15825, 15856, 15886, 15917, 15948, 15978, 16009, 16039, 16070, 
16101, 16129, 16160, 16190, 16221, 16251, 16282, 16313, 16343, 
16374, 16404, 16435, 16466, 16494, 16525, 16555, 16586, 16616, 
16647, 16678, 16708, 16739, 16769, 16800, 16831, 16860, 16891, 
16921, 16952, 16982, 17044, 17074, 17105, 17135, 17166, 17197, 
17225, 17256, 17286, 17317, 17347, 17378, 17409, 17439, 17470, 
17500, 17531, 17562, 17590, 17621, 17651, 17682, 17712, 17743, 
17774, 17804, 17835, 17865, 17896), class = "Date"), score = c(-2.38916419707325, 
0.572675136581781, -1.61130358515631, 5.42706994951004, 0.105533424368025, 
6.65697289481407, 0.613486039256266, 4.21013704773222, 0.106990463992386, 
-3.62352710962904, -0.203607589793183, -4.24563967581072, 2.97070300267885, 
2.92544516479698, 5.02538739147422, 2.25461465260415, 1.66492554339803, 
3.5690423154001, 0.108411247307002, 0.961008630173696, 3.79172784045593, 
1.94108347244724, -2.12992072359958, -5.87473482253699, -1.45100684091412, 
1.47842234462587, 1.43196010231586, -7.74290369146724, -2.79056547363334, 
-5.03532133668577, -1.99400739381075, -2.92320856826413, 3.93394610595585, 
3.29451174347621, -10.0410470556235, 3.34517672842812, 2.41625183369762, 
-10.3476519710384, -21.791966984666, -11.1142687331988, 3.32761656369176, 
-3.96223311815655, -11.093184503697, 11.6694167237026, 22.2461574652919, 
9.28255170483023, 4.63817899423635, 11.8553670456421, 8.27889381692159, 
8.19911670446593, -6.470817611772, 3.09218109975165, 7.5825172514382, 
0.0284717847140023, 4.90864483240255, 10.0311544305095, 8.55401150272708, 
-8.84107625063785, -8.04105369987643, -6.65872061590883, 10.8577722872979, 
4.03706922467202, 3.04148092466194, 8.90634921641063, 1.56555573277521, 
4.42535372370123, 0.841035482771217, 1.75578768128183, -2.67241757153407, 
-2.25418139889371, -8.7723458397205, -11.2420616969584, 11.4836809985778, 
-1.8649021388476, 0.832085873992507, 11.6062841497052, 2.59039949751966, 
2.28509371230735, -1.97715071813135, -7.3280081242774, 3.97121830333205, 
-0.569284938256821, 2.31082313266322, 3.02490478503254, -1.38512132143018, 
-0.866847983058995, 2.97552563660034, 5.95976111047322, -0.102502393594657, 
4.58003409048615, 0.842834319309465, 3.06786040532266, 0.250639945095402, 
6.78696057469418, -1.62606880448011, 5.46367912370997, 2.53357559730344, 
4.73895950607308, 2.50934817572881, -0.312149263565189, 4.82621271905962, 
-0.79009628184665, -3.12115495501355, -0.461711220579862, 4.27359516836912, 
-4.60871127364226, 3.84488020178729, -5.26245849925393, 3.54222359765326, 
1.04191534953213, 1.4982293818719, -3.56618092951384, 4.95478586278666, 
-0.270584959088251, -0.900452947549406, 0.901254072925249, -0.254483190258712, 
-2.63217404877559, -4.71624328721887, -7.1747474980974, 4.86036342835152, 
3.24549729559669, -4.19219918146311, -10.128570960197, 0.803895306904637, 
9.33865112323734, 2.85517888612945, 0.316844258915139, -0.151669189522978, 
1.00839469793829, 1.57398998124214, -5.0607247073979, 8.91704977465508, 
2.59984205825244, 1.31737969318745, 2.70804837397023, 1.80193676584248, 
1.48362026996833, -2.11380109244311, 3.54300752215851, 1.6501194298151, 
-1.01504840432201, 6.74326962933175, 0.1866931051541, 2.9825290286452, 
1.42593783576641, 2.71110274944611, -4.09572797775837, 1.50144422897237, 
-0.552818435076999, 5.23843746771127, 1.33321908169899, 1.28745947800351, 
2.60490918566195, -1.54038908822145, -9.6363012621261, -0.190177144865133, 
-13.0653210889016, -0.996169841201408, 0.761796033756066, -2.43223139843328, 
4.96080673677729, 0.101280687482689, 6.17357615435428, 1.38874425933974, 
4.28000293197853, -0.333742787139301, -4.11855074763588, 0.291974123409195, 
-3.02440619642814, 2.33067284340863, 1.79275362554759, 5.54934227736017, 
3.47837338830735, 1.52230822118993, 1.51698621410516, 0.914201367527017, 
1.01893905833506, 2.32234494239827, 2.66775915753112, -0.52833239258921, 
-6.17086807149927, 1.7020338770074, 1.86767804066961, -0.620947230682803, 
-6.61596804362882, -1.78635303074775, -3.4028254234031, -4.08987980095563, 
-0.599428345726525, 2.22947413169105, 1.77337610631589, -12.0789144452994, 
2.09112035738437, 2.56977467796921, -7.8359785641516, -21.037437680028, 
-14.2577457957058, 7.00119398667094, -9.76969250916794, -11.8131836223, 
11.430396420021, 22.7949352430167, 7.06995971310199, 1.36175375612812, 
9.28794280366248, 3.75786927130224, 5.53671989732137, -4.01859051206862, 
2.54089824221511, 6.82870547584865, -1.64839229999864, 2.99165441000821, 
8.71694773269455, 6.07678630460913, -6.95016838198987, -5.95522936876321, 
-6.1910368233456, 10.2473582925355, 4.70032243138825, 2.83808334365426, 
7.71947501585039, 1.05716976642439, 5.06630958246923, 1.27669539269683, 
2.69898639004638, -1.83479743615412, -1.68414137270995, -6.92676615265292, 
-8.64438760958025, 12.7654685749758, -0.31136765444292, 1.09305813271296, 
6.46665943463387, 3.36208393072405, 3.67686512082771, -0.398170163555997, 
-5.88363240690217, 4.19573752922585, -0.190935238969593, 2.78114884563598, 
3.83358228344802, -1.36735138051911, 1.09317362741059, 2.87253190842669, 
7.05170876584917, 1.59255415704844, 4.2286132471711, 0.233115056552123, 
3.92793910940855, 0.824619553936327, 6.9752591574591, -2.17343564366001, 
5.40487022782109, 2.78523298064147, 3.93061227679295, 1.85748976877625, 
-1.32769886398466, 3.59753677399333, 0.498023604497329, -1.92522655624948, 
0.677346866605942, 3.93407536101889, -3.6919882939559, 4.12121786363584, 
-4.0073004690221, 4.63264283023433, 0.25009273593706, 1.83200582351617, 
-3.5281064957593, 5.79502926471865, 1.41291711516946, -0.320472069074162, 
0.368577679494281, 0.215618940159114, -0.940588055243421, -3.51081238468536, 
-3.19611791103126, 5.59926444002201, 2.80803845557107, -3.98306363132988, 
-6.23446105411945, 0.158162273031255, 7.17844510703436, 3.03072774038718, 
1.49583272758886, -0.687490679196285, 1.25184715359306, 0.665100913913879, 
-2.96044832244789, 9.21017339256041, 3.77677983067854, 0.632074046886474, 
1.89801895807877, 0.580305096699066, 1.36375254119179, -0.68330200462244, 
3.49627324513832, 0.679635452911649, 0.290380229162133, 6.06083869297639, 
1.05953465020964, 3.66710915353922, 0.21726568905713, 3.11972609897608, 
-2.89689705704579, 1.44659691390088, 1.34341454683325, 4.89338720230884, 
1.07976444062663, 1.56502721063993, 4.61667345004454, -1.8474345893243, 
-8.27627761504385, 1.13912806601182, -11.6236819841101)), row.names = c(NA, 
-316L), class = c("tbl_df", "tbl", "data.frame"))

标签: rdataframedplyrrbind

解决方案


这不需要任何预处理。

library(dplyr)

df %>% 
  group_by(date) %>% 
  filter(ntile %in% c(1,5)) %>% 
  arrange(ntile) %>%
  summarise(ntile = paste(ntile[1], ntile[n()], sep = "-"),
            score = score[1] - score[n()]) %>% 
  bind_rows({df %>% mutate(ntile = as.character(ntile))}, .) %>% 
  select(date, ntile, score)
# # A tibble: 474 x 3
#     date       ntile  score
#     <date>     <chr>  <dbl>
#  1  2005-08-31 1     -2.39 
#  2  2005-09-30 1      0.573
#  3  2005-10-31 1     -1.61 
#  4  2005-11-30 1      5.43 
#  5  2005-12-31 1      0.106
#  6  2006-01-31 1      6.66 
#  7  2006-02-28 1      0.613
#  8  2006-03-31 1      4.21 
#  9  2006-04-30 1      0.107
#  10 2006-05-31 1     -3.62 
#  # ... with 464 more rows

df$ntile == '1' - df$ntile == '5'这是显示附加到底部的数据尾部:

.Last.value %>% tail %>% as.data.frame  

#   date       ntile  score
# 1 2018-07-31 1-5   -0.278
# 2 2018-08-31 1-5   -2.01 
# 3 2018-09-30 1-5    0.307
# 4 2018-10-31 1-5   -1.36 
# 5 2018-11-30 1-5   -1.33 
# 6 2018-12-31 1-5   -1.44 

推荐阅读