首页 > 解决方案 > 按 2 个变量分组的百分比变化

问题描述

我有一个包含 5 列的数据框:“Date_Time”、“Categ_1”、“Categ_2”、“Price”和“Volume”。数据帧按 Date_Time 排序。对于这个有序数据框,我想计算每组 Categ_1 和 Categ_2 内的价格百分比变化。我正在使用代码:

A <- dataset %>% 
    group_by(Categ_1, Categ_2) %>%
    mutate(price_prc_change = (Price/lag(Price)-1)*100)

但结果不适用于分组。百分比变化仅基于价格列计算,不考虑类别_1 或类别_2。

> dput(droplevels(df[1:50, ]))
structure(list(Date_Time = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 5L, 6L, 
6L, 6L, 6L, 7L, 7L, 8L, 8L, 9L, 9L, 10L, 11L, 11L), .Label = c("2020-04-30 15:26:03.8430000", 
"2020-04-30 15:26:14.5870000", "2020-04-30 15:45:11.4690000", 
"2020-04-30 16:00:37.8760000", "2020-04-30 16:00:39.4960000", 
"2020-04-30 16:01:18.9490000", "2020-04-30 16:02:02.6070000", 
"2020-04-30 16:02:23.2270000", "2020-04-30 16:02:25.3250000", 
"2020-04-30 16:07:34.7980000", "2020-04-30 16:08:33.1210000"), class = "factor"), 
    Categ_1 = structure(c(3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 
    4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 
    4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    4L, 4L, 2L, 2L, 4L, 1L, 1L, 3L, 1L, 3L, 5L), .Label = c("ED1", 
    "ED2", "ED3", "ED4", "XHE"), class = "factor"), Categ_2 = structure(c(5L, 
    5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
    6L, 6L, 6L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 
    8L, 8L, 8L, 8L, 8L, 8L, 10L, 10L, 3L, 2L, 3L, 2L, 4L, 4L, 
    9L, 9L, 7L, 7L, 1L), .Label = c("H01", "H06Q1", "H07Q1", 
    "H09Q2", "H11", "H12", "H12Q3", "H13", "H15Q4", "H18Q1"), class = "factor"), 
    Price = c(-14.88, -14.88, -14.88, -14.88, -14.89, -14.88, 
    -14.88, -14.88, -14.88, -14.89, -14.48, -14.48, -14.48, -14.48, 
    -14.49, -14.5, -14.48, -14.48, -14.48, -14.48, -14.49, -11.38, 
    -11.38, -11.38, -11.38, -11.38, -11.38, -11.38, -11.39, -11.39, 
    -11.38, -11.38, -11.38, -11.38, -11.38, -11.38, -11.38, -11.39, 
    -11.39, -42, -52.2, 9.6, 9.96, 9.6, 9.96, 9.46, 9.46, 17.86, 
    17.86, 9.56), Volume = c(5.8, 5.8, 5.8, 2.6, 5, 5.8, 5.8, 
    5.8, 2.6, 5, 5.5, 5.5, 5.5, 3.5, 5, 25, 5.5, 5.5, 5.5, 3.5, 
    5, 5.4, 0.6, 4.8, 1.2, 4.2, 1.8, 2, 4, 1, 5.4, 0.6, 4.8, 
    1.2, 4.2, 1.8, 2, 4, 1, 1, 1, 1, 1, 1, 1, 0.1, 0.1, 0.1, 
    0.1, 0.1)), row.names = c(NA, 50L), class = "data.frame")

标签: rgroup-bydplyrpercentage

解决方案


推荐阅读