首页 > 解决方案 > 按日期 R 的加权移动平均线

问题描述

下面是一个脚本,用于获取人员统计数据并在过去 6 天内进行滚动平均。我希望最接近今天的日期比以后的日期产生更大的影响。

如果有可能:

下面有两种创建滚动平均值的方法one_dftwo_df我在实际脚本中使用第一种方法,但我添加了第二种方法,以防在权重函数中更容易编写。

library(dplyr)
library(lubridate)

# Create DataFrame


df<- data.frame(name=c('CAREY.FAKE','CAREY.FAKE','CAREY.FAKE','CAREY.FAKE','CAREY.FAKE','CAREY.FAKE','CAREY.FAKE',
                       'JOHN.SMITH','JOHN.SMITH','JOHN.SMITH','JOHN.SMITH','JOHN.SMITH','JOHN.SMITH','JOHN.SMITH',
                       'JEFF.JOHNSON','JEFF.JOHNSON','JEFF.JOHNSON','JEFF.JOHNSON','JEFF.JOHNSON','JEFF.JOHNSON','JEFF.JOHNSON',
                       'SARA.JOHNSON','SARA.JOHNSON','SARA.JOHNSON','SARA.JOHNSON','SARA.JOHNSON','SARA.JOHNSON','SARA.JOHNSON'
),
GA=c(3,2,1,1,2,3,20,3,2,1,1,2,3,20,3,2,1,1,2,3,20,3,2,1,1,2,3,20),
SV=c(3,2,1,1,2,3,20,3,2,1,1,2,3,20,3,2,1,1,2,3,20,3,2,1,1,2,3,20),
GF=c(3,2,1,1,2,3,20,3,2,1,1,2,3,20,3,2,1,1,2,3,20,3,2,1,1,2,3,20),
SA=c(3,2,1,1,2,3,20,3,2,1,1,2,3,20,3,2,1,1,2,3,20,3,2,1,1,2,3,20),
date=c("10/20/2016","10/19/2016","10/18/2016","10/17/2016","10/16/2016","10/15/2016","10/14/2016",
       "10/20/2016","10/19/2016","10/18/2016","10/17/2016","10/16/2016","10/15/2016","10/14/2016",
       "10/20/2016","10/19/2016","10/18/2016","10/17/2016","10/16/2016","10/15/2016","10/14/2016",
       "10/20/2016","10/19/2016","10/18/2016","10/17/2016","10/16/2016","10/15/2016","10/14/2016"
),
stringsAsFactors = FALSE)

one_df <- df %>%
  group_by(name) %>%
  arrange(name, mdy(date)) %>% 
  summarise_at(2:5, funs(mean(tail(., 6))))

two_df <- df %>% 
  group_by(name) %>%
  top_n(mdy(date), n = 6) %>%
  summarise_at(2:5, mean)

东风:

    name        GA  SV  GF  SA  date
CAREY.FAKE      3   3   3   3   10/20/2016
CAREY.FAKE      2   2   2   2   10/19/2016
CAREY.FAKE      1   1   1   1   10/18/2016
CAREY.FAKE      1   1   1   1   10/17/2016
CAREY.FAKE      2   2   2   2   10/16/2016
CAREY.FAKE      3   3   3   3   10/15/2016
CAREY.FAKE      20  20  20  20  10/14/2016
JOHN.SMITH      3   3   3   3   10/20/2016
JOHN.SMITH      2   2   2   2   10/19/2016
JOHN.SMITH      1   1   1   1   10/18/2016
JOHN.SMITH      1   1   1   1   10/17/2016
JOHN.SMITH      2   2   2   2   10/16/2016
JOHN.SMITH      3   3   3   3   10/15/2016
JOHN.SMITH      20  20  20  20  10/14/2016
JEFF.JOHNSON    3   3   3   3   10/20/2016
JEFF.JOHNSON    2   2   2   2   10/19/2016
JEFF.JOHNSON    1   1   1   1   10/18/2016
JEFF.JOHNSON    1   1   1   1   10/17/2016
JEFF.JOHNSON    2   2   2   2   10/16/2016
JEFF.JOHNSON    3   3   3   3   10/15/2016
JEFF.JOHNSON    20  20  20  20  10/14/2016
SARA.JOHNSON    3   3   3   3   10/20/2016
SARA.JOHNSON    2   2   2   2   10/19/2016
SARA.JOHNSON    1   1   1   1   10/18/2016
SARA.JOHNSON    1   1   1   1   10/17/2016
SARA.JOHNSON    2   2   2   2   10/16/2016
SARA.JOHNSON    3   3   3   3   10/15/2016
SARA.JOHNSON    20  20  20  20  10/14/2016

结果:

name            GA  SV  GF  SA
CAREY.FAKE      2   2   2   2
JEFF.JOHNSON    2   2   2   2
JOHN.SMITH      2   2   2   2
SARA.JOHNSON    2   2   2   2

预期成绩:

name             GA   SV    GF   SA
CAREY.FAKE      2.05 2.05  2.05 2.05
JEFF.JOHNSON    2.05 2.05  2.05 2.05
JOHN.SMITH      2.05 2.05  2.05 2.05
SARA.JOHNSON    2.05 2.05  2.05 2.05

标签: r

解决方案


我相信混乱来自这样一个事实,你并不真正想要一个移动平均线,而是一个简单的加权平均线:

weights <- c(.5,.5,.3,.3,.2,.2)
df %>% 
  group_by(name) %>%
  arrange(desc(date)) %>% # sort dates ...
  slice(1:6) %>%          # ... in order to keep only 6 most recent
  summarise_at(vars(-date,-name),
               ~sum(.*weights)/sum(weights)) # apply weighted average
# # A tibble: 4 x 5
#   name            GA    SV    GF    SA
#   <chr>        <dbl> <dbl> <dbl> <dbl>
# 1 CAREY.FAKE    2.05  2.05  2.05  2.05
# 2 JEFF.JOHNSON  2.05  2.05  2.05  2.05
# 3 JOHN.SMITH    2.05  2.05  2.05  2.05
# 4 SARA.JOHNSON  2.05  2.05  2.05  2.05

推荐阅读