首页 > 解决方案 > 基于 R 中的列值(或日期)的滚动回归

问题描述

我有下表:

# A tibble: 40 x 5
# Groups:   stock [1]
   stock     date         mkt_cap  week  returns
   <chr>    <date>         <dbl> <int>    <dbl>
 1 A        2019-03-04 10522834.    NA  NA     
 2 A        2019-03-05 11659707.    NA   9.70  
 3 A        2019-03-06 11464531.    NA  -2.25  
 4 A        2019-03-07 12217241.    NA   5.80  
 5 A        2019-03-08 11619351.     1  -5.57  
 6 A        2019-03-09 11578687.    NA  -0.899 
 7 A        2019-03-10 11658368.    NA   0.141 
 8 A        2019-03-11 12722921.    NA   8.20  
 9 A        2019-03-12 15429934.    NA  18.8   
10 A        2019-03-13 16801600.    NA   7.98  
11 A        2019-03-14 17898334.    NA   5.79  
12 A        2019-03-15 18492686.     2   2.74  
13 A        2019-03-16 20686683.    NA  10.7   
14 A        2019-03-17 22299970.    NA   6.98  
15 A        2019-03-18 22924182.    NA   2.24  
16 A        2019-03-19 24174351.    NA   4.79  
17 A        2019-03-20 24661467.    NA   1.48  
18 A        2019-03-21 23351810.    NA  -5.97  
19 A        2019-03-22 27826601.     3  17.0   
20 A        2019-03-23 30670482.    NA   9.22  
21 A        2019-03-24 32802772.    NA   6.21  
22 A        2019-03-25 31778387.    NA  -3.68  
23 A        2019-03-26 33237006.    NA   3.99  
24 A        2019-03-27 34971479.    NA   4.59  
25 A        2019-03-28 36774005.    NA   4.53  
26 A        2019-03-29 37594815.     4   1.71  
27 A        2019-03-30 38321816.    NA   1.42  
28 A        2019-03-31 35167070.    NA  -9.08  
29 A        2019-04-01 35625396.    NA   0.808 
30 A        2019-04-02 35764747.    NA  -0.0940
31 A        2019-04-03 28316242.    NA -23.8   
32 A        2019-04-04 26124803.    NA  -8.53  
33 A        2019-04-05 30390295.     5  14.6   
34 A        2019-04-06 28256485.    NA  -7.76  
35 A        2019-04-07 29807837.    NA   4.87  
36 A        2019-04-08 30970364.    NA   3.36  
37 A        2019-04-09 30470093.    NA  -2.10  
38 A        2019-04-10 30860276.    NA   0.806 
39 A        2019-04-11 27946472.    NA -10.4   
40 A        2019-04-12 27662766.     6  -1.48  

在这张表上,我想运行一个滚动回归,其中滚动回归包含过去一个月的数据。我想在几周内运行这些滚动回归。也就是说,在 week==1、week ==2 等,我们使用过去一个月的数据。回归应该是lm(return~mkt_cap)。我使用 slide_period() 函数尝试了很多事情,但是,这对我来说并不奏效。例如,我尝试运行 tbl.data %>% group_by(stock, week) %>% slide_period(date, date, "month", ~.x, .before = 1). 我的数据存在一些空白,因此我更喜欢考虑日期的解决方案。

有人可以帮我吗?亲切的问候。

标签: r

解决方案


我会使用 tidyverserowwise方法。

我不清楚应该如何按周创建模型回到上个月。在下面的方法中,我max_date每周计算一次,然后返回 30 天。

# setup
library(tidyverse)
library(lubridate)

dat <- tribble(~stock, ~date, ~mkt_cap, ~week, ~returns,
"A",        "2019-03-04", 10522834.,    NA,       NA,
"A",        "2019-03-05", 11659707.,    NA,     9.70,
"A",        "2019-03-06", 11464531.,    NA,    -2.25,  
"A",        "2019-03-07", 12217241.,    NA,     5.80,  
"A",        "2019-03-08", 11619351.,     1,    -5.57,  
"A",        "2019-03-09", 11578687.,    NA,   -0.899,
"A",        "2019-03-10", 11658368.,    NA,    0.141, 
"A",        "2019-03-11", 12722921.,    NA,     8.20,  
"A",        "2019-03-12", 15429934.,    NA,     18.8,   
"A",        "2019-03-13", 16801600.,    NA,     7.98,  
"A",        "2019-03-14", 17898334.,    NA,     5.79,  
"A",        "2019-03-15", 18492686.,     2,     2.74,  
"A",        "2019-03-16", 20686683.,    NA,     10.7,   
"A",        "2019-03-17", 22299970.,    NA,     6.98,  
"A",        "2019-03-18", 22924182.,    NA,     2.24,  
"A",        "2019-03-19", 24174351.,    NA,     4.79,  
"A",        "2019-03-20", 24661467.,    NA,     1.48,  
"A",        "2019-03-21", 23351810.,    NA,    -5.97,  
"A",        "2019-03-22", 27826601.,     3,     17.0,   
"A",        "2019-03-23", 30670482.,    NA,     9.22,  
"A",        "2019-03-24", 32802772.,    NA,     6.21,  
"A",        "2019-03-25", 31778387.,    NA,    -3.68,  
"A",        "2019-03-26", 33237006.,    NA,     3.99,  
"A",        "2019-03-27", 34971479.,    NA,     4.59,  
"A",        "2019-03-28", 36774005.,    NA,     4.53,  
"A",        "2019-03-29", 37594815.,     4,     1.71,  
"A",        "2019-03-30", 38321816.,    NA,     1.42,  
"A",        "2019-03-31", 35167070.,    NA,    -9.08,  
"A",        "2019-04-01", 35625396.,    NA,    0.808, 
"A",        "2019-04-02", 35764747.,    NA,  -0.0940,
"A",        "2019-04-03", 28316242.,    NA,    -23.8,   
"A",        "2019-04-04", 26124803.,    NA,    -8.53,  
"A",        "2019-04-05", 30390295.,     5,     14.6,   
"A",        "2019-04-06", 28256485.,    NA,    -7.76,  
"A",        "2019-04-07", 29807837.,    NA,     4.87,  
"A",        "2019-04-08", 30970364.,    NA,     3.36,  
"A",        "2019-04-09", 30470093.,    NA,    -2.10,  
"A",        "2019-04-10", 30860276.,    NA,    0.806, 
"A",        "2019-04-11", 27946472.,    NA,    -10.4,   
"A",        "2019-04-12", 27662766.,     6,    -1.48) %>% 
  mutate(date = as.Date(date)) %>% 
  fill(week, .direction = "up") 

# summarised data.frame by week with min and max date
dat2 <- dat %>% 
  group_by(week) %>% 
  summarise(max_date = max(date),
            min_date = max_date %m-% months(1))
#> `summarise()` ungrouping output (override with `.groups` argument)

# create the models  
dat3 <- dat2 %>%
  rowwise() %>% 
  mutate(mod = list(lm(returns ~ mkt_cap,
                       data = filter(dat,
                                     date <= .env$max_date,
                                     date >= .env$min_date)))) 

# get the relevant informationen per week
dat3 %>%
  mutate(res = list(broom::tidy(mod)),
         broom::glance(mod)) %>% 
  select(week,
         res,
         adj.r.squared,
         mod_p.value = p.value,
         nobs) %>% 
  unnest(res) %>% 
  filter(term != "(Intercept)")

#> # A tibble: 6 x 9
#>    week term  estimate std.error statistic p.value adj.r.squared mod_p.value
#>   <dbl> <chr>    <dbl>     <dbl>     <dbl>   <dbl>         <dbl>       <dbl>
#> 1     1 mkt_~  1.01e-5   1.34e-5     0.756   0.529       -0.167        0.529
#> 2     2 mkt_~  9.26e-7   7.45e-7     1.24    0.245        0.0520       0.245
#> 3     3 mkt_~  2.56e-7   2.97e-7     0.864   0.400       -0.0152       0.400
#> 4     4 mkt_~  2.00e-8   1.42e-7     0.141   0.889       -0.0426       0.889
#> 5     5 mkt_~ -1.18e-7   1.61e-7    -0.736   0.467       -0.0150       0.467
#> 6     6 mkt_~ -3.23e-7   2.37e-7    -1.37    0.182        0.0271       0.182
#> # ... with 1 more variable: nobs <int>

reprex 包(v0.3.0)于 2021-04-27 创建

更新

当处理不止一只股票时,这种方法可以很容易地扩展:

# lets append the same data and change stock to "B":
dat <- dat %>% 
  bind_rows({mutate(., stock = "B")})

# summarised data.frame by week and group with min and max date
dat2 <- dat %>% 
  group_by(stock, week) %>% 
  summarise(max_date = max(date),
            min_date = max_date %m-% months(1))
#> `summarise()` has grouped output by 'stock'. You can override using the `.groups` argument.

# create the models, and this time also filer for .env$stock
dat3 <- dat2 %>%
  rowwise() %>% 
  mutate(mod = list(lm(returns ~ mkt_cap,
                       data = filter(dat,
                                     stock == .env$stock,
                                     date <= .env$max_date,
                                     date >= .env$min_date)))) 

# get the relevant informationen per week (this stays the same!)
dat3 %>%
  mutate(res = list(broom::tidy(mod)),
         broom::glance(mod)) %>% 
  select(week,
         res,
         adj.r.squared,
         mod_p.value = p.value,
         nobs) %>% 
  unnest(res) %>% 
  filter(term != "(Intercept)")

#> Adding missing grouping variables: `stock`
#> # A tibble: 12 x 10
#> # Groups:   stock [2]
#>    stock  week term         estimate   std.error statistic p.value adj.r.squared
#>    <chr> <dbl> <chr>           <dbl>       <dbl>     <dbl>   <dbl>         <dbl>
#>  1 A         1 mkt_cap  0.0000101    0.0000134       0.756   0.529       -0.167 
#>  2 A         2 mkt_cap  0.000000926  0.000000745     1.24    0.245        0.0520
#>  3 A         3 mkt_cap  0.000000256  0.000000297     0.864   0.400       -0.0152
#>  4 A         4 mkt_cap  0.0000000200 0.000000142     0.141   0.889       -0.0426
#>  5 A         5 mkt_cap -0.000000118  0.000000161    -0.736   0.467       -0.0150
#>  6 A         6 mkt_cap -0.000000323  0.000000237    -1.37    0.182        0.0271
#>  7 B         1 mkt_cap  0.0000101    0.0000134       0.756   0.529       -0.167 
#>  8 B         2 mkt_cap  0.000000926  0.000000745     1.24    0.245        0.0520
#>  9 B         3 mkt_cap  0.000000256  0.000000297     0.864   0.400       -0.0152
#> 10 B         4 mkt_cap  0.0000000200 0.000000142     0.141   0.889       -0.0426
#> 11 B         5 mkt_cap -0.000000118  0.000000161    -0.736   0.467       -0.0150
#> 12 B         6 mkt_cap -0.000000323  0.000000237    -1.37    0.182        0.0271
#> # … with 2 more variables: mod_p.value <dbl>, nobs <int>

reprex 包(v0.3.0)于 2021-04-27 创建


推荐阅读