首页 > 解决方案 > 使用面板数据滚动多元回归

问题描述

我正在尝试对按股票分组的数据集进行滚动多元回归。数据集的示例如下所示。它从 1991 年到 2019 年,包含股票回报等信息。我打算使用 36 个月的滚动回归窗口对每只股票的 EPU_Paper 上的因变量 ExcessReturn 进行回归。我还希望股票在 36 个月的滚动窗口中至少有 18 个月的收益观察。在这种情况下,ISIN 是每只股票的标识符。我还想在回归中包含 SIZE、INVEST、BM 和 OP 作为对照。数据集不包含任何 NA,除了 beta_monthly 列是所有 NA。.

这是我尝试运行的代码。我能够让 for 循环正常工作,但没有得到想要的结果。理想情况下,我希望将 EPU_Paper 的系数附加到正确行的 df_Final 中。

我对我当前尝试的全新解决方案和变体持开放态度。

# Create date sequence
date <- seq(as.Date("1991-01-01"),as.Date("2019-12-31"), by = "month")

## Model
v <- 36 # No. of observations in rolling regression 
w <- 18 # observations of stocks requred in period 

df_Final$beta_monthly <- NA

for (i in 1:(length(date)-v)) {
  beta.tab <- df_Final %>% filter(Date >= date[i] & Date < date[i+v]) %>%
    group_by(ISIN) %>% filter(n() >= w) %>%
    do(ols.model = lm(formula = ExcessReturn ~ EPU_Paper + SIZE, .)) %>%
    mutate(beta_monthly = coefficients(ols.model)[2],
           Date = date[v+1])
  } 

这是数据帧 df_Final。它包含我需要的所有数据。

structure(list(Year = c(2002, 2004, 2011, 2011, 2012, 1993, 2005, 
2019, 2005, 1998), Month = c(5, 4, 12, 11, 4, 11, 7, 2, 12, 12
), ISIN = c("NO0003172207", "NO0003072803", "NO0010001118", "NO0010096985", 
"NO0010052350", "NO0004031303", "NO0003733800", "NO0003049405", 
"NO0003028904", "NO0004684408"), SIZE = c(1143750000, 894618192, 
257727844.92, 293346266180.2, 104014912.25, 1312826651.5, 51164845865, 
535492777.6, 1.2465e+10, 8815671800), BM = c(2.69336652499494e-06, 
6.25913195949328e-07, 3.0680673824874e-07, 9.99841307356348e-07, 
3.99901247813628e-06, 3.6136784151303e-06, 6.27009692475242e-07, 
6.397720392755e-07, 1.985559566787e-07, 9.2518383241951e-07), 
    OP = c(-0.0259646808923766, 0.197313839816668, 0.136649432305334, 
    0.594948150836374, -0.0018535993529254, -0.0801364023870418, 
    0.130539826349566, 0.0244477246423, 0.620295983086681, 0.103857566765579
    ), INVEST = c(0.129154816408376, 0.0321275661230328, -0.092547902189399, 
    0.142434794968375, -0.121033439243494, -0.00124744840099796, 
    -0.240237999927217, 0.0376008757633188, 0.060294968189705, 
    0.112664489390554), MonthlyReturn = c(-0.039797852179406, 
    -0.066030013642565, 0.019230769230769, 0.049271412097704, 
    -0.12516823687752, -0.02219755826859, 0.057851239669421, 
    -0.043636363636364, 0.05232436939754, 0.32743529766845), 
    RiskFreeRate = c(0.00558, 0.00163, 0.00209, 0.00251, 0.00163, 
    0.00467, 0.00181, 0.00086, 0.00208, 0.00726), ShareTurnover = c(69750L, 
    5250L, 369135L, 183793926L, 54869L, 2879656L, 7957362L, 367551L, 
    2478662L, 2245928L), MarketExcessReturn = c(-2.7155, -3.0781, 
    1.0322, -0.3552, -0.9447, -4.9307, 6.0359, 3.8371, 6.932, 
    -0.7896), ExcessReturn = c(-4.5377852179406, -6.7660013642565, 
    1.7140769230769, 4.6761412097704, -12.679823687752, -2.686755826859, 
    5.6041239669421, -4.4496363636364, 5.024436939754, 32.017529766845
    ), TradeDate = structure(c(11838, 12538, 15338, 15308, 15460, 
    8734, 12993, 17955, 13147, 10590), class = "Date"), GR_SIZE = structure(c(3L, 
    2L, 1L, 3L, 1L, 2L, 3L, 1L, 3L, 3L), .Label = c("1", "2", 
    "3"), class = "factor"), GR_OP = structure(c(1L, 2L, 2L, 
    3L, 1L, 1L, 2L, 1L, 3L, 1L), .Label = c("1", "2", "3"), class = "factor"), 
    GR_BM = structure(c(3L, 2L, 1L, 3L, 3L, 3L, 2L, 2L, 1L, 3L
    ), .Label = c("1", "2", "3"), class = "factor"), GR_INVEST = structure(c(3L, 
    2L, 1L, 3L, 1L, 1L, 1L, 2L, 2L, 2L), .Label = c("1", "2", 
    "3"), class = "factor"), SIZE_BM = structure(c(9L, 5L, 1L, 
    9L, 3L, 6L, 8L, 2L, 7L, 9L), .Label = c("11", "12", "13", 
    "21", "22", "23", "31", "32", "33"), class = "factor"), SIZE_OP = structure(c(7L, 
    5L, 2L, 9L, 1L, 4L, 8L, 1L, 9L, 7L), .Label = c("11", "12", 
    "13", "21", "22", "23", "31", "32", "33"), class = "factor"), 
    SIZE_INVEST = structure(c(9L, 5L, 1L, 9L, 1L, 4L, 7L, 2L, 
    8L, 8L), .Label = c("11", "12", "13", "21", "22", "23", "31", 
    "32", "33"), class = "factor"), Date = structure(c(11808, 
    12509, 15309, 15279, 15431, 8705, 12965, 17928, 13118, 10561
    ), class = "Date"), EPU_Paper = c(53.995111032374, 68.0510031873012, 
    150.261825109363, 124.78265498286, 47.2994312059608, 164.273390295025, 
    27.168222382902, 181.297305839429, 29.292072793154, 139.423199892468
    ), beta_monthly = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
    )), row.names = c(NA, -10L), class = "data.frame")

标签: rlinear-regressionrolling-computation

解决方案


您可以尝试拆分应用组合方法。

.date <- sort(unique(df2$date))
i <- 1; v <- 24; w <- 3

res <- do.call(rbind, lapply(1:v, function(i) {
  ds <- df2$date %in% .date[0:(v - 1) + i]
  S <- split(df2[ds, ], df2[ds, "date"])
  dat <- do.call(rbind, unname(S[sapply(S, nrow) >= w]))
  fit <- lm(y ~ x1 + x2, dat)
  rs <- dat$date %in% .date[i]
  beta_monthly <- unname(fit$coef[2])
  if (any(rs)) cbind(dat[rs, ], beta_monthly) else NULL
}))

head(res, 10)
#    cl       date          y        x1          x2 beta_monthly
# 6   A 2011-02-01  0.1947135  38.20878 62846231450  -0.01949786
# 7   B 2011-02-01  5.7908162 130.09371 73446134000  -0.01949786
# 8   C 2011-02-01 -1.6335241  63.67381 93917412861  -0.01949786
# 9   D 2011-02-01 -4.8414052 151.70718 76852791458  -0.01949786
# 10  E 2011-02-01  4.1640901 123.10342 16714132588  -0.01949786
# 11  A 2011-03-01 -2.0569659 104.46436 28101485893  -0.01935559
# 12  B 2011-03-01  9.2205063  24.58415 42584043997  -0.01935559
# 13  C 2011-03-01 -0.1572310  65.94721 83745620495  -0.01935559
# 14  D 2011-03-01  5.2782394  25.69336 15235322119  -0.01935559
# 15  E 2011-03-01  3.6096263 163.65887 66618792459  -0.01935559

数据:

set.seed(42)
df2 <- expand.grid(cl=LETTERS[1:5],
                   date=seq(as.Date("2011-01-01"), as.Date("2019-12-31"), by="month"))
df2 <- df2[-sample(1:nrow(df2), nrow(df2)*.1), ]
n <- nrow(df2)
df2 <- transform(df2,
                 y=rnorm(n, 2, 5),
                 x1=runif(n, 20, 180),
                 x2=runif(n, 1e8, 1e11))

推荐阅读