首页 > 解决方案 > 用 ggplot 绘制多个 ACF

问题描述

嗨,我正在尝试用 ggplot 绘制多个 ACF 和 PACF。

我已经走了一点路,但循环的事情仍然给我带来了很多麻烦。所以希望你能帮助我。

我想制作四个 acf 图,每个 AAPL、GE、SPY 和 WMT 一个。

col_names <- colnames(df)
col_names <- col_names[-c(1,2)]
for (i in col_names){
bacf <- acf(df, plot = FALSE)
bacfdf <- with(bacf, data.frame(lag, acf))
significance_level <- qnorm((1 + 0.95)/2)/sqrt(sum(!is.na(df)))+

q <- ggplot(data = bacfdf, mapping = aes(x = lag, y = acf)) +
  geom_segment(mapping = aes(xend = lag, yend = 0))+
  geom_hline(yintercept=c(significance_level,-significance_level), lty=3, color="blue") +
  ggtitle(i)+theme_minimal()
plot_list[[i]]<-q;

plot_grid(q[[1]],q[[2]],q[[3]],q[[4]])

我的一些数据:

structure(list(Date = structure(c(10960, 10961, 10962, 10963, 
10966, 10967, 10968, 10969, 10970, 10974, 10975, 10976, 10977, 
10980, 10981, 10982, 10983, 10984, 10987, 10988), class = "Date"), 
    `AAPL, not cleaned` = c(-8.810021, 1.45281, -9.051401, 4.628075, 
    -1.774445, -5.25055, -6.181806, 10.40407, 3.74302, 3.425328, 
    2.48944, 6.309463, -1.948374, -4.652429, 5.493372, -1.852238, 
    -0.1725783, -7.924, 2.074379, -3.431709), AAPL = c(-8.810021, 
    1.45281, -9.051401, 4.628075, -1.774445, -5.25055, -6.181806, 
    10.40407, 3.74302, 3.425328, 2.48944, 6.309463, -1.948374, 
    -4.652429, 5.493372, -1.852238, -0.1725783, -7.924, 2.074379, 
    -3.431709), GE = c(-4.08219945, -0.17376199, 1.32681098, 
    3.7986923, -0.03966156, 0.1651528, 0.32948959, 1.14473858, 
    -1.80480699, -2.00675631, 0.48530696, -1.88697651, -1.24799082, 
    -4.25203994, 0.26750549, 2.10052733, 0.21893437, -5.62251427, 
    0, 1.48150858), SPY = c(-3.989133, 0.1787311, -1.620197, 
    5.645238, 0.3424661, -1.203798, -0.999791, 1.345214, 1.348592, 
    -0.7898116, 0.8111037, -1.542447, -0.2161228, -2.875245, 
    1.129238, -0.7957602, -0.4002674, -3.16912, 2.677718, 0.9804
    ), WMT = c(-3.813763, -2.360084, 1.391327, 7.280618, -1.841673, 
    -1.498155, -1.812554, 0.1075352, -0.9720049, 1.630053, -2.314561, 
    -1.067179, -1.494226, -5.024863, 2.904527, 1.316343, -4.642776, 
    -7.004438, -0.6916664, 6.949199)), row.names = c(NA, 20L), class = "data.frame")

标签: rggplot2autocorrelation

解决方案


试图使其尽可能高效地尊重您的意图。在某些情况下可以使用 base r lapply,但我是这样做的tidyverse

library(dplyr)
library(purrr)
library(ggplot2)

glimpse(df)
#> Rows: 20
#> Columns: 6
#> $ Date                <date> 2000-01-04, 2000-01-05, 2000-01-06, 2000-01-07, …
#> $ `AAPL, not cleaned` <dbl> -8.8100210, 1.4528100, -9.0514010, 4.6280750, -1.…
#> $ AAPL                <dbl> -8.8100210, 1.4528100, -9.0514010, 4.6280750, -1.…
#> $ GE                  <dbl> -4.08219945, -0.17376199, 1.32681098, 3.79869230,…
#> $ SPY                 <dbl> -3.9891330, 0.1787311, -1.6201970, 5.6452380, 0.3…
#> $ WMT                 <dbl> -3.8137630, -2.3600840, 1.3913270, 7.2806180, -1.…

# we'll go straight to building the dataframe with `map_dfr`

bacdf <- map_df(df, function(ts) acf(ts, plot = FALSE)$acf)

# The lags are all the same just 0 through the number of rows minus 1

bacdf$lag <- 0:(nrow(bacdf) - 1)

# reorder things and eliminate `Date` and unclean AAPL which is actually identical to AAPL

bacdf <- bacdf %>% select(lag, everything(), -Date, -`AAPL, not cleaned`)

bacdf
#> # A tibble: 14 x 5
#>      lag    AAPL      GE     SPY      WMT
#>    <int>   <dbl>   <dbl>   <dbl>    <dbl>
#>  1     0  1       1       1       1      
#>  2     1 -0.147   0.0812 -0.197   0.0769 
#>  3     2  0.143  -0.316   0.0197 -0.453  
#>  4     3 -0.0940 -0.182  -0.407  -0.296  
#>  5     4  0.312   0.326   0.239   0.158  
#>  6     5 -0.229   0.0171 -0.0140  0.176  
#>  7     6 -0.0727 -0.238  -0.0138 -0.00309
#>  8     7 -0.0300 -0.198  -0.0321 -0.0287 
#>  9     8 -0.150   0.0675 -0.135  -0.0981 
#> 10     9 -0.238   0.101  -0.0376 -0.106  
#> 11    10 -0.186  -0.313  -0.219  -0.0790 
#> 12    11  0.114  -0.0322  0.285   0.115  
#> 13    12 -0.119   0.148  -0.0444  0.202  
#> 14    13  0.128   0.214   0.0559 -0.0870

significance_level <- qnorm((1 + 0.95)/2)/sqrt(sum(!is.na(df)))

myaxis <- colnames(bacdf[1])
mynames <- colnames(bacdf[-1])
myaxis
#> [1] "lag"
mynames
#> [1] "AAPL" "GE"   "SPY"  "WMT"

for (i in seq_along(mynames)) {
  print(ggplot(bacdf, aes_string(x = myaxis, y = mynames[[i]])) + 
          geom_segment(mapping = aes(xend = lag, yend = 0)) +
          geom_point() +
          geom_hline(yintercept = c(significance_level, -significance_level), lty = 3, color = "blue") +
          ggtitle(mynames[[i]]) +
          theme_minimal())
}

只是为了记录您的数据...

df <- structure(list(Date = structure(c(10960, 10961, 10962, 10963,
                                        10966, 10967, 10968, 10969, 10970, 10974, 10975, 10976, 10977,
                                        10980, 10981, 10982, 10983, 10984, 10987, 10988), class = "Date"),
                     `AAPL, not cleaned` = c(-8.810021, 1.45281, -9.051401, 4.628075,
                                             -1.774445, -5.25055, -6.181806, 10.40407, 3.74302, 3.425328,
                                             2.48944, 6.309463, -1.948374, -4.652429, 5.493372, -1.852238,
                                             -0.1725783, -7.924, 2.074379, -3.431709), AAPL = c(-8.810021,
                                                                                                1.45281, -9.051401, 4.628075, -1.774445, -5.25055, -6.181806,
                                                                                                10.40407, 3.74302, 3.425328, 2.48944, 6.309463, -1.948374,
                                                                                                -4.652429, 5.493372, -1.852238, -0.1725783, -7.924, 2.074379,
                                                                                                -3.431709), GE = c(-4.08219945, -0.17376199, 1.32681098,
                                                                                                                   3.7986923, -0.03966156, 0.1651528, 0.32948959, 1.14473858,
                                                                                                                   -1.80480699, -2.00675631, 0.48530696, -1.88697651, -1.24799082,
                                                                                                                   -4.25203994, 0.26750549, 2.10052733, 0.21893437, -5.62251427,
                                                                                                                   0, 1.48150858), SPY = c(-3.989133, 0.1787311, -1.620197,
                                                                                                                                           5.645238, 0.3424661, -1.203798, -0.999791, 1.345214, 1.348592,
                                                                                                                                           -0.7898116, 0.8111037, -1.542447, -0.2161228, -2.875245,
                                                                                                                                           1.129238, -0.7957602, -0.4002674, -3.16912, 2.677718, 0.9804
                                                                                                                   ), WMT = c(-3.813763, -2.360084, 1.391327, 7.280618, -1.841673,
                                                                                                                              -1.498155, -1.812554, 0.1075352, -0.9720049, 1.630053, -2.314561,
                                                                                                                              -1.067179, -1.494226, -5.024863, 2.904527, 1.316343, -4.642776,
                                                                                                                              -7.004438, -0.6916664, 6.949199)), row.names = c(NA, 20L), class = "data.frame")

reprex 包(v0.3.0)于 2020-05-13 创建


推荐阅读