首页 > 解决方案 > R:在ggplot中使用rollmean会在最后产生错误的下降

问题描述

我正在平滑时间序列数据并用ggplot. 过去我使用 TTR 平滑数据,但最近开始在 ggplot 中动态平滑。但是,它产生了两个人工制品,我不确定我在这里缺少什么。

  1. ggplot 内的平滑使数据沿时间轴移动
  2. ggplot 内部的平滑会在一个数据系列的末尾产生错误的下降,而另一个则不会。
ggplot(data=df, aes(x=date, y=x, color=group))+
   geom_line(aes(y=rollmean(x, 10, fill=NA, align='left'), color=group), na.rm= TRUE, size=0.75) 

生产

带有rollmean的GGplot

然而

df.1.ts<-read.zoo(df[df$group=='series1',], format = "%Y-%m-%d")
df.1.SMA10<-data.frame(apply(df.1.ts[,1,drop=F], 2, SMA, n=10))
df.1.SMA10<-cbind(as.Date(time(df.1.ts)), df.1.SMA10)
df.1.SMA10$group<-'series1'
names(df.1.SMA10)[1]<-'date'

df.2.ts<-read.zoo(df[df$group=='series2',], format = "%Y-%m-%d")
df.2.SMA10<-data.frame(apply(df.2.ts[,1,drop=F], 2, SMA, n=10))
df.2.SMA10<-cbind(as.Date(time(df.2.ts)), df.2.SMA10)
df.2.SMA10$group<-'series2'
names(df.2.SMA10)[1]<-'date'

df.SMA10<-rbind(df.1.SMA10, df.2.SMA10)

ggplot(data=df.SMA10, aes(x=date, y=x, color=group)) +
  geom_line(size=0.75, na.rm=T) 

生产

具有预平滑时间序列的 Ggplot

样本数据:

df<-structure(list(date = structure(c(14242, 14243, 14244, 14245, 
14246, 14247, 14248, 14249, 14250, 14251, 14252, 14253, 14254, 
14255, 14256, 14257, 14258, 14259, 14260, 14261, 14262, 14263, 
14264, 14265, 14266, 14267, 14268, 14269, 14270, 14271, 14272, 
14273, 14274, 14275, 14276, 14277, 14278, 14279, 14280, 14281, 
14282, 14283, 14284, 14285, 14286, 14287, 14288, 14289, 14290, 
14291, 14292, 14293, 14294, 14295, 14296, 14297, 14298, 14299, 
14300, 14301, 14302, 14303, 14304, 14305, 14306, 14307, 14308, 
14309, 14310, 14311, 14312, 14313, 14314, 14315, 14316, 14317, 
14318, 14319, 14320, 14321, 14322, 14323, 14324, 14325, 14326, 
14327, 14328, 14329, 14330, 14331, 14332, 14333, 14334, 14335, 
14214, 14215, 14216, 14217, 14218, 14219, 14220, 14221, 14222, 
14223, 14224, 14225, 14226, 14227, 14228, 14229, 14230, 14231, 
14232, 14233, 14234, 14235, 14236, 14237, 14238, 14239, 14240, 
14241, 14242, 14243, 14244, 14245, 14246, 14247, 14248, 14249, 
14250, 14251, 14252, 14253, 14254, 14255, 14256, 14257, 14258, 
14259, 14260, 14261, 14262, 14263, 14264, 14265, 14266, 14267, 
14268, 14269, 14270, 14271, 14272, 14273, 14274, 14275, 14276, 
14277, 14278, 14279, 14280, 14281, 14282, 14283, 14284, 14285, 
14286, 14287, 14288, 14289, 14290, 14291, 14292, 14293, 14294, 
14295, 14296, 14297, 14298, 14299, 14300, 14301, 14302, 14303, 
14304, 14305, 14306, 14307, 14308, 14309, 14310, 14311, 14312, 
14313, 14314, 14315, 14316, 14317, 14318, 14319, 14320, 14321, 
14322, 14323, 14324, 14325, 14326), class = "Date"), x = c(0.859649122807018, 
0.583333333333333, 0.868055555555556, 0.78125, 0.524305555555556, 
0.475694444444444, 0.538194444444444, 0.798611111111111, 0.576388888888889, 
0.819444444444444, 0.746527777777778, 0.725694444444444, 0.336805555555556, 
0.263888888888889, 0.486111111111111, 0.701388888888889, 0.864583333333333, 
0.701388888888889, 0.524305555555556, 0.916666666666667, 0.715277777777778, 
0.857638888888889, 0.305555555555556, 0.701388888888889, 0.774305555555556, 
0.857638888888889, 0.961805555555556, 0.840277777777778, 0.913194444444444, 
0.909722222222222, 0.746527777777778, 0.711805555555556, 0.895833333333333, 
0.666666666666667, 0.993055555555556, 0.96875, 0.597222222222222, 
0.725694444444444, 0.791666666666667, 0.875, 0.9375, 0.788194444444444, 
0.708333333333333, 0.951388888888889, 0.819444444444444, 0.989583333333333, 
0.965277777777778, 0.947916666666667, 0.996527777777778, 0.979166666666667, 
0.944444444444444, 0.902777777777778, 0.996527777777778, 0.975694444444444, 
1, 1, 1, 1, 0.96875, 0.993055555555556, 0.982638888888889, 0.729166666666667, 
1, 0.993055555555556, 0.975694444444444, 0.996527777777778, 0.993055555555556, 
0.975694444444444, 0.996527777777778, 0.989583333333333, 0.996527777777778, 
1, 0.975694444444444, 0.996527777777778, 1, 0.989583333333333, 
0.996527777777778, 1, 0.996527777777778, 0.975694444444444, 0.975694444444444, 
0.979166666666667, 0.944444444444444, 0.989583333333333, 1, 0.986111111111111, 
0.951388888888889, 0.979166666666667, 0.993055555555556, 0.989583333333333, 
0.951388888888889, 0.996527777777778, 0.993055555555556, 1, 0.0390070921985816, 
0.0173611111111111, 0.229166666666667, 0, 0, 0.107638888888889, 
0.0208333333333333, 0.0763888888888889, 0, 0.121527777777778, 
0.00694444444444444, 0.159722222222222, 0.59375, 0.131944444444444, 
0.131944444444444, 0.0138888888888889, 0.00694444444444444, 0.0659722222222222, 
0.461805555555556, 0.277777777777778, 0.638888888888889, 0.784722222222222, 
0.892361111111111, 0.6875, 0.631944444444444, 0.180555555555556, 
0.00347222222222222, 0.166666666666667, 0.152777777777778, 0, 
0.659722222222222, 0.53125, 0.159722222222222, 0.232638888888889, 
0.673611111111111, 0.670138888888889, 0.631944444444444, 0.760416666666667, 
0.829861111111111, 0.902777777777778, 0.788194444444444, 0.638888888888889, 
0.65625, 0.836805555555556, 0.680555555555556, 0.715277777777778, 
0.677083333333333, 0.798611111111111, 0.579861111111111, 0.788194444444444, 
0.826388888888889, 0.895833333333333, 0.899305555555556, 0.930555555555556, 
0.958333333333333, 0.90625, 0.861111111111111, 0.934027777777778, 
0.798611111111111, 0.888888888888889, 0.961805555555556, 0.975694444444444, 
0.993055555555556, 0.996527777777778, 0.850694444444444, 0.902777777777778, 
0.979166666666667, 0.986111111111111, 0.993055555555556, 0.975694444444444, 
0.809027777777778, 0.972222222222222, 0.951388888888889, 0.899305555555556, 
0.930555555555556, 0.961805555555556, 0.996527777777778, 0.989583333333333, 
0.961805555555556, 0.965277777777778, 0.989583333333333, 0.989583333333333, 
0.940972222222222, 0.996527777777778, 0.947916666666667, 0.982638888888889, 
1, 1, 0.979166666666667, 0.909722222222222, 0.930555555555556, 
0.704861111111111, 0.833333333333333, 0.902777777777778, 0.940972222222222, 
0.96875, 0.802083333333333, 0.836805555555556, 0.989583333333333, 
0.961805555555556, 1, 0.993055555555556, 0.809027777777778, 0.989583333333333, 
0.993055555555556, 0.954861111111111, 0.979166666666667, 0.989583333333333, 
0.982638888888889, 0.989583333333333, 1, 0.961805555555556, 0.925581395348837
), group = c("series1", "series1", "series1", "series1", "series1", 
"series1", "series1", "series1", "series1", "series1", "series1", 
"series1", "series1", "series1", "series1", "series1", "series1", 
"series1", "series1", "series1", "series1", "series1", "series1", 
"series1", "series1", "series1", "series1", "series1", "series1", 
"series1", "series1", "series1", "series1", "series1", "series1", 
"series1", "series1", "series1", "series1", "series1", "series1", 
"series1", "series1", "series1", "series1", "series1", "series1", 
"series1", "series1", "series1", "series1", "series1", "series1", 
"series1", "series1", "series1", "series1", "series1", "series1", 
"series1", "series1", "series1", "series1", "series1", "series1", 
"series1", "series1", "series1", "series1", "series1", "series1", 
"series1", "series1", "series1", "series1", "series1", "series1", 
"series1", "series1", "series1", "series1", "series1", "series1", 
"series1", "series1", "series1", "series1", "series1", "series1", 
"series1", "series1", "series1", "series1", "series1", "series2", 
"series2", "series2", "series2", "series2", "series2", "series2", 
"series2", "series2", "series2", "series2", "series2", "series2", 
"series2", "series2", "series2", "series2", "series2", "series2", 
"series2", "series2", "series2", "series2", "series2", "series2", 
"series2", "series2", "series2", "series2", "series2", "series2", 
"series2", "series2", "series2", "series2", "series2", "series2", 
"series2", "series2", "series2", "series2", "series2", "series2", 
"series2", "series2", "series2", "series2", "series2", "series2", 
"series2", "series2", "series2", "series2", "series2", "series2", 
"series2", "series2", "series2", "series2", "series2", "series2", 
"series2", "series2", "series2", "series2", "series2", "series2", 
"series2", "series2", "series2", "series2", "series2", "series2", 
"series2", "series2", "series2", "series2", "series2", "series2", 
"series2", "series2", "series2", "series2", "series2", "series2", 
"series2", "series2", "series2", "series2", "series2", "series2", 
"series2", "series2", "series2", "series2", "series2", "series2", 
"series2", "series2", "series2", "series2", "series2", "series2", 
"series2", "series2", "series2", "series2", "series2", "series2", 
"series2", "series2", "series2", "series2")), row.names = c(NA, 
-207L), class = "data.frame")

标签: rggplot2smoothing

解决方案


在您的ggplot(.)代码中,您正在调用,它在所有的rollmean(x, ...)上滚动,而与组无关。如果您希望它是每组,您可以执行以下操作:x

ggplot(data=df, aes(x=date, y=x, color=group))+
   geom_line(aes(y=ave(x, group, FUN = function(Z) zoo::rollmean(Z, 10, fill=NA, align='left')), 
                 color=group), na.rm= TRUE, size=0.75)

在此处输入图像描述

不过,我倾向于将聚合/数据处理代码放在 之外ggplot2,这将有助于识别问题:

df$rolly <- zoo::rollmean(df$x, 10, fill=NA, align='left')

xtabs(~ group + is.na(rolly), data = df)
#          is.na(rolly)
# group     FALSE TRUE
#   series1    94    0
#   series2   104    9

df[c(1:3, 92:97, 196:207),]
#           date          x   group      rolly
# 1   2008-12-29 0.85964912 series1 0.68249269
# 2   2008-12-30 0.58333333 series1 0.67118056
# 3   2008-12-31 0.86805556 series1 0.68541667
# 92  2009-03-30 0.99652778 series1 0.34035904
# 93  2009-03-31 0.99305556 series1 0.24834515
# 94  2009-04-01 1.00000000 series1 0.14903960
# 95  2008-12-01 0.03900709 series2 0.06119238
# 96  2008-12-02 0.01736111 series2 0.05798611
# 97  2008-12-03 0.22916667 series2 0.07222222
# 196 2009-03-12 0.99305556 series2 0.96805556
# 197 2009-03-13 0.80902778 series2 0.96493056
# 198 2009-03-14 0.98958333 series2 0.97658592
# 199 2009-03-15 0.99305556 series2         NA
# 200 2009-03-16 0.95486111 series2         NA
# 201 2009-03-17 0.97916667 series2         NA
# 202 2009-03-18 0.98958333 series2         NA
# 203 2009-03-19 0.98263889 series2         NA
# 204 2009-03-20 0.98958333 series2         NA
# 205 2009-03-21 1.00000000 series2         NA
# 206 2009-03-22 0.96180556 series2         NA
# 207 2009-03-23 0.92558140 series2         NA

我希望每个系列的最后 9 行是NA,而不仅仅是一个系列。我们可以解决这个问题:

df$rolly <- ave(df$x, df$group, FUN = function(Z) zoo::rollmean(Z, 10, fill=NA, align='left'))
df[c(1:3, 82:97, 196:207),]
#           date          x   group      rolly
# 1   2008-12-29 0.85964912 series1 0.68249269
# 2   2008-12-30 0.58333333 series1 0.67118056
# 3   2008-12-31 0.86805556 series1 0.68541667
# 82  2009-03-20 0.97916667 series1 0.97638889
# 83  2009-03-21 0.94444444 series1 0.97812500
# 84  2009-03-22 0.98958333 series1 0.98298611
# 85  2009-03-23 1.00000000 series1 0.98402778
# 86  2009-03-24 0.98611111 series1         NA
# 87  2009-03-25 0.95138889 series1         NA
# 88  2009-03-26 0.97916667 series1         NA
# 89  2009-03-27 0.99305556 series1         NA
# 90  2009-03-28 0.98958333 series1         NA
# 91  2009-03-29 0.95138889 series1         NA
# 92  2009-03-30 0.99652778 series1         NA
# 93  2009-03-31 0.99305556 series1         NA
# 94  2009-04-01 1.00000000 series1         NA
# 95  2008-12-01 0.03900709 series2 0.06119238
# 96  2008-12-02 0.01736111 series2 0.05798611
# 97  2008-12-03 0.22916667 series2 0.07222222
# 196 2009-03-12 0.99305556 series2 0.96805556
# 197 2009-03-13 0.80902778 series2 0.96493056
# 198 2009-03-14 0.98958333 series2 0.97658592
# 199 2009-03-15 0.99305556 series2         NA
# 200 2009-03-16 0.95486111 series2         NA
# 201 2009-03-17 0.97916667 series2         NA
# 202 2009-03-18 0.98958333 series2         NA
# 203 2009-03-19 0.98263889 series2         NA
# 204 2009-03-20 0.98958333 series2         NA
# 205 2009-03-21 1.00000000 series2         NA
# 206 2009-03-22 0.96180556 series2         NA
# 207 2009-03-23 0.92558140 series2         NA

或者,如果您对 感到满意dplyr,那么

library(dplyr)
df %>% 
  group_by(group) %>% 
  mutate(rolly = zoo::rollmean(x, 10, fill=NA, align='left')) %>% 
  ungroup() %>% 
  ggplot(aes(x=date, y=x, color=group)) + 
  geom_line(aes(y=rolly, color=group), na.rm= TRUE, size=0.75)

推荐阅读