r - Tibble 无法获取异常检测的日期(可重现的示例)
问题描述
我在下面有一个数据集:
dput(stack)
structure(list(Day = structure(c(17836, 17837, 17838, 17840,
17840, 17841, 17842, 17843, 17843, 17843, 17844, 17844, 17844,
17845, 17845, 17845, 17846, 17846, 17847, 17847, 17847, 17848,
17848, 17848, 17849, 17849, 17850, 17850, 17850, 17850, 17851,
17851, 17851, 17851, 17852, 17852, 17852, 17852, 17853, 17853,
17853, 17853, 17854, 17854, 17854, 17854, 17855, 17855, 17855,
17855, 17856, 17856, 17856, 17856, 17857, 17857, 17857, 17857,
17858, 17858, 17858, 17858, 17859, 17859, 17859, 17859, 17860,
17860, 17860, 17860, 17860, 17861, 17861, 17861, 17861, 17862,
17862, 17862, 17862, 17863, 17863, 17863, 17863, 17864, 17864,
17864, 17864, 17865, 17865, 17865, 17865, 17866, 17866, 17866,
17866, 17867, 17867, 17867, 17867, 17868, 17868, 17868, 17868,
17869, 17869, 17869, 17869, 17869, 17870, 17870, 17870, 17870,
17871, 17871, 17871, 17871, 17871, 17871, 17871, 17872, 17872,
17872, 17872, 17872, 17872, 17872, 17873, 17873, 17873, 17873,
17873, 17873, 17874, 17874, 17874, 17874, 17874, 17874, 17875,
17875, 17875, 17875, 17875, 17875, 17876, 17876, 17876, 17876,
17876, 17876, 17877, 17877, 17877, 17877, 17877, 17877, 17878,
17878, 17878, 17878, 17878, 17878), class = "Date"), Channel = c("D",
"A", "D", "A", "D", "D", "D", "A", "B", "D", "A", "B", "D", "A",
"B", "D", "A", "D", "A", "B", "D", "A", "D", "G", "A", "D", "A",
"D", "F", "G", "A", "D", "F", "G", "A", "D", "F", "G", "A", "D",
"F", "G", "A", "D", "F", "G", "A", "D", "F", "G", "A", "D", "F",
"G", "A", "D", "F", "G", "A", "D", "F", "G", "A", "D", "F", "G",
"A", "B", "D", "F", "G", "A", "D", "F", "G", "A", "D", "F", "G",
"A", "D", "F", "G", "A", "D", "F", "G", "A", "D", "F", "G", "A",
"D", "F", "G", "A", "D", "F", "G", "A", "D", "F", "G", "A", "D",
"E", "F", "G", "A", "D", "F", "G", "A", "B", "C", "D", "E", "F",
"G", "A", "B", "C", "D", "E", "F", "G", "A", "C", "D", "E", "F",
"G", "A", "C", "D", "E", "F", "G", "A", "C", "D", "E", "F", "G",
"A", "C", "D", "E", "F", "G", "A", "C", "D", "E", "F", "G", "A",
"C", "D", "E", "F", "G"), Orders = c(0, 0, 0, 2, 0, 0, 0, 0,
0, 0, 9, 0, 0, 36, 0, 1, 46, 1, 11, 0, 0, 5, 0, 0, 112, 0, 66,
1, 765, 2, 133, 0, 2290, 9, 524, 2, 6417, 8, 33, 0, 1008, 7,
70, 1, 1860, 12, 62, 3, 1141, 12, 67, 5, 1059, 13, 14, 2, 481,
13, 84, 6, 1693, 23, 370, 16, 6028, 51, 29, 0, 3, 640, 11, 5,
0, 569, 12, 31, 2, 730, 16, 45, 4, 961, 27, 71, 2, 594, 55, 155,
10, 607, 48, 224, 30, 4298, 66, 50, 0, 587, 92, 27, 1, 728, 80,
195, 3, 0, 1310, 62, 24, 6, 754, 46, 18, 0, 0, 2, 0, 439, 50,
107, 0, 0, 7, 0, 981, 55, 130, 0, 11, 0, 2784, 83, 61, 0, 4,
0, 843, 77, 110, 0, 0, 0, 885, 63, 70, 0, 3, 0, 481, 77, 81,
0, 0, 0, 833, 83, 38, 0, 1, 1, 712, 88)), row.names = c(NA, -162L
), spec = structure(list(cols = list(Day = structure(list(), class =
c("collector_character",
"collector")), Channel = structure(list(), class = c("collector_character",
"collector")), Orders = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector"))), class = "col_spec"), class = c("tbl_df", "tbl",
"data.frame"))
下面是前几行:
head(stack)
# A tibble: 6 x 3
Day Channel Orders
<date> <chr> <dbl>
1 2018-11-01 D 0
2 2018-11-02 A 0
3 2018-11-03 D 0
4 2018-11-05 A 2
5 2018-11-05 D 0
6 2018-11-06 D 0
我想使用 anomalize 包按组查看异常。
根据下面的参考,我想按组绘制每个性能 https://www.rdocumentation.org/packages/anomalize/versions/0.1.1
这是我对我提供的数据集的尝试。
stack %>%
# Data Manipulation / Anomaly Detection
time_decompose(Orders, method = "stl") %>%
anomalize(remainder, method = "iqr") %>%
time_recompose() %>%
# Anomaly Visualization
plot_anomalies(time_recomposed = TRUE, ncol = 3, alpha_dots = 0.25) +
labs(title = "Tidyverse Anomalies", subtitle = "STL + IQR Methods")
这是我的错误:
Converting from tbl_df to tbl_time.
Auto-index message: index = Day
Error in mutate_impl(.data, dots) :
Evaluation error: Only year, quarter, month, week, and day periods are
allowed for an index of class Date.
根据网站参考,预期的输出应该是这样的:
解决方案
问题是您的日期变量不是唯一的。
all(length(unique(stack$Day))==length(stack$Day))
时间分解失败,因为数据不是等间隔时间序列
Evaluation error: Only year, quarter, month, week, and day periods are
allowed for an index of class Date.
我们知道错误来自,time_decompose
因为错误首先出现在该代码行之后:
stack %>%
# Data Manipulation / Anomaly Detection
time_decompose(Orders, method = "stl")
Converting from tbl_df to tbl_time.
Auto-index message: index = Day
Error in mutate_impl(.data, dots) :
Evaluation error: Only year, quarter, month, week, and day periods are allowed for an index of class Date.
当我们group_by
按天计算Orders
变量中的值时,我们会获得唯一的时间间隔。代码显示没有错误:
stack %>%
group_by(Day) %>%
summarise(Orders = sum(Orders)) %>%
time_decompose(Orders, method = "stl") %>%
anomalize(remainder, method = "iqr") %>%
time_recompose() %>%
# Anomaly Visualization
plot_anomalies(time_recomposed = TRUE, ncol = 3, alpha_dots = 0.25)
我怀疑你想要时间序列Channel
。如果是这种情况,您将面临一些问题,因为没有足够的数据来分解每个渠道的趋势。通道 B、C 和 E 不能分解。请参见下面的代码:
library(tidyverse)
library(dplyr)
a<-stack %>%
filter(Channel=="A") %>%
group_by(Day) %>%
summarise(Orders=sum(Orders)) %>% #in case of repeat dates
time_decompose(Orders, method = "stl") %>%
anomalize(remainder, method = "iqr") %>%
time_recompose() %>%
plot_anomalies(time_recomposed = TRUE, ncol = 3, alpha_dots = 0.25) +
labs(title = "Tidyverse Anomalies", subtitle = "STL + IQR Methods")
没有问题。现在让我们检查 b:
b<-stack %>%
filter(Channel=="B") %>%
group_by(Day) %>%
summarise(Orders=sum(Orders)) %>%
time_decompose(Orders, method = "stl") %>%
anomalize(remainder, method = "iqr") %>%
time_recompose() %>%
plot_anomalies(time_recomposed = TRUE, ncol = 3, alpha_dots = 0.25) +
labs(title = "Tidyverse Anomalies", subtitle = "STL + IQR Methods")
Converting from tbl_df to tbl_time.
Auto-index message: index = Day
frequency = 1.5 weeks
trend = 7 weeks
Error in stats::stl(., s.window = "periodic", t.window = trnd, robust = TRUE) :
series is not periodic or has less than two periods
错误告诉了我们一切。让我们看看b多长时间
b
# A tibble: 7 x 3
Day Channel Orders
<date> <chr> <dbl>
1 2018-11-08 B 0
2 2018-11-09 B 0
3 2018-11-10 B 0
4 2018-11-12 B 0
5 2018-11-25 B 0
6 2018-12-06 B 0
7 2018-12-07 B 0
那只有 7 个观察值。不足以成为趋势
c<-stack %>%
filter(Channel=="C") %>%
group_by(Day) %>%
summarise(Orders=sum(Orders)) %>%
time_decompose(Orders, method = "stl") %>%
anomalize(remainder, method = "iqr") %>%
time_recompose() %>%
plot_anomalies(time_recomposed = TRUE, ncol = 3, alpha_dots = 0.25) +
labs(title = "Tidyverse Anomalies", subtitle = "STL + IQR Methods")
对于 c 也是一样。现在让我们看看 d、e、f 和 g。
d<-stack %>%
filter(Channel=="D") %>%
group_by(Day) %>%
summarise(Orders=sum(Orders)) %>%
time_decompose(Orders, method = "stl") %>%
anomalize(remainder, method = "iqr") %>%
time_recompose() %>%
plot_anomalies(time_recomposed = TRUE, ncol = 3, alpha_dots = 0.25) +
labs(title = "Tidyverse Anomalies", subtitle = "STL + IQR Methods")
e<-stack %>%
filter(Channel=="E") %>%
group_by(Day) %>%
summarise(Orders=sum(Orders)) %>%
time_decompose(Orders, method = "stl") %>%
anomalize(remainder, method = "iqr") %>%
time_recompose() %>%
plot_anomalies(time_recomposed = TRUE, ncol = 3, alpha_dots = 0.25) +
labs(title = "Tidyverse Anomalies", subtitle = "STL + IQR Methods")
f<-stack %>%
filter(Channel=="F") %>%
group_by(Day) %>%
summarise(Orders=sum(Orders)) %>%
time_decompose(Orders, method = "stl") %>%
anomalize(remainder, method = "iqr") %>%
time_recompose() %>%
plot_anomalies(time_recomposed = TRUE, ncol = 3, alpha_dots = 0.25) +
labs(title = "Tidyverse Anomalies", subtitle = "STL + IQR Methods")
g<-stack %>%
filter(Channel=="G") %>%
group_by(Day) %>%
summarise(Orders=sum(Orders)) %>%
time_decompose(Orders, method = "stl") %>%
anomalize(remainder, method = "iqr") %>%
time_recompose() %>%
plot_anomalies(time_recomposed = TRUE, ncol = 3, alpha_dots = 0.25) +
labs(title = "Tidyverse Anomalies", subtitle = "STL + IQR Methods")
只有通道 a、d、f、g 有足够的观察值来分解趋势。
library(gridExtra)
grid.arrange(a,d,f,g, nrow=2)
您也可以使用一行代码完成上述操作(我只是想解释为什么会产生错误)。
stack %>%
filter(Channel=="A"| Channel=="D"|Channel=="F"|Channel=="G") %>%
group_by(Channel) %>%
time_decompose(Orders, method = "stl") %>%
anomalize(remainder, method = "iqr") %>%
time_recompose() %>%
plot_anomalies(time_recomposed = TRUE, ncol = 2, alpha_dots = 0.25) +
labs(title = "Tidyverse Anomalies", subtitle = "STL + IQR Methods")
推荐阅读
- python - 如何在课堂上运行 numpy 日志
- python-3.x - 用正则表达式库重新分割一行
- c++ - 使用带有运算符重载的多个运算符会产生奇怪的错误
- list - 将 a-list 列表拆分为子列表
- direct2d - Direct2D:非仿射变换
- python - 无法检查域(例如 yahoo 和 Outlook 等)的 mx 记录
- ms-access - 动态月份的工作日数
- routing - 在 Linux 中的 MPLS 测试台设置下无法在 LER 和 LSR 之间 ping
- javascript - 带有图层管理插件的传单中不显示 GeoJSON 数据
- javascript - 如何设置 FilePond js 图像预览