mongodb - 如何通过组和累加器阶段改进 MongoDB 聚合?
问题描述
下面的查询:
- 将选定的文档与匹配项匹配
- 按月分组,然后使用累加器将 URL 和计数添加到返回的数据集中
问题是查询非常复杂,并且随着数据量的增长,这看起来确实不是很好。有没有更简单的方法可以在 mongodb 中实现相同的目标?
输出形状如下:
{
"results": [
{
"_id": {
"month": 2,
"day": 2,
"year": 2021
},
"urls": [
{
"url": "https://shop.mydomain.com/product/golden-axe",
"count": 20
},
{
"url": "https://shop.mydomain.com/product/phantasy-star",
"count": 218
},
{
"url": "https://shop.mydomain.com/product/sega-arcades-retro",
"count": 30
}
],
"count": 268
},
{
"_id": {
"month": 2,
"day": 3,
"year": 2021
},
"urls": [
{
"url": "https://shop.mydomain.com/product/golden-axe",
"count": 109
},
{
"url": "https://shop.mydomain.com/product/phantasy-star",
"count": 416
},
{
"url": "https://shop.mydomain.com/product/sega-arcades-retro",
"count": 109
}
],
"count": 634
},
const aggregate = [
{
$match: {
source: 'itemLineView',
createdAt: {
$gte: new Date(query.dateGT),
$lte: new Date(query.dateLT)
},
url: { $regex: `^${query.url}` },
}
},
{
$group: {
_id: {
month: {
$month: '$createdAt'
},
day: {
$dayOfMonth: '$createdAt'
},
year: {
$year: '$createdAt'
}
},
urls: {
// https://docs.mongodb.com/manual/reference/operator/aggregation/accumulator/#grp._S_accumulator
$accumulator: {
init: function (): AccumulatorSourceStats {
return {
origins: []
};
},
// initArgs: [], // Argument arr to pass to the init function
accumulate: function (state: AccumulatorSourceStats, url: string) {
const index = state.origins.findIndex(function (origin) {
return origin.url === url;
});
if (index === -1) {
state.origins.push({
url: url,
count: 1
});
} else {
++state.origins[index].count;
}
return state;
},
accumulateArgs: ['$url'], // Argument(s) passed to the accumulate function
merge: function (state1: AccumulatorSourceStats, state2: AccumulatorSourceStats) {
return {
origins: state1.origins.concat(state2.origins)
};
},
finalize: function (state: AccumulatorSourceStats) { // Adjust the state to only return field we need
const sortByUrl = function (a: AccumulatorSourceStatsOrigin, b: AccumulatorSourceStatsOrigin) {
if (a.url < b.url) {
return -1;
}
if (a.url > b.url) {
return 1;
}
return 0;
};
return state.origins.sort(sortByUrl);
},
lang: 'js'
}
},
count: { $sum: 1 }
}
},
{ $sort: { _id: 1 } }
];
return this.model.aggregate(aggregate);
解决方案
从文档:
在聚合运算符中执行 JavaScript 可能会降低性能。仅当提供的管道运算符无法满足您的应用程序需求时,才使用 $accumulator 运算符。
避免在 Mongo 管道中使用 javascript 代码被认为是一种很好的做法,这应该仅作为最后的手段使用。在这种情况下,我们可以通过$group
ing 两次来避免使用它,每天每个 url 一次,然后每天一次。像这样:
db.collection.aggregate([
{
$match: {
source: "itemLineView",
createdAt: {
$gte: new Date(query.dateGT),
$lte: new Date(query.dateLT)
},
url: {
$regex: `^${query.url}`
},
}
},
{
$group: {
_id: {
month: {
$month: "$createdAt"
},
day: {
$dayOfMonth: "$createdAt"
},
year: {
$year: "$createdAt"
},
url: "$url"
},
count: {
$sum: 1
}
}
},
{
$group: {
_id: {
month: "$_id.month",
day: "$_id.day",
year: "$_id.year",
},
urls: {
$push: {
url: "$_id.url",
count: "$count"
}
},
count: {
$sum: "$count"
}
}
}
])
推荐阅读
- emacs - 如何通过 ALT-M 在 emacs 中进行编译?
- php - PHP更改行基于
- regex - 如何使用两种格式的正则表达式?
- javascript - 验证表单 hasError AngularJS
- python - openpyxl - 如何使用列号而不是字母?
- sql - 创建命令有什么错误,ErrorORA-00903: invalid table name and the FK reference to pk 是正确的?
- java - 如何在 JAXB2 maven 插件版本 2.4 中定义模式目录?
- java - 如何将数据从 Spring MVC 控制器重定向/转发到其他页面作为 POST?
- python-3.x - 如何在 Ray Tune 中定义与搜索算法无关的高维搜索空间?
- jmeter - 间隔在jmeter中运行特定的采样器