首页 > 解决方案 > 如何通过组和累加器阶段改进 MongoDB 聚合?

问题描述

下面的查询:

问题是查询非常复杂,并且随着数据量的增长,这看起来确实不是很好。有没有更简单的方法可以在 mongodb 中实现相同的目标?

输出形状如下:

{
  "results": [
    {
      "_id": {
        "month": 2,
        "day": 2,
        "year": 2021
      },
      "urls": [
        {
          "url": "https://shop.mydomain.com/product/golden-axe",
          "count": 20
        },
        {
          "url": "https://shop.mydomain.com/product/phantasy-star",
          "count": 218
        },
        {
          "url": "https://shop.mydomain.com/product/sega-arcades-retro",
          "count": 30
        }
      ],
      "count": 268
    },
    {
      "_id": {
        "month": 2,
        "day": 3,
        "year": 2021
      },
      "urls": [
        {
          "url": "https://shop.mydomain.com/product/golden-axe",
          "count": 109
        },
        {
          "url": "https://shop.mydomain.com/product/phantasy-star",
          "count": 416
        },
        {
          "url": "https://shop.mydomain.com/product/sega-arcades-retro",
          "count": 109
        }
      ],
      "count": 634
    },
        const aggregate = [
      {
        $match: {
          source: 'itemLineView',
          createdAt: {
            $gte: new Date(query.dateGT),
            $lte: new Date(query.dateLT)
          },
          url: { $regex: `^${query.url}` },
        }
      },
      {
        $group: {
          _id: {
            month: {
              $month: '$createdAt'
            },
            day: {
              $dayOfMonth: '$createdAt'
            },
            year: {
              $year: '$createdAt'
            }
          },
          urls: {
            // https://docs.mongodb.com/manual/reference/operator/aggregation/accumulator/#grp._S_accumulator
            $accumulator: {
              init: function (): AccumulatorSourceStats {
                return {
                  origins: []
                };
              },
              // initArgs: [], // Argument arr to pass to the init function
              accumulate: function (state: AccumulatorSourceStats, url: string) {
                const index = state.origins.findIndex(function (origin) {
                  return origin.url === url;
                });
                if (index === -1) {
                  state.origins.push({
                    url: url,
                    count: 1
                  });
                } else {
                  ++state.origins[index].count;
                }
                return state;
              },

              accumulateArgs: ['$url'], // Argument(s) passed to the accumulate function

              merge: function (state1: AccumulatorSourceStats, state2: AccumulatorSourceStats) {
                return {
                  origins: state1.origins.concat(state2.origins)
                };
              },

              finalize: function (state: AccumulatorSourceStats) { // Adjust the state to only return field we need
                const sortByUrl = function (a: AccumulatorSourceStatsOrigin, b: AccumulatorSourceStatsOrigin) {
                  if (a.url < b.url) {
                    return -1;
                  }
                  if (a.url > b.url) {
                    return 1;
                  }
                  return 0;
                };
                return state.origins.sort(sortByUrl);
              },

              lang: 'js'
            }
          },
          count: { $sum: 1 }
        }
      },
      { $sort: { _id: 1 } }
    ];
    return this.model.aggregate(aggregate);

标签: mongodbmongodb-queryaggregation-framework

解决方案


文档

在聚合运算符中执行 JavaScript 可能会降低性能。仅当提供的管道运算符无法满足您的应用程序需求时,才使用 $accumulator 运算符。

避免在 Mongo 管道中使用 javascript 代码被认为是一种很好的做法,这应该仅作为最后的手段使用。在这种情况下,我们可以通过$grouping 两次来避免使用它,每天每个 url 一次,然后每天一次。像这样:

db.collection.aggregate([
  {
    $match: {
      source: "itemLineView",
      createdAt: {
        $gte: new Date(query.dateGT),
        $lte: new Date(query.dateLT)
      },
      url: {
        $regex: `^${query.url}`
      },
      
    }
  },
  {
    $group: {
      _id: {
        month: {
          $month: "$createdAt"
        },
        day: {
          $dayOfMonth: "$createdAt"
        },
        year: {
          $year: "$createdAt"
        },
        url: "$url"
      },
      count: {
        $sum: 1
      }
    }
  },
  {
    $group: {
      _id: {
        month: "$_id.month",
        day: "$_id.day",
        year: "$_id.year",
        
      },
      urls: {
        $push: {
          url: "$_id.url",
          count: "$count"
        }
      },
      count: {
        $sum: "$count"
      }
    }
  }
])

蒙戈游乐场


推荐阅读