首页 > 解决方案 > 如何重新调整 mongoDB 数组中值的最大出现次数

问题描述

我在 mongodb 中有一个数组:我想最大化devDependenciesList给定数组中出现的值

[{
    "_id" : 0,
    "repoId" : 460078,
    "devDependenciesList" : [ 
        "value1", 
        "value2", 
        "value3", 
        "value4"
    ]
},{
    "_id" : 1,
    "repoId" : 1232,
    "devDependenciesList" : [ 
        "value1", 
        "value4", 
        "value7", 
        "value93"
    ]
},{
    "_id" : 2,
    "repoId" : 5423,
    "devDependenciesList" : [ 
        "value1", 
        "value23", 
        "value3", 
        "value4"
    ]
}]

输出应该是:

[value1:3,value4:3,value3:2]

标签: node.jsmongodbaggregation-framework

解决方案


基本上,您需要$unwind数组内容,然后$group将每个值作为分组键并$sum计算:

db.collection.aggregate([
  { "$unwind": "$devDependenciesList" },
  { "$group": { 
    "_id": "$devDependenciesList",
    "count": { "$sum": 1 }
  }}
])

哪个会返回:

{ "_id" : "value23", "count" : 1 }
{ "_id" : "value93", "count" : 1 }
{ "_id" : "value7", "count" : 1 }
{ "_id" : "value2", "count" : 1 }
{ "_id" : "value3", "count" : 2 }
{ "_id" : "value1", "count" : 3 }
{ "_id" : "value4", "count" : 3 }

这就是那里的基本数据,但如果你真的想要“key/count”表格,你可以这样做:

db.collection.aggregate([
  { "$unwind": "$devDependenciesList" },
  { "$group": { 
    "_id": "$devDependenciesList",
    "count": { "$sum": 1 }
  }},
  { "$sort": { "count": -1 } },
  { "$group": {
    "_id": null,
    "items": { "$push": { "k": "$_id", "v": "$count" } }
  }},
  { "$replaceRoot": {
    "newRoot": { "$arrayToObject": "$items" }
  }}
])

哪个会返回:

{
        "value1" : 3,
        "value4" : 3,
        "value3" : 2,
        "value23" : 1,
        "value93" : 1,
        "value7" : 1,
        "value2" : 1
}

附加的$groupand$push是将所有结果收集到一个文档中,其中包含一个名为"k""v"元素的数组。您需要此表单用于在下一阶段返回最终输出的$arrayToObject运算符。$replaceRoot

您需要一个支持后面这些运算符的 MongoDB 版本,但实际上您不需要。这实际上是在客户端代码中最有效地完成的。例如在 shell 中使用 JavaScript:

db.collection.aggregate([
  { "$unwind": "$devDependenciesList" },
  { "$group": { 
    "_id": "$devDependenciesList",
    "count": { "$sum": 1 }
  }},
  { "$sort": { "count": -1 } }    
]).toArray().reduce((o,e) => Object.assign(o, { [e._id]: e.count }),{})

这会产生与上述相同的结果。

当然,如果您想排除所有单数结果或类似结果,只需在$match之后添加$group

db.collection.aggregate([
  { "$unwind": "$devDependenciesList" },
  { "$group": { 
    "_id": "$devDependenciesList",
    "count": { "$sum": 1 }
  }},
  { "$match": { "count": { "$gt": 1 } } },
  { "$sort": { "count": -1 } }    
]).toArray().reduce((o,e) => Object.assign(o, { [e._id]: e.count }),{})

或者使用类似于以下内容的节点本机驱动程序:

let result = (await db.collection('collection').aggregate([
  { "$unwind": "$devDependenciesList" },
  { "$group": { 
    "_id": "$devDependenciesList",
    "count": { "$sum": 1 }
  }},
  { "$match": { "count": { "$gt": 1 } } },
  { "$sort": { "count": -1 } }    
]).toArray()).reduce((o,{ _id, count }) => ({ ...o,  [_id]: count }),{})

考虑到async/await实际数组的返回以及对象扩展和解构等 ES6 特性的使用。

这当然只是:

{ "value1" : 3, "value4" : 3, "value3" : 2 }

仅供参考,这是一个完全可复制的清单:

const { MongoClient } = require('mongodb');

const uri = 'mongodb://localhost:27017';
const opts = { useNewUrlParser: true };

const data = [
  {
    "_id" : 0,
    "repoId" : 460078,
    "devDependenciesList" : [
      "value1",
      "value2",
      "value3",
      "value4"
    ]
  },{
    "_id" : 1,
    "repoId" : 1232,
    "devDependenciesList" : [
      "value1",
      "value4",
      "value7",
      "value93"
    ]
  },{
    "_id" : 2,
    "repoId" : 5423,
    "devDependenciesList" : [
      "value1",
      "value23",
      "value3",
      "value4"
    ]
  }
];

const log = data => console.log(JSON.stringify(data, undefined, 2));

(async function() {

  let client;

  try {
    client = await MongoClient.connect(uri, opts);

    const db = client.db('test');

    // Clean data
    await db.collection('collection').deleteMany();

    // Insert data
    await db.collection('collection').insertMany(data);

    let result = (await db.collection('collection').aggregate([
      { "$unwind": "$devDependenciesList" },
      { "$group": {
        "_id": "$devDependenciesList",
        "count": { "$sum": 1 }
      }},
      { "$match": { "count": { "$gt": 1 } } },
      { "$sort": { "count": -1 } }
    ]).toArray()).reduce((o, { _id, count }) => ({ ...o, [_id]: count }),{});

    log(result);

    let sample = await db.collection('collection').aggregate([
      { "$unwind": "$devDependenciesList" },
      { "$sortByCount": "$devDependenciesList" },
    ],{ "explain": true }).toArray();

    log(sample);

  } catch(e) {
    console.error(e);
  } finally {
    if (client)
      client.close();
  }

})()

显示预期结果的输出和“解释”输出表明这$sortByCount不是“真正的”聚合阶段,只是键入 MongoDB 2.2 中存在的东西的一种更短的方式:

{
  "value1": 3,
  "value4": 3,
  "value3": 2
}
[
  {
    "stages": [
      {
        "$cursor": {
          "query": {},
          "fields": {
            "devDependenciesList": 1,
            "_id": 0
          },
          "queryPlanner": {
            "plannerVersion": 1,
            "namespace": "test.collection",
            "indexFilterSet": false,
            "parsedQuery": {},
            "winningPlan": {
              "stage": "COLLSCAN",
              "direction": "forward"
            },
            "rejectedPlans": []
          }
        }
      },
      {
        "$unwind": {
          "path": "$devDependenciesList"
        }
      },
      {
        "$group": {
          "_id": "$devDependenciesList",
          "count": {
            "$sum": {
              "$const": 1
            }
          }
        }
      },
      {
        "$sort": {
          "sortKey": {
            "count": -1
          }
        }
      }
    ],
    "ok": 1,
    "operationTime": "6674186995377373190",
    "$clusterTime": {
      "clusterTime": "6674186995377373190",
      "signature": {
        "hash": "AAAAAAAAAAAAAAAAAAAAAAAAAAA=",
        "keyId": 0
      }
    }
  }
]

推荐阅读