node.js - 如何重新调整 mongoDB 数组中值的最大出现次数
问题描述
我在 mongodb 中有一个数组:我想最大化devDependenciesList
给定数组中出现的值
[{
"_id" : 0,
"repoId" : 460078,
"devDependenciesList" : [
"value1",
"value2",
"value3",
"value4"
]
},{
"_id" : 1,
"repoId" : 1232,
"devDependenciesList" : [
"value1",
"value4",
"value7",
"value93"
]
},{
"_id" : 2,
"repoId" : 5423,
"devDependenciesList" : [
"value1",
"value23",
"value3",
"value4"
]
}]
输出应该是:
[value1:3,value4:3,value3:2]
解决方案
基本上,您需要$unwind
数组内容,然后$group
将每个值作为分组键并$sum
计算:
db.collection.aggregate([
{ "$unwind": "$devDependenciesList" },
{ "$group": {
"_id": "$devDependenciesList",
"count": { "$sum": 1 }
}}
])
哪个会返回:
{ "_id" : "value23", "count" : 1 }
{ "_id" : "value93", "count" : 1 }
{ "_id" : "value7", "count" : 1 }
{ "_id" : "value2", "count" : 1 }
{ "_id" : "value3", "count" : 2 }
{ "_id" : "value1", "count" : 3 }
{ "_id" : "value4", "count" : 3 }
这就是那里的基本数据,但如果你真的想要“key/count”表格,你可以这样做:
db.collection.aggregate([
{ "$unwind": "$devDependenciesList" },
{ "$group": {
"_id": "$devDependenciesList",
"count": { "$sum": 1 }
}},
{ "$sort": { "count": -1 } },
{ "$group": {
"_id": null,
"items": { "$push": { "k": "$_id", "v": "$count" } }
}},
{ "$replaceRoot": {
"newRoot": { "$arrayToObject": "$items" }
}}
])
哪个会返回:
{
"value1" : 3,
"value4" : 3,
"value3" : 2,
"value23" : 1,
"value93" : 1,
"value7" : 1,
"value2" : 1
}
附加的$group
and$push
是将所有结果收集到一个文档中,其中包含一个名为"k"
和"v"
元素的数组。您需要此表单用于在下一阶段返回最终输出的$arrayToObject
运算符。$replaceRoot
您需要一个支持后面这些运算符的 MongoDB 版本,但实际上您不需要。这实际上是在客户端代码中最有效地完成的。例如在 shell 中使用 JavaScript:
db.collection.aggregate([
{ "$unwind": "$devDependenciesList" },
{ "$group": {
"_id": "$devDependenciesList",
"count": { "$sum": 1 }
}},
{ "$sort": { "count": -1 } }
]).toArray().reduce((o,e) => Object.assign(o, { [e._id]: e.count }),{})
这会产生与上述相同的结果。
当然,如果您想排除所有单数结果或类似结果,只需在$match
之后添加$group
:
db.collection.aggregate([
{ "$unwind": "$devDependenciesList" },
{ "$group": {
"_id": "$devDependenciesList",
"count": { "$sum": 1 }
}},
{ "$match": { "count": { "$gt": 1 } } },
{ "$sort": { "count": -1 } }
]).toArray().reduce((o,e) => Object.assign(o, { [e._id]: e.count }),{})
或者使用类似于以下内容的节点本机驱动程序:
let result = (await db.collection('collection').aggregate([
{ "$unwind": "$devDependenciesList" },
{ "$group": {
"_id": "$devDependenciesList",
"count": { "$sum": 1 }
}},
{ "$match": { "count": { "$gt": 1 } } },
{ "$sort": { "count": -1 } }
]).toArray()).reduce((o,{ _id, count }) => ({ ...o, [_id]: count }),{})
考虑到async/await
实际数组的返回以及对象扩展和解构等 ES6 特性的使用。
这当然只是:
{ "value1" : 3, "value4" : 3, "value3" : 2 }
仅供参考,这是一个完全可复制的清单:
const { MongoClient } = require('mongodb');
const uri = 'mongodb://localhost:27017';
const opts = { useNewUrlParser: true };
const data = [
{
"_id" : 0,
"repoId" : 460078,
"devDependenciesList" : [
"value1",
"value2",
"value3",
"value4"
]
},{
"_id" : 1,
"repoId" : 1232,
"devDependenciesList" : [
"value1",
"value4",
"value7",
"value93"
]
},{
"_id" : 2,
"repoId" : 5423,
"devDependenciesList" : [
"value1",
"value23",
"value3",
"value4"
]
}
];
const log = data => console.log(JSON.stringify(data, undefined, 2));
(async function() {
let client;
try {
client = await MongoClient.connect(uri, opts);
const db = client.db('test');
// Clean data
await db.collection('collection').deleteMany();
// Insert data
await db.collection('collection').insertMany(data);
let result = (await db.collection('collection').aggregate([
{ "$unwind": "$devDependenciesList" },
{ "$group": {
"_id": "$devDependenciesList",
"count": { "$sum": 1 }
}},
{ "$match": { "count": { "$gt": 1 } } },
{ "$sort": { "count": -1 } }
]).toArray()).reduce((o, { _id, count }) => ({ ...o, [_id]: count }),{});
log(result);
let sample = await db.collection('collection').aggregate([
{ "$unwind": "$devDependenciesList" },
{ "$sortByCount": "$devDependenciesList" },
],{ "explain": true }).toArray();
log(sample);
} catch(e) {
console.error(e);
} finally {
if (client)
client.close();
}
})()
显示预期结果的输出和“解释”输出表明这$sortByCount
不是“真正的”聚合阶段,只是键入 MongoDB 2.2 中存在的东西的一种更短的方式:
{
"value1": 3,
"value4": 3,
"value3": 2
}
[
{
"stages": [
{
"$cursor": {
"query": {},
"fields": {
"devDependenciesList": 1,
"_id": 0
},
"queryPlanner": {
"plannerVersion": 1,
"namespace": "test.collection",
"indexFilterSet": false,
"parsedQuery": {},
"winningPlan": {
"stage": "COLLSCAN",
"direction": "forward"
},
"rejectedPlans": []
}
}
},
{
"$unwind": {
"path": "$devDependenciesList"
}
},
{
"$group": {
"_id": "$devDependenciesList",
"count": {
"$sum": {
"$const": 1
}
}
}
},
{
"$sort": {
"sortKey": {
"count": -1
}
}
}
],
"ok": 1,
"operationTime": "6674186995377373190",
"$clusterTime": {
"clusterTime": "6674186995377373190",
"signature": {
"hash": "AAAAAAAAAAAAAAAAAAAAAAAAAAA=",
"keyId": 0
}
}
}
]
推荐阅读
- android - 在使用 Kotlin 的 android 微调器上提示微调器
- python - 从另一个 Python 文件实例化一个 Python 对象
- mysql - 如何使用慢 Order By 优化 MYSQL 查询
- c# - 在 C# 中填充 Azure 表存储表
- sql - 如何比较 SQL Server 中的两个日期值?
- r - 编写一个函数来构建所有可能模型的列表,其中每个变量都有多种类型的回归
- mysql - 在创建具有多个输入的通配符 sql 代码时遇到问题
- c - attiny13 pwm led 完成开关
- laravel - how can i change lang of errors message using laravel6?
- asp.net-core - EF Core 3.1 属性值返回默认字段值