mongodb - MongoDB slow facet query using multiple group by
问题描述
I am working on a project where we will have almost 5 million documents in a collection. And each document's size will be around 18571 bytes having 120 to 150 fields.
I have to return my response in less than 1 second. And mongo query will perform almost 10 to 15 group by in faceted query on maximum 3,00,000 documents.
This is my first time handling this much of data where I have to return responses in real time.
I have implemented indexes and they reduced response time to 5 to 6 seconds but I still need it in less then 1 second.
Below is sample query:
db.sample.aggregation(
"$match":{
"$and":[
{"is_new": <true/false>},
{"brand":<some-brand>},
{"year":{"$gte":<some-year>,"$lte":<some-year>}},
{"seller_id":{"$in":[<array-of-seller-ids-may-have-40,000-seller-ids>]}}
]
},
{
"$facet":{
"data":[{
"$project":{
"_id":"_id",
"brand":"$brand_name",
"model":"$model_name",
<will have almost 20 keys with lookup>
}
}],
"count":[{"$group":{"_id":"$_id"}},{"$count":"vin_count"}],
"price":[{"$bucketAuto":{"groupBy":"$price", "buckets":1}}],
<will have 12-15 group by>
}
}
)
Below is sample document:
{
"_id" : "KNDMC5C11J6394584",
"brand_id" : 22,
"brand_name" : "XYZ",
"abc_id" : 1234567890,
"city" : "Gurgaon, IN",
"fluctuation" : 18,
"created_at" : ISODate("2018-08-17T06:08:12.940Z"),
"release_data" : "2018-06-29",
"seller_name" : "Seller name",
"seller_price" : 34890,
"seller_rating" : 4,
"seller_zip" : "12550",
"feature1" : "ABC",
"feature2" : 3300,
"feature3" : "AB",
"expected_price" : -1,
"exterior_color" : "Unknown",
"registered_dealer" : true,
"registered_brand" : "ABC",
"fluctuation_rate" : 20.700000000000003,
"fluctuation_type" : 2,
"fluc_type_name" : "Something",
"has_patents" : false,
"tested_frequency" : 24,
"interior_color" : "---",
"is_certified" : false,
"is_certified_iso" : false,
"is_featured" : false,
"is_new" : true,
"is_certified_bhel" : false,
"location" : {
"type" : "Point",
"coordinates" : [
-24.08180236816406,
31.507198333740234
]
},
"max_input" : 8,
"feature4" : 3,
"feature5" : 206,
"feature6" : "Something",
"monthly_payment" : 649,
"msrp" : 34890,
"feature7" : false,
"seller_id" : 123567890,
"product_family_name" : "abc",
"product_id" : 15,
"product_name" : "Something",
"reflection" : "Something",
"fluc_id" : 2312,
"fluc_name" : "something something (abc) ac",
"updated_at" : ISODate("2018-09-11T17:59:36.889Z"),
"product_damage_category" : "None",
"year" : 2018,
"damage_check" : "-",
"team_size" : "-",
"Technology" : {
"camera_unit" : true
}
}
Below is the explain output
{
"stages" : [
{
"$cursor" : {
"query" : {
"$and" : [
{
"is_new" : true
},
{
"year" : {
"$gte" : 2018,
"$lte" : 2018
}
},
{
"sp_id" : {
"$in" : [<list of 40,000 seller ids>]
}
}
]
},
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "test_collection.col",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"is_new" : {
"$eq" : true
}
},
{
"year" : {
"$lte" : 2018
}
},
{
"year" : {
"$gte" : 2018
}
},
{
"sp_id" : {
"$in" : [<list of 40,000 seller ids>]
}
}
]
},
"winningPlan" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"is_new" : 1,
"year" : 1,
"sp_id" : 1
},
"indexName" : "is_new_1_year_1_sp_id_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"is_new" : [ ],
"year" : [ ],
"sp_id" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"is_new" : [
"[true, true]"
],
"year" : [
"[2018.0, 2018.0]"
],
"sp_id" : [
"[47590.0, 47590.0]",
"[48333.0, 48333.0]",
"[51333.0, 51333.0]",
<range of 40,000 seller_ids>
]
}
}
},
"rejectedPlans" : [ ]
}
}
},
{
"$facet" : {
"data" : [
{
"$project" : {
"_id" : "$_id",
"brand_name" : "$brand_name",
"feature1" : "$feature1",
"feature2" : "$feature2",
"feature3" : "$feature3",
"feature4" : "$feature4",
"feature5" : "$feature5",
"feature6" : "$feature6",
"feature7" : "$feature7",
"feature8" : "$feature8",
"feature9" : "$feature9",
"feature10" : "$feature10",
"feature11" : "$feature11",
"feature12" : "$feature12",
"feature13" : "$feature13",
"feature14" : "$feature14",
"feature15" : "$feature15",
"feature16" : "$feature16",
"feature17" : "$feature17",
"feature18" : "$feature18",
"feature19" : "$feature19",
"feature20" : "$feature20"
}
}
],
"count" : [
{
"$group" : {
"_id" : "$_id"
}
},
{
"$group" : {
"_id" : {
"$const" : null
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$project" : {
"_id" : false,
"count" : true
}
}
],
"feature1" : [
{
"$match" : {
"feature1" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature1",
"name" : {
"$first" : "$feature1"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature2" : [
{
"$match" : {
"feature2" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature2",
"name" : {
"$first" : "$feature2"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature3" : [
{
"$match" : {
"feature3" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature3",
"name" : {
"$first" : "$feature3"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature4" : [
{
"$match" : {
"feature4" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature4",
"name" : {
"$first" : "$feature4"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature5" : [
{
"$match" : {
"feature5" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature5",
"name" : {
"$first" : "$fuel"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature6" : [
{
"$match" : {
"feature6" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature6",
"name" : {
"$first" : "$feature6"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature7" : [
{
"$match" : {
"feature7" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature7",
"name" : {
"$first" : "$feature7"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature8" : [
{
"$match" : {
"feature8" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature8",
"name" : {
"$first" : "$feature8"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature9" : [
{
"$match" : {
"feature9" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature9",
"name" : {
"$first" : "$feature9"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"count" : -1
}
}
}
],
"feature10" : [
{
"$match" : {
"feature10" : {
"$exists" : true
}
}
},
{
"$group" : {
"_id" : "$feature10",
"name" : {
"$first" : "$feature10"
},
"count" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$sort" : {
"sortKey" : {
"_id" : -1
}
}
}
],
"feature11" : [
{
"$match" : {
"feature11" : {
"$exists" : true
}
}
},
{
"$bucketAuto" : {
"groupBy" : "$feature11",
"buckets" : 1,
"output" : {
"count" : {
"$sum" : {
"$const" : 1
}
}
}
}
}
],
"feature12" : [
{
"$bucketAuto" : {
"groupBy" : "$feature11",
"buckets" : 1,
"output" : {
"count" : {
"$sum" : {
"$const" : 1
}
}
}
}
}
]
}
}
],
"ok" : 1
}
If this information is not complete for the solution. I will provide more.
I am stuck on this from last 1 month.
Any help would be appreciated.
解决方案
推荐阅读
- google-apps-script - 使用应用程序脚本从 Google 电子表格中通过电子邮件发送数据透视表
- php - 如何在 php 中从数据库中获取 mathml 数据?
- apache-spark - EMR 和外部 hive/glue 上的 Pyspark - 可以通过 sqlContext 删除但不能创建表
- html - 角度自动完成搜索
- ios - 在 PDFView 上移动/拖动时如何限制位置视图?(迅速)
- css - Bootstrap 的 SCSS 无法编译 - 总是出现“`unit($number)` must be a number”错误
- python - 减去熊猫列
- powershell - powershell SharePoint - 尝试重命名网站时无法通过确认对话框
- amazon-web-services - 将可执行文件上传到 aws S3 存储桶始终失败,并出现 OptionsRequestDenied
- bash - 为什么我的 bash “历史”命令在子 shell 中慢 56 倍?