elasticsearch - es聚合查询很慢。怎么提高?
问题描述
我们在 elasticsearch 生产集群中有7552642 个文档,总大小为 94.290219647 GB。
XMS 和 XMX:
/usr/bin/java -Xms20g -Xmx20g
集群信息:
{
"cluster_name": "production",
"status": "green",
"timed_out": false,
"number_of_nodes": 2,
"number_of_data_nodes": 2,
"active_primary_shards": 10,
"active_shards": 20,
"relocating_shards": 0,
"initializing_shards": 0,
"unassigned_shards": 0,
"delayed_unassigned_shards": 0,
"number_of_pending_tasks": 0,
"number_of_in_flight_fetch": 0,
"task_max_waiting_in_queue_millis": 0,
"active_shards_percent_as_number": 100
}
映射
"posts": {
"mappings": {
"information": {
"_all": {
"enabled": true
},
"properties":{
"post_created_time": {
"type": "date"
},
"post_impressions": {
"type": "double"
},
"post_interactions": {
"type": "long"
},
"post_media_value": {
"type": "float"
},
"post_message": {
"type": "text"
},
"post_reach": {
"type": "double"
},
"post_type": {
"type": "keyword"
},
"profile_id": {
"type": "long"
},
"profile_name": {
"type": "text"
},
"platform_followed_by" :{
"type: "long"
}
}
}
}
}
聚合查询:
{
"size":0,
"aggs":{
"total_influencers":{
"cardinality":{
"field":"profile_id"
}
},
"profiles_reach_bucket":{
"terms":{
"size":2147483639,
"field":"profile_id"
},
"aggs":{
"media_reach_bucket":{
"terms":{
"field":"_type",
"size":2147483639
},
"aggs":{
"media_reach":{
"avg":{
"field":"post_reach"
}
}
}
},
"total_media_reach":{
"sum_bucket":{
"buckets_path":"media_reach_bucket>media_reach"
}
}
}
},
"total_reach":{
"sum_bucket":{
"buckets_path":"profiles_reach_bucket>total_media_reach"
}
},
"total_impressions":{
"sum":{
"field":"post_impressions"
}
},
"total_interactions":{
"sum":{
"field":"post_interactions"
}
},
"total_followers":{
"sum":{
"field":"platform_followed_by"
}
},
"interactions_for_media_value":{
"terms":{
"size":2147483639,
"field":"_type"
},
"aggs":{
"interaction":{
"sum":{
"field":"post_interactions"
}
}
}
},
"impressions_for_media_value":{
"terms":{
"size":2147483639,
"field":"_type"
},
"aggs":{
"impression":{
"sum":{
"field":"post_impressions"
}
}
}
}
},
"query":{
"bool":{
"must":[
{
"query_string":{
"query":"post_message:*food* post_description:*food* post_title:*food* "
}
}
],
"filter":[
{
"range":{
"post_created_time":{
"gte":1533427200,
"lte":1549324800
}
}
}
]
}
}
}
结果:
上面的查询采取
"took": 6685,
"timed_out": false,
"_shards": {
"total": 10,
"successful": 10,
"skipped": 0,
"failed": 0
}
任何优化建议将不胜感激。
解决方案
推荐阅读
- python - _mysql_exceptions.OperationalError: (1045, "Access denied for user 'root'@'localhost' (using password: NO)")
- matplotlib - 如何控制子图之间的空间
- docker - 如何在 docker-compose 中启动/关闭单个容器
- javascript - Chrome 扩展:使用 appendChild() 附加的节点消失
- sql - 如何为条件逻辑语句排行和分区?
- javascript - 为什么我的 Mock fetch 总是返回 null?(笑话)
- xcode - 在 SwiftUI (5) 和 Xcode (12.4) 中实现 AdMob 插页式广告
- c - 将平方 x,y 坐标转换为圆坐标
- python - 在切片后绘制下一行
- c# - 从特定标签获取所有提交的列表,直到最后一个