sorting - Elastic Search 中的聚合和排序
问题描述
我想在弹性搜索中对查询的聚合结果进行排序
等效 SQL 查询:- select col1, col2, sum(col3) from table group by col1,col2 order by sum(col3) desc;
我尝试使用以下查询,它返回结果但不是按我期望的排序顺序
{
"from": 0,
"size": 0,
"_source": {
"includes": [
"col1",
"col2",
"SUM"
],
"excludes": []
},
"stored_fields": [
"col1",
"col2"
],
"aggregations": {
"col1": {
"terms": {
"field": "col1",
"size": 200,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false
},
"aggregations": {
"col2": {
"terms": {
"field": "col2",
"size": 10,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false
},
"aggregations": {
"SUM_0": {
"sum": {
"field": "col3"
}
},
"col3_bucket_sort": {
"bucket_sort": {
"sort": [
{ "SUM_0": { "order": "desc" } }
],
"size": 3
}
}
}
}
}
}
}
}
抽样索引数据
{
"_index": "my_index",
"_type": "products",
"_id": "OJfBSXUB0GzAt2o_zVdS",
"_score": 1.0,
"_source": {
"product_name": "car",
"product_type": "retail",
"qty": 5
}
}
{
"_index": "my_index",
"_type": "report",
"_id": "OpfBSXUB0GzAt2o_zVfG1",
"_score": 1.0,
"_source": {
"product_name": "bike",
"product_type": "retail",
"qty": 5
}
},
{
"_index": "my_index",
"_type": "report",
"_id": "OpfBSXUB0GzAt2o_zVfG",
"_score": 1.0,
"_source": {
"product_name": "car",
"product_type": "retail",
"qty": 3
}
},
{
"_index": "my_index",
"_type": "report",
"_id": "OpfBSXUB0GzAt2o_zVfG2",
"_score": 1.0,
"_source": {
"product_name": "bike",
"product_type": "retail",
"qty": 1
}
}
预期输出:-想要根据字段 product_name 和 product_type 聚合(分组)我的文档并按 sum(qty) 排序
等效 SQl 查询:- 从 product_table group by product_name 中 select product_name, product_type, sum(qty), product_type order by sum(qty) desc;
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": null,
"hits": []
},
"aggregations": {
"product_name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "car",
"doc_count": 2,
"product_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": retail,
"doc_count": 2,
"SUM_0": {
"value":8
}
}
]
}
},
{
"key": "bike",
"doc_count": 2,
"product_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": retail,
"doc_count": 2,
"SUM_0": {
"value": 6
}
}
]
}
}
]
}
}
}
但我低于输出,即成功聚合文档但排序不适用于 sum(qty)
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": null,
"hits": []
},
"aggregations": {
"product_name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "bike",
"doc_count": 2,
"product_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": retail,
"doc_count": 2,
"SUM_0": {
"value": 6
}
}
]
}
},
{
"key": "car",
"doc_count": 2,
"product_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": retail,
"doc_count": 2,
"SUM_0": {
"value":8
}
}
]
}
}
]
}
}
}
解决方案
引用ES-Docs的摘录
与所有管道聚合一样,bucket_sort 聚合在所有其他非管道聚合之后执行。这意味着排序仅适用于已从父聚合返回的任何存储桶。例如,如果父聚合是术语并且其大小设置为 10,则 bucket_sort 将仅对这 10 个返回的术语存储桶进行排序。
以上是您的查询未给出正确结果的原因。
推荐阅读
- java - JDBCTemplate 执行查询后关闭准备好的语句
- python-3.x - Python tkinter ListBox 高度不起作用
- node.js - 访问 NODE.JS 中不存在的文件时如何显示消息?
- sql - 在 SQL Server 中导入平面文件时,有没有办法一次更改所有数据类型?
- typescript - 错误 TS2494:仅 ECMAScript 5 及更高版本支持在“for...of”语句中使用字符串
- javascript - 我应该如何将搜索变量从 React 客户端输入到 Express 服务器端 API 调用?
- java - Java SE 1.8 在 MacOS Mojave 中安装后消失
- java - 如何使用 Spring Boot 在 Eclipse 中修复 JVM 参数?
- jupyter-notebook - Jupyter Lab 中的只读 Python Notebook
- python - 如何使 Flask 应用程序中的局部变量由另一个函数控制?