elasticsearch - 如何对 ElasticSearch 中的聚合字段执行复杂查询
问题描述
我试图弄清楚如何在弹性搜索中执行复杂的查询,假设我有以下数据表:
我从以下查询中得到的
{
"aggs": {
"3": {
"terms": {
"field": "ColumnA",
"order": {
"_key": "desc"
},
"size": 50
},
"aggs": {
"4": {
"terms": {
"field": "ColumnB",
"order": {
"_key": "desc"
},
"size": 50
},
"aggs": {
"5": {
"terms": {
"field": "ColumnC",
"order": {
"_key": "desc"
},
"size": 50
},
"aggs": {
"sum_of_views": {
"sum": {
"field": "views"
}
},
"sum_of_costs": {
"sum": {
"field": "cost"
}
},
"sum_of_clicks": {
"sum": {
"field": "clicks"
}
},
"sum_of_earned": {
"sum": {
"field": "earned"
}
},
"sum_of_adv_earned": {
"sum": {
"field": "adv_earned"
}
}
}
}
}
}
}
}
},
"size": 0,
"_source": {
"excludes": []
},
"stored_fields": [
"*"
],
"script_fields": {},
"docvalue_fields": [
{
"field": "hour",
"format": "date_time"
}
],
"query": {
"bool": {
"must": [],
"filter": [
{
"match_all": {}
},
{
"range": {
"hour": {
"format": "strict_date_optional_time",
"gte": "2019-08-08T06:29:34.723Z",
"lte": "2020-08-08T06:29:34.724Z"
}
}
}
],
"should": [],
"must_not": []
}
}
}
现在例如,如果我想获取具有以下条件的记录
(sum_of_clicks / sum_of_views) * (sum_of_earned2 / sum_of_earned1) < 0.5
我应该查询什么?
解决方案
认为以下内容应该有所帮助。我的理解是,您希望首先基于 进行分组ColumnA, ColumnB, ColumnC
,计算clicks, views, earned1 and earned2
字段的总和,然后应用您正在寻找的自定义聚合逻辑。
我已经能够提出以下查询,其中我使用了Bucket Selector Aggregation。
POST <your_index_name>/_search
{
"size": 0,
"aggs": {
"3": {
"terms": {
"field": "ColumnA",
"order": {
"_key": "desc"
},
"size": 50
},
"aggs": {
"4": {
"terms": {
"field": "ColumnB",
"order": {
"_key": "desc"
},
"size": 50
},
"aggs": {
"5": {
"terms": {
"field": "ColumnC",
"order": {
"_key": "desc"
},
"size": 50
},
"aggs": {
"sum_views": {
"sum": {
"field": "views"
}
},
"sum_clicks": {
"sum": {
"field": "clicks"
}
},
"sum_earned1": {
"sum": {
"field": "earned1"
}
},
"sum_earned2": {
"sum": {
"field": "earned2"
}
},
"custom_sum_bucket_filter": {
"bucket_selector": {
"buckets_path": {
"sum_of_views": "sum_views",
"sum_of_clicks": "sum_clicks",
"sum_of_earned1": "sum_earned1",
"sum_of_earned2": "sum_earned2"
},
"script": "(params.sum_of_views/params.sum_of_clicks) * (params.sum_of_earned1/params.sum_of_earned2) < 0.5"
}
}
}
},
"min_bucket_selector": {
"bucket_selector": {
"buckets_path": {
"valid_docs_count": "5._bucket_count"
},
"script": {
"source": "params.valid_docs_count >= 1"
}
}
}
}
},
"min_bucket_selector": {
"bucket_selector": {
"buckets_path": {
"valid_docs_count": "4._bucket_count"
},
"script": {
"source": "params.valid_docs_count >= 1"
}
}
}
}
}
}
}
请注意,要获得您正在寻找的确切结果,我必须在 和 处添加存储桶的过滤4
条件5
。
我使用的聚合是
- Bucket Selector 来计算你提到的条件
- 再次桶选择器,以便在聚合 5 时不显示空桶
- 再次使用存储桶选择器,以便现在在级别 4 显示空存储桶聚合。
为了测试我为什么添加了额外的空桶过滤器,您可以删除它们并查看您观察到的结果。
请注意,为简单起见,我忽略了query
部分和cost
字段。请随时添加它们并进行测试。