elasticsearch - Elasticsearch 为每个用户获取最新的徽章值
问题描述
我有一个索引“candidate_ranking”,其中包含具有以下文档结构的文档。索引的每个文档都有 1 个候选 ID,并且可以有多个具有相同候选 ID 的文档,但 created_at 字段对于它们来说会有所不同。这是我的候选排名索引中的文档样本-
"hits" : [
{
"_index" : "candidate_ranking",
"_type" : "_doc",
"_id" : "SCa26HgB0zUr7edEvDul",
"_score" : 1.0,
"_source" : {
"id" : 118558,
"candidate_id" : 29492,
"created_at" : "2021-03-27T01:34:29.628550+00:00",
"badge" : "2"
}
},
{
"_index" : "candidate_ranking",
"_type" : "_doc",
"_id" : "SSa26HgB0zUr7edEvDul",
"_score" : 1.0,
"_source" : {
"id" : 133354,
"candidate_id" : 29492,
"created_at" : "2021-03-27T02:11:35.811420+00:00",
"badge" : "2"
}
},
{
"_index" : "candidate_ranking",
"_type" : "_doc",
"_id" : "Sia26HgB0zUr7edEvDul",
"_score" : 1.0,
"_source" : {
"id" : 148136,
"candidate_id" : 29492,
"created_at" : "2021-03-29T20:20:36.482066+00:00",
"badge" : "2"
}
},
{
"_index" : "candidate_ranking",
"_type" : "_doc",
"_id" : "Sya26HgB0zUr7edEvDul",
"_score" : 1.0,
"_source" : {
"id" : 162916,
"candidate_id" : 29492,
"created_at" : "2021-03-29T21:05:03.985032+00:00",
"badge" : null
}
},
{
"_index" : "candidate_ranking",
"_type" : "_doc",
"_id" : "TCa26HgB0zUr7edEvDul",
"_score" : 1.0,
"_source" : {
"id" : 177712,
"candidate_id" : 29492,
"created_at" : "2021-03-29T21:33:32.596613+00:00",
"badge" : null
}
},
{
"_index" : "candidate_ranking",
"_type" : "_doc",
"_id" : "TSa26HgB0zUr7edEvDul",
"_score" : 1.0,
"_source" : {
"id" : 192999,
"candidate_id" : 29492,
"created_at" : "2021-03-29T22:20:24.942116+00:00",
"badge" : null
}
},
{
"_index" : "candidate_ranking",
"_type" : "_doc",
"_id" : "Tia26HgB0zUr7edEvDul",
"_score" : 1.0,
"_source" : {
"id" : 225434,
"candidate_id" : 29492,
"created_at" : "2021-03-29T23:13:59.266074+00:00",
"badge" : null
}
},
{
"_index" : "candidate_ranking",
"_type" : "_doc",
"_id" : "Tya26HgB0zUr7edEvDul",
"_score" : 1.0,
"_source" : {
"id" : 247169,
"candidate_id" : 29492,
"created_at" : "2021-03-30T00:16:04.077245+00:00",
"badge" : null
}
},
{
"_index" : "candidate_ranking",
"_type" : "_doc",
"_id" : "UCa26HgB0zUr7edEvDul",
"_score" : 1.0,
"_source" : {
"id" : 271179,
"candidate_id" : 29492,
"created_at" : "2021-03-30T01:19:59.803999+00:00",
"badge" : null
}
},
{
"_index" : "candidate_ranking",
"_type" : "_doc",
"_id" : "USa26HgB0zUr7edEvDul",
"_score" : 1.0,
"_source" : {
"id" : 295537,
"candidate_id" : 29492,
"created_at" : "2021-03-30T02:23:42.077149+00:00",
"badge" : null
}
}
]
}
此徽章值可以是空字符串或“1”或“2”。
我目前正在使用此聚合来获取具有徽章值 1 和 2 的所有用户的计数
GET /candidate_ranking/_search
{
"aggs": {
"mega_mogul": {
"terms": {
"field": "badge.keyword",
"exclude": ["", "2"],
"size": 500000
}
},
"rising_mogul": {
"terms": {
"field": "badge.keyword",
"exclude": ["", "1"],
"size": 500000
}
}
}
}
我的索引包含每个“candidate_id”的多个文档。我只想为每个candidate_id 的最新文档进行徽章聚合。类似于按 created_at 字段降序排序或排序,然后只为每个候选 ID 取最高值。因此,将所有具有最新徽章的候选人计数为 1 或 2。
我试过这样做,但不工作
GET /candidate_ranking/_search
{
"aggs": {
"mega_mogul": {
"terms": {
"field": "badge.keyword",
"exclude": ["", "2"],
"size": 500000,
"order": {"created_at": "desc"},
"top_hits": {"size":1}
}
},
"rising_mogul": {
"terms": {
"field": "badge.keyword",
"exclude": ["", "1"],
"size": 500000
}
}
}
}
解决方案
要获取存储桶的数量,您需要使用stats_bucket 聚合
添加具有索引数据、映射、搜索查询和搜索结果的工作示例
索引映射:
{
"mappings": {
"properties": {
"created_at": {
"type": "date",
"format": "yyyy-MM-dd'T'HH:mm:ss.SSSSSSz"
}
}
}
}
指数数据:
{
"id": 295537,
"candidate_id": 29492,
"created_at": "2021-03-30T02:23:42.077149+00:00",
"badge": "1"
}
{
"id": 271179,
"candidate_id": 29492,
"created_at": "2021-03-30T01:19:59.803999+00:00",
"badge": "1"
}
{
"id": 247169,
"candidate_id": 29492,
"created_at": "2021-03-30T00:16:04.077245+00:00",
"badge": "1"
}
{
"id": 225434,
"candidate_id": 29492,
"created_at": "2021-03-29T23:13:59.266074+00:00",
"badge": null
}
{
"id": 192999,
"candidate_id": 29492,
"created_at": "2021-03-29T22:20:24.942116+00:00",
"badge": null
}
{
"id": 177712,
"candidate_id": 29492,
"created_at": "2021-03-29T21:33:32.596613+00:00",
"badge": null
}
{
"id": 162916,
"candidate_id": 29492,
"created_at": "2021-03-29T21:05:03.985032+00:00",
"badge": null
}
{
"id": 148136,
"candidate_id": 29492,
"created_at": "2021-03-29T20:20:36.482066+00:00",
"badge": "2"
}
{
"id": 118558,
"candidate_id": 29492,
"created_at": "2021-03-27T01:34:29.628550+00:00",
"badge": "2"
}
{
"id": 133354,
"candidate_id": 29492,
"created_at": "2021-03-27T02:11:35.811420+00:00",
"badge": "2"
}
搜索查询:
{
"size": 0,
"aggs": {
"badge_1": {
"terms": {
"field": "badge.keyword",
"include": [
"1"
],
"size": 500000
},
"aggs": {
"unique_id": {
"terms": {
"field": "candidate_id",
"size": 10,
"order": {
"latestOrder": "desc"
}
},
"aggs": {
"top_doc": {
"top_hits": {
"size": 1
}
},
"latestOrder": {
"max": {
"field": "created_at"
}
}
}
},
"stats_1": {
"stats_bucket": {
"buckets_path": "unique_id._count"
}
}
}
},
"badge_2": {
"terms": {
"field": "badge.keyword",
"include": [
"2"
],
"size": 500000
},
"aggs": {
"unique_id": {
"terms": {
"field": "candidate_id",
"size": 10,
"order": {
"latestOrder": "desc"
}
},
"aggs": {
"top_doc": {
"top_hits": {
"size": 1
}
},
"latestOrder": {
"max": {
"field": "created_at"
}
}
}
},
"stats_2": {
"stats_bucket": {
"buckets_path": "unique_id._count"
}
}
}
}
}
}
搜索结果:
"aggregations": {
"badge_2": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "2",
"doc_count": 3,
"unique_id": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 29492,
"doc_count": 3,
"latestOrder": {
"value": 1.617049236482E12,
"value_as_string": "2021-03-29T20:20:36.482000Z"
},
"top_doc": {
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "67157371",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"id": 133354,
"candidate_id": 29492,
"created_at": "2021-03-27T02:11:35.811420+00:00",
"badge": "2"
}
}
]
}
}
}
]
},
"stats_2": {
"count": 1, // note this
"min": 3.0,
"max": 3.0,
"avg": 3.0,
"sum": 3.0
}
}
]
},
"badge_1": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "1",
"doc_count": 3,
"unique_id": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 29492,
"doc_count": 3,
"latestOrder": {
"value": 1.617071022077E12,
"value_as_string": "2021-03-30T02:23:42.077000Z"
},
"top_doc": {
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "67157371",
"_type": "_doc",
"_id": "10",
"_score": 1.0,
"_source": {
"id": 295537,
"candidate_id": 29492,
"created_at": "2021-03-30T02:23:42.077149+00:00",
"badge": "1"
}
}
]
}
}
}
]
},
"stats_1": {
"count": 1, // note this
"min": 3.0,
"max": 3.0,
"avg": 3.0,
"sum": 3.0
}
}
]
}
}
推荐阅读
- python - BigQuery:此表的表 dml 插入操作过多
- animation - Flutter Widget Test 等待动画
- javascript - 如何在同一行 Qweb 报告上对齐 div。奥多 14
- c# - 如何使用 Include - ThenInclude 查询 IQueryable?
- android - Android WifiP2pManager 发现 - 无法缩小问题范围
- prolog - prolog中命题逻辑的困难
- c# - 如何将值从文本文件传递到 DataGrid?C#WPF
- python - 如何重命名 csv 文件并在每次循环迭代时更改名称?
- c# - 有什么方法可以从自发光渲染纹理中伪造实时全局照明?
- python - CSV 第一行总是被覆盖(python)