首页 > 解决方案 > 最新 N 条记录上的 Elasticsearch 聚合

问题描述

有没有办法对最新的 N 条记录进行聚合?

这个解决方案不起作用

{
   "query": {...},
   "size": N,
   "order": ...,
   "aggs": {
       ....
   }
}

有关更多详细信息:我想从“service_name”字段为“x”的记录中获取最后 10 条记录,然后汇总这 10 条记录以找出这些记录中有多少在“resp_code”字段中具有“成功”。

我的数据是这样的:

[
  {
    "_index": "logs",
    "_type": "_doc",
    "_id": "1232525",
    "_score": 1,
    "_source": {
      "resp_body": "",
      "client_ip": "127.0.0.1",
      "resp_time": "2021-04-15T10:24:51+01:00",
      "@timestamp": "2021-04-15T05:55:00.452Z",
      "resp_code": "412",
      "service_name": "service1",
      "log_id": "1232525"
    }
  },
  {
    "_index": "logs",
    "_type": "_doc",
    "_id": "1232524",
    "_score": 1,
    "_source": {
      "resp_body": "",
      "client_ip": "127.0.0.1",
      "resp_time": "2021-04-15T10:23:51+01:00",
      "@timestamp": "2021-04-15T05:53:00.452Z",
      "resp_code": "0",
      "service_name": "service2",
      "log_id": "1232524"
    }
  },
  {
    "_index": "logs",
    "_type": "_doc",
    "_id": "1232523",
    "_score": 1,
    "_source": {
      "resp_body": "",
      "client_ip": "127.0.0.1",
      "resp_time": "2021-04-15T10:22:51+01:00",
      "@timestamp": "2021-04-15T05:52:00.452Z",
      "resp_code": "0",
      "service_name": "service1",
      "log_id": "1232523"
    }
  },
  {
    "_index": "logs",
    "_type": "_doc",
    "_id": "1232522",
    "_score": 1,
    "_source": {
      "resp_body": "",
      "client_ip": "127.0.0.1",
      "resp_time": "2021-04-15T10:21:51+01:00",
      "@timestamp": "2021-04-15T05:51:00.452Z",
      "resp_code": "0",
      "service_name": "service1",
      "log_id": "1232522"
    }
  },
  {
    "_index": "logs",
    "_type": "_doc",
    "_id": "1232521",
    "_score": 1,
    "_source": {
      "resp_body": "",
      "client_ip": "127.0.0.1",
      "resp_time": "2021-04-15T10:20:51+01:00",
      "@timestamp": "2021-04-15T05:50:00.452Z",
      "resp_code": "0",
      "service_name": "service2",
      "log_id": "1232521"
    }
  }
]

例如:我想用“service_name=service1”获取最后 2 条记录,并找出其中有多少条记录有“resp_code=0”

标签: elasticsearchfilteraggregation

解决方案


您需要结合使用术语聚合过滤聚合最大聚合来获得所需的结果。

  1. 使用过滤器聚合 ( first_filter),首先,过滤这些文档"service_name=service1"
  2. 然后使用术语聚合 ( top_terms_aggregation) 根据log_id字段创建过滤文档的存储桶。这些桶desc根据@timestamp字段按顺序排序(使用最大聚合)
  3. 再次使用过滤器聚合(second_filter),这些文档被过滤掉了"resp_code=0"

添加一个带有索引映射、数据(与问题相同)、搜索查询和搜索结果的工作示例

索引映射:

{
  "mappings": {
    "properties": {
      "@timestamp": {
        "type": "date",
        "format": "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"
      }
    }
  }
}

搜索查询:

{
  "size": 0,
  "aggs": {
    "first_filter": {
      "filter": {
        "bool": {
          "must": [
            {
              "term": {
                "service_name.keyword": "service1"
              }
            }
          ]
        }
      },
      "aggs": {
        "top_terms_aggregation": {
          "terms": {
            "field": "log_id.keyword",
            "size": 10,
            "order": {
              "second_filter>latestRecord": "desc"
            }
          },
          "aggs": {
            "second_filter": {
              "filter": {
                "bool": {
                  "must": [
                    {
                      "term": {
                        "resp_code": "0"
                      }
                    }
                  ]
                }
              },
              "aggs": {
                "latestRecord": {
                  "max": {
                    "field": "@timestamp"
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}

搜索结果:

"aggregations": {
    "first_filter": {
      "doc_count": 3,
      "top_terms_aggregation": {
        "doc_count_error_upper_bound": 0,
        "sum_other_doc_count": 0,
        "buckets": [
          {
            "key": "1232523",
            "doc_count": 1,
            "second_filter": {
              "doc_count": 1,
              "latestOrder": {
                "value": 1.618465920452E12,
                "value_as_string": "2021-04-15T05:52:00.452Z"      // note this
              }
            }
          },
          {
            "key": "1232522",
            "doc_count": 1,
            "second_filter": {
              "doc_count": 1,
              "latestOrder": {
                "value": 1.618465860452E12,
                "value_as_string": "2021-04-15T05:51:00.452Z"          // note this
              }
            }
          },
          {
            "key": "1232525",
            "doc_count": 1,
            "second_filter": {
              "doc_count": 0,
              "latestOrder": {
                "value": null
              }
            }
          }
        ]
      }
    }
  }

推荐阅读