elasticsearch - 使用匹配短语查询后的 Elasticsearch 唯一文档
问题描述
嘿堆栈溢出我有一个如下所示的弹性搜索文档。我只对“标签”键感兴趣。
"_index": "graph_20211025t0909",
"_type": "_doc",
"_id": "E12201A5-CC50-40AF-97AE-C54A2CA303F7",
"_score": null,
"_source": {
"entity_id": "E12201A5-CC50-40AF-97AE-C54A2CA303F7",
"properties": {
"external": {
"facebook": {
"id": "muji.jp"
},
"instagram": {
"id": "muji_global"
},
"twitter": {
"id": "muji_net"
},
"wikidata": {
"id": "Q708789"
}
},
"akas": [
{
"value": "Muji",
"language": "zh"
},
{
"value": "multinacional japonesa",
"language": "es"
},
]
},
"data_source": {
"data_pull_date": "202109",
"source_id": "muji_global",
"dataset": "brand"
},
"scoring_entity_data_size": 5306,
"population_percentile": 0.9855572298745676,
"type_synonyms": [],
"@version": "1",
"@timestamp": "2021-10-25T16:28:24.892Z",
"name": "Muji",
"types": [
"urn:entity:brand"
],
"tags": [
{
"tag_id": "D24DE9CF-C778-4468-8433-5A0E8AA2BA9D",
"name": "Wikipedia articles with GND identifiers",
"type": "urn:tag:wikipedia_category"
},
{
"tag_id": "67A608CC-2DA3-4C78-B7F6-6DD419744FFC",
"name": "Clothing brands of Japan",
"type": "urn:tag:wikipedia_category"
},
]
}
我的弹性搜索查询是
{
"size": 20,
"_source": ["tags"],
"sort": [
{ "@timestamp": { "order": "desc" } }
],
"query": {
"nested" : {
"path" : "tags",
"query" : {
"bool" : {
"must" : [
{ "match_phrase" : {"tags.name" : "thriller"} }
]
}
}
}
}
}
我的问题是我的查询如何根据我的 Elasticsearch 查询返回唯一文档?我正在“tags”字段中搜索“tags.name”。我希望我的“标签”字段返回一组独特的项目,例如我目前正在返回
tags: [
{
{
"name": "Male actors",
"tag_id": "A2A18D57-24B5-4578-B0D3-2A9190EEAD7C",
"type": "urn:tag:wikipedia_category"
},
{
"name": "some tag name",
"tag_id": "0CB4BE42-026F-4B14-A59A-C5A331E8A56F",
"type": "urn:tag:wikipedia_category"
},
},
{
"name": "Male actors",
"tag_id": "A2A18D57-24B5-4578-B0D3-2A9190EEAD7C",
"type": "urn:tag:wikipedia_category"
},
{
"name": "another tag name",
"tag_id": "0CB4BE42-026F-4B14-A59A-C5A331E8A56F",
"type": "urn:tag:wikipedia_category"
},
}
]
我希望我的结果不重复“名字”:“男演员”
解决方案
从您的tags
查询返回的来自不同的文档,因此您不能假设它们是唯一的。我建议使用聚合来获得唯一的tags.name
:
{
"size": 20,
"_source": ["tags"],
"sort": [
{ "@timestamp": { "order": "desc" } }
],
"query": {
"nested" : {
"path" : "tags",
"query" : {
"bool" : {
"must" : [
{ "match_phrase" : {"tags.name" : "thriller"} }
]
}
}
}
},
"aggs": {
"unique_tags": {
"nested": {
"path": "tags"
},
"aggs": {
"tag_name": {
"terms": {
"field": "tags.name"
}
}
}
}
}