首页 > 技术文章 > 中文分词器

Applogize 2020-08-17 14:33 原文

1.安装插件

[root@db01 ~]# /usr/share/elasticsearch/bin/elasticsearch-plugin install https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v6.6.0/elasticsearch-analysis-ik-6.6.0.zip
[root@db02 ~]# /usr/share/elasticsearch/bin/elasticsearch-plugin install https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v6.6.0/elasticsearch-analysis-ik-6.6.0.zip
[root@db03 ~]# /usr/share/elasticsearch/bin/elasticsearch-plugin install https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v6.6.0/elasticsearch-analysis-ik-6.6.0.zip

# 解压到es目录下
[root@db01 ~]# unzip elasticsearch-analysis-ik-6.6.0.zip -d /etc/elasticsearch/

2.创建索引与mapping

PUT /news

curl -XPOST http://localhost:9200/news/text/_mapping -H 'Content-Type:application/json' -d'
{
    "properties": {
         "content": {
            "type": "text",
            "analyzer": "ik_max_word",
            "search_analyzer": "ik_smart"
        }
    }
}

3.编辑我们要定义的词

[root@redis01 ~]# vim /etc/elasticsearch/analysis-ik/IKAnalyzer.cfg.xml 
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
<properties>
    <comment>IK Analyzer 扩展配置</comment>
    <!--用户可以在这里配置自己的扩展字典 -->
    <entry key="ext_dict">/etc/elasticsearch/analysis-ik/my.dic</entry>
    
[root@redis01 ~]# vim /etc/elasticsearch/analysis-ik/my.dic 
中国

[root@redis01 ~]# chown -R elasticsearch.elasticsearch /etc/elasticsearch/analysis-ik/my.dic

4.重新插入数据

POST /news/text/1
{"content":"美国留给伊拉克的是个烂摊子吗"}

POST /news/text/2
{"content":"公安部:各地校车将享最高路权"}

POST /news/text/3
{"content":"中韩渔警冲突调查:韩警平均每天扣1艘中国渔船"}

POST /news/text/4
{"content":"中国驻洛杉矶领事馆遭亚裔男子枪击 嫌犯已自首"}

5.重新插入数据

POST /news/_search
{
  "query" : { "match" : { "content" : "中国" }},
  "highlight" : {
      "pre_tags" : ["<tag1>", "<tag2>"],
      "post_tags" : ["</tag1>", "</tag2>"],
      "fields" : {
          "content" : {}
      }
   }
}

推荐阅读