热门标签 | HotTags
当前位置:  开发笔记 > 编程语言 > 正文

ElasticSearch学习笔记邻近匹配搜索记录

ElasticSearch版本:elasticsearch-7.3.0环境准备:curl-HContent-Type:applicationjso

ElasticSearch版本:elasticsearch-7.3.0

环境准备:

curl -H "Content-Type: application/json" -XPUT 'http://192.168.0.1:9200/article/' -d '
{"settings": {"analysis": {"analyzer": {"shingle_analyzer": {"type": "custom","tokenizer": "ik_smart","filter": ["shingle_filter"]}},"filter": {"shingle_filter": {"type": "shingle","min_shingle_size": 2,"max_shingle_size": 4,"output_unigrams": false}}}},"mappings": {"dynamic": "strict","_source": {"excludes": ["id"]},"properties": {"id": {"type": "keyword"},"title": {"analyzer": "ik_smart","type": "text","fields": {"raw": {"type": "keyword"},"shingle": {"type": "text","analyzer": "shingle_analyzer"}}},"publish_time": {"type": "date","format": "yyyy-MM-dd HH:mm:ss"}}}
}
'
curl -H "Content-Type: application/json" -XPOST 'http://192.168.0.1:9200/article/_doc/1' -d '
{"id": "1","title": "周杰伦、林俊杰、罗志祥、王力宏、潘玮柏、蔡依林、孙燕姿、梁静茹一同参加颁奖典礼","publish_time": "2019-08-22 17:48:16"
}
'
curl -H "Content-Type: application/json" -XPOST 'http://192.168.0.1:9200/article/_doc/2' -d '
{"id": "2","title": "周杰伦、林俊杰、罗志祥、王力宏、潘玮柏一同参加颁奖典礼","publish_time": "2019-08-22 16:48:16"
}
'
curl -H "Content-Type: application/json" -XPOST 'http://192.168.0.1:9200/article/_doc/3' -d '
{"id": "3","title": "周杰伦、林俊杰、罗志祥、孙燕姿一同参加颁奖典礼","publish_time": "2019-08-22 15:48:16"
}
'
curl -H "Content-Type: application/json" -XPOST 'http://192.168.0.1:9200/article/_doc/4' -d '
{"id": "4","title": "周杰伦、罗志祥、孙燕姿一同参加颁奖典礼","publish_time": "2019-08-22 14:48:16"
}
'
curl -H "Content-Type: application/json" -XPOST 'http://192.168.0.1:9200/article/_doc/5' -d '
{"id": "5","title": "周杰伦、林俊杰、梁静茹一同参加颁奖典礼","publish_time": "2019-08-22 13:48:16"
}
'
curl -H "Content-Type: application/json" -XPOST 'http://192.168.0.1:9200/article/_doc/6' -d '
{"id": "6","title": "周杰伦、王力宏、潘玮柏、林俊杰、罗志祥、孙燕姿一同参加颁奖典礼","publish_time": "2019-08-22 12:48:16"
}
'
curl -H "Content-Type: application/json" -XGET 'http://192.168.0.1:9200/article/_analyze?pretty=true' -d '
{"analyzer": "shingle_analyzer","text": "周杰伦、林俊杰、罗志祥、王力宏、潘玮柏、蔡依林、孙燕姿、梁静茹一同参加颁奖典礼"
}
'

测试验证代码:

import java.io.IOException;
import java.util.Map;import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.MatchPhraseQueryBuilder;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;public class ElasticRestClientHelper {private static final Logger LOG = LoggerFactory.getLogger(ElasticRestClientHelper.class);public static String search(QueryBuilder queryBuilder, int from, int size, String... indices) {RestHighLevelClient restHighLevelClient = ElasticRestClient.getInstance().getClient();SearchRequest searchRequest = new SearchRequest(indices);SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();searchSourceBuilder.query(queryBuilder);searchSourceBuilder.from(from).size(size);HighlightBuilder highlightBuilder = new HighlightBuilder();highlightBuilder.preTags("");highlightBuilder.postTags("");highlightBuilder.field("*");searchSourceBuilder.highlighter(highlightBuilder);searchRequest.source(searchSourceBuilder);searchRequest.scroll(TimeValue.timeValueMinutes(5));searchRequest.searchType(SearchType.QUERY_THEN_FETCH);SearchResponse searchRespOnse= null;try {searchRespOnse= restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);} catch (IOException e) {LOG.error(e.getMessage(), e);}LOG.info("took {}", searchResponse.getTook().getSeconds());SearchHits searchHits = searchResponse.getHits();long totalHits = searchHits.getTotalHits().value;LOG.info("total hits {}", totalHits);SearchHit[] searchHitArray = searchHits.getHits();for (int i = 0, len = searchHitArray.length; i source = searchHit.getSourceAsMap();wrapperHighLight(source, searchHit.getHighlightFields());LOG.info("{} {} {}", searchHit.getScore(), searchHit.getId(), source.get("title"));}try {restHighLevelClient.close();} catch (IOException e) {LOG.error(e.getMessage(), e);}return searchResponse.getScrollId();}private static void wrapperHighLight(Map source, Map highLightFields) {String entryKey = null;Object entryValue = null;for (Map.Entry entry : source.entrySet()) {entryKey = entry.getKey();if (!highLightFields.containsKey(entryKey)) continue;Text[] texts = highLightFields.get(entryKey).getFragments();StringBuilder highLightText = new StringBuilder(100);for (int i = 0, tlen = texts.length; i 0) entryValue = highLightText.toString();entry.setValue(entryValue);}}/** Match查询匹配分词后的关键字中任意一个以上 */public static void search_01() {MatchQueryBuilder matchQueryBuilder = QueryBuilders.matchQuery("title", "周杰伦、罗志祥");search(matchQueryBuilder, 0, 10, "article");}/** 短语查询匹配分词后的关键字中每一个且邻接有序 */public static void search_02() {MatchPhraseQueryBuilder matchPhraseQueryBuilder = QueryBuilders.matchPhraseQuery("title", "周杰伦、罗志祥");search(matchPhraseQueryBuilder, 0, 10, "article");}/** 短语查询匹配分词后的关键字中每一个且有序、间隔可以邻接或为1 */public static void search_03() {MatchPhraseQueryBuilder matchPhraseQueryBuilder = QueryBuilders.matchPhraseQuery("title", "周杰伦、罗志祥");matchPhraseQueryBuilder.slop(1);search(matchPhraseQueryBuilder, 0, 10, "article");}/** 短语查询匹配分词后的关键字中每一个且有序、间隔可以邻接或为10以内 */public static void search_04() {MatchPhraseQueryBuilder matchPhraseQueryBuilder = QueryBuilders.matchPhraseQuery("title", "周杰伦、罗志祥");matchPhraseQueryBuilder.slop(10);search(matchPhraseQueryBuilder, 0, 10, "article");}/** 在Match查询的基础上通过短语查询提升相关度分值 */public static void search_05() {BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();MatchQueryBuilder matchQueryBuilder = QueryBuilders.matchQuery("title", "周杰伦、罗志祥");boolQueryBuilder.must(matchQueryBuilder);MatchPhraseQueryBuilder matchPhraseQueryBuilder = QueryBuilders.matchPhraseQuery("title", "周杰伦、罗志祥");matchPhraseQueryBuilder.slop(1);boolQueryBuilder.should(matchPhraseQueryBuilder);search(boolQueryBuilder, 0, 10, "article");}/** 在Match查询的基础上通过boost值来控制每个查询子句的相对权重,提升相关度分值 */public static void search_06() {BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();MatchQueryBuilder matchQueryBuilder1 = QueryBuilders.matchQuery("title", "周杰伦、林俊杰、罗志祥");matchQueryBuilder1.minimumShouldMatch("30%"); boolQueryBuilder.must(matchQueryBuilder1);MatchQueryBuilder matchQueryBuilder2 = QueryBuilders.matchQuery("title", "周杰伦");matchQueryBuilder2.boost(5.0f);boolQueryBuilder.should(matchQueryBuilder2);MatchQueryBuilder matchQueryBuilder3 = QueryBuilders.matchQuery("title", "林俊杰");matchQueryBuilder3.boost(3.0f);boolQueryBuilder.should(matchQueryBuilder3);search(boolQueryBuilder, 0, 10, "article");}/** 在Match查询的基础上通过shingle关联词提升相关度分值 *//** shingle索引时创建,比短语查询灵活、性能高,需要选择合适的shingle_size */public static void search_07() {BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();MatchQueryBuilder matchQueryBuilder1 = QueryBuilders.matchQuery("title", "周杰伦、林俊杰、罗志祥");matchQueryBuilder1.minimumShouldMatch("30%");boolQueryBuilder.must(matchQueryBuilder1);MatchQueryBuilder matchQueryBuilder2 = QueryBuilders.matchQuery("title.shingle", "周杰伦、罗志祥");boolQueryBuilder.should(matchQueryBuilder2);search(boolQueryBuilder, 0, 10, "article");}}

 


推荐阅读
author-avatar
mengziwudao
这个家伙很懒,什么也没留下!
PHP1.CN | 中国最专业的PHP中文社区 | DevBox开发工具箱 | json解析格式化 |PHP资讯 | PHP教程 | 数据库技术 | 服务器技术 | 前端开发技术 | PHP框架 | 开发工具 | 在线工具
Copyright © 1998 - 2020 PHP1.CN. All Rights Reserved | 京公网安备 11010802041100号 | 京ICP备19059560号-4 | PHP1.CN 第一PHP社区 版权所有