作者:泉水叮咚139 | 来源:互联网 | 2023-07-06 19:28
此例为基于Luceneshingle英文单词BiGramAnalyzer的实现importjava.io.Reader;importorg.apache.lucene.analy
此例为基于Lucene shingle英文单词BiGram Analyzer的实现
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.shingle.ShingleFilter;
import org.apache.lucene.util.Version;public final class BiGramAnalyzer extends Analyzer {@Overridepublic TokenStream tokenStream(String fieldName, Reader reader) {TokenStream result = new WhitespaceTokenizer(Version.LUCENE_36, reader);result = new LowerCaseFilter(Version.LUCENE_36, result);ShingleFilter shingleFilter = new ShingleFilter(result, 2);shingleFilter.setOutputUnigrams(false);result = shingleFilter;return result;}}
http://searchhub.org/2010/12/17/whats-a-shingle-in-lucene-parlance/