实现文本搜索的框架,用于文本的搜索,非结构化文件的搜索。可以搜索文件里面的内容。用于电商网站上的搜索。
实现文本搜索首先要进行索引的创建,索引创建了才能实现文本搜索。
获取原始文档
创建文档对象
分析文档
创建索引
创建java工程
导入包
创建索引
1IndexWriter indexWriter=new IndexWriter(directory, indexWriterConfig);new IndexWriter(directory, indexWriterConfig);
创建索引保存路径
1Directory directory =FSDirectory.open(new File("d:\\luceneindex\\index"));new File("d:\\luceneindex\\index"));
配置索引写入流的参数
1Analyzer analyzer=new StandardAnalyzer();2IndexWriterConfig indexWriterCOnfig=new IndexWriterConfig(Version.LATEST, analyzer);new StandardAnalyzer();
2IndexWriterConfig indexWriterCOnfig=new IndexWriterConfig(Version.LATEST, analyzer);
创建一个Document文件,这个文件用来专门存储各种域对象
1Document document=new Document();new Document();
通过文件流的输入获取要分析的资源,并将资源存储到数组中
1File file=new File("D:\\luceneindex\\searchsource");2File[] listFiles = file.listFiles();new File("D:\\luceneindex\\searchsource");
2File[] listFiles = file.listFiles();
for增强循环遍历该数组
1for(File file2:listFiles) { 2 //文件名称 3 String file_name=file2.getName(); 4 Field fileNameField=new TextField("fileName",file_name,Store.YES); 5 //文件大小 6 long file_size = FileUtils.sizeOf(file2); 7 Field fileSizeField=new LongField("fileSize", file_size, Store.YES); 8 //文件路径 9 String file_path = file2.getPath();10 Field filePathField=new StoredField("filePath", file_path);11 //文件内容12 String file_context = FileUtils.readFileToString(file2);13 Field fileCOntextField=new TextField("fileContext", file_context,Store.YES);for(File file2:listFiles) {
2 //文件名称
3 String file_name=file2.getName();
4 Field fileNameField=new TextField("fileName",file_name,Store.YES);
5 //文件大小
6 long file_size = FileUtils.sizeOf(file2);
7 Field fileSizeField=new LongField("fileSize", file_size, Store.YES);
8 //文件路径
9 String file_path = file2.getPath();
10 Field filePathField=new StoredField("filePath", file_path);
11 //文件内容
12 String file_context = FileUtils.readFileToString(file2);
13 Field fileCOntextField=new TextField("fileContext", file_context,Store.YES);
将所有域对象存储到document文档中
1document.add(fileContextField);2document.add(filePathField);3document.add(fileSizeField);4document.add(fileNameField);
2document.add(filePathField);
3document.add(fileSizeField);
4document.add(fileNameField);
使用indexWriter对象将document中的内容添加到索引库中,并将索引也添加到了索引库
1//使用indexWriter对象将document对象写入索引库,此时进行索引创建,并将索引和document对象写入索引库。2indexWriter.addDocument(document);//使用indexWriter对象将document对象写入索引库,此时进行索引创建,并将索引和document对象写入索引库。
2indexWriter.addDocument(document);
关闭indexWriter流
1//关流2indexWriter.close();//关流
2indexWriter.close();
创建一个Directory对象,获取索引所在路径
1//第一步:创建一个Directory对象,获取索引库存放的位置。2Directory directory=FSDirectory.open(new File("d:\\luceneindex\\index"));//第一步:创建一个Directory对象,获取索引库存放的位置。
2Directory directory=FSDirectory.open(new File("d:\\luceneindex\\index"));
创建索引读取流
1//第二步;创建一个indexReader索引读取流,指定Directory路径2IndexReader indexReader=DirectoryReader.open(directory);//第二步;创建一个indexReader索引读取流,指定Directory路径
2IndexReader indexReader=DirectoryReader.open(directory);
创建索引查找对象并指定查找的内容,即:读取的内容
1//第三步:indexSearcher对象,需要指定IndexReader对象2IndexSearcher indexSearcher=new IndexSearcher(indexReader);//第三步:indexSearcher对象,需要指定IndexReader对象
2IndexSearcher indexSearcher=new IndexSearcher(indexReader);
创建一个TermQuery对象指定查询的关键词
1//第四步:创建一个TermQuery对象,指定查询的域和关键词2Query query=new TermQuery(new Term("fileName","apache"));//第四步:创建一个TermQuery对象,指定查询的域和关键词
2Query query=new TermQuery(new Term("fileName","apache"));
执行查询
1//第五步:执行查询2TopDocs topDocs = indexSearcher.search(query, 2);//第五步:执行查询
2TopDocs topDocs = indexSearcher.search(query, 2);
返回查询结果
1//第六步:返回查询结果,遍历查询结果并输出2ScoreDoc[] scoreDocs = topDocs.scoreDocs;//第六步:返回查询结果,遍历查询结果并输出
2ScoreDoc[] scoreDocs = topDocs.scoreDocs;
遍历查询结果
1for(ScoreDoc scoreDoc:scoreDocs) { 2 int doc = scoreDoc.doc; 3 Document document=indexSearcher.doc(doc); 4 //文件名 5 String fileName=document.get("fileName"); 6 System.out.println(fileName); 7 String fileCOntext=document.get("fileContext"); 8 System.out.println(fileContext); 9 String fileSize=document.get("fileSize");10 System.out.println(fileSize);11 String filePath=document.get("filePath");12 System.out.println(filePath);13}for(ScoreDoc scoreDoc:scoreDocs) {
2 int doc = scoreDoc.doc;
3 Document document=indexSearcher.doc(doc);
4 //文件名
5 String fileName=document.get("fileName");
6 System.out.println(fileName);
7 String fileCOntext=document.get("fileContext");
8 System.out.println(fileContext);
9 String fileSize=document.get("fileSize");
10 System.out.println(fileSize);
11 String filePath=document.get("filePath");
12 System.out.println(filePath);
13}
关闭流
1//第七步:关闭indexReader流2indexReader.close();//第七步:关闭indexReader流
2indexReader.close();
搜索使用的分析器和索引使用的分析器要一致。
创建一个IndexWriter对象,注意这个对象是包含索引存储路径,和所使用分析器的对象。
1public IndexWriter getIndexWriter() throws Exception{2 Directory directory = FSDirectory.open(new File("D:\\temp\\index"));3 // Directory directory = new RAMDirectory();//保存索引到内存中 (内存索引库)4 Analyzer analyzer = new StandardAnalyzer();// 官方推荐5 IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer);6 return new IndexWriter(directory, config);7}public IndexWriter getIndexWriter() throws Exception{
2 Directory directory = FSDirectory.open(new File("D:\\temp\\index"));
3 // Directory directory = new RAMDirectory();//保存索引到内存中 (内存索引库)
4 Analyzer analyzer = new StandardAnalyzer();// 官方推荐
5 IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer);
6 return new IndexWriter(directory, config);
7}
删除文档
1//全删除2@Test3public void testAllDelete() throws Exception {4 IndexWriter indexWriter = getIndexWriter();5 indexWriter.deleteAll();6 indexWriter.close();7}//全删除
2@Test
3public void testAllDelete() throws Exception {
4 IndexWriter indexWriter = getIndexWriter();
5 indexWriter.deleteAll();
6 indexWriter.close();
7}
条件删除
1//根据条件删除2@Test3public void testDelete() throws Exception {4 IndexWriter indexWriter = getIndexWriter();5 Query query = new TermQuery(new Term("fileName","apache"));6 indexWriter.deleteDocuments(query);7 indexWriter.close();8}//根据条件删除
2@Test
3public void testDelete() throws Exception {
4 IndexWriter indexWriter = getIndexWriter();
5 Query query = new TermQuery(new Term("fileName","apache"));
6 indexWriter.deleteDocuments(query);
7 indexWriter.close();
8}
使用updateDocument方法
1//修改 2@Test 3public void testUpdate() throws Exception { 4 IndexWriter indexWriter = getIndexWriter(); 5 Document doc = new Document(); 6 doc.add(new TextField("fileN", "测试文件名",Store.YES)); 7 doc.add(new TextField("fileC", "测试文件内容",Store.YES)); 8 indexWriter.updateDocument(new Term("fileName","lucene"), doc, new IKAnalyzer()); 9 indexWriter.close();10}//修改
2@Test
3public void testUpdate() throws Exception {
4 IndexWriter indexWriter = getIndexWriter();
5 Document doc = new Document();
6 doc.add(new TextField("fileN", "测试文件名",Store.YES));
7 doc.add(new TextField("fileC", "测试文件内容",Store.YES));
8 indexWriter.updateDocument(new Term("fileName","lucene"), doc, new IKAnalyzer());
9 indexWriter.close();
10}
查询所有:使用MatchAllDocsQuery类
1//查询所有 2@Test 3public void testMatchAllDocsQuery() throws Exception { 4 IndexSearcher indexSearcher = getIndexSearcher(); 5 Query query = new MatchAllDocsQuery(); 6 System.out.println(query); 7 printResult(indexSearcher, query); 8 //关闭资源 9 indexSearcher.getIndexReader().close();10}//查询所有
2@Test
3public void testMatchAllDocsQuery() throws Exception {
4 IndexSearcher indexSearcher = getIndexSearcher();
5 Query query = new MatchAllDocsQuery();
6 System.out.println(query);
7 printResult(indexSearcher, query);
8 //关闭资源
9 indexSearcher.getIndexReader().close();
10}
精确查询:TermQuery
根据数值范围查询
1//根据数值范围查询 2@Test 3public void testNumericRangeQuery() throws Exception { 4 IndexSearcher indexSearcher = getIndexSearcher(); 5 //false是不包含左边区间,true是包含右边区间 6 Query query = NumericRangeQuery.newLongRange("fileSize", 47L, 200L, false, true); 7 System.out.println(query); 8 printResult(indexSearcher, query); 9 //关闭资源10 indexSearcher.getIndexReader().close();11}//根据数值范围查询
2@Test
3public void testNumericRangeQuery() throws Exception {
4 IndexSearcher indexSearcher = getIndexSearcher();
5 //false是不包含左边区间,true是包含右边区间
6 Query query = NumericRangeQuery.newLongRange("fileSize", 47L, 200L, false, true);
7 System.out.println(query);
8 printResult(indexSearcher, query);
9 //关闭资源
10 indexSearcher.getIndexReader().close();
11}
组合查询
1//可以组合查询条件 2@Test 3public void testBooleanQuery() throws Exception { 4 IndexSearcher indexSearcher = getIndexSearcher(); 5 BooleanQuery booleanQuery = new BooleanQuery(); 6 Query query1 = new TermQuery(new Term("fileName","apache")); 7 Query query2 = new TermQuery(new Term("fileName","lucene")); 8 // select * from user where id =1 or name = 'safdsa' 9 booleanQuery.add(query1, Occur.MUST);//必须,该名字上必须有apache10 booleanQuery.add(query2, Occur.SHOULD);//非必须,可以没有11 System.out.println(booleanQuery);12 printResult(indexSearcher, booleanQuery);13 //关闭资源14 indexSearcher.getIndexReader().close();15}//可以组合查询条件
2@Test
3public void testBooleanQuery() throws Exception {
4 IndexSearcher indexSearcher = getIndexSearcher();
5 BooleanQuery booleanQuery = new BooleanQuery();
6 Query query1 = new TermQuery(new Term("fileName","apache"));
7 Query query2 = new TermQuery(new Term("fileName","lucene"));
8 // select * from user where id =1 or name = 'safdsa'
9 booleanQuery.add(query1, Occur.MUST);//必须,该名字上必须有apache
10 booleanQuery.add(query2, Occur.SHOULD);//非必须,可以没有
11 System.out.println(booleanQuery);
12 printResult(indexSearcher, booleanQuery);
13 //关闭资源
14 indexSearcher.getIndexReader().close();
15}
利用QueryParser查询
1//条件解释的对象查询 2@Test 3public void testQueryParser() throws Exception { 4 IndexSearcher indexSearcher = getIndexSearcher(); 5 //参数1: 默认查询的域 6 //参数2:采用的分析器 7 QueryParser queryParser = new QueryParser("fileName",new IKAnalyzer()); 8 // *:* 域:值 9 Query query = queryParser.parse("fileName:lucene is apache OR fileContent:lucene is apache");1011 printResult(indexSearcher, query);12 //关闭资源13 indexSearcher.getIndexReader().close();14}//条件解释的对象查询
2@Test
3public void testQueryParser() throws Exception {
4 IndexSearcher indexSearcher = getIndexSearcher();
5 //参数1: 默认查询的域
6 //参数2:采用的分析器
7 QueryParser queryParser = new QueryParser("fileName",new IKAnalyzer());
8 // *:* 域:值
9 Query query = queryParser.parse("fileName:lucene is apache OR fileContent:lucene is apache");
10
11 printResult(indexSearcher, query);
12 //关闭资源
13 indexSearcher.getIndexReader().close();
14}
多个默认查询域
1//条件解析的对象查询 多个默念域 2@Test 3public void testMultiFieldQueryParser() throws Exception { 4 IndexSearcher indexSearcher = getIndexSearcher(); 5 String[] fields = {"fileName","fileContent"}; 6 //参数1: 默认查询的域 7 //参数2:采用的分析器 8 MultiFieldQueryParser queryParser = new MultiFieldQueryParser(fields,new IKAnalyzer()); 9 // *:* 域:值10 Query query = queryParser.parse("lucene is apache");11 printResult(indexSearcher, query);12 //关闭资源13 indexSearcher.getIndexReader().close();14}//条件解析的对象查询 多个默念域
2@Test
3public void testMultiFieldQueryParser() throws Exception {
4 IndexSearcher indexSearcher = getIndexSearcher();
5 String[] fields = {"fileName","fileContent"};
6 //参数1: 默认查询的域
7 //参数2:采用的分析器
8 MultiFieldQueryParser queryParser = new MultiFieldQueryParser(fields,new IKAnalyzer());
9 // *:* 域:值
10 Query query = queryParser.parse("lucene is apache");
11 printResult(indexSearcher, query);
12 //关闭资源
13 indexSearcher.getIndexReader().close();
14}