作者:可卡因 | 来源:互联网 | 2023-09-01 20:04
Lucen最主要用到的又两个部分,一个是创建索引,一个是查询索引说明下下面的代码注意事项:1、如果自己比较懒的话,就把文档路径和索引路径创建到项目下,方便查看2、我下面的两个例子都是在D盘下创建Luc
Lucen最主要用到的又两个部分,一个是创建索引,一个是查询索引
说明下下面的代码注意事项:
1、如果自己比较懒的话,就把文档路径和索引路径创建到项目下,方便查看
2、我下面的两个例子都是在D盘下创建Lucene,然后分别在D:/Lucene下创建Index和Data两个文件夹
3、在Data中我创建了两个文件:record1.txt和record2.txt,内容分别为txt1和txt2,当然你也可以创建更多和更复杂的内容的文本
4、在搜索的时候我搜的是txt2所以能查到record2.txt。。-_-
一、创建索引
1、创建索引常用到的类有以下几个(有些事基类,其子类就不过多列举了,具体使用的时候再查)
IndexWriter
Direcory
Analyzer
Document
Field
具体意思就不多说了,如果你不清楚,点击这个链接http://pan.baidu.com/s/1pLrZzWB下载《lucene in action_中文版.pdf》,在第25页可以看到解释,
下面是关于创建索引的一个例子:
package com.burns.lucene_in_action;
import java.io.File;
import java.io.FileFilter;
import java.io.FileReader;
import java.io.IOException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.FieldSelectorResult;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexFileNameFilter;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import com.yiibai.lucene.TextFileFilter;
public class Indexer {
public static void main(String[] args) throws IOException {
//if (args.length != 2) {
//throw new IllegalArgumentException("Usage:java" + Index.class.getName() + " ");
//}
// String indexDir = args[0];
// String dataDir = args[1];
String indexDir = "D:\\Lucene\\Index";
String dataDir = "D:\\Lucene\\Data";
long start = System.currentTimeMillis();
Indexer indexer = new Indexer(indexDir);
int numIndexed;
try {
numIndexed = indexer.index(dataDir, new TextFileFilter());
} finally {
indexer.close();
}
long end = System.currentTimeMillis();
System.out.println("Indexing" + numIndexed + "files took" + (end - start) + "milliseconds");
}
public IndexWriter writer;
@SuppressWarnings("deprecation")
public Indexer(String indexDir) throws IOException {
Directory dir = FSDirectory.open(new File(indexDir));
writer = new IndexWriter(dir, new StandardAnalyzer(Version.LUCENE_30), true,
IndexWriter.MaxFieldLength.UNLIMITED);
}
public void close() throws CorruptIndexException, IOException {// 关闭Index
// Writer
writer.close();
}
public int index(String dataDir, FileFilter filter) throws IOException {
File[] files = new File(dataDir).listFiles();
for (File file : files) {
if (!file.isDirectory() && !file.isHidden() && file.exists() && file.canRead()
&& (filter == null || filter.accept(file))) {
indexFile(file);
}
}
return writer.numDocs();
}
private static class TextFileFilter implements FileFilter {
@Override
public boolean accept(File pathname) {
return pathname.getName().toLowerCase().endsWith(".txt");
}
}
protected Document getDocument(File f) throws IOException {
Document doc = new Document();
doc.add(new Field("contents", new FileReader(f)));
doc.add(new Field("filename", f.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("fullpath", f.getCanonicalPath(), Field.Store.YES, Field.Index.NOT_ANALYZED));
return doc;
}
private void indexFile(File f) throws IOException {
System.out.println("Indexing" + f.getCanonicalPath());
Document doc = getDocument(f);
writer.addDocument(doc);
}
}
執行結果如下:
IndexingD:\Lucene\Data\record1.txt
IndexingD:\Lucene\Data\record2.txt
Indexing2files took1954milliseconds
二、搜索/查询索引
常用的类如下:
IndexSearcher
Term
Query
TermQuery
TopDocs
例子代码如下:
package com.burns.lucene_in_action;
import java.io.File;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class Searcher {
public static void main(String[] args) throws Exception {
// if (args.length != 2) {
// throw new IllegalArgumentException("Usage: java" +
// Searcher.class.getName() + " ");
// }
//
// String indexDir = args[0];
// String q = args[1];
String indexDir = "D:\\Lucene\\Index";
String q = "txt2";
search(indexDir, q);
}
private static void search(String indexDir, String q) throws Exception {
Directory dir = FSDirectory.open(new File(indexDir));
IndexSearcher is = new IndexSearcher(dir);
QueryParser parser = new QueryParser(Version.LUCENE_30, "contents", new StandardAnalyzer(Version.LUCENE_30));
Query query = parser.parse(q);
long start = System.currentTimeMillis();
TopDocs hits = is.search(query, 10);
long end = System.currentTimeMillis();
System.err.print("Found" + hits.totalHits + "document(s) (in " + (end - start)
+ "milliseconds that matched query'" + q + "':");
for (ScoreDoc scoreDoc : hits.scoreDocs) {
Document doc = is.doc(scoreDoc.doc);
System.out.println(doc.get("fullpath"));
}
is.close();
}
}
执行结果如下:
Found1document(s) (in 9milliseconds that matched query'txt2':D:\Lucene\Data\record2.txt