作者:聂依依mma | 来源:互联网 | 2023-06-16 18:57
1.eclipse新建maven项目solr,pom.xml加入依赖2在项目下新建类updoctestpackagecom.linbin.solr;importja
1. eclipse 新建maven项目solr,pom.xml 加入依赖
2 在项目下新建类updoctest
package com.linbin.solr;import java.io.File;
import java.io.IOException;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.request.AbstractUpdateRequest.ACTION;
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;public class updoctest {public static String solrUrl = "http://centos7:8983/solr/mycore"; public static void main(String[] args) throws Exception {//查询 findIndex1();//删除 deleteIndexById();// 导入doc文档String fileName = "/home/linbin/文档/能工巧匠进校园.doc";String solrId = "能工巧匠进校园.doc";indexFilesSolrCell(solrId, solrId,fileName);}// 查询测试public static void findIndex1() throws IOException, SolrServerException {HttpSolrClient solrClient = new HttpSolrClient.Builder(solrUrl).build(); SolrQuery query = new SolrQuery(); // 创建搜索对象 query.set("q","*:*"); // 设置搜索条件query.setRows(10); //设置每页显示多少条QueryResponse response = solrClient.query(query); //发起搜索请求SolrDocumentList docs = response.getResults(); // 查询结果long cnt = docs.getNumFound(); // 查询结果总数System.out.println("总条数为"+cnt+"条"); for (SolrDocument doc : docs) {// System.out.println(doc);System.out.println("-------------\r\n");System.out.println("id:"+ doc.get("id") + ",autor:"+ doc.get("author") + ",text:"+ doc.get("text"));}solrClient.close();}//删除测试
public static void deleteIndexById() throws IOException, SolrServerException {HttpSolrClient solrClient = new HttpSolrClient.Builder(solrUrl).build(); //全删 //solrClient.deleteByQuery("*:*"); //模糊匹配删除(带有分词效果的删除)solrClient.deleteByQuery("id:solr-word.pdf"); //指定id删除 //solrClient.deleteById("1"); solrClient.commit();}// 导入doc文档测试
public static void indexFilesSolrCell(String fileName, String solrId, String path)throws IOException, SolrServerException{SolrClient solr = new HttpSolrClient.Builder(solrUrl).build();ContentStreamUpdateRequest up = new ContentStreamUpdateRequest("/update/extract");String contentType = getFileContentType(fileName);up.addFile(new File(path), contentType);up.setParam("literal.id", fileName);up.setParam("uprefix", "ignored_");up.setParam("fmap.content", "text");//文件内容up.setAction(ACTION.COMMIT, true, true);solr.request(up);System.out.println("upload ok! \r\n");}//根据文件拓展名获取文件类型
public static String getFileContentType(String filename) {String contentType = "";String prefix = filename.substring(filename.lastIndexOf(".") + 1);if (prefix.equals("xlsx")) {contentType = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";} else if (prefix.equals("pdf")) {contentType = "application/pdf";} else if (prefix.equals("doc")) {contentType = "application/msword";} else if (prefix.equals("txt")) {contentType = "text/plain";} else if (prefix.equals("xls")) {contentType = "application/vnd.ms-excel";} else if (prefix.equals("docx")) {contentType = "application/vnd.openxmlformats-officedocument.wordprocessingml.document";} else if (prefix.equals("ppt")) {contentType = "application/vnd.ms-powerpoint";} else if (prefix.equals("pptx")) {contentType = "application/vnd.openxmlformats-officedocument.presentationml.presentation";}else {contentType = "othertype";}return contentType;}}
2. 在solr的core目录下的solrconfig.xml增加如下内容:
true
ignored_
text
其中 ignored_ 部分是把读取文件时不需要映射的字段忽略掉
text 是把读取的fmap.content字段映射为solr的 text字段
3. 修改 managed-schema 文件,增加
这个是生成一个动态字段,类型为ignored,承接忽略的那些字段
4. 检查 solrconfig.xml
检查以上路径是否匹配,是相对于建立的mycore路径
5. 在mycore目录下建立lib目录(如果没有)
复制 solr-7.5.0/contrib/extraction/lib下的所有文件 到mycore/lib目录
复制 solr-7.5.0/dist/solr-cell-7.5.0.jar 到mycore/lib目录
6.重新启动solr,如正常启动,再在eclipse 运行第1步建立的java程序
7. 在solr网页查询可以检查到已上传doc文件的索引