作者:zh1234之歌 | 来源:互联网 | 2023-08-18 18:32
1.创建本地目录$mkdirusrlocalcontentplatformsolrsolrcore1file1$ls-lhtotal88M-rw-r--r--1tnuserapp
1. 创建本地目录
$ mkdir /usr/local/contentplatform/solr/solr/core1/file1
$ ls -lh
total 88M
-rw-r--r-- 1 tnuser appuser 14M May 14 20:11 apache_hbase_reference_guide.pdf
-rw-r--r-- 1 tnuser appuser 7.4M Apr 28 23:00 Architecting_HBase_Applications.pdf
-rw-r--r-- 1 tnuser appuser 14M Jan 15 2014 Cloudera_Hadoop_Test_Cases.docx
-rw-r--r-- 1 tnuser appuser 6.6M Apr 21 21:01 HBase_Administration_Cookbook.pdf
-rw-r--r-- 1 tnuser appuser 2.1M Apr 28 22:58 HBase_Essentials.pdf
-rw-r--r-- 1 tnuser appuser 25M Apr 9 16:16 Hbase-HBase实战.pdf
-rw-r--r-- 1 tnuser appuser 7.9M Nov 13 2015 HBase.in.Action.pdf
-rw-r--r-- 1 tnuser appuser 13M Apr 28 22:44 HBase:The_Definitive_Guide.pdf
2. 在core的conf目录修改配置文件solrconfig.xml配置dataimport请求处理器
data-config.xml
3. 在conf目录新建data-config.xml文件并添加数据源的引用
name="file1"
datasource="fileDataSource"
processor="FileListEntityProcessor"
baseDir="/usr/local/contentplatform/solr/solr/core1/file1"
fileName=".*\.(pdf)|(doc)|(docx)|(ppt)|(pptx)|(xls)|(xlsx)|(odf)|(txt)|(rtf)|(html)|(htm)|(jpg)|(csv)"
one rror="skip"
recursive="true"
rootEntity="false">
name="documentImport1"
processor="TikaEntityProcessor"
url="${file1.fileAbsolutePath}"
format="text"
datasource="binFileDataSource"
one rror="skip"
recursive="true">
4.修改conf目录下的schema.xml文件,添加以下内容
5. 重新加载配置文件
6. 通过DIH导入本地的文件
6. 查看导入的文档
{
"responseHeader": {
"status": 0,
"QTime": 1,
"params": {
"indent": "true",
"q": "*:*",
"_": "1564127787808",
"wt": "json"
}
},
"response": {
"numFound": 8,
"start": 0,
"docs": [
{
"id": "Hbase-HBase实战.pdf",
"title": [
"HBASE 实战=HBASE IN ACTION"
],
"author": "(美)NICK DIMIDUK著;谢磊译",
"author_s": "(美)NICK DIMIDUK著;谢磊译",
"_version_": 1640106408929132500
},
{
"id": "apache_hbase_reference_guide.pdf",
"title": [
"Apache HBase ™ Reference Guide"
],
"author": "Apache HBase Team",
"author_s": "Apache HBase Team",
"_version_": 1640106415302377500
},
{
"id": "Architecting_HBase_Applications.pdf",
"title": [
"Architecting HBase Applications"
],
"author": "Jean-Marc Spaggiari & Kevin O'Dell",
"author_s": "Jean-Marc Spaggiari & Kevin O'Dell",
"_version_": 1640106423153066000
},
{
"id": "HBase_Administration_Cookbook.pdf",
"_version_": 1640106425323618300
},
{
"id": "HBase_Essentials.pdf",
"title": [
""
],
"author": "",
"author_s": "",
"_version_": 1640106427129266200
},
{
"id": "HBase.in.Action.pdf",
"title": [
"HBase in Action"
],
"author": "Nick Dimiduk, Amandeep Khurana",
"author_s": "Nick Dimiduk, Amandeep Khurana",
"_version_": 1640106439293796400
},
{
"id": "HBase:The_Definitive_Guide.pdf",
"title": [
"HBase: The Definitive Guide"
],
"author": "Lars George",
"author_s": "Lars George",
"_version_": 1640106444193792000
},
{
"id": "Cloudera_Hadoop_Test_Cases.docx",
"author": "FeiLong, Li [DBA]",
"author_s": "FeiLong, Li [DBA]",
"_version_": 1640106445801259000
}
]
}
}