2019独角兽企业重金招聘Python工程师标准>>>
下载安装包并解压
cd /usr/localwget http://archive.cloudera.com/cdh5/cdh/5/flume-ng-1.6.0-cdh5.7.1.tar.gztar -xvf flume-ng-1.6.0-cdh5.7.1.tar.gzrm flume-ng-1.6.0-cdh5.7.1.tar.gzmv apache-flume-1.6.0-cdh5.7.1 flume-1.6.0-cdh5.7.1
配置环境变量
cd /usr/localvim .bash_profileexport FLUME_HOME=/usr/local/flume-1.6.0-cdh5.7.1export PATH=$PATH:$FLUME_HOME/binsource .bash_profile
配置flume-env.sh文件
cd flume-1.6.0-cdh5.7.1/conf/cp flume-env.sh.template flume-env.shvim flume-env.shexport JAVA_HOME=/usr/local/jdk1.7.0_79
版本验证
flume-ng version
部署
flume最主要的是sink,sources,和
cd /usr/local/flume-1.6.0-cdh5.7.1/conf/
vim test.conf
agent1的组件名称
agent1.sources = source1
agent1.sinks = sink1
agent1.channels = channel1
# 指定Flume source(要监听的路径)
#(taildir source ---------windows 暂不支持这种source)
agent1.sources.source1.type = taildir
agent1.sources.source1.positionFile = /flume/account/taildir_position.json
agent1.sources.source1.filegroups = f1
agent1.sources.source1.filegroups.f1 =/root/btc/logs/account/dcp/multiple.log
agent1.sources.source1.headers.f1.headerKey1 = value1
agent1.sources.source1.fileHeader = true
agent1.sources.source1.type
#(exec source ---------)
#agent1.sources.source1.type = exec
#agent1.sources.source1.command = tail -F /var/log/secure# 指定Flume sink
agent1.sinks.sink1.type = org.apache.flume.sink.kafka.KafkaSink
agent1.sinks.sink1.topic = test
#如果kafka是集群,只需要有逗号分隔
agent1.sinks.sink1.brokerList = 192.168.80.110:9092
agent1.sinks.sink1.requiredAcks = 1
agent1.sinks.sink1.batchSize = 100
#自定义sink(写东西进数据库)
agent1.sinks.mysqlSink.type =com.us.flume.MySink
agent1.sinks.mysqlSink.hostname=localhost
agent1.sinks.mysqlSink.port=3306
agent1.sinks.mysqlSink.databaseName=sinktest
agent1.sinks.mysqlSink.tableName=test
agent1.sinks.mysqlSink.user=root
agent1.sinks.mysqlSink.password=xxxxxx# 指定Flume channel内存channel
agent1.channels.channel1.type = memory
agent1.channels.channel1.capacity = 1000
agent1.channels.channel1.transactionCapacity = 100
# 持久化channel,文件channel
#agent1.channels.file_channel.type = file
#agent1.channels.file_channel.checkpointDir = /var/log/flume-ng/checkpoint
#agent1.channels.file_channel.dataDirs = /var/log/flume-ng/data# 绑定source和sink到channel上
agent1.sources.source1.channels = channel1
agent1.sinks.sink1.channel = channel1
启动flume命令
#-c 是flume 安装目录下的conf
#-f 是配置文件的绝对路径,可以不在flume的安装目录下
#-n 是配置文件中的agent1 ,只是一个名称
#-Dflume.root.logger=INFO,console是log类型,这里是控制台输出
flume-ng agent -c /usr/local/flume-1.6.0-cdh5.7.1/conf -f /usr/local/flume-1.6.0-cdh5.7.1/conf/test.conf -n agent1 -Dflume.root.logger=INFO,consoleflume-ng.cmd agent -conf ../conf -conf -file ../conf/mysqlSink.conf -name agent1 -property flume.root.logger=INFO,console
flume 更详细的东西可以查看文档,里面有很多种sink,source.我这里只是列举了我所使用的两种 官方文档