这里就简单贴个配置吧,有问题欢迎留言探讨
sink的配置
##############################define [sink] begin############################## #define the sink k1 a1.sinks.k1.type=hdfs a1.sinks.k1.channel=c1 a1.sinks.k1.hdfs.useLocalTimeStamp=true a1.sinks.k1.hdfs.path=hdfs://192.168.11.177:9000/flume/events/%Y/%m/%d a1.sinks.k1.hdfs.filePrefix=cmcc-%H a1.sinks.k1.hdfs.fileType=DataStream a1.sinks.k1.hdfs.minBlockReplicas=1 a1.sinks.k1.hdfs.rollInterval=3600 a1.sinks.k1.hdfs.rollSize=0 a1.sinks.k1.hdfs.rollCount=0 a1.sinks.k1.hdfs.idleTimeout=0 #define the sink k2 a1.sinks.k2.channel=c2 a1.sinks.k2.type=com.cmcc.chiwei.kafka.CmccKafkaSink a1.sinks.k2.metadata.broker.list=192.168.11.174:9092,192.168.11.175:9092,192.168.11.196:9092 a1.sinks.k2.partition.key=0 a1.sinks.k2.partitioner.class=com.cmcc.chiwei.kafka.CmccPartition a1.sinks.k2.serializer.class=kafka.serializer.StringEncoder a1.sinks.k2.request.required.acks=0 a1.sinks.k2.cmcc.encoding=UTF-8 a1.sinks.k2.cmcc.topic.name=cmcc a1.sinks.k2.producer.type=async a1.sinks.k2.batchSize=100 ##############################define [sink] end##############################这里的是将同样的数据无差异sink到多个输出端,所以通道是复制模式
a1.sources.r1.selector.type=replicating一个是kafka,为了提高性能,用内存通道,一个到hdfs,离线分析,用文件通道
##############################define [channel] begin############################## #define the channel c1 a1.channels.c1.type=file a1.channels.c1.checkpointDir=/home/flume/flumeCheckpoint a1.channels.c1.dataDirs=/home/flume/flumeData,/home/flume/flumeDataExt a1.channels.c1.capacity=2000000 a1.channels.c1.transactionCapacity=100 #define the channel c2 a1.channels.c2.type=memory a1.channels.c2.capacity=2000000 a1.channels.c2.transactionCapacity=100 ##############################define [channel] end##############################以上就是无差异数据输出多个sink的配置,如果你想不同数据到不同的sink,需要用到通道的多路复用模式,请参考【通道的多路复用】
【Flume】flume多个输出分支的配置,多sink,多channel
原文地址:http://blog.csdn.net/simonchi/article/details/43730589