package org.jai.flume.agent; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.UUID; import org.apache.flume.Channel; import org.apache.flume.ChannelSelector; import org.apache.flume.Context; import org.apache.flume.EventDeliveryException; import org.apache.flume.agent.embedded.EmbeddedAgent; import org.apache.flume.channel.ChannelProcessor; import org.apache.flume.channel.MemoryChannel; import org.apache.flume.channel.MultiplexingChannelSelector; import org.apache.flume.conf.Configurables; import org.apache.flume.sink.AvroSink; import org.apache.flume.sink.RollingFileSink; import org.apache.flume.source.AvroSource; import org.jai.flume.sinks.elasticsearch.FlumeESSinkService; import org.jai.flume.sinks.hbase.FlumeHbaseSinkService; import org.jai.flume.sinks.hdfs.FlumeHDFSSinkService; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; @Service public class FlumeAgentServiceImpl implements FlumeAgentService { private static final Logger LOG = LoggerFactory .getLogger(FlumeAgentServiceImpl.class); @Autowired private FlumeHDFSSinkService flumeHDFSSinkService; @Autowired private FlumeESSinkService flumeESSinkService; @Autowired private FlumeHbaseSinkService flumeHbaseSinkService; private static EmbeddedAgent agent; // Ideally external avro source to consume embedded agent data. For testing // here. private AvroSource avroSource; // Rolling file sink in case no actual sink integration required, for // testing private RollingFileSink sink; private Channel channel; // Spark sink for real time analytics calculations. private AvroSink sparkAvroSink; private Channel sparkAvroChannel; @Override public void setup() { createSparkAvroSink(); createAvroSourceWithSelectorHDFSAndESSinks(); createAgent(); } private void createAvroSourceWithSelectorHDFSAndESSinks() { Channel ESChannel = flumeESSinkService.getChannel(); Channel HDFSChannel = flumeHDFSSinkService.getChannel(); Channel HbaseChannel = flumeHbaseSinkService.getChannel(); final Map<String, String> properties = new HashMap<String, String>(); properties.put("type", "avro"); properties.put("bind", "localhost"); properties.put("port", "44444"); avroSource = new AvroSource(); avroSource.setName("AvroSource-" + UUID.randomUUID()); Context sourceContext = new Context(properties); avroSource.configure(sourceContext); ChannelSelector selector = new MultiplexingChannelSelector(); List<Channel> channels = new ArrayList<>(); channels.add(ESChannel); channels.add(HDFSChannel); channels.add(sparkAvroChannel); channels.add(HbaseChannel); selector.setChannels(channels); final Map<String, String> selectorProperties = new HashMap<String, String>(); selectorProperties.put("type", "multiplexing"); selectorProperties.put("header", "State"); // selectorProperties.put("mapping.VIEWED", HDFSChannel.getName() + " " // + ESChannel.getName()); // selectorProperties.put("mapping.FAVOURITE", HDFSChannel.getName() + // " " // + ESChannel.getName()); // selectorProperties.put("default", HDFSChannel.getName()); // In case spark avro sink is used. selectorProperties.put("mapping.VIEWED", HDFSChannel.getName() + " " + ESChannel.getName() + " " + sparkAvroChannel.getName() + " " + HbaseChannel.getName()); selectorProperties.put("mapping.FAVOURITE", HDFSChannel.getName() + " " + ESChannel.getName() + " " + sparkAvroChannel.getName() + " " + HbaseChannel.getName()); selectorProperties.put("default", HDFSChannel.getName() + " " + sparkAvroChannel.getName() + " " + HbaseChannel.getName()); Context selectorContext = new Context(selectorProperties); selector.configure(selectorContext); ChannelProcessor cp = new ChannelProcessor(selector); avroSource.setChannelProcessor(cp); avroSource.start(); } // Note: Use in case just want to dump data to rolling file sink. @SuppressWarnings("unused") private void createAvroSourceWithLocalFileRollingSink() { channel = new MemoryChannel(); String channelName = "AvroSourceMemoryChannel-" + UUID.randomUUID(); channel.setName(channelName); sink = new RollingFileSink(); sink.setName("RollingFileSink-" + UUID.randomUUID()); Map<String, String> paramters = new HashMap<>(); paramters.put("type", "file_roll"); paramters.put("sink.directory", "target/flumefilelog"); Context sinkContext = new Context(paramters); sink.configure(sinkContext); Configurables.configure(channel, sinkContext); sink.setChannel(channel); final Map<String, String> properties = new HashMap<String, String>(); properties.put("type", "avro"); properties.put("bind", "localhost"); properties.put("port", "44444"); properties.put("selector.type", "multiplexing"); properties.put("selector.header", "State"); properties.put("selector.mapping.VIEWED", channelName); properties.put("selector.mapping.default", channelName); avroSource = new AvroSource(); avroSource.setName("AvroSource-" + UUID.randomUUID()); Context sourceContext = new Context(properties); avroSource.configure(sourceContext); ChannelSelector selector = new MultiplexingChannelSelector(); List<Channel> channels = new ArrayList<>(); channels.add(channel); selector.setChannels(channels); final Map<String, String> selectorProperties = new HashMap<String, String>(); properties.put("default", channelName); Context selectorContext = new Context(selectorProperties); selector.configure(selectorContext); ChannelProcessor cp = new ChannelProcessor(selector); avroSource.setChannelProcessor(cp); sink.start(); channel.start(); avroSource.start(); } private void createSparkAvroSink() { sparkAvroChannel = new MemoryChannel(); Map<String, String> channelParamters = new HashMap<>(); channelParamters.put("capacity", "100000"); channelParamters.put("transactionCapacity", "1000"); Context channelContext = new Context(channelParamters); Configurables.configure(sparkAvroChannel, channelContext); String channelName = "SparkAvroMemoryChannel-" + UUID.randomUUID(); sparkAvroChannel.setName(channelName); sparkAvroSink = new AvroSink(); sparkAvroSink.setName("SparkAvroSink-" + UUID.randomUUID()); Map<String, String> paramters = new HashMap<>(); paramters.put("type", "avro"); paramters.put("hostname", "localhost"); paramters.put("port", "41111"); paramters.put("batch-size", "100"); Context sinkContext = new Context(paramters); sparkAvroSink.configure(sinkContext); Configurables.configure(sparkAvroSink, sinkContext); sparkAvroSink.setChannel(sparkAvroChannel); sparkAvroChannel.start(); sparkAvroSink.start(); } @Override public void shutdown() { try{ if (agent != null) { agent.stop(); } if (avroSource != null) { sparkAvroChannel.stop(); sparkAvroSink.stop(); channel.stop(); sink.stop(); avroSource.stop(); } } catch(Exception ex) { //Do nothing } } @Override public void processAllEvents() { try { flumeHDFSSinkService.getSink().process(); flumeESSinkService.getSink().process(); sparkAvroSink.process(); flumeHbaseSinkService.getSink().process(); } catch (EventDeliveryException e) { String errMsg = "Error processing event!"; LOG.error(errMsg, e); throw new RuntimeException(errMsg, e); } } private void createAgent() { final Map<String, String> properties = new HashMap<String, String>(); properties.put("channel.type", "memory"); properties.put("channel.capacity", "100000"); properties.put("channel.transactionCapacity", "1000"); properties.put("sinks", "sink1"); properties.put("sink1.type", "avro"); properties.put("sink1.hostname", "localhost"); properties.put("sink1.port", "44444"); properties.put("processor.type", "default"); try { agent = new EmbeddedAgent("myagent"); agent.configure(properties); agent.start(); } catch (final Exception ex) { LOG.error("Error creating agent!", ex); } } @Override public EmbeddedAgent getFlumeAgent() { if (agent == null) { createAgent(); } return agent; } }