Java Code Examples for storm.kafka.trident.OpaqueTridentKafkaSpout

The following are top voted examples for showing how to use storm.kafka.trident.OpaqueTridentKafkaSpout. These examples are extracted from open source projects. You can vote up the examples you like and your votes will be used in our system to generate more good examples.
Example 1
Project: realtime-event-processing   File: DocEventProcessingTopology.java   Source Code and License 5 votes vote down vote up
public static StormTopology buildTopology(Config conf, LocalDRPC drpc) {

        TridentTopology topology = new TridentTopology();

        //Kafka Spout
        BrokerHosts zk = new ZkHosts(conf.get(CrawlerConfig.KAFKA_CONSUMER_HOST_NAME) + ":" +conf.get(CrawlerConfig.KAFKA_CONSUMER_HOST_PORT));
        TridentKafkaConfig kafkaConfig = new TridentKafkaConfig(zk, (String) conf.get(CrawlerConfig.KAFKA_TOPIC_DOCUMENT_NAME));
        kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
        OpaqueTridentKafkaSpout spout = new OpaqueTridentKafkaSpout(kafkaConfig);

        //ElasticSearch Persistent State
        Settings esSettings = ImmutableSettings.settingsBuilder()
                .put("storm.elasticsearch.cluster.name", conf.get(CrawlerConfig.ELASTICSEARCH_CLUSTER_NAME))
                .put("storm.elasticsearch.hosts", conf.get(CrawlerConfig.ELASTICSEARCH_HOST_NAME) + ":" + conf.get(CrawlerConfig.ELASTICSEARCH_HOST_PORT))
                .build();
        StateFactory esStateFactory = new ESIndexState.Factory<JSONObject>(new ClientFactory.NodeClient(esSettings.getAsMap()), JSONObject.class);
        TridentState esStaticState = topology.newStaticState(esStateFactory);

        String esIndex = (String)(conf.get(CrawlerConfig.ELASTICSEARCH_INDEX_NAME));
        topology.newStream("docstream",spout)
                .each( new Fields("str"), new SplitDocStreamArgs(), new Fields("filename", "task", "user", "content"))
                .each( new Fields("filename", "task", "user"), new PrintFilter("Kafka"))
                .each( new Fields("filename","task","user","content"), new PrepareDocForElasticSearch(), new Fields("index","type","id","source") )
                .partitionPersist(esStateFactory, new Fields("index","type","id","source"), new ESIndexUpdater<String>(new ESTridentTupleMapper()), new Fields());

        return topology.build();
    }
 
Example 2
Project: storm-smoke-test   File: TridentConnectorUtil.java   Source Code and License 5 votes vote down vote up
public static OpaqueTridentKafkaSpout getTridentKafkaEmitter(String zkConnString, String topicName, Map topologyConfig) {
    BrokerHosts hosts = new ZkHosts(zkConnString);
    TridentKafkaConfig kafkaConfig = new TridentKafkaConfig(hosts, topicName);
    //topologyConfig.put("topology.spout.max.batch.size", 1);
    //kafkaConfig.forceFromStart = true;
    kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    return new OpaqueTridentKafkaSpout(kafkaConfig);
}
 
Example 3
Project: simple-kafka-storm-java   File: KafkaStormTridentExample.java   Source Code and License 5 votes vote down vote up
public static void main(String... args) throws AlreadyAliveException, InvalidTopologyException, AuthorizationException {

		// starting to build topology
		TridentTopology topology = new TridentTopology();

		// Kafka as an opaque trident spout
		OpaqueTridentKafkaSpout spout = new OpaqueTridentKafkaSpoutBuilder(Conf.zookeeper, Conf.inputTopic).build();
		Stream stream = topology.newStream(kafkaSpout, spout);

		// mapping transaction messages to pairs: (person,amount)
		Stream atomicTransactions = stream.each(strF, Functions.mapToPersonAmount, personAmountF);

		// bolt to println data
		atomicTransactions.each(personAmountF, Functions.printlnFunction, emptyF);

		// aggregating transactions and mapping to Kafka messages
		Stream transactionsGroupped = atomicTransactions.groupBy(personF)
				.persistentAggregate(new MemoryMapState.Factory(), amountF, new Sum(), sumF).newValuesStream()
				.each(personSumF, Functions.mapToKafkaMessage, keyMessageF);

		// Kafka as a bolt -- producing to outputTopic
		TridentKafkaStateFactory stateFactory = new TridentKafkaStateFactory() //
				.withKafkaTopicSelector(new DefaultTopicSelector(Conf.outputTopic)) //
				.withTridentTupleToKafkaMapper(new FieldNameBasedTupleToKafkaMapper<String, String>(key, message));
		transactionsGroupped.partitionPersist(stateFactory, keyMessageF, new TridentKafkaUpdater(), emptyF);

		// submitting topology to local cluster
		new LocalCluster().submitTopology(kafkaAccountsTopology, topologyConfig(), topology.build());

		// waiting a while, then running Kafka producer
		Sleep.seconds(5);
		KafkaProduceExample.start(20);

	}
 
Example 4
Project: web-crawler   File: WebCrawlerTopology.java   Source Code and License 4 votes vote down vote up
public static StormTopology buildTopology(Config conf, LocalDRPC localDrpc) {
    TridentTopology topology = new TridentTopology();

    //Kafka Spout
    BrokerHosts zk = new ZkHosts(conf.get(CrawlerConfig.KAFKA_CONSUMER_HOST_NAME) + ":" +conf.get(CrawlerConfig.KAFKA_CONSUMER_HOST_PORT));
    TridentKafkaConfig kafkaConfig = new TridentKafkaConfig(zk, (String) conf.get(CrawlerConfig.KAFKA_TOPIC_NAME));
    kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    OpaqueTridentKafkaSpout spout = new OpaqueTridentKafkaSpout(kafkaConfig);

    //ElasticSearch Persistent State
    Settings esSettings = ImmutableSettings.settingsBuilder()
            .put("storm.elasticsearch.cluster.name", conf.get(CrawlerConfig.ELASTICSEARCH_CLUSTER_NAME))
            .put("storm.elasticsearch.hosts", conf.get(CrawlerConfig.ELASTICSEARCH_HOST_NAME) + ":" + conf.get(CrawlerConfig.ELASTICSEARCH_HOST_PORT))
            .build();
    StateFactory esStateFactory = new ESIndexState.Factory<String>(new ClientFactory.NodeClient(esSettings.getAsMap()), String.class);
    TridentState esStaticState = topology.newStaticState(esStateFactory);

    //Topology
    topology.newStream("crawlKafkaSpout", spout).parallelismHint(5)
             //Splits url and depth information on receiving from Kafka
            .each(new Fields("str"), new SplitKafkaInput(), new Fields("url", "depth"))
            //Bloom Filter. Filters already crawled URLs
            .each(new Fields("url"), new URLFilter())
            //Download and Parse Webpage
            .each(new Fields("url"), new GetAdFreeWebPage(), new Fields("content_html", "title", "href"))//TODO Optimize
            //Add Href URls to Kafka queue
            .each(new Fields("href", "depth"), new KafkaProducerFilter())//TODO Replace with kafka persistent state.
            //Insert to Elasticsearch
            .each(new Fields("url", "content_html", "title"), new PrepareForElasticSearch(), new Fields("index", "type", "id", "source"))
            .partitionPersist(esStateFactory, new Fields("index", "type", "id", "source"), new ESIndexUpdater<String>(new ESTridentTupleMapper()))
    ;

    //DRPC
    topology.newDRPCStream("search", localDrpc)
            .each(new Fields("args"), new SplitDRPCArgs(), new Fields("query_input"))
            .each(new Fields("query_input"), new BingAutoSuggest(0), new Fields("query_preProcessed"))//TODO return List of expanded query
            .each(new Fields("query_preProcessed"), new PrepareSearchQuery(), new Fields("query", "indices", "types"))
            .groupBy(new Fields("query", "indices", "types"))
            .stateQuery(esStaticState, new Fields("query", "indices", "types"), new QuerySearchIndexQuery(), new Fields("results"))
    ;

    return topology.build();
}
 
Example 5
Project: realtime-event-processing   File: URLEventProcessingTopology.java   Source Code and License 4 votes vote down vote up
public static StormTopology buildTopology(Config conf, LocalDRPC localDrpc) {
    TridentTopology topology = new TridentTopology();

    //Kafka Spout
    BrokerHosts zk = new ZkHosts(conf.get(CrawlerConfig.KAFKA_CONSUMER_HOST_NAME) + ":" +conf.get(CrawlerConfig.KAFKA_CONSUMER_HOST_PORT));
    TridentKafkaConfig kafkaConfig = new TridentKafkaConfig(zk, (String) conf.get(CrawlerConfig.KAFKA_TOPIC_NAME));
    kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    //kafkaConfig.ignoreZkOffsets=true;
    OpaqueTridentKafkaSpout spout = new OpaqueTridentKafkaSpout(kafkaConfig);

    //ElasticSearch Persistent State
    Settings esSettings = ImmutableSettings.settingsBuilder()
            .put("storm.elasticsearch.cluster.name", conf.get(CrawlerConfig.ELASTICSEARCH_CLUSTER_NAME))
            .put("storm.elasticsearch.hosts", conf.get(CrawlerConfig.ELASTICSEARCH_HOST_NAME) + ":" + conf.get(CrawlerConfig.ELASTICSEARCH_HOST_PORT))
            .build();
    StateFactory esStateFactory = new ESIndexState.Factory<JSONObject>(new ClientFactory.NodeClient(esSettings.getAsMap()), JSONObject.class);
    TridentState esStaticState = topology.newStaticState(esStateFactory);

    //Topology
    topology.newStream("crawlKafkaSpout", spout).parallelismHint(5)
            //Splits words on receiving from Kafka
            .each(new Fields("str"), new SplitFunction(), new Fields("url", "depth", "task", "user"))
            .each(new Fields("str"), new PrintFilter("Kafka"))
            //Bloom Filter, Filters already crawled URLs
            .each(new Fields("url", "task"), new URLFilter())
            //Download and Parse Webpage
            .each(new Fields("url"), new GetAdFreeWebPage(), new Fields("content_html", "title", "href"))
            //Sending URLs present in the page into the kafka queue.
            .each(new Fields("href", "depth", "task", "user"), new KafkaProducerFilter())
            //Insert to Elasticsearch
            .each(new Fields("url", "content_html", "title", "task", "user"), new PrepareForElasticSearch(), new Fields("index", "type", "id", "source"))
            .partitionPersist(esStateFactory, new Fields("index", "type", "id", "source"), new ESIndexUpdater<String>(new ESTridentTupleMapper()), new Fields())
            ;

    //DRPC
    topology.newDRPCStream("search", localDrpc)
            .each(new Fields("args"), new SplitDRPCArgs(), new Fields("query_input", "task"))
            .each(new Fields("query_input"), new BingAutoSuggest(0), new Fields("query_preProcessed"))
            .each(new Fields("query_preProcessed", "task"), new PrepareSearchQuery(), new Fields("query", "indices", "types"))
            .groupBy(new Fields("query", "indices", "types"))
            .stateQuery(esStaticState, new Fields("query", "indices", "types"), new QuerySearchIndexQuery(), new Fields("results"))
            ;

    return topology.build();
}
 
Example 6
Project: simple-kafka-storm-java   File: OpaqueTridentKafkaSpoutBuilder.java   Source Code and License 4 votes vote down vote up
public OpaqueTridentKafkaSpout build() {
	BrokerHosts zk = new ZkHosts(zookeeper);
	TridentKafkaConfig spoutConf = new TridentKafkaConfig(zk, topic);
	spoutConf.scheme = new SchemeAsMultiScheme(new StringScheme());
	return new OpaqueTridentKafkaSpout(spoutConf);
}
 
Example 7
Project: openbus   File: BrokerSpout.java   Source Code and License 4 votes vote down vote up
@SuppressWarnings("rawtypes")
public IOpaquePartitionedTridentSpout<GlobalPartitionInformation, Partition, Map> getOpaquePartitionedTridentSpout() {
       return new OpaqueTridentKafkaSpout(config);
   }
 
Example 8
Project: openbus   File: BrokerSpout.java   Source Code and License 4 votes vote down vote up
@SuppressWarnings("rawtypes")
   public IOpaquePartitionedTridentSpout<GlobalPartitionInformation, Partition, Map> getOpaquePartitionedTridentSpout() {
return new OpaqueTridentKafkaSpout(config);
   }
 
Example 9
Project: openbus   File: BrokerSpout.java   Source Code and License 4 votes vote down vote up
public IOpaquePartitionedTridentSpout<GlobalPartitionInformation, Partition, Map> getOpaquePartitionedTridentSpout() {
    return new OpaqueTridentKafkaSpout(config);
}
 
Example 10
Project: rb-bi   File: TridentKafkaSpout.java   Source Code and License 2 votes vote down vote up
/**
 * Build the tridentKafkaSpout.
 *
 * @return Trident spout of kafka.
 */
public OpaqueTridentKafkaSpout builder() {
    return new OpaqueTridentKafkaSpout(_kafkaConfig);
}
 
Example 11
Project: rb-bi   File: TridentKafkaSpoutNmsp.java   Source Code and License 2 votes vote down vote up
/**
 * Build the tridentKafkaSpout.
 *
 * @return Trident spout of kafka.
 */
public OpaqueTridentKafkaSpout builder() {
    return new OpaqueTridentKafkaSpout(_kafkaConfig);
}
 
Example 12
Project: rb-bi   File: TridentKafkaSpoutLocation.java   Source Code and License 2 votes vote down vote up
/**
 * Build the tridentKafkaSpout.
 *
 * @return Trident spout of kafka.
 */
public OpaqueTridentKafkaSpout builder() {
    return new OpaqueTridentKafkaSpout(_kafkaConfig);
}