Java Code Examples for storm.trident.operation.builtin.Count

The following are top voted examples for showing how to use storm.trident.operation.builtin.Count. These examples are extracted from open source projects. You can vote up the examples you like and your votes will be used in our system to generate more good examples.
Example 1
Project: big-data-system   File: TridentWordCount.java   Source Code and License 6 votes vote down vote up
public static StormTopology buildTopology(LocalDRPC drpc) {
  FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 3, new Values("the cow jumped over the moon"),
      new Values("the man went to the store and bought some candy"), new Values("four score and seven years ago"),
      new Values("how many apples can you eat"), new Values("to be or not to be the person"));
  spout.setCycle(true);

  TridentTopology topology = new TridentTopology();
  TridentState wordCounts = topology.newStream("spout1", spout).parallelismHint(16).each(new Fields("sentence"),
      new Split(), new Fields("word")).groupBy(new Fields("word")).persistentAggregate(new MemoryMapState.Factory(),
      new Count(), new Fields("count")).parallelismHint(16);

  topology.newDRPCStream("words", drpc).each(new Fields("args"), new Split(), new Fields("word")).groupBy(new Fields(
      "word")).stateQuery(wordCounts, new Fields("word"), new MapGet(), new Fields("count")).each(new Fields("count"),
      new FilterNull()).aggregate(new Fields("count"), new Sum(), new Fields("sum"));
  return topology.build();
}
 
Example 2
Project: cdh-storm   File: TridentWordCount.java   Source Code and License 6 votes vote down vote up
public static StormTopology buildTopology(LocalDRPC drpc) {
  FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 3, new Values("the cow jumped over the moon"),
      new Values("the man went to the store and bought some candy"), new Values("four score and seven years ago"),
      new Values("how many apples can you eat"), new Values("to be or not to be the person"));
  spout.setCycle(true);

  TridentTopology topology = new TridentTopology();
  TridentState wordCounts = topology.newStream("spout1", spout).parallelismHint(16).each(new Fields("sentence"),
      new Split(), new Fields("word")).groupBy(new Fields("word")).persistentAggregate(new MemoryMapState.Factory(),
      new Count(), new Fields("count")).parallelismHint(16);

  topology.newDRPCStream("words", drpc).each(new Fields("args"), new Split(), new Fields("word")).groupBy(new Fields(
      "word")).stateQuery(wordCounts, new Fields("word"), new MapGet(), new Fields("count")).each(new Fields("count"),
      new FilterNull()).aggregate(new Fields("count"), new Sum(), new Fields("sum"));
  return topology.build();
}
 
Example 3
Project: LearnStorm   File: TridentWordCount.java   Source Code and License 6 votes vote down vote up
public static StormTopology buildTopology(LocalDRPC drpc) {
	FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 3,
			new Values("the$$cow$$jumped$$over$$the$$moon"),
			new Values("the$$man$$went$$to$$the$$store$$and$$bought$$some$$candy"),
			new Values("four$$score$$and$$seven$$years$$ago"),
			new Values("how$$many$$apples$$can$$you$$eat"),
			new Values("to$$be$$or$$not$$to$$be$$the$$person"));
	spout.setCycle(true);

	TridentTopology topology = new TridentTopology();

	TridentState wordCounts = topology.newStream("spout1", spout)
			.each(new Fields("sentence"), new Split(), new Fields("word"))
			.groupBy(new Fields("word"))
			.persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
			.parallelismHint(6);

	topology.newDRPCStream("words", drpc).each(new Fields("args"), new Split(), new Fields("word"))
			.groupBy(new Fields("word"))
			.stateQuery(wordCounts, new Fields("word"), new MapGet(), new Fields("count"))
			.each(new Fields("count"), new FilterNull())
			.aggregate(new Fields("count"), new Sum(), new Fields("sum"));

	return topology.build();
}
 
Example 4
Project: resa   File: TridentWordCount.java   Source Code and License 6 votes vote down vote up
public static StormTopology buildTopology(Config conf) {
    ITridentSpout<Object> spout;
    if (!ConfigUtil.getBoolean(conf, "spout.redis", false)) {
        spout = new OneSentencePerBatchSpout();
    } else {
        String host = (String) conf.get("redis.host");
        int port = ((Number) conf.get("redis.port")).intValue();
        String queue = (String) conf.get("redis.queue");
        spout = null;
    }
    TridentTopology topology = new TridentTopology();
    topology.newStream("spout", spout).parallelismHint(ConfigUtil.getInt(conf, "spout.parallelism", 1))
            .each(new Fields("sentence"), new Split(), new Fields("word"))
            .parallelismHint(ConfigUtil.getInt(conf, "split.parallelism", 1))
            .groupBy(new Fields("word"))
            .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
            .parallelismHint(ConfigUtil.getInt(conf, "counter.parallelism", 1));
    return topology.build();
}
 
Example 5
Project: resa   File: TridentWordCount.java   Source Code and License 6 votes vote down vote up
public static StormTopology buildTopology(Config conf) {
    IRichSpout spout;
    if (!ConfigUtil.getBoolean(conf, "spout.redis", false)) {
        spout = new RandomSentenceSpout();
    } else {
        String host = (String) conf.get("redis.host");
        int port = ((Number) conf.get("redis.port")).intValue();
        String queue = (String) conf.get("redis.queue");
        spout = new RedisSentenceSpout(host, port, queue);
    }
    TridentTopology topology = new TridentTopology();
    topology.newStream("spout", spout).parallelismHint(ConfigUtil.getInt(conf, "spout.parallelism", 1))
            .each(new Fields("sentence"), new Split(), new Fields("word"))
            .parallelismHint(ConfigUtil.getInt(conf, "split.parallelism", 1))
            .groupBy(new Fields("word"))
            .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
            .parallelismHint(ConfigUtil.getInt(conf, "counter.parallelism", 1));
    return topology.build();
}
 
Example 6
Project: incubator-storm   File: TridentWordCount.java   Source Code and License 6 votes vote down vote up
public static StormTopology buildTopology(LocalDRPC drpc) {
  FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 3, new Values("the cow jumped over the moon"),
      new Values("the man went to the store and bought some candy"), new Values("four score and seven years ago"),
      new Values("how many apples can you eat"), new Values("to be or not to be the person"));
  spout.setCycle(true);

  TridentTopology topology = new TridentTopology();
  TridentState wordCounts = topology.newStream("spout1", spout).parallelismHint(16).each(new Fields("sentence"),
      new Split(), new Fields("word")).groupBy(new Fields("word")).persistentAggregate(new MemoryMapState.Factory(),
      new Count(), new Fields("count")).parallelismHint(16);

  topology.newDRPCStream("words", drpc).each(new Fields("args"), new Split(), new Fields("word")).groupBy(new Fields(
      "word")).stateQuery(wordCounts, new Fields("word"), new MapGet(), new Fields("count")).each(new Fields("count"),
      new FilterNull()).aggregate(new Fields("count"), new Sum(), new Fields("sum"));
  return topology.build();
}
 
Example 7
Project: storm-example   File: ClickThruAnalyticsTopology.java   Source Code and License 6 votes vote down vote up
public static StormTopology buildTopology() {
    LOG.info("Building topology.");
    TridentTopology topology = new TridentTopology();
    StateFactory clickThruMemory = new MemoryMapState.Factory();
    ClickThruSpout spout = new ClickThruSpout();
    Stream inputStream = topology.newStream("clithru", spout);
    TridentState clickThruState = inputStream.each(new Fields("username", "campaign", "product", "click"), new Filter("click", "true"))
            .each(new Fields("username", "campaign", "product", "click"), new Distinct())
            .groupBy(new Fields("campaign"))
            .persistentAggregate(clickThruMemory, new Count(), new Fields("click_thru_count"));

    inputStream.groupBy(new Fields("campaign"))
            .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("impression_count"))
            .newValuesStream()
            .stateQuery(clickThruState, new Fields("campaign"), new MapGet(), new Fields("click_thru_count"))
            .each(new Fields("campaign", "impression_count", "click_thru_count"), new CampaignEffectiveness(), new Fields(""));

    return topology.build();
}
 
Example 8
Project: storm-benchmark   File: TridentWordCount.java   Source Code and License 6 votes vote down vote up
@Override
  public StormTopology getTopology(Config config) {
    final int spoutNum = BenchmarkUtils.getInt(config, SPOUT_NUM, DEFAULT_SPOUT_NUM);
    final int splitNum = BenchmarkUtils.getInt(config, SPLIT_NUM, DEFAULT_SPLIT_BOLT_NUM);
    final int countNum = BenchmarkUtils.getInt(config, COUNT_NUM, DEFAULT_COUNT_BOLT_NUM);

    spout  = new TransactionalTridentKafkaSpout(
            KafkaUtils.getTridentKafkaConfig(config, new SchemeAsMultiScheme(new StringScheme())));

    TridentTopology trident = new TridentTopology();

    trident.newStream("wordcount", spout).name("sentence").parallelismHint(spoutNum).shuffle()
            .each(new Fields(StringScheme.STRING_SCHEME_KEY), new WordSplit(), new Fields("word"))
            .parallelismHint(splitNum)
            .groupBy(new Fields("word"))
            .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
            .parallelismHint(countNum);
/*    trident.newStream("wordcount", spout)
      .each(new Fields(StringScheme.STRING_SCHEME_KEY), new WordSplit(), new Fields("word"))
      .groupBy(new Fields("word"))
      .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"));*/


    return trident.build();
  }
 
Example 9
Project: flink-perf   File: TridentWordCount.java   Source Code and License 6 votes vote down vote up
public static StormTopology buildTopology(LocalDRPC drpc) {
  FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 3, new Values("the cow jumped over the moon"),
      new Values("the man went to the store and bought some candy"), new Values("four score and seven years ago"),
      new Values("how many apples can you eat"), new Values("to be or not to be the person"));
  spout.setCycle(true);

  TridentTopology topology = new TridentTopology();
  TridentState wordCounts = topology.newStream("spout1", spout).parallelismHint(16).each(new Fields("sentence"),
      new Split(), new Fields("word")).groupBy(new Fields("word")).persistentAggregate(new MemoryMapState.Factory(),
      new Count(), new Fields("count")).parallelismHint(16);

  topology.newDRPCStream("words", drpc).each(new Fields("args"), new Split(), new Fields("word")).groupBy(new Fields(
      "word")).stateQuery(wordCounts, new Fields("word"), new MapGet(), new Fields("count")).each(new Fields("count"),
      new FilterNull()).aggregate(new Fields("count"), new Sum(), new Fields("sum"));
  return topology.build();
}
 
Example 10
Project: trident-tutorial   File: TopHashtagByFollowerClass.java   Source Code and License 6 votes vote down vote up
public static StormTopology buildTopology(TransactionalTridentKafkaSpout spout) throws IOException {

        TridentTopology topology = new TridentTopology();
        TridentState count =
        topology
                .newStream("tweets", spout)
                .each(new Fields("str"), new ParseTweet(), new Fields("text", "content", "user"))
                .project(new Fields("content", "user"))
                .each(new Fields("content"), new OnlyHashtags())
                .each(new Fields("user"), new OnlyEnglish())
                .each(new Fields("content", "user"), new ExtractFollowerClassAndContentName(), new Fields("followerClass", "contentName"))
                .groupBy(new Fields("followerClass", "contentName"))
                .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
        ;


        topology
                .newDRPCStream("hashtag_count")
                .stateQuery(count, new TupleCollectionGet(), new Fields("followerClass", "contentName"))
                .stateQuery(count, new Fields("followerClass", "contentName"), new MapGet(), new Fields("count"))
                .groupBy(new Fields("followerClass"))
                .aggregate(new Fields("contentName", "count"), new FirstN.FirstNSortedAgg(1,"count", true), new Fields("contentName", "count"))
        ;

        return topology.build();
    }
 
Example 11
Project: trident-tutorial   File: GlobalTop20Hashtags.java   Source Code and License 6 votes vote down vote up
public static StormTopology buildTopology(TransactionalTridentKafkaSpout spout) throws IOException {

        TridentTopology topology = new TridentTopology();
        TridentState count =
        topology
                .newStream("tweets", spout)
                .each(new Fields("str"), new ParseTweet(), new Fields("text", "content", "user"))
                .project(new Fields("content", "user"))
                .each(new Fields("content"), new OnlyHashtags())
                .each(new Fields("user"), new OnlyEnglish())
                .each(new Fields("content", "user"), new ExtractFollowerClassAndContentName(), new Fields("followerClass", "contentName"))
                .groupBy(new Fields("followerClass", "contentName"))
                .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
        ;


        topology
                .newDRPCStream("top_hashtags")
                .stateQuery(count, new TupleCollectionGet(), new Fields("followerClass", "contentName"))
                .stateQuery(count, new Fields("followerClass", "contentName"), new MapGet(), new Fields("count"))
                .aggregate(new Fields("contentName", "count"), new FirstN.FirstNSortedAgg(5,"count", true), new Fields("contentName", "count"))
        ;

        return topology.build();
    }
 
Example 12
Project: trident-tutorial   File: TopHashtagByCountry.java   Source Code and License 6 votes vote down vote up
public static StormTopology buildTopology(TransactionalTridentKafkaSpout spout) throws IOException {

        TridentTopology topology = new TridentTopology();
        TridentState count =
        topology
                .newStream("tweets", spout)
                .each(new Fields("str"), new ParseTweet(), new Fields("status", "content", "user"))
                .project(new Fields("content", "user", "status"))
                .each(new Fields("content"), new OnlyHashtags())
                .each(new Fields("status"), new OnlyGeo())
                .each(new Fields("status", "content"), new ExtractLocation(), new Fields("country", "contentName"))
                .groupBy(new Fields("country", "contentName"))
                .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
        ;


        topology
                .newDRPCStream("location_hashtag_count")
                .stateQuery(count, new TupleCollectionGet(), new Fields("country", "contentName"))
                .stateQuery(count, new Fields("country", "contentName"), new MapGet(), new Fields("count"))
                .groupBy(new Fields("country"))
                .aggregate(new Fields("contentName", "count"), new FirstN.FirstNSortedAgg(3,"count", true), new Fields("contentName", "count"))
        ;

        return topology.build();
    }
 
Example 13
Project: trident-tutorial   File: TopHashtagFollowerCountGrouping.java   Source Code and License 6 votes vote down vote up
public static StormTopology buildTopology(TransactionalTridentKafkaSpout spout) throws IOException {

        TridentTopology topology = new TridentTopology();
        TridentState count =
        topology
                .newStream("tweets", spout)
                .each(new Fields("str"), new ParseTweet(), new Fields("text", "content", "user"))
                .project(new Fields("content", "user"))
                .each(new Fields("content"), new OnlyHashtags())
                .each(new Fields("user"), new OnlyEnglish())
                .each(new Fields("content", "user"), new ExtractFollowerClassAndContentName(), new Fields("followerClass", "contentName"))
                .parallelismHint(3)
                .groupBy(new Fields("followerClass", "contentName"))
                .persistentAggregate(new HazelCastStateFactory(), new Count(), new Fields("count"))
                .parallelismHint(3)
        ;


        topology
                .newDRPCStream("hashtag_count")
                .each(new Constants<String>("< 100", "< 10K", "< 100K", ">= 100K"), new Fields("followerClass"))
                .stateQuery(count, new Fields("followerClass", "args"), new MapGet(), new Fields("count"))
        ;

        return topology.build();
    }
 
Example 14
Project: trident-tutorial   File: ClusterTestTopology.java   Source Code and License 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    Config conf = new Config();

    // Submits the topology
    String topologyName = args[0];
    conf.setNumWorkers(8); // Our Vagrant environment has 8 workers

    FakeTweetsBatchSpout fakeTweets = new FakeTweetsBatchSpout(10);

    TridentTopology topology = new TridentTopology();
    TridentState countState =
            topology
                    .newStream("spout", fakeTweets)
                    .groupBy(new Fields("actor"))
                    .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"));

    topology
            .newDRPCStream("count_per_actor")
            .stateQuery(countState, new Fields("args"), new MapGet(), new Fields("count"));

    StormSubmitter.submitTopology(topologyName, conf, topology.build());

}
 
Example 15
Project: storm-kafka-0.8-plus-test   File: SentenceAggregationTopology.java   Source Code and License 6 votes vote down vote up
public StormTopology buildTopology(LocalDRPC drpc) {
    TridentKafkaConfig kafkaConfig = new TridentKafkaConfig(brokerHosts, "storm-sentence", "storm");
    kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    TransactionalTridentKafkaSpout kafkaSpout = new TransactionalTridentKafkaSpout(kafkaConfig);
    TridentTopology topology = new TridentTopology();

    TridentState wordCounts = topology.newStream("kafka", kafkaSpout).shuffle().
            each(new Fields("str"), new WordSplit(), new Fields("word")).
            groupBy(new Fields("word")).
            persistentAggregate(new HazelCastStateFactory(), new Count(), new Fields("aggregates_words")).parallelismHint(2);


    topology.newDRPCStream("words", drpc)
            .each(new Fields("args"), new Split(), new Fields("word"))
            .groupBy(new Fields("word"))
            .stateQuery(wordCounts, new Fields("word"), new MapGet(), new Fields("count"))
            .each(new Fields("count"), new FilterNull())
            .aggregate(new Fields("count"), new Sum(), new Fields("sum"));

    return topology.build();
}
 
Example 16
Project: jstorm   File: TridentWordCount.java   Source Code and License 6 votes vote down vote up
public static StormTopology buildTopology(LocalDRPC drpc) {
    FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 3,
            new Values("the cow jumped over the moon"),
            new Values("the man went to the store and bought some candy"),
            new Values("four score and seven years ago"), new Values("how many apples can you eat"),
            new Values("to be or not to be the person"));
    spout.setCycle(true);
    
    TridentTopology topology = new TridentTopology();
    TridentState wordCounts = topology.newStream("spout1", spout).parallelismHint(16)
            .each(new Fields("sentence"), new Split(), new Fields("word")).groupBy(new Fields("word"))
            .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
            .parallelismHint(16);
            
    topology.newDRPCStream("words", drpc).each(new Fields("args"), new Split(), new Fields("word"))
            .groupBy(new Fields("word"))
            .stateQuery(wordCounts, new Fields("word"), new MapGet(), new Fields("count"))
            .each(new Fields("count"), new FilterNull())
            .aggregate(new Fields("count"), new Sum(), new Fields("sum"));
    return topology.build();
}
 
Example 17
Project: jstorm   File: TridentMapExample.java   Source Code and License 6 votes vote down vote up
public static StormTopology buildTopology(LocalDRPC drpc) {
    FixedBatchSpout spout = new FixedBatchSpout(new Fields("word"), 3, new Values("the cow jumped over the moon"),
            new Values("the man went to the store and bought some candy"),
            new Values("four score and seven years ago"), new Values("how many apples can you eat"),
            new Values("to be or not to be the person"));
    spout.setCycle(true);
    
    TridentTopology topology = new TridentTopology();
    TridentState wordCounts = topology.newStream("spout1", spout).parallelismHint(16).flatMap(split).map(toUpper)
            .filter(theFilter).peek(new Consumer() {
                @Override
                public void accept(TridentTuple input) {
                    System.out.println(input.getString(0));
                }
            }).groupBy(new Fields("word"))
            .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
            .parallelismHint(16);
            
    topology.newDRPCStream("words", drpc).flatMap(split).groupBy(new Fields("args"))
            .stateQuery(wordCounts, new Fields("args"), new MapGet(), new Fields("count")).filter(new FilterNull())
            .aggregate(new Fields("count"), new Sum(), new Fields("sum"));
    return topology.build();
}
 
Example 18
Project: jstorm   File: TridentTopologySource.java   Source Code and License 6 votes vote down vote up
public StormTopology getTopology(Config config) {

        this.spout = new FixedBatchSpout(new Fields("sentence"), 20,
                new Values("one two"),
                new Values("two three"),
                new Values("three four"),
                new Values("four five"),
                new Values("five six")
        );


        TridentTopology trident = new TridentTopology();

        trident.newStream("wordcount", spout).name("sentence").parallelismHint(1).shuffle()
                .each(new Fields("sentence"), new Split(), new Fields("word"))
                .parallelismHint(1)
                .groupBy(new Fields("word"))
                .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
                .parallelismHint(1);
        return trident.build();
    }
 
Example 19
Project: storm-trident-example   File: ExampleTopology.java   Source Code and License 5 votes vote down vote up
public static StormTopology buildTopology()

{
	TridentTopology topology = new TridentTopology();
	RandomPhraseSpout spout1 = new RandomPhraseSpout();
	
	Stream inputStream = topology.newStream("dumbo", spout1);//where is dump used ? No where as per as I see.
	
	/**
	 * persistentAggregate : The persistentAggregate operation updates a source of state.
	 * persistentAggregate is an additional abstraction built on top of partitionPersist that knows how to take a 
	 * Trident aggregator and use it to apply updates to the source of state.
	 * Args:
	 * StateFactory instance - This factory implement the makeState API, that should return a instance of State.
	 * Fields list, that needs to be persisted. These field list should be present in the input stream.
	 * StateUpdater instance - The StateUpdater instance will update the underlying State.
	 */
	 inputStream
	    //input stream generated by spout1 has a field called randomPhrase.
	    //RandomPhraseSplitter takes a randomPhrase and additionally emits a field called randomWord into the stream.
	    .each(new Fields("randomPhrase"), new RandomPhraseSplitter(), new Fields("randomWord"))
	    //the input stream is grouped by randomWord - Isn't this same as storm field grouping ? yes , similar.
	    .groupBy(new Fields("randomWord"))
	    //count the occurence of randomWord using Count aggregrator, that will add a field called count to the stream.
	    //persist the count in Redis.
	    .persistentAggregate(new RedisStoreStateFactory(), new Count(), new Fields("count"));
	 
	 return topology.build();
}
 
Example 20
Project: storm-trident-example   File: ExampleTopology.java   Source Code and License 5 votes vote down vote up
public static StormTopology buildTopology()

{
	TridentTopology topology = new TridentTopology();
	RandomPhraseSpout spout1 = new RandomPhraseSpout();
	
	Stream inputStream = topology.newStream("dumbo", spout1);//where is dump used ? No where as per as I see.
	
	/**
	 * persistentAggregate : The persistentAggregate operation updates a source of state.
	 * persistentAggregate is an additional abstraction built on top of partitionPersist that knows how to take a 
	 * Trident aggregator and use it to apply updates to the source of state.
	 * Args:
	 * StateFactory instance - This factory implement the makeState API, that should return a instance of State.
	 * Fields list, that needs to be persisted. These field list should be present in the input stream.
	 * StateUpdater instance - The StateUpdater instance will update the underlying State.
	 */
	 inputStream
	    //input stream generated by spout1 has a field called randomPhrase.
	    //RandomPhraseSplitter takes a randomPhrase and additionally emits a field called randomWord into the stream.
	    .each(new Fields("randomPhrase"), new RandomPhraseSplitter(), new Fields("randomWord"))
	    //the input stream is grouped by randomWord - Isn't this same as storm field grouping ? yes , similar.
	    .groupBy(new Fields("randomWord"))
	    //count the occurence of randomWord using Count aggregrator, that will add a field called count to the stream.
	    //persist the count in Redis.
	    .persistentAggregate(new RedisStoreStateFactory(), new Count(), new Fields("count"));
	 
	 return topology.build();
}
 
Example 21
Project: storm-trident-example   File: ExampleTopology.java   Source Code and License 5 votes vote down vote up
public static StormTopology buildTopology()

{
	TridentTopology topology = new TridentTopology();
	RandomPhraseSpout spout1 = new RandomPhraseSpout();
	
	Stream inputStream = topology.newStream("dumbo", spout1);//where is dump used ? No where as per as I see.
	
	/**
	 * persistentAggregate : The persistentAggregate operation updates a source of state.Used for grouping operations unlike partitionPersist.
	 * persistentAggregate is an additional abstraction built on top of partitionPersist that knows how to take a 
	 * Trident aggregator and use it to apply updates to the source of state.
	 * Args:
	 * StateFactory instance - This factory implement the makeState API, that should return a instance of State.
	 * Fields list, that needs to be persisted. These field list should be present in the input stream.
	 * StateUpdater instance - The StateUpdater instance will update the underlying State.
	 */
	 inputStream
	    //input stream generated by spout1 has a field called randomPhrase.
	    //RandomPhraseSplitter takes a randomPhrase and additionally emits a field called randomWord into the stream.
	    .each(new Fields("randomPhrase"), new RandomPhraseSplitter(), new Fields("randomWord"))
	    //the input stream is grouped by randomWord - Isn't this same as storm field grouping ? yes , similar.
	    .groupBy(new Fields("randomWord"))
	    //count the occurence of randomWord using Count aggregrator, that will add a field called count to the stream.
	    //persist the count in Redis.
	    .persistentAggregate(new RedisStoreStateFactory(), new Count(), new Fields("count"));
	 
	 return topology.build();
}
 
Example 22
Project: storm-trident-example   File: ExampleTopology.java   Source Code and License 5 votes vote down vote up
public static StormTopology buildTopology()

{
	TridentTopology topology = new TridentTopology();
	RandomPhraseSpout spout1 = new RandomPhraseSpout();
	
	Stream inputStream = topology.newStream("dumbo", spout1);//where is dump used ? No where as per as I see.
	
	/**
	 * persistentAggregate : The persistentAggregate operation updates a source of state.
	 * persistentAggregate is an additional abstraction built on top of partitionPersist that knows how to take a 
	 * Trident aggregator and use it to apply updates to the source of state.
	 * Args:
	 * StateFactory instance - This factory implement the makeState API, that should return a instance of State.
	 * Fields list, that needs to be persisted. These field list should be present in the input stream.
	 * StateUpdater instance - The StateUpdater instance will update the underlying State.
	 */
	 inputStream
	    //input stream generated by spout1 has a field called randomPhrase.
	    //RandomPhraseSplitter takes a randomPhrase and additionally emits a field called randomWord into the stream.
	    .each(new Fields("randomPhrase"), new RandomPhraseSplitter(), new Fields("randomWord"))
	    //the input stream is grouped by randomWord - Isn't this same as storm field grouping ? yes , similar.
	    .groupBy(new Fields("randomWord"))
	    //count the occurence of randomWord using Count aggregrator, that will add a field called count to the stream.
	    //persist the count in Redis.
	    .persistentAggregate(new RedisStoreStateFactory(), new Count(), new Fields("count"));
	 
	 return topology.build();
}
 
Example 23
Project: storm-trident-example   File: ExampleTopology.java   Source Code and License 5 votes vote down vote up
public static StormTopology buildTopology()

{
	TridentTopology topology = new TridentTopology();
	RandomPhraseSpout spout1 = new RandomPhraseSpout();
	
	Stream inputStream = topology.newStream("dumbo", spout1);//where is dump used ? No where as per as I see.
	
	/**
	 * persistentAggregate : The persistentAggregate operation updates a source of state.
	 * persistentAggregate is an additional abstraction built on top of partitionPersist that knows how to take a 
	 * Trident aggregator and use it to apply updates to the source of state.
	 * Args:
	 * StateFactory instance - This factory implement the makeState API, that should return a instance of State.
	 * Fields list, that needs to be persisted. These field list should be present in the input stream.
	 * StateUpdater instance - The StateUpdater instance will update the underlying State.
	 */
	 inputStream
	    //input stream generated by spout1 has a field called randomPhrase.
	    //RandomPhraseSplitter takes a randomPhrase and additionally emits a field called randomWord into the stream.
	    .each(new Fields("randomPhrase"), new RandomPhraseSplitter(), new Fields("randomWord"))
	    //the input stream is grouped by randomWord - Isn't this same as storm field grouping ? yes , similar.
	    .groupBy(new Fields("randomWord"))
	    //count the occurence of randomWord using Count aggregrator, that will add a field called count to the stream.
	    //persist the count in Redis.
	    .persistentAggregate(new RedisStoreStateFactory(), new Count(), new Fields("count"));
	 
	 return topology.build();
}
 
Example 24
Project: LearnStorm   File: TridentKafkaWordCount.java   Source Code and License 5 votes vote down vote up
private TridentState addTridentState(TridentTopology tridentTopology) {
    return tridentTopology.newStream("spout1", createKafkaSpout()).parallelismHint(1)
            .each(new Fields("str"), new Split(), new Fields("word"))
            .groupBy(new Fields("word"))
            .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
            .parallelismHint(1);
}
 
Example 25
Project: storm-example   File: OutbreakDetectionTopology.java   Source Code and License 5 votes vote down vote up
public static StormTopology buildTopology() {
    TridentTopology topology = new TridentTopology();
    DiagnosisEventSpout spout = new DiagnosisEventSpout();
    Stream inputStream = topology.newStream("event", spout);

    inputStream.each(new Fields("event"), new DiseaseFilter())
            .each(new Fields("event"), new CityAssignment(), new Fields("city"))
            .each(new Fields("event", "city"), new HourAssignment(), new Fields("hour", "cityDiseaseHour"))
            .groupBy(new Fields("cityDiseaseHour"))
            .persistentAggregate(new OutbreakTrendFactory(), new Count(), new Fields("count")).newValuesStream()
            .each(new Fields("cityDiseaseHour", "count"), new OutbreakDetector(), new Fields("alert"))
            .each(new Fields("alert"), new DispatchAlert(), new Fields());
    return topology.build();
}
 
Example 26
Project: storm-trident-example   File: ExampleTopology.java   Source Code and License 5 votes vote down vote up
public static StormTopology buildTopology()

{
	TridentTopology topology = new TridentTopology();
	RandomPhraseSpout spout1 = new RandomPhraseSpout();
	
	Stream inputStream = topology.newStream("dumbo", spout1);//where is dump used ? No where as per as I see.
	
	/**
	 * persistentAggregate : The persistentAggregate operation updates a source of state.
	 * persistentAggregate is an additional abstraction built on top of partitionPersist that knows how to take a 
	 * Trident aggregator and use it to apply updates to the source of state.
	 * Args:
	 * StateFactory instance - This factory implement the makeState API, that should return a instance of State.
	 * Fields list, that needs to be persisted. These field list should be present in the input stream.
	 * StateUpdater instance - The StateUpdater instance will update the underlying State.
	 */
	 inputStream
	    //input stream generated by spout1 has a field called randomPhrase.
	    //RandomPhraseSplitter takes a randomPhrase and additionally emits a field called randomWord into the stream.
	    .each(new Fields("randomPhrase"), new RandomPhraseSplitter(), new Fields("randomWord"))
	    //the input stream is grouped by randomWord - Isn't this same as storm field grouping ? yes , similar.
	    .groupBy(new Fields("randomWord"))
	    //count the occurence of randomWord using Count aggregrator, that will add a field called count to the stream.
	    //persist the count in Redis.
	    .persistentAggregate(new RedisStoreStateFactory(), new Count(), new Fields("count"));
	 
	 return topology.build();
}
 
Example 27
Project: storm-trident-example   File: ExampleTopology.java   Source Code and License 5 votes vote down vote up
public static StormTopology buildTopology()

{
	TridentTopology topology = new TridentTopology();
	RandomPhraseSpout spout1 = new RandomPhraseSpout();
	
	Stream inputStream = topology.newStream("dumbo", spout1);//where is dump used ? No where as per as I see.
	
	/**
	 * persistentAggregate : The persistentAggregate operation updates a source of state.
	 * persistentAggregate is an additional abstraction built on top of partitionPersist that knows how to take a 
	 * Trident aggregator and use it to apply updates to the source of state.
	 * Args:
	 * StateFactory instance - This factory implement the makeState API, that should return a instance of State.
	 * Fields list, that needs to be persisted. These field list should be present in the input stream.
	 * StateUpdater instance - The StateUpdater instance will update the underlying State.
	 */
	 inputStream
	    //input stream generated by spout1 has a field called randomPhrase.
	    //RandomPhraseSplitter takes a randomPhrase and additionally emits a field called randomWord into the stream.
	    .each(new Fields("randomPhrase"), new RandomPhraseSplitter(), new Fields("randomWord"))
	    //the input stream is grouped by randomWord - Isn't this same as storm field grouping ? yes , similar.
	    .groupBy(new Fields("randomWord"))
	    //count the occurence of randomWord using Count aggregrator, that will add a field called count to the stream.
	    //persist the count in Redis.
	    .persistentAggregate(new RedisStoreStateFactory(), new Count(), new Fields("count"));
	 
	 return topology.build();
}
 
Example 28
Project: storm-trident-example   File: ExampleTopology.java   Source Code and License 5 votes vote down vote up
public static StormTopology buildTopology()

{
	TridentTopology topology = new TridentTopology();
	RandomPhraseSpout spout1 = new RandomPhraseSpout();
	
	Stream inputStream = topology.newStream("dumbo", spout1);//where is dump used ? No where as per as I see.
	
	/**
	 * persistentAggregate : The persistentAggregate operation updates a source of state.Used for grouping operations unlike partitionPersist.
	 * persistentAggregate is an additional abstraction built on top of partitionPersist that knows how to take a 
	 * Trident aggregator and use it to apply updates to the source of state.
	 * Args:
	 * StateFactory instance - This factory implement the makeState API, that should return a instance of State.
	 * Fields list, that needs to be persisted. These field list should be present in the input stream.
	 * StateUpdater instance - The StateUpdater instance will update the underlying State.
	 */
	 inputStream
	    //input stream generated by spout1 has a field called randomPhrase.
	    //RandomPhraseSplitter takes a randomPhrase and additionally emits a field called randomWord into the stream.
	    .each(new Fields("randomPhrase"), new RandomPhraseSplitter(), new Fields("randomWord"))
	    //the input stream is grouped by randomWord - Isn't this same as storm field grouping ? yes , similar.
	    .groupBy(new Fields("randomWord"))
	    //count the occurence of randomWord using Count aggregrator, that will add a field called count to the stream.
	    //persist the count in Redis.
	    .persistentAggregate(new RedisStoreStateFactory(), new Count(), new Fields("count"));
	 
	 return topology.build();
}
 
Example 29
Project: storm-trident-example   File: ExampleTopology.java   Source Code and License 5 votes vote down vote up
public static StormTopology buildTopology()

{
	TridentTopology topology = new TridentTopology();
	RandomPhraseSpout spout1 = new RandomPhraseSpout();
	
	Stream inputStream = topology.newStream("dumbo", spout1);//where is dump used ? No where as per as I see.
	
	/**
	 * persistentAggregate : The persistentAggregate operation updates a source of state.
	 * persistentAggregate is an additional abstraction built on top of partitionPersist that knows how to take a 
	 * Trident aggregator and use it to apply updates to the source of state.
	 * Args:
	 * StateFactory instance - This factory implement the makeState API, that should return a instance of State.
	 * Fields list, that needs to be persisted. These field list should be present in the input stream.
	 * StateUpdater instance - The StateUpdater instance will update the underlying State.
	 */
	 inputStream
	    //input stream generated by spout1 has a field called randomPhrase.
	    //RandomPhraseSplitter takes a randomPhrase and additionally emits a field called randomWord into the stream.
	    .each(new Fields("randomPhrase"), new RandomPhraseSplitter(), new Fields("randomWord"))
	    //the input stream is grouped by randomWord - Isn't this same as storm field grouping ? yes , similar.
	    .groupBy(new Fields("randomWord"))
	    //count the occurence of randomWord using Count aggregrator, that will add a field called count to the stream.
	    //persist the count in Redis.
	    .persistentAggregate(new RedisStoreStateFactory(), new Count(), new Fields("count"));
	 
	 return topology.build();
}
 
Example 30
Project: storm-trident-example   File: ExampleTopology.java   Source Code and License 5 votes vote down vote up
public static StormTopology buildTopology()

{
	TridentTopology topology = new TridentTopology();
	RandomPhraseSpout spout1 = new RandomPhraseSpout();
	
	Stream inputStream = topology.newStream("dumbo", spout1);//where is dump used ? No where as per as I see.
	
	/**
	 * persistentAggregate : The persistentAggregate operation updates a source of state.
	 * persistentAggregate is an additional abstraction built on top of partitionPersist that knows how to take a 
	 * Trident aggregator and use it to apply updates to the source of state.
	 * Args:
	 * StateFactory instance - This factory implement the makeState API, that should return a instance of State.
	 * Fields list, that needs to be persisted. These field list should be present in the input stream.
	 * StateUpdater instance - The StateUpdater instance will update the underlying State.
	 */
	 inputStream
	    //input stream generated by spout1 has a field called randomPhrase.
	    //RandomPhraseSplitter takes a randomPhrase and additionally emits a field called randomWord into the stream.
	    .each(new Fields("randomPhrase"), new RandomPhraseSplitter(), new Fields("randomWord"))
	    //the input stream is grouped by randomWord - Isn't this same as storm field grouping ? yes , similar.
	    .groupBy(new Fields("randomWord"))
	    //count the occurence of randomWord using Count aggregrator, that will add a field called count to the stream.
	    //persist the count in Redis.
	    .persistentAggregate(new RedisStoreStateFactory(), new Count(), new Fields("count"));
	 
	 return topology.build();
}
 
Example 31
Project: trident-tutorial   File: Part03_AdvancedPrimitives2.java   Source Code and License 5 votes vote down vote up
private static StormTopology advancedPrimitives(FeederBatchSpout spout) throws IOException {

        TridentTopology topology = new TridentTopology();

        // What if we want more than one aggregation? For that, we can use "chained" aggregations.
        // Note how we calculate count and sum.
        // The aggregated values can then be processed further, in this case into mean
        topology
                .newStream("aggregation", spout)
                .groupBy(new Fields("city"))
                .chainedAgg()
                .aggregate(new Count(), new Fields("count"))
                .aggregate(new Fields("age"), new Sum(), new Fields("age_sum"))
                .chainEnd()
                .each(new Fields("age_sum", "count"), new DivideAsDouble(), new Fields("mean_age"))
                .each(new Fields("city", "mean_age"), new Print())
        ;

        // What if we want to persist results of an aggregation, but want to further process these
        // results? You can use "newValuesStream" for that
        topology
                .newStream("further",spout)
                .groupBy(new Fields("city"))
                .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
                .newValuesStream()
                .each(new Fields("city", "count"), new Print());

        return topology.build();
    }
 
Example 32
Project: sentweet   File: TridentWordCount.java   Source Code and License 5 votes vote down vote up
public static StormTopology buildTopology(LocalDRPC drpc) {
    FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 3,
            new Values("the cow jumped over the moon"),
            new Values("the man went to the store and bought some candy"),
            new Values("four score and seven years ago"),
            new Values("how many apples can you eat"),
            new Values("to be or not to be the person"));
    spout.setCycle(true);
    
    TridentTopology topology = new TridentTopology();        
    TridentState wordCounts =
          topology.newStream("spout1", spout)
            .parallelismHint(16)
            .each(new Fields("sentence"), new Split(), new Fields("word"))
            .groupBy(new Fields("word"))
            .persistentAggregate(new MemoryMapState.Factory(),
                                 new Count(), new Fields("count"))         
            .parallelismHint(16);
            
    topology.newDRPCStream("words", drpc)
            .each(new Fields("args"), new Split(), new Fields("word"))
            .groupBy(new Fields("word"))
            .stateQuery(wordCounts, new Fields("word"), new MapGet(), new Fields("count"))
            .each(new Fields("count"), new FilterNull())
            .aggregate(new Fields("count"), new Sum(), new Fields("sum"))
            ;
    return topology.build();
}
 
Example 33
Project: t4f-data   File: TridentWordCount.java   Source Code and License 5 votes vote down vote up
public static StormTopology buildTopology(LocalDRPC drpc) {

        @SuppressWarnings("unchecked")
        FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 3, new Values(
                "the cow jumped over the moon"), new Values("the man went to the store and bought some candy"), //
                new Values("four score and seven years ago"), new Values("how many apples can you eat"), //
                new Values("to be or not to be the person"));
        spout.setCycle(true);

        TridentTopology topology = new TridentTopology();

        TridentState wordCounts = topology.newStream("spout1", spout) //
                .parallelismHint(16) //
                .each(new Fields("sentence"), new Split(), new Fields("word")) //
                .groupBy(new Fields("word")) //
                .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count")) //
                .parallelismHint(16);

        topology.newDRPCStream("words", drpc) //
                .each(new Fields("args"), new Split(), new Fields("word")) //
                .groupBy(new Fields("word")) //
                .stateQuery(wordCounts, new Fields("word"), new MapGet(), new Fields("count")) //
                .each(new Fields("count"), new FilterNull()) //
                .aggregate(new Fields("count"), new Sum(), new Fields("sum"));

        return topology.build();

    }
 
Example 34
Project: LearnStorm   File: TestTridentTopology.java   Source Code and License 4 votes vote down vote up
public static void main(String args[]) throws Exception {

		TridentTopology topology = new TridentTopology();
		Config conf = new Config();

		@SuppressWarnings("unchecked")
		FixedBatchSpout spout = new FixedBatchSpout(
				new Fields("sentence"), 3,
				new Values("the cow jumped over the moon"),
				new Values("the man went to the store and bought some candy"),
				new Values("four score and seven years ago"),
				new Values("how many apples can you eat"));
		spout.setCycle(true);

		TridentState wordCounts = topology.newStream("spout1", spout)
				.each(new Fields("sentence"), new Split(), new Fields("word"))
				.groupBy(new Fields("word"))
				.persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
				.parallelismHint(6);

		// MapGet() : gets the count for each word
		topology.newDRPCStream("words")
				.each(new Fields("args"), new Split(), new Fields("word"))
				.groupBy(new Fields("word"))
				.stateQuery(wordCounts, new Fields("word"), new MapGet(), new Fields("count"))
				.each(new Fields("count"), new FilterNull())
				.aggregate(new Fields("count"), new Sum(), new Fields("sum"));

		conf.setDebug(true);

		conf.put("storm.thrift.transport", "backtype.storm.security.auth.SimpleTransportPlugin");
		conf.put(Config.STORM_NIMBUS_RETRY_TIMES, 3);
		conf.put(Config.STORM_NIMBUS_RETRY_INTERVAL, 10);
		conf.put(Config.STORM_NIMBUS_RETRY_INTERVAL_CEILING, 20);
		conf.put(Config.DRPC_MAX_BUFFER_SIZE, 1048576);

		DRPCClient client = new DRPCClient(conf, "hdp02.localdomain", 3772);

		System.out.println(client.execute("words", "cat dog the man"));

		LocalCluster cluster = new LocalCluster();
		cluster.submitTopology("test", conf, topology.build());

		Utils.sleep(1000);

		cluster.killTopology("test");
		cluster.shutdown();

	}
 
Example 35
Project: SmashBrosTwitterAnalytics   File: SmashBrosTwitterTopology.java   Source Code and License 4 votes vote down vote up
private static StormTopology buildTopology(LocalDRPC drpc) {
	TwitterDeveloperAccount twitterDeveloperAccount = new TwitterDeveloperAccount(
			TwitterDebugAuthenticationData.ACCESS_TOKEN, TwitterDebugAuthenticationData.ACCESS_TOKEN_SECRET,
			TwitterDebugAuthenticationData.API_KEY, TwitterDebugAuthenticationData.API_SECRET);
	TridentTopology topology = new TridentTopology();

	SmashBrosTweetsSpout smashBrosTweetsSpout = new SmashBrosTweetsSpout(twitterDeveloperAccount);

	Stream tweetsStream = topology.newStream("smashbros-tweets-spout", smashBrosTweetsSpout);

	// TridentState persistedTweets = tweetsStream.partitionPersist(new
	// SmashBrosTweetsDatabaseState.Factory(),
	// new Fields("tweet"), new
	// BaseStateUpdater<SmashBrosTweetsDatabaseState>() {
	// private static final long serialVersionUID = -2160953537837069611L;
	//
	// @Override
	// public void updateState(SmashBrosTweetsDatabaseState state,
	// List<TridentTuple> tuples,
	// TridentCollector collector) {
	// List<Object> tweetIds = new ArrayList<Object>();
	// List<Object> tweets = new ArrayList<Object>();
	// for (TridentTuple t : tuples) {
	// tweetIds.add(((Tweet)t.get(0)).getId());
	// tweets.add(t.get(0));
	// }
	// state.multiUpdate(tweetIds, tweets);
	// }
	// });

	Stream tweetsTextStream = tweetsStream.each(new Fields("tweet"), new TweetTextExtractor(), new Fields(
			"tweet-text"));

	TridentState wordCounts = tweetsTextStream
			.each(new Fields("tweet-text"), new TweetWordsFilterAndSplit(), new Fields("word")) //
			.groupBy(new Fields("word")) //
			.persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count")) //
			.parallelismHint(6);

	TridentState charactersRank = tweetsTextStream
			.each(new Fields("tweet-text"), new CharactersReferencesIdentifier(), new Fields("charRef")) //
			.groupBy(new Fields("charRef")) //
			.persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count")) //
			.parallelismHint(6);

	// wordCounts.newValuesStream().each(new Fields("count"), new Debug());
	// charactersRank.newValuesStream().each(new Fields("count"), new
	// Debug());

	return topology.build();
}
 
Example 36
Project: SketchOnStorm   File: FlowAnalysisTopologyBuilder.java   Source Code and License 4 votes vote down vote up
/**
 * Builds the topology for Ip Flow Analysis
 */
public static final StormTopology buildTopology() {

    //first init topology
    TridentTopology topology = new TridentTopology();

    //bring in the spout
    FlowGenerator dataGen = new FlowGenerator();
    //attach the state factory
    //here it is simple HashMap backed in memory store
    StateFactory mapState = new MemoryMapState.Factory();
    StateFactory conversationMapState = new MemoryMapState.Factory();

    //the stream
    Stream ipFlowStream = topology
            .newStream(STREAM_NAME, dataGen)
            .parallelismHint(4);

    //define the counter state
    TridentState counterState =

            ipFlowStream
                    //group by minutely bucket
                    .groupBy(new Fields(Names.MIN_OF_DAY_FLD))
                            //Source + IP DEST fld makes a conversation
                    .persistentAggregate(mapState, new Fields(Names.SOURCE_IP_FLD, Names.DEST_IP_FLD),
                            new IpConversationSketch(),
                            new Fields("ConversationsCount"));

    //track al conversations count
    TridentState globalCountPerMin =
            ipFlowStream
                    //group by minutely bucket
                    .groupBy(new Fields(Names.MIN_OF_DAY_FLD))
                    .persistentAggregate(conversationMapState, new Fields(Names.MIN_OF_DAY_FLD),
                            new Count(),
                            new Fields("ConversationCountPerMin"));

    //now define DRPC stream on which the queries are executed

    //attach to local instance of DRPC
    topology.newDRPCStream(UNIQUE_CONVERSATION_COUNT, StormClusterStore.getInstance().getLocalDRPC())

            //takes in string args the minute of day (bucket used for unique counts)
            .each(new Fields("args"), new Split(), new Fields("FLD"))
                    //MIN_OF_DAY_FLD is the key for the map
            .each(new Fields("FLD"), new DataTypeConvert(new Integer(1)), new Fields(Names.MIN_OF_DAY_FLD))
                    //now get the fields
            .stateQuery(counterState, new Fields(Names.MIN_OF_DAY_FLD),
                    new MapGet(), new Fields(Names.COUNTER_VALS_FLD))

                    //filter out the NULLs
            .each(new Fields(Names.COUNTER_VALS_FLD), new FilterNull())

                    //convert the HLL sketch to Base64 encoded String
                    //since drpc.execute results can only be strings
                    //TODO: if possible define another combiner to combine multiple results from DRPC
            .each(new Fields(Names.COUNTER_VALS_FLD), new HLLToStrConverter(Names.COUNTER_VALS_FLD),
                    new Fields(Names.CONVERSATION_COUNT_FLD))
            .project(new Fields(Names.CONVERSATION_COUNT_FLD));

    topology.newDRPCStream(CONVERSATION_COUNT, StormClusterStore.getInstance().getLocalDRPC())

            //takes in string args the minute of day (bucket used for unique counts)
            .each(new Fields("args"), new Split(), new Fields("FLD"))
                    //MIN_OF_DAY_FLD is the key for the map
            .each(new Fields("FLD"), new DataTypeConvert(new Integer(1)), new Fields(Names.MIN_OF_DAY_FLD))

                    //now get the fields
            .stateQuery(globalCountPerMin, new Fields(Names.MIN_OF_DAY_FLD),
                    new MapGet(), new Fields(Names.COUNTER_VALS_FLD))

                    //filter out the NULLs
            .each(new Fields(Names.COUNTER_VALS_FLD), new FilterNull());

    return topology.build();
}
 
Example 37
Project: C9-Internet-radio-play-stats   File: TopologyBuilder.java   Source Code and License 4 votes vote down vote up
private static StormTopology build(ILocalDRPC drpc) {
  TridentTopology topology = new TridentTopology();

  Stream playStream =
    topology.newStream("play-spout", buildSpout())
            .each(new Fields("play-log"),
                  new LogDeserializer(),
                  new Fields("artist", "title", "tags"))
            .each(new Fields("artist", "title"),
                  new Sanitizer(new Fields("artist", "title")))
            .name("LogDeserializerSanitizer");

  TridentState countByArtist = playStream
    .project(new Fields("artist"))
    .groupBy(new Fields("artist"))
    .name("ArtistCounts")
    .persistentAggregate(new MemoryMapState.Factory(),
                         new Count(),
                         new Fields("artist-count"))
    .parallelismHint(4);

  TridentState countsByTitle = playStream
    .project(new Fields("title"))
    .groupBy(new Fields("title"))
    .name("TitleCounts")
    .persistentAggregate(new MemoryMapState.Factory(),
                         new Count(),
                         new Fields("title-count"))
    .parallelismHint(4);

  TridentState countsByTag = playStream
    .each(new Fields("tags"),
          new ListSplitter(),
          new Fields("tag"))
    .project(new Fields("tag"))
    .groupBy(new Fields("tag"))
    .name("TagCounts")
    .persistentAggregate(new MemoryMapState.Factory(),
                         new Count(),
                         new Fields("tag-count"))
    .parallelismHint(4);

  topology.newDRPCStream("count-request-by-tag", drpc)
          .name("RequestForTagCounts")
          .each(new Fields("args"),
                new SplitOnDelimiter(","),
                new Fields("tag"))
          .groupBy(new Fields("tag"))
          .name("QueryForRequest")
          .stateQuery(countsByTag,
                      new Fields("tag"),
                      new MapGet(),
                      new Fields("count"));

  return topology.build();
}
 
Example 38
Project: trident-tutorial   File: Part04_BasicStateAndDRPC.java   Source Code and License 4 votes vote down vote up
private static StormTopology basicStateAndDRPC(LocalDRPC drpc, FeederBatchSpout spout) throws IOException {
        TridentTopology topology = new TridentTopology();

        // persistentAggregate persists the result of aggregation into data stores,
        // which you can use from other applications.
        // You can also use it in other topologies by using the TridentState object returned.
        //
        // The state is commonly backed by a data store like memcache, cassandra etc.
        // Here we are simply using a hash map
        TridentState countState =
                topology
                        .newStream("spout", spout)
                        .groupBy(new Fields("actor"))
                        .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"));

        // There are a few ready-made state libraries that you can use
        // Below is an example to use memcached
//        List<InetSocketAddress> memcachedServerLocations = ImmutableList.of(new InetSocketAddress("some.memcached.server",12000));
//        TridentState countStateMemcached =
//                topology
//                        .newStream("spout", spout)
//                        .groupBy(new Fields("actor"))
//                        .persistentAggregate(MemcachedState.transactional(memcachedServerLocations), new Count(), new Fields("count"));



        // DRPC stands for Distributed Remote Procedure Call
        // You can issue calls using the DRPC client library
        // A DRPC call takes two Strings, function name and function arguments
        //
        // In order to call the DRPC defined below, you'd use "count_per_actor" as the function name
        // The function arguments will be available as "args"

        /*
        topology
                .newDRPCStream("ping", drpc)
                .each(new Fields("args"), new Split(" "), new Fields("reply"))
                .each(new Fields("reply"), new RegexFilter("ping"))
                .project(new Fields("reply"));

        // You can apply usual processing primitives to DRPC streams as well
        topology
                .newDRPCStream("count", drpc)
                .each(new Fields("args"), new Split(" "), new Fields("split"))
                .each(new Fields("split"), new RegexFilter("a.*"))
                .groupBy(new Fields("split"))
                .aggregate(new Count(), new Fields("count"));   */


        // More usefully, you can query the state you created earlier
        topology
                .newDRPCStream("count_per_actor", drpc)
                .stateQuery(countState, new Fields("args"), new MapGet(), new Fields("count"));


        // Here is a more complex example
        topology
                .newDRPCStream("count_per_actors", drpc)
                .each(new Fields("args"), new Split(" "), new Fields("actor"))
                .groupBy(new Fields("actor"))
                .stateQuery(countState, new Fields("actor"), new MapGet(), new Fields("individual_count"))
                .each(new Fields("individual_count"), new FilterNull())
                .aggregate(new Fields("individual_count"), new Sum(), new Fields("count"));

        // For how to call DRPC calls, go back to the main method

        return topology.build();
    }
 
Example 39
Project: trident-tutorial   File: Part01_BasicPrimitives.java   Source Code and License 4 votes vote down vote up
public static StormTopology basicPrimitives(IBatchSpout spout) throws IOException {

        // A topology is a set of streams.
        // A stream is a DAG of Spouts and Bolts.
        // (In Storm there are Spouts (data producers) and Bolts (data processors).
        // Spouts create Tuples and Bolts manipulate then and possibly emit new ones.)

        // But in Trident we operate at a higher level.
        // Bolts are created and connected automatically out of higher-level constructs.
        // Also, Spouts are "batched".
        TridentTopology topology = new TridentTopology();

        // The "each" primitive allows us to apply either filters or functions to the stream
        // We always have to select the input fields.
        topology
                .newStream("filter", spout)
                .each(new Fields("actor"), new RegexFilter("pere"))
                .each(new Fields("text", "actor"), new Print());

        // Functions describe their output fields, which are always appended to the input fields.
        // As you see, Each operations can be chained.
        topology
                .newStream("function", spout)
                .each(new Fields("text"), new ToUpperCase(), new Fields("uppercased_text"))
                .each(new Fields("text", "uppercased_text"), new Print());

        // You can prune unnecessary fields using "project"
        topology
                .newStream("projection", spout)
                .each(new Fields("text"), new ToUpperCase(), new Fields("uppercased_text"))
                .project(new Fields("uppercased_text"))
                .each(new Fields("uppercased_text"), new Print());

        // Stream can be parallelized with "parallelismHint"
        // Parallelism hint is applied downwards until a partitioning operation (we will see this later).
        // This topology creates 5 spouts and 5 bolts:
        // Let's debug that with TridentOperationContext.partitionIndex !
        topology
                .newStream("parallel", spout)
                .each(new Fields("actor"), new RegexFilter("pere"))
                .parallelismHint(5)
                .each(new Fields("text", "actor"), new Print());

        // You can perform aggregations by grouping the stream and then applying an aggregation
        // Note how each actor appears more than once. We are aggregating inside small batches (aka micro batches)
        // This is useful for pre-processing before storing the result to databases
        topology
                .newStream("aggregation", spout)
                .groupBy(new Fields("actor"))
                .aggregate(new Count(),new Fields("count"))
                .each(new Fields("actor", "count"),new Print())
        ;

        // In order ot aggregate across batches, we need persistentAggregate.
        // This example is incrementing a count in the DB, using the result of these micro batch aggregations
        // (here we are simply using a hash map for the "database")
        topology
                .newStream("aggregation", spout)
                .groupBy(new Fields("actor"))
                .persistentAggregate(new MemoryMapState.Factory(),new Count(),new Fields("count"))
        ;

        return topology.build();
    }
 
Example 40
Project: trident-tutorial   File: Part05_AdvancedStateAndDRPC.java   Source Code and License 4 votes vote down vote up
private static StormTopology externalState(LocalDRPC drpc, FeederBatchSpout spout) {
    TridentTopology topology = new TridentTopology();

    // You can reference existing data sources as well.
    // Here we are mocking up a "database"
    StateFactory stateFactory = new StateFactory() {
        @Override
        public State makeState(Map conf, IMetricsContext metrics, int partitionIndex, int numPartitions) {
            MemoryMapState<Integer> name_to_age = new MemoryMapState<Integer>("name_to_age");
            // This is a bit hard to read but it's just pre-populating the state
            List<List<Object>> keys = getKeys("ted", "mary", "jason", "tom", "chuck");
            name_to_age.multiPut(keys, ImmutableList.of(32, 21, 45, 52, 18));
            return name_to_age;
        }
    };
    TridentState nameToAge =
            topology.newStaticState(stateFactory);

    // Let's setup another state that keeps track of actor's appearance counts per location
    TridentState countState =
            topology
                    .newStream("spout", spout)
                    .groupBy(new Fields("actor","location"))
                    .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"));

    // Now, let's calculate the average age of actors seen
    topology
            .newDRPCStream("age_stats", drpc)
            .stateQuery(countState, new TupleCollectionGet(), new Fields("actor", "location"))
            .stateQuery(nameToAge, new Fields("actor"), new MapGet(), new Fields("age"))
            .each(new Fields("actor","location","age"), new Print())
            .groupBy(new Fields("location"))
            .chainedAgg()
            .aggregate(new Count(), new Fields("count"))
            .aggregate(new Fields("age"), new Sum(), new Fields("sum"))
            .chainEnd()
            .each(new Fields("sum", "count"), new DivideAsDouble(), new Fields("avg"))
            .project(new Fields("location", "count", "avg"))
    ;

    return topology.build();
}
 
Example 41
Project: openbus   File: OpenbusProcessorFileTopology.java   Source Code and License 4 votes vote down vote up
public static StormTopology buildTopology(Config conf) {			
	TridentTopology topology = new TridentTopology();
	Stream stream = null;
	
	List<String> fieldsWebLog = new ArrayList<String>();
	fieldsWebLog.add("host");
	fieldsWebLog.add("log");
	fieldsWebLog.add("user");
	fieldsWebLog.add("datetime");
	fieldsWebLog.add("request");
	fieldsWebLog.add("status");
	fieldsWebLog.add("size");
	fieldsWebLog.add("referer");
	fieldsWebLog.add("userAgent");
	fieldsWebLog.add("session");
	fieldsWebLog.add("responseTime");
	fieldsWebLog.add("timestamp");
	fieldsWebLog.add("json");
			
    SimpleFileStringSpout spout = new SimpleFileStringSpout("data/webserverlogs.json", "rawLogs");
    spout.setCycle(true);
    
    stream = topology.newStream("spout", spout);
    stream = stream.each(new Fields("rawLogs"), new WebServerLog2Json(), new Fields(fieldsWebLog));	    	    
	stream = stream.each(new Fields(fieldsWebLog), new WebServerLogFilter());
	
	stream.each(new Fields("request", "datetime"), new DatePartition(), new Fields("cq", "cf"))
			.groupBy(new Fields("request", "cq", "cf"))
			.persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))					
			.newValuesStream()
			.each(new Fields("request", "cq", "cf", "count"), new LogFilter());
	
	stream.each(new Fields("user", "datetime"), new DatePartition(), new Fields("cq", "cf"))
			.groupBy(new Fields("user", "cq", "cf"))
			.persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))					
			.newValuesStream()
			.each(new Fields("user", "cq", "cf", "count"), new LogFilter());
 
	if (Constant.YES.equals(conf.get(Conf.PROP_OPENTSDB_USE))) {
		LOG.info("OpenTSDB: " + conf.get(Conf.PROP_OPENTSDB_USE));
		stream.groupBy(new Fields(fieldsWebLog)).aggregate(new Fields(fieldsWebLog), new WebServerLog2TSDB(), new Fields("count"))			
		.each(new Fields("request", "count"), new LogFilter());
	}
	
	if (Constant.YES.equals(conf.get(Conf.PROP_HDFS_USE))) {
		LOG.info("HDFS: " + conf.get(Conf.PROP_HDFS_USE));
		stream.each(new Fields(fieldsWebLog), new HDFSPersistence(), new Fields("result"))
		.each(new Fields("result"), new LogFilter());
	}
	
	return topology.build();				
}
 
Example 42
Project: openbus   File: OpenbusProcessorTopology.java   Source Code and License 4 votes vote down vote up
public static StormTopology buildTopology(AvroWebLogTopologyOptions options) throws IOException {

		TridentConfig configRequest = new TridentConfig(options.getHbaseRequestTable(),
                                                        options.getHbaseRequestTableRowId());
		StateFactory stateRequest = HBaseAggregateState.transactional(configRequest);

	    TridentConfig configUser = new TridentConfig(options.getHbaseUserTable(),
                                                     options.getHbaseUserTableRowId());
	    StateFactory stateUser = HBaseAggregateState.transactional(configUser);

	    TridentConfig configSession = new TridentConfig(options.getHbaseSessionTable(),
                                                        options.getHbaseSessionTableRowId());
	    StateFactory stateSession = HBaseAggregateState.transactional(configSession);
	    	    
	    BrokerSpout openbusBrokerSpout = new BrokerSpout( options.getKafkaTopic(),
                                                          options.getZookeeper(),
                                                          options.getKafkaClientID(),
                                                          options.isForceFromStart());

        //We need to know what fields will be produced after Avro messages decoding.
        //We use the avro schema for that (even when we dont need the schema to decode
        //the messages, because is embedded into the messages)
        Schema avroSchema = new Schema.Parser().parse(new File(options.getAvroSchema()));
        List<String> avroFieldNames = new ArrayList<>();
        for (Schema.Field avroField : avroSchema.getFields()) {
            avroFieldNames.add(avroField.name());
        }

        TridentTopology topology = new TridentTopology();
		Stream stream = topology.newStream("spout", openbusBrokerSpout.getPartitionedTridentSpout())
		         .each(new Fields("bytes"), new AvroLogDecoder(), new Fields(avroFieldNames))
                 .each(new Fields("datetime"), new DateTimeTransformation(), new Fields("timestamp"))
                 //this step adds a "timestamp" field:
		         .each(new Fields(avroFieldNames), new WebServerLogFilter());
		
		stream.each(new Fields("request", "datetime"), new DatePartition(), new Fields("cq", "cf"))
				.groupBy(new Fields("request", "cq", "cf"))
				.persistentAggregate(stateRequest, new Count(), new Fields("count"))
				//.persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))	// Test				
				.newValuesStream()
				.each(new Fields("request", "cq", "cf", "count"), new LogFilter());
		
		stream.each(new Fields("user", "datetime"), new DatePartition(), new Fields("cq", "cf"))
				.groupBy(new Fields("user", "cq", "cf"))
				.persistentAggregate(stateUser, new Count(), new Fields("count"))
				//.persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))	// Test				
				.newValuesStream()
				.each(new Fields("user", "cq", "cf", "count"), new LogFilter());
		
		stream.each(new Fields("session", "datetime"), new DatePartition(), new Fields("cq", "cf"))
				.groupBy(new Fields("session", "cq", "cf"))
				.persistentAggregate(stateSession, new Count(), new Fields("count"))
				//.persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))	// Test				
				.newValuesStream()
				.each(new Fields("session", "cq", "cf", "count"), new LogFilter());
		
		return topology.build();				
	}
 
Example 43
Project: jstorm   File: TridentFastWordCount.java   Source Code and License 4 votes vote down vote up
public static StormTopology buildTopology(LocalDRPC drpc) {
    FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 3,
            new Values("the cow jumped over the moon"),
            new Values("the man went to the store and bought some candy"),
            new Values("four score and seven years ago"), new Values("how many apples can you eat"),
            new Values("to be or not to be the person"),
            new Values("marry had a little lamb whos fleese was white as snow"),
            new Values("and every where that marry went the lamb was sure to go"),
            new Values("one two three four five six seven eight nine ten"),
            new Values("this is a test of the emergency broadcast system this is only a test"),
            new Values("peter piper picked a peck of pickeled peppers"),
            new Values("JStorm is a distributed and fault-tolerant realtime computation system."),
            new Values(
                    "Inspired by Apache Storm, JStorm has been completely rewritten in Java and provides many more enhanced features."),
            new Values("JStorm has been widely used in many enterprise environments and proved robust and stable."),
            new Values("JStorm provides a distributed programming framework very similar to Hadoop MapReduce."),
            new Values(
                    "The developer only needs to compose his/her own pipe-lined computation logic by implementing the JStorm API"),
            new Values(" which is fully compatible with Apache Storm API"),
            new Values("and submit the composed Topology to a working JStorm instance."),
            new Values("Similar to Hadoop MapReduce, JStorm computes on a DAG (directed acyclic graph)."),
            new Values("Different from Hadoop MapReduce, a JStorm topology runs 24 * 7"),
            new Values("the very nature of its continuity abd 100% in-memory architecture "),
            new Values(
                    "has been proved a particularly suitable solution for streaming data and real-time computation."),
            new Values("JStorm guarantees fault-tolerance."), new Values("Whenever a worker process crashes, "),
            new Values(
                    "the scheduler embedded in the JStorm instance immediately spawns a new worker process to take the place of the failed one."),
            new Values(" The Acking framework provided by JStorm guarantees that every single piece of data will be processed at least once.") );
    spout.setCycle(true);
    
    
    int spout_Parallelism_hint = JStormUtils.parseInt(conf.get(TOPOLOGY_SPOUT_PARALLELISM_HINT), 1);
    int split_Parallelism_hint = JStormUtils.parseInt(conf.get(TOPOLOGY_SPLIT_PARALLELISM_HINT), 2);
    int count_Parallelism_hint = JStormUtils.parseInt(conf.get(TOPOLOGY_COUNT_PARALLELISM_HINT), 2);
    
    TridentTopology topology = new TridentTopology();
    TridentState wordCounts = topology.newStream("spout1", spout).parallelismHint(spout_Parallelism_hint)
            .each(new Fields("sentence"), new Split(), new Fields("word")).parallelismHint(split_Parallelism_hint).groupBy(new Fields("word"))
            .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
            .parallelismHint(count_Parallelism_hint);
            
    return topology.build();
}
 
Example 44
Project: t4f-data   File: TridentWordCount3.java   Source Code and License 4 votes vote down vote up
public static void main(String... args) throws AlreadyAliveException, InvalidTopologyException, TException,
        DRPCExecutionException {

    String topologyName = "trident-word-count";

    FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 100, //
            new Values("the cow jumped over the moon"), //
            new Values("the man went to the store and bought some candy"), //
            new Values("four score and seven years ago"), //
            new Values("how many apples can you eat"));
    spout.setCycle(true);

    TridentTopology tridentTopology = new TridentTopology();
    TridentState wordCounts = tridentTopology.newStream("spout1", spout) //
            .each(new Fields("sentence"), new SplitFunction(), new Fields("word")) //
            .groupBy(new Fields("word")) //
            .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count")) //
            .parallelismHint(6);

    StormTopology stormTopology = tridentTopology.build();

    Config topologyConf = new Config();
    topologyConf.setMaxSpoutPending(50);
    if (RUN_LOCALLY) {

        topologyConf.setDebug(false);
        topologyConf.setNumWorkers(2);

        LocalCluster cluster = new LocalCluster();
        cluster.submitTopology(topologyName, topologyConf, stormTopology);

        Utils.sleep(10000);

        cluster.killTopology(topologyName);
        cluster.shutdown();

    } else {
        topologyConf.setNumWorkers(1);
        StormSubmitter.submitTopology(topologyName, topologyConf, stormTopology);
    }

}
 
Example 45
Project: t4f-data   File: TridentWordCount2.java   Source Code and License 4 votes vote down vote up
public static void main(String... args) throws AlreadyAliveException, InvalidTopologyException, TException,
        DRPCExecutionException {

    String topologyName = "trident-word-count";

    FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 100, //
            new Values("the cow jumped over the moon"), //
            new Values("the man went to the store and bought some candy"), //
            new Values("four score and seven years ago"), //
            new Values("how many apples can you eat"));
    spout.setCycle(true);

    LocalDRPC localDrpc = new LocalDRPC();

    TridentTopology tridentTopology = new TridentTopology();

    TridentState wordCounts = tridentTopology.newStream("spout1", spout) //
            .parallelismHint(16)//
            .each(new Fields("sentence"), new SplitFunction(), new Fields("word")) //
            .groupBy(new Fields("word")) //
            .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count")) //
            .parallelismHint(2);

    Stream queryStream = tridentTopology //
            .newDRPCStream("words", localDrpc) //
            .each(new Fields("args"), new SplitFunction(), new Fields("word")) //
            .groupBy(new Fields("word")) //
            .stateQuery(wordCounts, new Fields("word"), new MapGet(), new Fields("count")) //
            .each(new Fields("count"), new FilterNull()) //
            .aggregate(new Fields("count"), new Sum(), new Fields("sum"));

    StormTopology stormTopology = tridentTopology.build();

    Config topologyConf = new Config();
    topologyConf.setMaxSpoutPending(50);

    topologyConf.setDebug(false);
    topologyConf.setNumWorkers(2);

    LocalCluster localCluster = new LocalCluster();
    localCluster.submitTopology(topologyName, topologyConf, stormTopology);

    Utils.sleep(5000);

    // DRPCClient client = new DRPCClient("localhost", 3772);
    // String result = client.execute("words", "cat dog the man");
    // System.out.println(result);

    String result = localDrpc.execute("words", "cat dog the man");
    System.out.println(result);

    Utils.sleep(5000);

    localCluster.killTopology(topologyName);
    localCluster.shutdown();

}