storm.trident.operation.builtin.Sum Java Examples

The following examples show how to use storm.trident.operation.builtin.Sum. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TridentWordCount.java    From flink-perf with Apache License 2.0 7 votes vote down vote up
public static StormTopology buildTopology(LocalDRPC drpc) {
  FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 3, new Values("the cow jumped over the moon"),
      new Values("the man went to the store and bought some candy"), new Values("four score and seven years ago"),
      new Values("how many apples can you eat"), new Values("to be or not to be the person"));
  spout.setCycle(true);

  TridentTopology topology = new TridentTopology();
  TridentState wordCounts = topology.newStream("spout1", spout).parallelismHint(16).each(new Fields("sentence"),
      new Split(), new Fields("word")).groupBy(new Fields("word")).persistentAggregate(new MemoryMapState.Factory(),
      new Count(), new Fields("count")).parallelismHint(16);

  topology.newDRPCStream("words", drpc).each(new Fields("args"), new Split(), new Fields("word")).groupBy(new Fields(
      "word")).stateQuery(wordCounts, new Fields("word"), new MapGet(), new Fields("count")).each(new Fields("count"),
      new FilterNull()).aggregate(new Fields("count"), new Sum(), new Fields("sum"));
  return topology.build();
}
 
Example #2
Source File: TridentWordCount.java    From jstorm with Apache License 2.0 6 votes vote down vote up
public static StormTopology buildTopology(LocalDRPC drpc) {
    FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 3,
            new Values("the cow jumped over the moon"),
            new Values("the man went to the store and bought some candy"),
            new Values("four score and seven years ago"), new Values("how many apples can you eat"),
            new Values("to be or not to be the person"));
    spout.setCycle(true);
    
    TridentTopology topology = new TridentTopology();
    TridentState wordCounts = topology.newStream("spout1", spout).parallelismHint(16)
            .each(new Fields("sentence"), new Split(), new Fields("word")).groupBy(new Fields("word"))
            .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
            .parallelismHint(16);
            
    topology.newDRPCStream("words", drpc).each(new Fields("args"), new Split(), new Fields("word"))
            .groupBy(new Fields("word"))
            .stateQuery(wordCounts, new Fields("word"), new MapGet(), new Fields("count"))
            .each(new Fields("count"), new FilterNull())
            .aggregate(new Fields("count"), new Sum(), new Fields("sum"));
    return topology.build();
}
 
Example #3
Source File: TridentMapExample.java    From jstorm with Apache License 2.0 6 votes vote down vote up
public static StormTopology buildTopology(LocalDRPC drpc) {
    FixedBatchSpout spout = new FixedBatchSpout(new Fields("word"), 3, new Values("the cow jumped over the moon"),
            new Values("the man went to the store and bought some candy"),
            new Values("four score and seven years ago"), new Values("how many apples can you eat"),
            new Values("to be or not to be the person"));
    spout.setCycle(true);
    
    TridentTopology topology = new TridentTopology();
    TridentState wordCounts = topology.newStream("spout1", spout).parallelismHint(16).flatMap(split).map(toUpper)
            .filter(theFilter).peek(new Consumer() {
                @Override
                public void accept(TridentTuple input) {
                    System.out.println(input.getString(0));
                }
            }).groupBy(new Fields("word"))
            .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
            .parallelismHint(16);
            
    topology.newDRPCStream("words", drpc).flatMap(split).groupBy(new Fields("args"))
            .stateQuery(wordCounts, new Fields("args"), new MapGet(), new Fields("count")).filter(new FilterNull())
            .aggregate(new Fields("count"), new Sum(), new Fields("sum"));
    return topology.build();
}
 
Example #4
Source File: SalesTopology.java    From storm-cassandra-cql with Apache License 2.0 5 votes vote down vote up
public static StormTopology buildTopology() {
    LOG.info("Building topology.");
    TridentTopology topology = new TridentTopology();
    SalesSpout spout = new SalesSpout();
    Stream inputStream = topology.newStream("sales", spout);
    SalesMapper mapper = new SalesMapper();
    inputStream.partitionPersist(
            new CassandraCqlIncrementalStateFactory<String, Number>(new Sum(), mapper),
            new Fields("price", "state", "product"),
            new CassandraCqlIncrementalStateUpdater<String, Number>());
    return topology.build();
}
 
Example #5
Source File: WordCountTopology.java    From storm-cassandra-cql with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public static StormTopology buildWordCountAndSourceTopology(LocalDRPC drpc) {
    LOG.info("Building topology.");
    TridentTopology topology = new TridentTopology();

    String source1 = "spout1";
    String source2 = "spout2";
    FixedBatchSpout spout1 = new FixedBatchSpout(new Fields("sentence", "source"), 3,
            new Values("the cow jumped over the moon", source1),
            new Values("the man went to the store and bought some candy", source1),
            new Values("four score and four years ago", source2),
            new Values("how much wood can a wood chuck chuck", source2));
    spout1.setCycle(true);

    TridentState wordCounts =
            topology.newStream("spout1", spout1)
                    .each(new Fields("sentence"), new Split(), new Fields("word"))
                    .groupBy(new Fields("word", "source"))
                    .persistentAggregate(CassandraCqlMapState.nonTransactional(new WordCountAndSourceMapper()),
                            new IntegerCount(), new Fields("count"))
                    .parallelismHint(6);

    topology.newDRPCStream("words", drpc)
            .each(new Fields("args"), new Split(), new Fields("word"))
            .groupBy(new Fields("word"))
            .stateQuery(wordCounts, new Fields("word"), new MapGet(), new Fields("count"))
            .each(new Fields("count"), new FilterNull())
            .aggregate(new Fields("count"), new Sum(), new Fields("sum"));

    return topology.build();
}
 
Example #6
Source File: IncrementalStateTest.java    From storm-cassandra-cql with Apache License 2.0 5 votes vote down vote up
public IncrementalStateTest() {
    super();
    SalesMapper mapper = new SalesMapper();
    stateFactory = new CassandraCqlIncrementalStateFactory<String, Number>(new Sum(), mapper);
    stateFactory.setCqlClientFactory(clientFactory);
    stateUpdater = new CassandraCqlIncrementalStateUpdater<String, Number>();
}
 
Example #7
Source File: TridentReach.java    From flink-perf with Apache License 2.0 5 votes vote down vote up
public static StormTopology buildTopology(LocalDRPC drpc) {
  TridentTopology topology = new TridentTopology();
  TridentState urlToTweeters = topology.newStaticState(new StaticSingleKeyMapState.Factory(TWEETERS_DB));
  TridentState tweetersToFollowers = topology.newStaticState(new StaticSingleKeyMapState.Factory(FOLLOWERS_DB));


  topology.newDRPCStream("reach", drpc).stateQuery(urlToTweeters, new Fields("args"), new MapGet(), new Fields(
      "tweeters")).each(new Fields("tweeters"), new ExpandList(), new Fields("tweeter")).shuffle().stateQuery(
      tweetersToFollowers, new Fields("tweeter"), new MapGet(), new Fields("followers")).each(new Fields("followers"),
      new ExpandList(), new Fields("follower")).groupBy(new Fields("follower")).aggregate(new One(), new Fields(
      "one")).aggregate(new Fields("one"), new Sum(), new Fields("reach"));
  return topology.build();
}
 
Example #8
Source File: Part03_AdvancedPrimitives2.java    From trident-tutorial with Apache License 2.0 5 votes vote down vote up
private static StormTopology advancedPrimitives(FeederBatchSpout spout) throws IOException {

        TridentTopology topology = new TridentTopology();

        // What if we want more than one aggregation? For that, we can use "chained" aggregations.
        // Note how we calculate count and sum.
        // The aggregated values can then be processed further, in this case into mean
        topology
                .newStream("aggregation", spout)
                .groupBy(new Fields("city"))
                .chainedAgg()
                .aggregate(new Count(), new Fields("count"))
                .aggregate(new Fields("age"), new Sum(), new Fields("age_sum"))
                .chainEnd()
                .each(new Fields("age_sum", "count"), new DivideAsDouble(), new Fields("mean_age"))
                .each(new Fields("city", "mean_age"), new Print())
        ;

        // What if we want to persist results of an aggregation, but want to further process these
        // results? You can use "newValuesStream" for that
        topology
                .newStream("further",spout)
                .groupBy(new Fields("city"))
                .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
                .newValuesStream()
                .each(new Fields("city", "count"), new Print());

        return topology.build();
    }
 
Example #9
Source File: TridentReach.java    From jstorm with Apache License 2.0 5 votes vote down vote up
public static StormTopology buildTopology(LocalDRPC drpc) {
    TridentTopology topology = new TridentTopology();
    TridentState urlToTweeters = topology.newStaticState(new StaticSingleKeyMapState.Factory(TWEETERS_DB));
    TridentState tweetersToFollowers = topology.newStaticState(new StaticSingleKeyMapState.Factory(FOLLOWERS_DB));
    
    topology.newDRPCStream("reach", drpc)
            .stateQuery(urlToTweeters, new Fields("args"), new MapGet(), new Fields("tweeters"))
            .each(new Fields("tweeters"), new ExpandList(), new Fields("tweeter")).shuffle()
            .stateQuery(tweetersToFollowers, new Fields("tweeter"), new MapGet(), new Fields("followers"))
            .each(new Fields("followers"), new ExpandList(), new Fields("follower")).groupBy(new Fields("follower"))
            .aggregate(new One(), new Fields("one")).aggregate(new Fields("one"), new Sum(), new Fields("reach"));
    return topology.build();
}
 
Example #10
Source File: DRPC.java    From storm-benchmark with Apache License 2.0 4 votes vote down vote up
@Override
  public StormTopology getTopology(Config config) {

    Object sObj = config.get(SERVER);
    if (null == sObj) {
      throw new IllegalArgumentException("must set a drpc server");
    }
    server = (String) sObj;
    config.put(Config.DRPC_SERVERS, Lists.newArrayList(server));

    Object pObj = config.get(PORT);
    if (null == pObj) {
      throw new IllegalArgumentException("must set a drpc port");
    }
    port = Utils.getInt(pObj);
    config.put(Config.DRPC_PORT, port);

    LOG.info("drpc server: " + server + "; drpc port: " + port);

    final int spoutNum = BenchmarkUtils.getInt(config, SPOUT_NUM, DEFAULT_SPOUT_NUM);
    final int pageNum = BenchmarkUtils.getInt(config, PAGE_NUM, DEFAULT_PAGE_BOLT_NUM);
    final int viewNum = BenchmarkUtils.getInt(config, VIEW_NUM, DEFAULT_VIEW_BOLT_NUM);
    final int userNum = BenchmarkUtils.getInt(config, USER_NUM, DEFAULT_USER_BOLT_NUM);
    final int followerNum = BenchmarkUtils.getInt(config, FOLLOWER_NUM, DEFAULT_FOLLOWER_BOLT_NUM);

    spout = new TransactionalTridentKafkaSpout(
            KafkaUtils.getTridentKafkaConfig(config, new SchemeAsMultiScheme(new StringScheme())));

    TridentTopology trident = new TridentTopology();
    TridentState urlToUsers =
            trident.newStream("drpc", spout).parallelismHint(spoutNum).shuffle()
            .each(new Fields(StringScheme.STRING_SCHEME_KEY), new Extract(Arrays.asList(Item.URL, Item.USER)),
                    new Fields("url", "user")).parallelismHint(pageNum)
            .groupBy(new Fields("url"))
            .persistentAggregate(new MemoryMapState.Factory(), new Fields("url", "user"), new Distinct(), new Fields("user_set"))
            .parallelismHint(viewNum);
/** debug
 *  1. this proves that the aggregated result has successfully persisted
    urlToUsers.newValuesStream()
            .each(new Fields("url", "user_set"), new Print("(url, user_set)"), new Fields("url2", "user_set2"));
 */
    PageViewGenerator generator = new PageViewGenerator();
    TridentState userToFollowers = trident.newStaticState(new StaticSingleKeyMapState.Factory(generator.genFollowersDB()));
/** debug
  * 2. this proves that MemoryMapState could be read correctly
   trident.newStream("urlToUsers", new PageViewSpout(false))
            .each(new Fields("page_view"), new Extract(Arrays.asList(Item.URL)), new Fields("url"))
            .each(new Fields("url"), new Print("url"), new Fields("url2"))
            .groupBy(new Fields("url2"))
            .stateQuery(urlToUsers, new Fields("url2"),  new MapGet(), new Fields("users"))
            .each(new Fields("users"), new Print("users"), new Fields("users2"));
*/
/** debug
 *  3. this proves that StaticSingleKeyMapState could be read correctly
    trident.newStream("userToFollowers", new PageViewSpout(false))
            .each(new Fields("page_view"), new Extract(Arrays.asList(Item.USER)), new Fields("user"))
            .each(new Fields("user"), new Print("user"), new Fields("user2"))
            .stateQuery(userToFollowers, new Fields("user2"), new MapGet(), new Fields("followers"))
            .each(new Fields("followers"), new Print("followers"), new Fields("followers2"));
 */
    trident.newDRPCStream(FUNCTION, null)
            .each(new Fields("args"), new Print("args"), new Fields("url"))
            .groupBy(new Fields("url"))
            .stateQuery(urlToUsers, new Fields("url"), new MapGet(), new Fields("users"))
            .each(new Fields("users"), new Expand(), new Fields("user")).parallelismHint(userNum)
            .groupBy(new Fields("user"))
            .stateQuery(userToFollowers, new Fields("user"), new MapGet(), new Fields("followers"))
            .each(new Fields("followers"), new Expand(), new Fields("follower")).parallelismHint(followerNum)
            .groupBy(new Fields("follower"))
            .aggregate(new One(), new Fields("one"))
            .aggregate(new Fields("one"), new Sum(), new Fields("reach"));
    return trident.build();
  }
 
Example #11
Source File: Part04_BasicStateAndDRPC.java    From trident-tutorial with Apache License 2.0 4 votes vote down vote up
private static StormTopology basicStateAndDRPC(LocalDRPC drpc, FeederBatchSpout spout) throws IOException {
        TridentTopology topology = new TridentTopology();

        // persistentAggregate persists the result of aggregation into data stores,
        // which you can use from other applications.
        // You can also use it in other topologies by using the TridentState object returned.
        //
        // The state is commonly backed by a data store like memcache, cassandra etc.
        // Here we are simply using a hash map
        TridentState countState =
                topology
                        .newStream("spout", spout)
                        .groupBy(new Fields("actor"))
                        .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"));

        // There are a few ready-made state libraries that you can use
        // Below is an example to use memcached
//        List<InetSocketAddress> memcachedServerLocations = ImmutableList.of(new InetSocketAddress("some.memcached.server",12000));
//        TridentState countStateMemcached =
//                topology
//                        .newStream("spout", spout)
//                        .groupBy(new Fields("actor"))
//                        .persistentAggregate(MemcachedState.transactional(memcachedServerLocations), new Count(), new Fields("count"));



        // DRPC stands for Distributed Remote Procedure Call
        // You can issue calls using the DRPC client library
        // A DRPC call takes two Strings, function name and function arguments
        //
        // In order to call the DRPC defined below, you'd use "count_per_actor" as the function name
        // The function arguments will be available as "args"

        /*
        topology
                .newDRPCStream("ping", drpc)
                .each(new Fields("args"), new Split(" "), new Fields("reply"))
                .each(new Fields("reply"), new RegexFilter("ping"))
                .project(new Fields("reply"));

        // You can apply usual processing primitives to DRPC streams as well
        topology
                .newDRPCStream("count", drpc)
                .each(new Fields("args"), new Split(" "), new Fields("split"))
                .each(new Fields("split"), new RegexFilter("a.*"))
                .groupBy(new Fields("split"))
                .aggregate(new Count(), new Fields("count"));   */


        // More usefully, you can query the state you created earlier
        topology
                .newDRPCStream("count_per_actor", drpc)
                .stateQuery(countState, new Fields("args"), new MapGet(), new Fields("count"));


        // Here is a more complex example
        topology
                .newDRPCStream("count_per_actors", drpc)
                .each(new Fields("args"), new Split(" "), new Fields("actor"))
                .groupBy(new Fields("actor"))
                .stateQuery(countState, new Fields("actor"), new MapGet(), new Fields("individual_count"))
                .each(new Fields("individual_count"), new FilterNull())
                .aggregate(new Fields("individual_count"), new Sum(), new Fields("count"));

        // For how to call DRPC calls, go back to the main method

        return topology.build();
    }
 
Example #12
Source File: Part05_AdvancedStateAndDRPC.java    From trident-tutorial with Apache License 2.0 4 votes vote down vote up
private static StormTopology externalState(LocalDRPC drpc, FeederBatchSpout spout) {
    TridentTopology topology = new TridentTopology();

    // You can reference existing data sources as well.
    // Here we are mocking up a "database"
    StateFactory stateFactory = new StateFactory() {
        @Override
        public State makeState(Map conf, IMetricsContext metrics, int partitionIndex, int numPartitions) {
            MemoryMapState<Integer> name_to_age = new MemoryMapState<Integer>("name_to_age");
            // This is a bit hard to read but it's just pre-populating the state
            List<List<Object>> keys = getKeys("ted", "mary", "jason", "tom", "chuck");
            name_to_age.multiPut(keys, ImmutableList.of(32, 21, 45, 52, 18));
            return name_to_age;
        }
    };
    TridentState nameToAge =
            topology.newStaticState(stateFactory);

    // Let's setup another state that keeps track of actor's appearance counts per location
    TridentState countState =
            topology
                    .newStream("spout", spout)
                    .groupBy(new Fields("actor","location"))
                    .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"));

    // Now, let's calculate the average age of actors seen
    topology
            .newDRPCStream("age_stats", drpc)
            .stateQuery(countState, new TupleCollectionGet(), new Fields("actor", "location"))
            .stateQuery(nameToAge, new Fields("actor"), new MapGet(), new Fields("age"))
            .each(new Fields("actor","location","age"), new Print())
            .groupBy(new Fields("location"))
            .chainedAgg()
            .aggregate(new Count(), new Fields("count"))
            .aggregate(new Fields("age"), new Sum(), new Fields("sum"))
            .chainEnd()
            .each(new Fields("sum", "count"), new DivideAsDouble(), new Fields("avg"))
            .project(new Fields("location", "count", "avg"))
    ;

    return topology.build();
}