storm.trident.TridentState Java Examples

The following examples show how to use storm.trident.TridentState. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TridentWordCount.java    From flink-perf with Apache License 2.0 7 votes vote down vote up
public static StormTopology buildTopology(LocalDRPC drpc) {
  FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 3, new Values("the cow jumped over the moon"),
      new Values("the man went to the store and bought some candy"), new Values("four score and seven years ago"),
      new Values("how many apples can you eat"), new Values("to be or not to be the person"));
  spout.setCycle(true);

  TridentTopology topology = new TridentTopology();
  TridentState wordCounts = topology.newStream("spout1", spout).parallelismHint(16).each(new Fields("sentence"),
      new Split(), new Fields("word")).groupBy(new Fields("word")).persistentAggregate(new MemoryMapState.Factory(),
      new Count(), new Fields("count")).parallelismHint(16);

  topology.newDRPCStream("words", drpc).each(new Fields("args"), new Split(), new Fields("word")).groupBy(new Fields(
      "word")).stateQuery(wordCounts, new Fields("word"), new MapGet(), new Fields("count")).each(new Fields("count"),
      new FilterNull()).aggregate(new Fields("count"), new Sum(), new Fields("sum"));
  return topology.build();
}
 
Example #2
Source File: ESIndexUpdaterTest.java    From storm-trident-elasticsearch with Apache License 2.0 6 votes vote down vote up
@Override
protected StormTopology buildTopology() {
    FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 3,
            new Values("the cow jumped over the moon"),
            new Values("the man went to the store and bought some candy"),
            new Values("four score and seven years ago"),
            new Values("how many apples can you eat"),
            new Values("to be or not to be the person"));
    spout.setCycle(true);

    ESIndexState.Factory<Tweet> factory = new ESIndexState.Factory<>(getLocalClient(), Tweet.class);
    TridentTopology topology = new TridentTopology();

    TridentState state = topology.newStream("tweets", spout)
            .partitionPersist(factory, new Fields("sentence"), new ESIndexUpdater(new MyTridentTupleMapper()));

    topology.newDRPCStream("search", drpc)
            .each(new Fields("args"), new ExtractSearchArgs(), new Fields("query", "indices", "types"))
            .groupBy(new Fields("query", "indices", "types"))
            .stateQuery(state, new Fields("query", "indices", "types"), new QuerySearchIndexQuery(), new Fields("tweet"))
            .each(new Fields("tweet"), new FilterNull())
            .each(new Fields("tweet"), new CreateJson(), new Fields("json"))
            .project(new Fields("json"));

    return topology.build();
}
 
Example #3
Source File: ClickThruAnalyticsTopology.java    From storm-example with Apache License 2.0 6 votes vote down vote up
public static StormTopology buildTopology() {
    LOG.info("Building topology.");
    TridentTopology topology = new TridentTopology();
    StateFactory clickThruMemory = new MemoryMapState.Factory();
    ClickThruSpout spout = new ClickThruSpout();
    Stream inputStream = topology.newStream("clithru", spout);
    TridentState clickThruState = inputStream.each(new Fields("username", "campaign", "product", "click"), new Filter("click", "true"))
            .each(new Fields("username", "campaign", "product", "click"), new Distinct())
            .groupBy(new Fields("campaign"))
            .persistentAggregate(clickThruMemory, new Count(), new Fields("click_thru_count"));

    inputStream.groupBy(new Fields("campaign"))
            .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("impression_count"))
            .newValuesStream()
            .stateQuery(clickThruState, new Fields("campaign"), new MapGet(), new Fields("click_thru_count"))
            .each(new Fields("campaign", "impression_count", "click_thru_count"), new CampaignEffectiveness(), new Fields(""));

    return topology.build();
}
 
Example #4
Source File: TridentMapExample.java    From jstorm with Apache License 2.0 6 votes vote down vote up
public static StormTopology buildTopology(LocalDRPC drpc) {
    FixedBatchSpout spout = new FixedBatchSpout(new Fields("word"), 3, new Values("the cow jumped over the moon"),
            new Values("the man went to the store and bought some candy"),
            new Values("four score and seven years ago"), new Values("how many apples can you eat"),
            new Values("to be or not to be the person"));
    spout.setCycle(true);
    
    TridentTopology topology = new TridentTopology();
    TridentState wordCounts = topology.newStream("spout1", spout).parallelismHint(16).flatMap(split).map(toUpper)
            .filter(theFilter).peek(new Consumer() {
                @Override
                public void accept(TridentTuple input) {
                    System.out.println(input.getString(0));
                }
            }).groupBy(new Fields("word"))
            .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
            .parallelismHint(16);
            
    topology.newDRPCStream("words", drpc).flatMap(split).groupBy(new Fields("args"))
            .stateQuery(wordCounts, new Fields("args"), new MapGet(), new Fields("count")).filter(new FilterNull())
            .aggregate(new Fields("count"), new Sum(), new Fields("sum"));
    return topology.build();
}
 
Example #5
Source File: TridentWordCount.java    From jstorm with Apache License 2.0 6 votes vote down vote up
public static StormTopology buildTopology(LocalDRPC drpc) {
    FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 3,
            new Values("the cow jumped over the moon"),
            new Values("the man went to the store and bought some candy"),
            new Values("four score and seven years ago"), new Values("how many apples can you eat"),
            new Values("to be or not to be the person"));
    spout.setCycle(true);
    
    TridentTopology topology = new TridentTopology();
    TridentState wordCounts = topology.newStream("spout1", spout).parallelismHint(16)
            .each(new Fields("sentence"), new Split(), new Fields("word")).groupBy(new Fields("word"))
            .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
            .parallelismHint(16);
            
    topology.newDRPCStream("words", drpc).each(new Fields("args"), new Split(), new Fields("word"))
            .groupBy(new Fields("word"))
            .stateQuery(wordCounts, new Fields("word"), new MapGet(), new Fields("count"))
            .each(new Fields("count"), new FilterNull())
            .aggregate(new Fields("count"), new Sum(), new Fields("sum"));
    return topology.build();
}
 
Example #6
Source File: ClusterTestTopology.java    From trident-tutorial with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    Config conf = new Config();

    // Submits the topology
    String topologyName = args[0];
    conf.setNumWorkers(8); // Our Vagrant environment has 8 workers

    FakeTweetsBatchSpout fakeTweets = new FakeTweetsBatchSpout(10);

    TridentTopology topology = new TridentTopology();
    TridentState countState =
            topology
                    .newStream("spout", fakeTweets)
                    .groupBy(new Fields("actor"))
                    .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"));

    topology
            .newDRPCStream("count_per_actor")
            .stateQuery(countState, new Fields("args"), new MapGet(), new Fields("count"));

    StormSubmitter.submitTopology(topologyName, conf, topology.build());

}
 
Example #7
Source File: TopHashtagFollowerCountGrouping.java    From trident-tutorial with Apache License 2.0 6 votes vote down vote up
public static StormTopology buildTopology(TransactionalTridentKafkaSpout spout) throws IOException {

        TridentTopology topology = new TridentTopology();
        TridentState count =
        topology
                .newStream("tweets", spout)
                .each(new Fields("str"), new ParseTweet(), new Fields("text", "content", "user"))
                .project(new Fields("content", "user"))
                .each(new Fields("content"), new OnlyHashtags())
                .each(new Fields("user"), new OnlyEnglish())
                .each(new Fields("content", "user"), new ExtractFollowerClassAndContentName(), new Fields("followerClass", "contentName"))
                .parallelismHint(3)
                .groupBy(new Fields("followerClass", "contentName"))
                .persistentAggregate(new HazelCastStateFactory(), new Count(), new Fields("count"))
                .parallelismHint(3)
        ;


        topology
                .newDRPCStream("hashtag_count")
                .each(new Constants<String>("< 100", "< 10K", "< 100K", ">= 100K"), new Fields("followerClass"))
                .stateQuery(count, new Fields("followerClass", "args"), new MapGet(), new Fields("count"))
        ;

        return topology.build();
    }
 
Example #8
Source File: TopHashtagByCountry.java    From trident-tutorial with Apache License 2.0 6 votes vote down vote up
public static StormTopology buildTopology(TransactionalTridentKafkaSpout spout) throws IOException {

        TridentTopology topology = new TridentTopology();
        TridentState count =
        topology
                .newStream("tweets", spout)
                .each(new Fields("str"), new ParseTweet(), new Fields("status", "content", "user"))
                .project(new Fields("content", "user", "status"))
                .each(new Fields("content"), new OnlyHashtags())
                .each(new Fields("status"), new OnlyGeo())
                .each(new Fields("status", "content"), new ExtractLocation(), new Fields("country", "contentName"))
                .groupBy(new Fields("country", "contentName"))
                .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
        ;


        topology
                .newDRPCStream("location_hashtag_count")
                .stateQuery(count, new TupleCollectionGet(), new Fields("country", "contentName"))
                .stateQuery(count, new Fields("country", "contentName"), new MapGet(), new Fields("count"))
                .groupBy(new Fields("country"))
                .aggregate(new Fields("contentName", "count"), new FirstN.FirstNSortedAgg(3,"count", true), new Fields("contentName", "count"))
        ;

        return topology.build();
    }
 
Example #9
Source File: TopHashtagByFollowerClass.java    From trident-tutorial with Apache License 2.0 6 votes vote down vote up
public static StormTopology buildTopology(TransactionalTridentKafkaSpout spout) throws IOException {

        TridentTopology topology = new TridentTopology();
        TridentState count =
        topology
                .newStream("tweets", spout)
                .each(new Fields("str"), new ParseTweet(), new Fields("text", "content", "user"))
                .project(new Fields("content", "user"))
                .each(new Fields("content"), new OnlyHashtags())
                .each(new Fields("user"), new OnlyEnglish())
                .each(new Fields("content", "user"), new ExtractFollowerClassAndContentName(), new Fields("followerClass", "contentName"))
                .groupBy(new Fields("followerClass", "contentName"))
                .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
        ;


        topology
                .newDRPCStream("hashtag_count")
                .stateQuery(count, new TupleCollectionGet(), new Fields("followerClass", "contentName"))
                .stateQuery(count, new Fields("followerClass", "contentName"), new MapGet(), new Fields("count"))
                .groupBy(new Fields("followerClass"))
                .aggregate(new Fields("contentName", "count"), new FirstN.FirstNSortedAgg(1,"count", true), new Fields("contentName", "count"))
        ;

        return topology.build();
    }
 
Example #10
Source File: GlobalTop20Hashtags.java    From trident-tutorial with Apache License 2.0 6 votes vote down vote up
public static StormTopology buildTopology(TransactionalTridentKafkaSpout spout) throws IOException {

        TridentTopology topology = new TridentTopology();
        TridentState count =
        topology
                .newStream("tweets", spout)
                .each(new Fields("str"), new ParseTweet(), new Fields("text", "content", "user"))
                .project(new Fields("content", "user"))
                .each(new Fields("content"), new OnlyHashtags())
                .each(new Fields("user"), new OnlyEnglish())
                .each(new Fields("content", "user"), new ExtractFollowerClassAndContentName(), new Fields("followerClass", "contentName"))
                .groupBy(new Fields("followerClass", "contentName"))
                .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
        ;


        topology
                .newDRPCStream("top_hashtags")
                .stateQuery(count, new TupleCollectionGet(), new Fields("followerClass", "contentName"))
                .stateQuery(count, new Fields("followerClass", "contentName"), new MapGet(), new Fields("count"))
                .aggregate(new Fields("contentName", "count"), new FirstN.FirstNSortedAgg(5,"count", true), new Fields("contentName", "count"))
        ;

        return topology.build();
    }
 
Example #11
Source File: IndexMapStateTest.java    From storm-trident-elasticsearch with Apache License 2.0 5 votes vote down vote up
@Override
public StormTopology buildTopology( ) {
    ESIndexMapState.Factory<Tweet> state = ESIndexMapState.nonTransactional(getLocalClient(), Tweet.class);

    FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 3,
            new Values("the cow jumped over the moon"),
            new Values("the man went to the store and bought some candy"),
            new Values("four score and seven years ago"),
            new Values("how many apples can you eat"),
            new Values("to be or not to be the person"));
    spout.setCycle(true);

    TridentTopology topology = new TridentTopology();

    TridentState staticState = topology.newStaticState(new ESIndexState.Factory<>(getLocalClient(), Tweet.class));

    topology.newStream("tweets", spout)
                    .each(new Fields("sentence"), new DocumentBuilder(), new Fields("document"))
                    .each(new Fields("document"), new ExtractDocumentInfo(), new Fields("id", "index", "type"))
                    .groupBy(new Fields("index", "type", "id"))
                    .persistentAggregate(state, new Fields("document"), new TweetBuilder(), new Fields("tweet"))
                    .parallelismHint(1);

    topology.newDRPCStream("search", drpc)
            .each(new Fields("args"), new ExtractSearchArgs(), new Fields("query", "indices", "types"))
            .groupBy(new Fields("query", "indices", "types"))
            .stateQuery(staticState, new Fields("query", "indices", "types"), new QuerySearchIndexQuery(), new Fields("tweet"))
            .each(new Fields("tweet"), new FilterNull())
            .each(new Fields("tweet"), new CreateJson(), new Fields("json"))
            .project(new Fields("json"));

    return topology.build();
}
 
Example #12
Source File: TridentReach.java    From jstorm with Apache License 2.0 5 votes vote down vote up
public static StormTopology buildTopology(LocalDRPC drpc) {
    TridentTopology topology = new TridentTopology();
    TridentState urlToTweeters = topology.newStaticState(new StaticSingleKeyMapState.Factory(TWEETERS_DB));
    TridentState tweetersToFollowers = topology.newStaticState(new StaticSingleKeyMapState.Factory(FOLLOWERS_DB));
    
    topology.newDRPCStream("reach", drpc)
            .stateQuery(urlToTweeters, new Fields("args"), new MapGet(), new Fields("tweeters"))
            .each(new Fields("tweeters"), new ExpandList(), new Fields("tweeter")).shuffle()
            .stateQuery(tweetersToFollowers, new Fields("tweeter"), new MapGet(), new Fields("followers"))
            .each(new Fields("followers"), new ExpandList(), new Fields("follower")).groupBy(new Fields("follower"))
            .aggregate(new One(), new Fields("one")).aggregate(new Fields("one"), new Sum(), new Fields("reach"));
    return topology.build();
}
 
Example #13
Source File: TridentReach.java    From flink-perf with Apache License 2.0 5 votes vote down vote up
public static StormTopology buildTopology(LocalDRPC drpc) {
  TridentTopology topology = new TridentTopology();
  TridentState urlToTweeters = topology.newStaticState(new StaticSingleKeyMapState.Factory(TWEETERS_DB));
  TridentState tweetersToFollowers = topology.newStaticState(new StaticSingleKeyMapState.Factory(FOLLOWERS_DB));


  topology.newDRPCStream("reach", drpc).stateQuery(urlToTweeters, new Fields("args"), new MapGet(), new Fields(
      "tweeters")).each(new Fields("tweeters"), new ExpandList(), new Fields("tweeter")).shuffle().stateQuery(
      tweetersToFollowers, new Fields("tweeter"), new MapGet(), new Fields("followers")).each(new Fields("followers"),
      new ExpandList(), new Fields("follower")).groupBy(new Fields("follower")).aggregate(new One(), new Fields(
      "one")).aggregate(new Fields("one"), new Sum(), new Fields("reach"));
  return topology.build();
}
 
Example #14
Source File: TridentSequenceTopology.java    From storm-hdfs with Apache License 2.0 5 votes vote down vote up
public static StormTopology buildTopology(String hdfsUrl){
    FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence", "key"), 1000, new Values("the cow jumped over the moon", 1l),
            new Values("the man went to the store and bought some candy", 2l), new Values("four score and seven years ago", 3l),
            new Values("how many apples can you eat", 4l), new Values("to be or not to be the person", 5l));
    spout.setCycle(true);

    TridentTopology topology = new TridentTopology();
    Stream stream = topology.newStream("spout1", spout);

    Fields hdfsFields = new Fields("sentence", "key");

    FileNameFormat fileNameFormat = new DefaultFileNameFormat()
            .withPath("/trident")
            .withPrefix("trident")
            .withExtension(".seq");

    FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(5.0f, FileSizeRotationPolicy.Units.MB);

    HdfsState.Options seqOpts = new HdfsState.SequenceFileOptions()
            .withFileNameFormat(fileNameFormat)
            .withSequenceFormat(new DefaultSequenceFormat("key", "sentence"))
            .withRotationPolicy(rotationPolicy)
            .withFsUrl(hdfsUrl)
            .addRotationAction(new MoveFileAction().toDestination("/dest2/"));

    StateFactory factory = new HdfsStateFactory().withOptions(seqOpts);

    TridentState state = stream
            .partitionPersist(factory, hdfsFields, new HdfsUpdater(), new Fields());

    return topology.build();
}
 
Example #15
Source File: RealTimeTextSearch.java    From trident-tutorial with Apache License 2.0 5 votes vote down vote up
public static StormTopology buildTopology(TransactionalTridentKafkaSpout spout)
        throws IOException {

    TridentTopology topology = new TridentTopology();
    /**
     * As a first thing, we need a stream of tweets which we can parse and extract
     * only the text and its id. As you will notice, we're going to store the stream
     * using the {@link ElasticSearchState} implementation using its {@link StateUpdater}.
     * Check their implementations for details.
     */
    topology
            .newStream("tweets", spout)
            .each(new Fields("str"), new ParseTweet(), new Fields("text", "content", "user"))
            .each(new Fields("text", "content"), new TweetIdExtractor(), new Fields("tweetId"))
            .project(new Fields("tweetId", "text"))
            .each(new Fields("tweetId", "text"), new Print())
            .partitionPersist(new ElasticSearchStateFactory(), new Fields("tweetId", "text"), new ElasticSearchStateUpdater());

    /**
     * Now we need a DRPC stream to query the state where the tweets are stored.
     * To do that, as shown below, we need an implementation of {@link QueryFunction} to
     * access our {@link ElasticSearchState}.
     */
    TridentState elasticSearchState = topology.newStaticState(new ElasticSearchStateFactory());
    topology
            .newDRPCStream("search")
            .each(new Fields("args"), new Split(" "), new Fields("keywords")) // let's split the arguments
            .stateQuery(elasticSearchState, new Fields("keywords"), new TweetQuery(), new Fields("ids")) // and pass them as query parameters
            .project(new Fields("ids"));
    return topology.build();
}
 
Example #16
Source File: TridentFileTopology.java    From storm-hdfs with Apache License 2.0 5 votes vote down vote up
public static StormTopology buildTopology(String hdfsUrl){
    FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence", "key"), 1000, new Values("the cow jumped over the moon", 1l),
            new Values("the man went to the store and bought some candy", 2l), new Values("four score and seven years ago", 3l),
            new Values("how many apples can you eat", 4l), new Values("to be or not to be the person", 5l));
    spout.setCycle(true);

    TridentTopology topology = new TridentTopology();
    Stream stream = topology.newStream("spout1", spout);

    Fields hdfsFields = new Fields("sentence", "key");

    FileNameFormat fileNameFormat = new DefaultFileNameFormat()
            .withPath("/trident")
            .withPrefix("trident")
            .withExtension(".txt");

    RecordFormat recordFormat = new DelimitedRecordFormat()
            .withFields(hdfsFields);

    FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(5.0f, FileSizeRotationPolicy.Units.MB);

    HdfsState.Options options = new HdfsState.HdfsFileOptions()
            .withFileNameFormat(fileNameFormat)
            .withRecordFormat(recordFormat)
            .withRotationPolicy(rotationPolicy)
            .withFsUrl(hdfsUrl);

    StateFactory factory = new HdfsStateFactory().withOptions(options);

    TridentState state = stream
            .partitionPersist(factory, hdfsFields, new HdfsUpdater(), new Fields());

    return topology.build();
}
 
Example #17
Source File: WordCountTopology.java    From storm-cassandra-cql with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public static StormTopology buildWordCountAndSourceTopology(LocalDRPC drpc) {
    LOG.info("Building topology.");
    TridentTopology topology = new TridentTopology();

    String source1 = "spout1";
    String source2 = "spout2";
    FixedBatchSpout spout1 = new FixedBatchSpout(new Fields("sentence", "source"), 3,
            new Values("the cow jumped over the moon", source1),
            new Values("the man went to the store and bought some candy", source1),
            new Values("four score and four years ago", source2),
            new Values("how much wood can a wood chuck chuck", source2));
    spout1.setCycle(true);

    TridentState wordCounts =
            topology.newStream("spout1", spout1)
                    .each(new Fields("sentence"), new Split(), new Fields("word"))
                    .groupBy(new Fields("word", "source"))
                    .persistentAggregate(CassandraCqlMapState.nonTransactional(new WordCountAndSourceMapper()),
                            new IntegerCount(), new Fields("count"))
                    .parallelismHint(6);

    topology.newDRPCStream("words", drpc)
            .each(new Fields("args"), new Split(), new Fields("word"))
            .groupBy(new Fields("word"))
            .stateQuery(wordCounts, new Fields("word"), new MapGet(), new Fields("count"))
            .each(new Fields("count"), new FilterNull())
            .aggregate(new Fields("count"), new Sum(), new Fields("sum"));

    return topology.build();
}
 
Example #18
Source File: GroupedStream.java    From jstorm with Apache License 2.0 4 votes vote down vote up
public Stream stateQuery(TridentState state, QueryFunction function, Fields functionFields) {
    return stateQuery(state, null, function, functionFields);
}
 
Example #19
Source File: GroupedStream.java    From jstorm with Apache License 2.0 4 votes vote down vote up
public TridentState persistentAggregate(StateSpec spec, ReducerAggregator agg, Fields functionFields) {
    return persistentAggregate(spec, null, agg, functionFields);
}
 
Example #20
Source File: GroupedStream.java    From jstorm with Apache License 2.0 4 votes vote down vote up
public TridentState persistentAggregate(StateFactory stateFactory, ReducerAggregator agg, Fields functionFields) {
    return persistentAggregate(new StateSpec(stateFactory), agg, functionFields);
}
 
Example #21
Source File: GroupedStream.java    From jstorm with Apache License 2.0 4 votes vote down vote up
public Stream stateQuery(TridentState state, Fields inputFields, QueryFunction function, Fields functionFields) {
    return _stream.partitionBy(_groupFields).stateQuery(state, inputFields, function, functionFields);
}
 
Example #22
Source File: GroupedStream.java    From jstorm with Apache License 2.0 4 votes vote down vote up
public TridentState persistentAggregate(StateSpec spec, Fields inputFields, ReducerAggregator agg, Fields functionFields) {
    return _stream.partitionBy(_groupFields).partitionPersist(spec, TridentUtils.fieldsUnion(_groupFields, inputFields),
            new MapReducerAggStateUpdater(agg, _groupFields, inputFields), TridentUtils.fieldsConcat(_groupFields, functionFields));
}
 
Example #23
Source File: GroupedStream.java    From jstorm with Apache License 2.0 4 votes vote down vote up
public TridentState persistentAggregate(StateFactory stateFactory, Fields inputFields, ReducerAggregator agg, Fields functionFields) {
    return persistentAggregate(new StateSpec(stateFactory), inputFields, agg, functionFields);
}
 
Example #24
Source File: GroupedStream.java    From jstorm with Apache License 2.0 4 votes vote down vote up
public TridentState persistentAggregate(StateSpec spec, Fields inputFields, CombinerAggregator agg, Fields functionFields) {
    return aggregate(inputFields, agg, functionFields).partitionPersist(spec, TridentUtils.fieldsUnion(_groupFields, functionFields),
            new MapCombinerAggStateUpdater(agg, _groupFields, functionFields), TridentUtils.fieldsConcat(_groupFields, functionFields));
}
 
Example #25
Source File: GroupedStream.java    From jstorm with Apache License 2.0 4 votes vote down vote up
public TridentState persistentAggregate(StateFactory stateFactory, Fields inputFields, CombinerAggregator agg, Fields functionFields) {
    return persistentAggregate(new StateSpec(stateFactory), inputFields, agg, functionFields);
}
 
Example #26
Source File: GroupedStream.java    From jstorm with Apache License 2.0 4 votes vote down vote up
public TridentState persistentAggregate(StateSpec spec, CombinerAggregator agg, Fields functionFields) {
    return persistentAggregate(spec, null, agg, functionFields);
}
 
Example #27
Source File: GroupedStream.java    From jstorm with Apache License 2.0 4 votes vote down vote up
public TridentState persistentAggregate(StateFactory stateFactory, CombinerAggregator agg, Fields functionFields) {
    return persistentAggregate(new StateSpec(stateFactory), agg, functionFields);
}
 
Example #28
Source File: TridentFastWordCount.java    From jstorm with Apache License 2.0 4 votes vote down vote up
public static StormTopology buildTopology(LocalDRPC drpc) {
    FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 3,
            new Values("the cow jumped over the moon"),
            new Values("the man went to the store and bought some candy"),
            new Values("four score and seven years ago"), new Values("how many apples can you eat"),
            new Values("to be or not to be the person"),
            new Values("marry had a little lamb whos fleese was white as snow"),
            new Values("and every where that marry went the lamb was sure to go"),
            new Values("one two three four five six seven eight nine ten"),
            new Values("this is a test of the emergency broadcast system this is only a test"),
            new Values("peter piper picked a peck of pickeled peppers"),
            new Values("JStorm is a distributed and fault-tolerant realtime computation system."),
            new Values(
                    "Inspired by Apache Storm, JStorm has been completely rewritten in Java and provides many more enhanced features."),
            new Values("JStorm has been widely used in many enterprise environments and proved robust and stable."),
            new Values("JStorm provides a distributed programming framework very similar to Hadoop MapReduce."),
            new Values(
                    "The developer only needs to compose his/her own pipe-lined computation logic by implementing the JStorm API"),
            new Values(" which is fully compatible with Apache Storm API"),
            new Values("and submit the composed Topology to a working JStorm instance."),
            new Values("Similar to Hadoop MapReduce, JStorm computes on a DAG (directed acyclic graph)."),
            new Values("Different from Hadoop MapReduce, a JStorm topology runs 24 * 7"),
            new Values("the very nature of its continuity abd 100% in-memory architecture "),
            new Values(
                    "has been proved a particularly suitable solution for streaming data and real-time computation."),
            new Values("JStorm guarantees fault-tolerance."), new Values("Whenever a worker process crashes, "),
            new Values(
                    "the scheduler embedded in the JStorm instance immediately spawns a new worker process to take the place of the failed one."),
            new Values(" The Acking framework provided by JStorm guarantees that every single piece of data will be processed at least once.") );
    spout.setCycle(true);
    
    
    int spout_Parallelism_hint = JStormUtils.parseInt(conf.get(TOPOLOGY_SPOUT_PARALLELISM_HINT), 1);
    int split_Parallelism_hint = JStormUtils.parseInt(conf.get(TOPOLOGY_SPLIT_PARALLELISM_HINT), 2);
    int count_Parallelism_hint = JStormUtils.parseInt(conf.get(TOPOLOGY_COUNT_PARALLELISM_HINT), 2);
    
    TridentTopology topology = new TridentTopology();
    TridentState wordCounts = topology.newStream("spout1", spout).parallelismHint(spout_Parallelism_hint)
            .each(new Fields("sentence"), new Split(), new Fields("word")).parallelismHint(split_Parallelism_hint).groupBy(new Fields("word"))
            .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
            .parallelismHint(count_Parallelism_hint);
            
    return topology.build();
}
 
Example #29
Source File: DRPC.java    From storm-benchmark with Apache License 2.0 4 votes vote down vote up
@Override
  public StormTopology getTopology(Config config) {

    Object sObj = config.get(SERVER);
    if (null == sObj) {
      throw new IllegalArgumentException("must set a drpc server");
    }
    server = (String) sObj;
    config.put(Config.DRPC_SERVERS, Lists.newArrayList(server));

    Object pObj = config.get(PORT);
    if (null == pObj) {
      throw new IllegalArgumentException("must set a drpc port");
    }
    port = Utils.getInt(pObj);
    config.put(Config.DRPC_PORT, port);

    LOG.info("drpc server: " + server + "; drpc port: " + port);

    final int spoutNum = BenchmarkUtils.getInt(config, SPOUT_NUM, DEFAULT_SPOUT_NUM);
    final int pageNum = BenchmarkUtils.getInt(config, PAGE_NUM, DEFAULT_PAGE_BOLT_NUM);
    final int viewNum = BenchmarkUtils.getInt(config, VIEW_NUM, DEFAULT_VIEW_BOLT_NUM);
    final int userNum = BenchmarkUtils.getInt(config, USER_NUM, DEFAULT_USER_BOLT_NUM);
    final int followerNum = BenchmarkUtils.getInt(config, FOLLOWER_NUM, DEFAULT_FOLLOWER_BOLT_NUM);

    spout = new TransactionalTridentKafkaSpout(
            KafkaUtils.getTridentKafkaConfig(config, new SchemeAsMultiScheme(new StringScheme())));

    TridentTopology trident = new TridentTopology();
    TridentState urlToUsers =
            trident.newStream("drpc", spout).parallelismHint(spoutNum).shuffle()
            .each(new Fields(StringScheme.STRING_SCHEME_KEY), new Extract(Arrays.asList(Item.URL, Item.USER)),
                    new Fields("url", "user")).parallelismHint(pageNum)
            .groupBy(new Fields("url"))
            .persistentAggregate(new MemoryMapState.Factory(), new Fields("url", "user"), new Distinct(), new Fields("user_set"))
            .parallelismHint(viewNum);
/** debug
 *  1. this proves that the aggregated result has successfully persisted
    urlToUsers.newValuesStream()
            .each(new Fields("url", "user_set"), new Print("(url, user_set)"), new Fields("url2", "user_set2"));
 */
    PageViewGenerator generator = new PageViewGenerator();
    TridentState userToFollowers = trident.newStaticState(new StaticSingleKeyMapState.Factory(generator.genFollowersDB()));
/** debug
  * 2. this proves that MemoryMapState could be read correctly
   trident.newStream("urlToUsers", new PageViewSpout(false))
            .each(new Fields("page_view"), new Extract(Arrays.asList(Item.URL)), new Fields("url"))
            .each(new Fields("url"), new Print("url"), new Fields("url2"))
            .groupBy(new Fields("url2"))
            .stateQuery(urlToUsers, new Fields("url2"),  new MapGet(), new Fields("users"))
            .each(new Fields("users"), new Print("users"), new Fields("users2"));
*/
/** debug
 *  3. this proves that StaticSingleKeyMapState could be read correctly
    trident.newStream("userToFollowers", new PageViewSpout(false))
            .each(new Fields("page_view"), new Extract(Arrays.asList(Item.USER)), new Fields("user"))
            .each(new Fields("user"), new Print("user"), new Fields("user2"))
            .stateQuery(userToFollowers, new Fields("user2"), new MapGet(), new Fields("followers"))
            .each(new Fields("followers"), new Print("followers"), new Fields("followers2"));
 */
    trident.newDRPCStream(FUNCTION, null)
            .each(new Fields("args"), new Print("args"), new Fields("url"))
            .groupBy(new Fields("url"))
            .stateQuery(urlToUsers, new Fields("url"), new MapGet(), new Fields("users"))
            .each(new Fields("users"), new Expand(), new Fields("user")).parallelismHint(userNum)
            .groupBy(new Fields("user"))
            .stateQuery(userToFollowers, new Fields("user"), new MapGet(), new Fields("followers"))
            .each(new Fields("followers"), new Expand(), new Fields("follower")).parallelismHint(followerNum)
            .groupBy(new Fields("follower"))
            .aggregate(new One(), new Fields("one"))
            .aggregate(new Fields("one"), new Sum(), new Fields("reach"));
    return trident.build();
  }
 
Example #30
Source File: WordCountTrident.java    From storm-hbase with Apache License 2.0 4 votes vote down vote up
public static StormTopology buildTopology(String hbaseRoot){
    Fields fields = new Fields("word", "count");
    FixedBatchSpout spout = new FixedBatchSpout(fields, 4,
            new Values("storm", 1),
            new Values("trident", 1),
            new Values("needs", 1),
            new Values("javadoc", 1)
    );
    spout.setCycle(true);

    TridentHBaseMapper tridentHBaseMapper = new SimpleTridentHBaseMapper()
            .withColumnFamily("cf")
            .withColumnFields(new Fields("word"))
            .withCounterFields(new Fields("count"))
            .withRowKeyField("word");

    HBaseValueMapper rowToStormValueMapper = new WordCountValueMapper();

    HBaseProjectionCriteria projectionCriteria = new HBaseProjectionCriteria();
    projectionCriteria.addColumn(new HBaseProjectionCriteria.ColumnMetaData("cf", "count"));

    HBaseState.Options options = new HBaseState.Options()
            .withConfigKey(hbaseRoot)
            .withDurability(Durability.SYNC_WAL)
            .withMapper(tridentHBaseMapper)
            .withProjectionCriteria(projectionCriteria)
            .withRowToStormValueMapper(rowToStormValueMapper)
            .withTableName("WordCount");

    StateFactory factory = new HBaseStateFactory(options);

    TridentTopology topology = new TridentTopology();
    Stream stream = topology.newStream("spout1", spout);

    stream.partitionPersist(factory, fields,  new HBaseUpdater(), new Fields());

    TridentState state = topology.newStaticState(factory);
    stream = stream.stateQuery(state, new Fields("word"), new HBaseQuery(), new Fields("columnName","columnValue"));
    stream.each(new Fields("word","columnValue"), new PrintFunction(), new Fields());
    return topology.build();
}