storm.trident.TridentTopology Java Examples

The following examples show how to use storm.trident.TridentTopology. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: TridentWordCount.java From flink-perf with Apache License 2.0

7 votes

public static StormTopology buildTopology(LocalDRPC drpc) {
  FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 3, new Values("the cow jumped over the moon"),
      new Values("the man went to the store and bought some candy"), new Values("four score and seven years ago"),
      new Values("how many apples can you eat"), new Values("to be or not to be the person"));
  spout.setCycle(true);

  TridentTopology topology = new TridentTopology();
  TridentState wordCounts = topology.newStream("spout1", spout).parallelismHint(16).each(new Fields("sentence"),
      new Split(), new Fields("word")).groupBy(new Fields("word")).persistentAggregate(new MemoryMapState.Factory(),
      new Count(), new Fields("count")).parallelismHint(16);

  topology.newDRPCStream("words", drpc).each(new Fields("args"), new Split(), new Fields("word")).groupBy(new Fields(
      "word")).stateQuery(wordCounts, new Fields("word"), new MapGet(), new Fields("count")).each(new Fields("count"),
      new FilterNull()).aggregate(new Fields("count"), new Sum(), new Fields("sum"));
  return topology.build();
}

Example #2

Source File: TridentMinMaxOfDevicesTopology.java From jstorm with Apache License 2.0

6 votes

/**
 * Creates a topology with device-id and count (which are whole numbers) as
 * tuple fields in a stream and it finally generates result stream based on
 * min amd max with device-id and count values.
 */
public static StormTopology buildDevicesTopology() {
    String deviceID = "device-id";
    String count = "count";
    Fields allFields = new Fields(deviceID, count);
    
    RandomNumberGeneratorSpout spout = new RandomNumberGeneratorSpout(allFields, 10, 1000);
    
    TridentTopology topology = new TridentTopology();
    Stream devicesStream = topology.newStream("devicegen-spout", spout).each(allFields, new Debug("##### devices"));
    
    devicesStream.minBy(deviceID).each(allFields, new Debug("#### device with min id"));
    
    devicesStream.maxBy(count).each(allFields, new Debug("#### device with max count"));
    
    return topology.build();
}

Example #3

Source File: TopHashtagByCountry.java From trident-tutorial with Apache License 2.0

6 votes

public static StormTopology buildTopology(TransactionalTridentKafkaSpout spout) throws IOException {

        TridentTopology topology = new TridentTopology();
        TridentState count =
        topology
                .newStream("tweets", spout)
                .each(new Fields("str"), new ParseTweet(), new Fields("status", "content", "user"))
                .project(new Fields("content", "user", "status"))
                .each(new Fields("content"), new OnlyHashtags())
                .each(new Fields("status"), new OnlyGeo())
                .each(new Fields("status", "content"), new ExtractLocation(), new Fields("country", "contentName"))
                .groupBy(new Fields("country", "contentName"))
                .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
        ;


        topology
                .newDRPCStream("location_hashtag_count")
                .stateQuery(count, new TupleCollectionGet(), new Fields("country", "contentName"))
                .stateQuery(count, new Fields("country", "contentName"), new MapGet(), new Fields("count"))
                .groupBy(new Fields("country"))
                .aggregate(new Fields("contentName", "count"), new FirstN.FirstNSortedAgg(3,"count", true), new Fields("contentName", "count"))
        ;

        return topology.build();
    }

Example #4

Source File: TopHashtagFollowerCountGrouping.java From trident-tutorial with Apache License 2.0

6 votes

public static StormTopology buildTopology(TransactionalTridentKafkaSpout spout) throws IOException {

        TridentTopology topology = new TridentTopology();
        TridentState count =
        topology
                .newStream("tweets", spout)
                .each(new Fields("str"), new ParseTweet(), new Fields("text", "content", "user"))
                .project(new Fields("content", "user"))
                .each(new Fields("content"), new OnlyHashtags())
                .each(new Fields("user"), new OnlyEnglish())
                .each(new Fields("content", "user"), new ExtractFollowerClassAndContentName(), new Fields("followerClass", "contentName"))
                .parallelismHint(3)
                .groupBy(new Fields("followerClass", "contentName"))
                .persistentAggregate(new HazelCastStateFactory(), new Count(), new Fields("count"))
                .parallelismHint(3)
        ;


        topology
                .newDRPCStream("hashtag_count")
                .each(new Constants<String>("< 100", "< 10K", "< 100K", ">= 100K"), new Fields("followerClass"))
                .stateQuery(count, new Fields("followerClass", "args"), new MapGet(), new Fields("count"))
        ;

        return topology.build();
    }

Example #5

Source File: ClusterTestTopology.java From trident-tutorial with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
    Config conf = new Config();

    // Submits the topology
    String topologyName = args[0];
    conf.setNumWorkers(8); // Our Vagrant environment has 8 workers

    FakeTweetsBatchSpout fakeTweets = new FakeTweetsBatchSpout(10);

    TridentTopology topology = new TridentTopology();
    TridentState countState =
            topology
                    .newStream("spout", fakeTweets)
                    .groupBy(new Fields("actor"))
                    .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"));

    topology
            .newDRPCStream("count_per_actor")
            .stateQuery(countState, new Fields("args"), new MapGet(), new Fields("count"));

    StormSubmitter.submitTopology(topologyName, conf, topology.build());

}

Example #6

Source File: GlobalTop20Hashtags.java From trident-tutorial with Apache License 2.0

6 votes

public static StormTopology buildTopology(TransactionalTridentKafkaSpout spout) throws IOException {

        TridentTopology topology = new TridentTopology();
        TridentState count =
        topology
                .newStream("tweets", spout)
                .each(new Fields("str"), new ParseTweet(), new Fields("text", "content", "user"))
                .project(new Fields("content", "user"))
                .each(new Fields("content"), new OnlyHashtags())
                .each(new Fields("user"), new OnlyEnglish())
                .each(new Fields("content", "user"), new ExtractFollowerClassAndContentName(), new Fields("followerClass", "contentName"))
                .groupBy(new Fields("followerClass", "contentName"))
                .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
        ;


        topology
                .newDRPCStream("top_hashtags")
                .stateQuery(count, new TupleCollectionGet(), new Fields("followerClass", "contentName"))
                .stateQuery(count, new Fields("followerClass", "contentName"), new MapGet(), new Fields("count"))
                .aggregate(new Fields("contentName", "count"), new FirstN.FirstNSortedAgg(5,"count", true), new Fields("contentName", "count"))
        ;

        return topology.build();
    }

Example #7

Source File: TopHashtagByFollowerClass.java From trident-tutorial with Apache License 2.0

6 votes

public static StormTopology buildTopology(TransactionalTridentKafkaSpout spout) throws IOException {

        TridentTopology topology = new TridentTopology();
        TridentState count =
        topology
                .newStream("tweets", spout)
                .each(new Fields("str"), new ParseTweet(), new Fields("text", "content", "user"))
                .project(new Fields("content", "user"))
                .each(new Fields("content"), new OnlyHashtags())
                .each(new Fields("user"), new OnlyEnglish())
                .each(new Fields("content", "user"), new ExtractFollowerClassAndContentName(), new Fields("followerClass", "contentName"))
                .groupBy(new Fields("followerClass", "contentName"))
                .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
        ;


        topology
                .newDRPCStream("hashtag_count")
                .stateQuery(count, new TupleCollectionGet(), new Fields("followerClass", "contentName"))
                .stateQuery(count, new Fields("followerClass", "contentName"), new MapGet(), new Fields("count"))
                .groupBy(new Fields("followerClass"))
                .aggregate(new Fields("contentName", "count"), new FirstN.FirstNSortedAgg(1,"count", true), new Fields("contentName", "count"))
        ;

        return topology.build();
    }

Example #8

Source File: TridentWordCount.java From jstorm with Apache License 2.0

6 votes

public static StormTopology buildTopology(LocalDRPC drpc) {
    FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 3,
            new Values("the cow jumped over the moon"),
            new Values("the man went to the store and bought some candy"),
            new Values("four score and seven years ago"), new Values("how many apples can you eat"),
            new Values("to be or not to be the person"));
    spout.setCycle(true);
    
    TridentTopology topology = new TridentTopology();
    TridentState wordCounts = topology.newStream("spout1", spout).parallelismHint(16)
            .each(new Fields("sentence"), new Split(), new Fields("word")).groupBy(new Fields("word"))
            .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
            .parallelismHint(16);
            
    topology.newDRPCStream("words", drpc).each(new Fields("args"), new Split(), new Fields("word"))
            .groupBy(new Fields("word"))
            .stateQuery(wordCounts, new Fields("word"), new MapGet(), new Fields("count"))
            .each(new Fields("count"), new FilterNull())
            .aggregate(new Fields("count"), new Sum(), new Fields("sum"));
    return topology.build();
}

Example #9

Source File: TridentMapExample.java From jstorm with Apache License 2.0

6 votes

public static StormTopology buildTopology(LocalDRPC drpc) {
    FixedBatchSpout spout = new FixedBatchSpout(new Fields("word"), 3, new Values("the cow jumped over the moon"),
            new Values("the man went to the store and bought some candy"),
            new Values("four score and seven years ago"), new Values("how many apples can you eat"),
            new Values("to be or not to be the person"));
    spout.setCycle(true);
    
    TridentTopology topology = new TridentTopology();
    TridentState wordCounts = topology.newStream("spout1", spout).parallelismHint(16).flatMap(split).map(toUpper)
            .filter(theFilter).peek(new Consumer() {
                @Override
                public void accept(TridentTuple input) {
                    System.out.println(input.getString(0));
                }
            }).groupBy(new Fields("word"))
            .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
            .parallelismHint(16);
            
    topology.newDRPCStream("words", drpc).flatMap(split).groupBy(new Fields("args"))
            .stateQuery(wordCounts, new Fields("args"), new MapGet(), new Fields("count")).filter(new FilterNull())
            .aggregate(new Fields("count"), new Sum(), new Fields("sum"));
    return topology.build();
}

Example #10

Source File: TridentWindowingInmemoryStoreTopology.java From jstorm with Apache License 2.0

6 votes

public static StormTopology buildTopology(WindowsStoreFactory windowStore, WindowConfig windowConfig)
        throws Exception {
    FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 3,
            new Values("the cow jumped over the moon"),
            new Values("the man went to the store and bought some candy"),
            new Values("four score and seven years ago"), new Values("how many apples can you eat"),
            new Values("to be or not to be the person"));
    spout.setCycle(true);
    
    TridentTopology topology = new TridentTopology();
    
    Stream stream = topology.newStream("spout1", spout).parallelismHint(16)
            .each(new Fields("sentence"), new Split(), new Fields("word"))
            .window(windowConfig, windowStore, new Fields("word"), new CountAsAggregator(), new Fields("count"))
            .peek(new Consumer() {
                @Override
                public void accept(TridentTuple input) {
                    LOG.info("Received tuple: [{}]", input);
                }
            });
            
    return topology.build();
}

Example #11

Source File: TridentMinMaxOfDevicesTopology.java From jstorm with Apache License 2.0

6 votes

/**
 * Creates a topology which demonstrates min/max operations on tuples of
 * stream which contain vehicle and driver fields with values
 * {@link TridentMinMaxOfDevicesTopology.Vehicle} and
 * {@link TridentMinMaxOfDevicesTopology.Driver} respectively.
 */
public static StormTopology buildVehiclesTopology() {
    Fields driverField = new Fields(Driver.FIELD_NAME);
    Fields vehicleField = new Fields(Vehicle.FIELD_NAME);
    Fields allFields = new Fields(Vehicle.FIELD_NAME, Driver.FIELD_NAME);
    
    FixedBatchSpout spout = new FixedBatchSpout(allFields, 10, Vehicle.generateVehicles(20));
    spout.setCycle(true);
    
    TridentTopology topology = new TridentTopology();
    Stream vehiclesStream = topology.newStream("spout1", spout).each(allFields, new Debug("##### vehicles"));
    
    Stream slowVehiclesStream = vehiclesStream.min(new SpeedComparator()).each(vehicleField,
            new Debug("#### slowest vehicle"));
            
    Stream slowDriversStream = slowVehiclesStream.project(driverField).each(driverField,
            new Debug("##### slowest driver"));
            
    vehiclesStream.max(new SpeedComparator()).each(vehicleField, new Debug("#### fastest vehicle"))
            .project(driverField).each(driverField, new Debug("##### fastest driver"));
            
    vehiclesStream.max(new EfficiencyComparator()).each(vehicleField, new Debug("#### efficient vehicle"));
    
    return topology.build();
}

Example #12

Source File: TridentSlidingCountWindowTest.java From jstorm with Apache License 2.0

6 votes

@Test
public void testTridentSlidingCountWindow()
{
    WindowsStoreFactory windowsStoreFactory = new InMemoryWindowsStoreFactory();
    FixedLimitBatchSpout spout = new FixedLimitBatchSpout(SPOUT_LIMIT, new Fields("sentence"), SPOUT_BATCH_SIZE,
                new Values("the cow jumped over the moon"),
                new Values("the man went to the store and bought some candy"),
                new Values("four score and seven years ago"), new Values("how many apples can you eat"),
                new Values("to be or not to be the person"));

    TridentTopology tridentTopology = new TridentTopology();

    Stream stream = tridentTopology.newStream("spout1", spout).parallelismHint(16)
                .each(new Fields("sentence"), new Split(), new Fields("word"))
                .window(windowConfig, windowsStoreFactory, new Fields("word"), new CountAsAggregator(), new Fields("count"))
                .peek(new ValidateConsumer());

    Map config = new HashMap();
    config.put(Config.TOPOLOGY_NAME, "TridentSlidingCountWindowTest");

    JStormUnitTestRunner.submitTopology(tridentTopology.build(), null, 120, null);
}

Example #13

Source File: TridentWordCount.java From storm-benchmark with Apache License 2.0

6 votes

@Override
  public StormTopology getTopology(Config config) {
    final int spoutNum = BenchmarkUtils.getInt(config, SPOUT_NUM, DEFAULT_SPOUT_NUM);
    final int splitNum = BenchmarkUtils.getInt(config, SPLIT_NUM, DEFAULT_SPLIT_BOLT_NUM);
    final int countNum = BenchmarkUtils.getInt(config, COUNT_NUM, DEFAULT_COUNT_BOLT_NUM);

    spout  = new TransactionalTridentKafkaSpout(
            KafkaUtils.getTridentKafkaConfig(config, new SchemeAsMultiScheme(new StringScheme())));

    TridentTopology trident = new TridentTopology();

    trident.newStream("wordcount", spout).name("sentence").parallelismHint(spoutNum).shuffle()
            .each(new Fields(StringScheme.STRING_SCHEME_KEY), new WordSplit(), new Fields("word"))
            .parallelismHint(splitNum)
            .groupBy(new Fields("word"))
            .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
            .parallelismHint(countNum);
/*    trident.newStream("wordcount", spout)
      .each(new Fields(StringScheme.STRING_SCHEME_KEY), new WordSplit(), new Fields("word"))
      .groupBy(new Fields("word"))
      .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"));*/


    return trident.build();
  }

Example #14

Source File: TridentMinMaxOfDevicesTest.java From jstorm with Apache License 2.0

6 votes

@Test
public void testTridentMinMaxOfDevices()
{
    Fields fields = new Fields("device-id", "count");
    List<Values> content = new ArrayList<Values>();
    for(int i=0; i<SPOUT_BATCH_SIZE; i++)
        content.add(new Values(i+1));
    ShuffleValuesBatchSpout spout = new ShuffleValuesBatchSpout(fields, content, content);
    TridentTopology tridentTopology = new TridentTopology();
    Stream stream = tridentTopology.newStream("device-gen-spout", spout)
            .each(fields, new Debug("#### devices"));
    stream.minBy("device-id").each(fields, new AssertMinDebug());
    stream.maxBy("count").each(fields, new AssertMaxDebug());

    Map config = new HashMap();
    config.put(Config.TOPOLOGY_NAME, "TridentMinMaxOfDevicesTest");

    //the test can pass if the 2 AssertDebug pass throughout the test
    JStormUnitTestRunner.submitTopology(tridentTopology.build(), config, 120, null);
}

Example #15

Source File: TridentTumblingCountWindowTest.java From jstorm with Apache License 2.0

6 votes

@Test
public void testTridentTumblingCountWindow()
{
    WindowsStoreFactory windowsStoreFactory = new InMemoryWindowsStoreFactory();
    FixedLimitBatchSpout spout = new FixedLimitBatchSpout(SPOUT_LIMIT, new Fields("sentence"), SPOUT_BATCH_SIZE,
                new Values("the cow jumped over the moon"),
                new Values("the man went to the store and bought some candy"),
                new Values("four score and seven years ago"), new Values("how many apples can you eat"),
                new Values("to be or not to be the person"));

    TridentTopology tridentTopology = new TridentTopology();

    Stream stream = tridentTopology.newStream("spout1", spout).parallelismHint(16)
                .each(new Fields("sentence"), new Split(), new Fields("word"))
                .window(windowConfig, windowsStoreFactory, new Fields("word"), new CountAsAggregator(), new Fields("count"))
                .peek(new ValidateConsumer());

    Map config = new HashMap();
    config.put(Config.TOPOLOGY_NAME, "TridentTumblingCountWindowTest");

    JStormUnitTestRunner.submitTopology(tridentTopology.build(), null, 120, null);
}

Example #16

Source File: TridentTumblingDurationWindowTest.java From jstorm with Apache License 2.0

6 votes

@Test
public void testTridentTumblingDurationWindow()
{
        WindowsStoreFactory windowsStoreFactory = new InMemoryWindowsStoreFactory();
        FixedLimitBatchSpout spout = new FixedLimitBatchSpout(SPOUT_LIMIT, new Fields("sentence"), SPOUT_BATCH_SIZE,
                new Values("the cow jumped over the moon"),
                new Values("the man went to the store and bought some candy"),
                new Values("four score and seven years ago"), new Values("how many apples can you eat"),
                new Values("to be or not to be the person"));

        TridentTopology tridentTopology = new TridentTopology();

        Stream stream = tridentTopology.newStream("spout1", spout).parallelismHint(16)
                .each(new Fields("sentence"), new Split(), new Fields("word"))
                .window(windowConfig, windowsStoreFactory, new Fields("word"), new CountAsAggregator(), new Fields("count"))
                .peek(new ValidateConsumer());

        Map config = new HashMap();
        config.put(Config.TOPOLOGY_NAME, "TridentTumblingDurationWindowTest");

        JStormUnitTestRunner.submitTopology(tridentTopology.build(), null, 120, null);

}

Example #17

Source File: ClickThruAnalyticsTopology.java From storm-example with Apache License 2.0

6 votes

public static StormTopology buildTopology() {
    LOG.info("Building topology.");
    TridentTopology topology = new TridentTopology();
    StateFactory clickThruMemory = new MemoryMapState.Factory();
    ClickThruSpout spout = new ClickThruSpout();
    Stream inputStream = topology.newStream("clithru", spout);
    TridentState clickThruState = inputStream.each(new Fields("username", "campaign", "product", "click"), new Filter("click", "true"))
            .each(new Fields("username", "campaign", "product", "click"), new Distinct())
            .groupBy(new Fields("campaign"))
            .persistentAggregate(clickThruMemory, new Count(), new Fields("click_thru_count"));

    inputStream.groupBy(new Fields("campaign"))
            .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("impression_count"))
            .newValuesStream()
            .stateQuery(clickThruState, new Fields("campaign"), new MapGet(), new Fields("click_thru_count"))
            .each(new Fields("campaign", "impression_count", "click_thru_count"), new CampaignEffectiveness(), new Fields(""));

    return topology.build();
}

Example #18

Source File: DrpcTopology.java From storm-example with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
    final LocalCluster cluster = new LocalCluster();
    final Config conf = new Config();

    LocalDRPC client = new LocalDRPC();
    TridentTopology drpcTopology = new TridentTopology();

    drpcTopology.newDRPCStream("drpc", client)
            .each(new Fields("args"), new ArgsFunction(), new Fields("gamestate"))
            .each(new Fields("gamestate"), new GenerateBoards(), new Fields("children"))
            .each(new Fields("children"), new ScoreFunction(), new Fields("board", "score", "player"))
            .groupBy(new Fields("gamestate"))
            .aggregate(new Fields("board", "score"), new FindBestMove(), new Fields("bestMove"))
            .project(new Fields("bestMove"));

    cluster.submitTopology("drpcTopology", conf, drpcTopology.build());

    Board board = new Board();
    board.board[1][1] = "O";
    board.board[2][2] = "X";
    board.board[0][1] = "O";
    board.board[0][0] = "X";
    LOG.info("Determing best move for O on:" + board.toString());
    LOG.info("RECEIVED RESPONSE [" + client.execute("drpc", board.toKey()) + "]");
}

Example #19

Source File: TridentSlidingDurationWindowTest.java From jstorm with Apache License 2.0

6 votes

@Test
public void testTridentSlidingDurationWindow()
{
    WindowsStoreFactory windowsStoreFactory = new InMemoryWindowsStoreFactory();
    FixedLimitBatchSpout spout = new FixedLimitBatchSpout(SPOUT_LIMIT, new Fields("sentence"), SPOUT_BATCH_SIZE,
                new Values("the cow jumped over the moon"),
                new Values("the man went to the store and bought some candy"),
                new Values("four score and seven years ago"), new Values("how many apples can you eat"),
                new Values("to be or not to be the person"));

    TridentTopology tridentTopology = new TridentTopology();

    Stream stream = tridentTopology.newStream("spout1", spout).parallelismHint(16)
                .each(new Fields("sentence"), new Split(), new Fields("word"))
                .window(windowConfig, windowsStoreFactory, new Fields("word"), new CountAsAggregator(), new Fields("count"))
                .peek(new ValidateConsumer());

    Map config = new HashMap();
    config.put(Config.TOPOLOGY_NAME, "TridentSlidingDurationWindowTest");

    JStormUnitTestRunner.submitTopology(tridentTopology.build(), null, 120, null);

}

Example #20

Source File: ScoringTopology.java From storm-example with Apache License 2.0

6 votes

public static StormTopology buildTopology() {
    LOG.info("Building topology.");
    TridentTopology topology = new TridentTopology();

    GameState exampleRecursiveState = GameState.playAtRandom(new Board(), "X");
    LOG.info("SIMULATED LEAF NODE : [" + exampleRecursiveState.getBoard() + "] w/ state [" + exampleRecursiveState + "]");

    // Scoring Queue / Spout
    LocalQueueEmitter<GameState> scoringSpoutEmitter = new LocalQueueEmitter<GameState>("ScoringQueue");
    scoringSpoutEmitter.enqueue(exampleRecursiveState);
    LocalQueueSpout<GameState> scoringSpout = new LocalQueueSpout<GameState>(scoringSpoutEmitter);

    Stream inputStream = topology.newStream("scoring", scoringSpout);

    inputStream.each(new Fields("gamestate"), new isEndGame())
            .each(new Fields("gamestate"),
                    new ScoreFunction(),
                    new Fields("board", "score", "player"))
            .each(new Fields("board", "score", "player"), new ScoreUpdater(), new Fields());
    return topology.build();
}

Example #21

Source File: TridentTopologySource.java From jstorm with Apache License 2.0

6 votes

public StormTopology getTopology(Config config) {

        this.spout = new FixedBatchSpout(new Fields("sentence"), 20,
                new Values("one two"),
                new Values("two three"),
                new Values("three four"),
                new Values("four five"),
                new Values("five six")
        );


        TridentTopology trident = new TridentTopology();

        trident.newStream("wordcount", spout).name("sentence").parallelismHint(1).shuffle()
                .each(new Fields("sentence"), new Split(), new Fields("word"))
                .parallelismHint(1)
                .groupBy(new Fields("word"))
                .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
                .parallelismHint(1);
        return trident.build();
    }

Example #22

Source File: ESIndexUpdaterTest.java From storm-trident-elasticsearch with Apache License 2.0

6 votes

@Override
protected StormTopology buildTopology() {
    FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 3,
            new Values("the cow jumped over the moon"),
            new Values("the man went to the store and bought some candy"),
            new Values("four score and seven years ago"),
            new Values("how many apples can you eat"),
            new Values("to be or not to be the person"));
    spout.setCycle(true);

    ESIndexState.Factory<Tweet> factory = new ESIndexState.Factory<>(getLocalClient(), Tweet.class);
    TridentTopology topology = new TridentTopology();

    TridentState state = topology.newStream("tweets", spout)
            .partitionPersist(factory, new Fields("sentence"), new ESIndexUpdater(new MyTridentTupleMapper()));

    topology.newDRPCStream("search", drpc)
            .each(new Fields("args"), new ExtractSearchArgs(), new Fields("query", "indices", "types"))
            .groupBy(new Fields("query", "indices", "types"))
            .stateQuery(state, new Fields("query", "indices", "types"), new QuerySearchIndexQuery(), new Fields("tweet"))
            .each(new Fields("tweet"), new FilterNull())
            .each(new Fields("tweet"), new CreateJson(), new Fields("json"))
            .project(new Fields("json"));

    return topology.build();
}

Example #23

Source File: TridentTopologySource.java From flux with Apache License 2.0

6 votes

public StormTopology getTopology(Config config) {

        this.spout = new FixedBatchSpout(new Fields("sentence"), 20,
                new Values("one two"),
                new Values("two three"),
                new Values("three four"),
                new Values("four five"),
                new Values("five six")
        );


        TridentTopology trident = new TridentTopology();

        trident.newStream("wordcount", spout).name("sentence").parallelismHint(1).shuffle()
                .each(new Fields("sentence"), new Split(), new Fields("word"))
                .parallelismHint(1)
                .groupBy(new Fields("word"))
                .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
                .parallelismHint(1);
        return trident.build();
    }

Example #24

Source File: MovingAvgLocalTopologyRunner.java From hadoop-arch-book with Apache License 2.0

5 votes

public static void main(String[] args) 
    throws Exception {
  
  Config conf = new Config();
  LocalCluster cluster = new LocalCluster();
  
  TridentTopology topology = new TridentTopology();

  Stream movingAvgStream =
    topology.newStream("ticks-spout", buildSpout())
    .each(new Fields("stock-ticks"), new TickParser(), new Fields("price"))
    .aggregate(new Fields("price"), new CalculateAverage(), new Fields("count"));

  cluster.submitTopology("moving-avg", conf, topology.build());
}

Example #25

Source File: TridentMinMaxOfVehiclesTopology.java From jstorm with Apache License 2.0

5 votes

/**
 * Creates a topology which demonstrates min/max operations on tuples of
 * stream which contain vehicle and driver fields with values
 * {@link TridentMinMaxOfVehiclesTopology.Vehicle} and
 * {@link TridentMinMaxOfVehiclesTopology.Driver} respectively.
 */
public static StormTopology buildVehiclesTopology() {
    Fields driverField = new Fields(Driver.FIELD_NAME);
    Fields vehicleField = new Fields(Vehicle.FIELD_NAME);
    Fields allFields = new Fields(Vehicle.FIELD_NAME, Driver.FIELD_NAME);
    
    FixedBatchSpout spout = new FixedBatchSpout(allFields, 10, Vehicle.generateVehicles(20));
    spout.setCycle(true);
    
    TridentTopology topology = new TridentTopology();
    Stream vehiclesStream = topology.newStream("spout1", spout).each(allFields, new Debug("##### vehicles"));
    
    Stream slowVehiclesStream = vehiclesStream.min(new SpeedComparator()).each(vehicleField,
            new Debug("#### slowest vehicle"));
            
    Stream slowDriversStream = slowVehiclesStream.project(driverField).each(driverField,
            new Debug("##### slowest driver"));
            
    vehiclesStream.max(new SpeedComparator()).each(vehicleField, new Debug("#### fastest vehicle"))
            .project(driverField).each(driverField, new Debug("##### fastest driver"));
            
    vehiclesStream.minBy(Vehicle.FIELD_NAME, new EfficiencyComparator()).each(vehicleField,
            new Debug("#### least efficient vehicle"));
            
    vehiclesStream.maxBy(Vehicle.FIELD_NAME, new EfficiencyComparator()).each(vehicleField,
            new Debug("#### most efficient vehicle"));
            
    return topology.build();
}

Example #26

Source File: TridentReach.java From jstorm with Apache License 2.0

5 votes

public static StormTopology buildTopology(LocalDRPC drpc) {
    TridentTopology topology = new TridentTopology();
    TridentState urlToTweeters = topology.newStaticState(new StaticSingleKeyMapState.Factory(TWEETERS_DB));
    TridentState tweetersToFollowers = topology.newStaticState(new StaticSingleKeyMapState.Factory(FOLLOWERS_DB));
    
    topology.newDRPCStream("reach", drpc)
            .stateQuery(urlToTweeters, new Fields("args"), new MapGet(), new Fields("tweeters"))
            .each(new Fields("tweeters"), new ExpandList(), new Fields("tweeter")).shuffle()
            .stateQuery(tweetersToFollowers, new Fields("tweeter"), new MapGet(), new Fields("followers"))
            .each(new Fields("followers"), new ExpandList(), new Fields("follower")).groupBy(new Fields("follower"))
            .aggregate(new One(), new Fields("one")).aggregate(new Fields("one"), new Sum(), new Fields("reach"));
    return topology.build();
}

Example #27

Source File: Part03_AdvancedPrimitives2.java From trident-tutorial with Apache License 2.0

5 votes

private static StormTopology advancedPrimitives(FeederBatchSpout spout) throws IOException {

        TridentTopology topology = new TridentTopology();

        // What if we want more than one aggregation? For that, we can use "chained" aggregations.
        // Note how we calculate count and sum.
        // The aggregated values can then be processed further, in this case into mean
        topology
                .newStream("aggregation", spout)
                .groupBy(new Fields("city"))
                .chainedAgg()
                .aggregate(new Count(), new Fields("count"))
                .aggregate(new Fields("age"), new Sum(), new Fields("age_sum"))
                .chainEnd()
                .each(new Fields("age_sum", "count"), new DivideAsDouble(), new Fields("mean_age"))
                .each(new Fields("city", "mean_age"), new Print())
        ;

        // What if we want to persist results of an aggregation, but want to further process these
        // results? You can use "newValuesStream" for that
        topology
                .newStream("further",spout)
                .groupBy(new Fields("city"))
                .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
                .newValuesStream()
                .each(new Fields("city", "count"), new Print());

        return topology.build();
    }

Example #28

Source File: Skeleton.java From trident-tutorial with Apache License 2.0

5 votes

public static StormTopology buildTopology(TransactionalTridentKafkaSpout spout) throws IOException {
    TridentTopology topology = new TridentTopology();
    topology
            .newStream("tweets", spout)
            .each(new Fields("str"), new Print())
    ;

    topology
            .newDRPCStream("ping");

    return topology.build();
}

Example #29

Source File: RealTimeTextSearch.java From trident-tutorial with Apache License 2.0

5 votes

public static StormTopology buildTopology(TransactionalTridentKafkaSpout spout)
        throws IOException {

    TridentTopology topology = new TridentTopology();
    /**
     * As a first thing, we need a stream of tweets which we can parse and extract
     * only the text and its id. As you will notice, we're going to store the stream
     * using the {@link ElasticSearchState} implementation using its {@link StateUpdater}.
     * Check their implementations for details.
     */
    topology
            .newStream("tweets", spout)
            .each(new Fields("str"), new ParseTweet(), new Fields("text", "content", "user"))
            .each(new Fields("text", "content"), new TweetIdExtractor(), new Fields("tweetId"))
            .project(new Fields("tweetId", "text"))
            .each(new Fields("tweetId", "text"), new Print())
            .partitionPersist(new ElasticSearchStateFactory(), new Fields("tweetId", "text"), new ElasticSearchStateUpdater());

    /**
     * Now we need a DRPC stream to query the state where the tweets are stored.
     * To do that, as shown below, we need an implementation of {@link QueryFunction} to
     * access our {@link ElasticSearchState}.
     */
    TridentState elasticSearchState = topology.newStaticState(new ElasticSearchStateFactory());
    topology
            .newDRPCStream("search")
            .each(new Fields("args"), new Split(" "), new Fields("keywords")) // let's split the arguments
            .stateQuery(elasticSearchState, new Fields("keywords"), new TweetQuery(), new Fields("ids")) // and pass them as query parameters
            .project(new Fields("ids"));
    return topology.build();
}

Example #30

Source File: IndexMapStateTest.java From storm-trident-elasticsearch with Apache License 2.0

5 votes

@Override
public StormTopology buildTopology( ) {
    ESIndexMapState.Factory<Tweet> state = ESIndexMapState.nonTransactional(getLocalClient(), Tweet.class);

    FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 3,
            new Values("the cow jumped over the moon"),
            new Values("the man went to the store and bought some candy"),
            new Values("four score and seven years ago"),
            new Values("how many apples can you eat"),
            new Values("to be or not to be the person"));
    spout.setCycle(true);

    TridentTopology topology = new TridentTopology();

    TridentState staticState = topology.newStaticState(new ESIndexState.Factory<>(getLocalClient(), Tweet.class));

    topology.newStream("tweets", spout)
                    .each(new Fields("sentence"), new DocumentBuilder(), new Fields("document"))
                    .each(new Fields("document"), new ExtractDocumentInfo(), new Fields("id", "index", "type"))
                    .groupBy(new Fields("index", "type", "id"))
                    .persistentAggregate(state, new Fields("document"), new TweetBuilder(), new Fields("tweet"))
                    .parallelismHint(1);

    topology.newDRPCStream("search", drpc)
            .each(new Fields("args"), new ExtractSearchArgs(), new Fields("query", "indices", "types"))
            .groupBy(new Fields("query", "indices", "types"))
            .stateQuery(staticState, new Fields("query", "indices", "types"), new QuerySearchIndexQuery(), new Fields("tweet"))
            .each(new Fields("tweet"), new FilterNull())
            .each(new Fields("tweet"), new CreateJson(), new Fields("json"))
            .project(new Fields("json"));

    return topology.build();
}