package tutorial.storm.trident; import backtype.storm.Config; import backtype.storm.LocalCluster; import backtype.storm.LocalDRPC; import backtype.storm.generated.StormTopology; import backtype.storm.tuple.Fields; import com.google.common.collect.ImmutableList; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import storm.trident.TridentState; import storm.trident.TridentTopology; import storm.trident.operation.builtin.Count; import storm.trident.operation.builtin.FilterNull; import storm.trident.operation.builtin.MapGet; import storm.trident.operation.builtin.Sum; import storm.trident.testing.FeederBatchSpout; import storm.trident.testing.MemoryMapState; import tutorial.storm.trident.operations.Split; import tutorial.storm.trident.testutil.FakeTweetGenerator; import java.io.IOException; /** * @author Enno Shioji ([email protected]) */ public class Part04_BasicStateAndDRPC { private static final Logger log = LoggerFactory.getLogger(Part04_BasicStateAndDRPC.class); public static void main(String[] args) throws Exception{ FakeTweetGenerator fakeTweets = new FakeTweetGenerator(); FeederBatchSpout testSpout = new FeederBatchSpout(ImmutableList.of("id", "text", "actor", "location", "date")); Config conf = new Config(); LocalCluster cluster = new LocalCluster(); LocalDRPC drpc = new LocalDRPC(); cluster.submitTopology("state_drpc", conf, basicStateAndDRPC(drpc, testSpout)); // You can use FeederBatchSpout to feed known values to the topology. Very useful for tests. testSpout.feed(fakeTweets.getNextTweetTuples("ted")); testSpout.feed(fakeTweets.getNextTweetTuples("ted")); testSpout.feed(fakeTweets.getNextTweetTuples("mary")); testSpout.feed(fakeTweets.getNextTweetTuples("jason")); // This is how you make DRPC calls. First argument must match the function name // System.out.println(drpc.execute("ping", "ping pang pong")); // System.out.println(drpc.execute("count", "america america ace ace ace item")); System.out.println(drpc.execute("count_per_actor", "ted")); // System.out.println(drpc.execute("count_per_actors", "ted mary pere jason")); // You can use a client library to make calls remotely // DRPCClient client = new DRPCClient("drpc.server.location", 3772); // System.out.println(client.execute("ping", "ping pang pong")); } private static StormTopology basicStateAndDRPC(LocalDRPC drpc, FeederBatchSpout spout) throws IOException { TridentTopology topology = new TridentTopology(); // persistentAggregate persists the result of aggregation into data stores, // which you can use from other applications. // You can also use it in other topologies by using the TridentState object returned. // // The state is commonly backed by a data store like memcache, cassandra etc. // Here we are simply using a hash map TridentState countState = topology .newStream("spout", spout) .groupBy(new Fields("actor")) .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count")); // There are a few ready-made state libraries that you can use // Below is an example to use memcached // List<InetSocketAddress> memcachedServerLocations = ImmutableList.of(new InetSocketAddress("some.memcached.server",12000)); // TridentState countStateMemcached = // topology // .newStream("spout", spout) // .groupBy(new Fields("actor")) // .persistentAggregate(MemcachedState.transactional(memcachedServerLocations), new Count(), new Fields("count")); // DRPC stands for Distributed Remote Procedure Call // You can issue calls using the DRPC client library // A DRPC call takes two Strings, function name and function arguments // // In order to call the DRPC defined below, you'd use "count_per_actor" as the function name // The function arguments will be available as "args" /* topology .newDRPCStream("ping", drpc) .each(new Fields("args"), new Split(" "), new Fields("reply")) .each(new Fields("reply"), new RegexFilter("ping")) .project(new Fields("reply")); // You can apply usual processing primitives to DRPC streams as well topology .newDRPCStream("count", drpc) .each(new Fields("args"), new Split(" "), new Fields("split")) .each(new Fields("split"), new RegexFilter("a.*")) .groupBy(new Fields("split")) .aggregate(new Count(), new Fields("count")); */ // More usefully, you can query the state you created earlier topology .newDRPCStream("count_per_actor", drpc) .stateQuery(countState, new Fields("args"), new MapGet(), new Fields("count")); // Here is a more complex example topology .newDRPCStream("count_per_actors", drpc) .each(new Fields("args"), new Split(" "), new Fields("actor")) .groupBy(new Fields("actor")) .stateQuery(countState, new Fields("actor"), new MapGet(), new Fields("individual_count")) .each(new Fields("individual_count"), new FilterNull()) .aggregate(new Fields("individual_count"), new Sum(), new Fields("count")); // For how to call DRPC calls, go back to the main method return topology.build(); } }