Java Code Examples for storm.kafka.StringScheme

The following are top voted examples for showing how to use storm.kafka.StringScheme. These examples are extracted from open source projects. You can vote up the examples you like and your votes will be used in our system to generate more good examples.
Example 1
Project: RealEstate-Streaming   File: PhoenixTest.java   Source Code and License 6 votes vote down vote up
private SpoutConfig constructKafkaSpoutConf() {
    // BrokerHosts hosts = new ZkHosts(topologyConfig.getProperty("kafka.zookeeper.host.port"));
    BrokerHosts hosts = new ZkHosts("localhost:2181");
    /*
    String topic = topologyConfig.getProperty("kafka.topic");
    String zkRoot = topologyConfig.getProperty("kafka.zkRoot");
    String consumerGroupId = topologyConfig.getProperty("kafka.consumer.group.id");
    */
    String topic = "addresses";
    String zkRoot = "";
    String consumerGroupId = "group1";

    SpoutConfig spoutConfig = new SpoutConfig(hosts, topic, zkRoot, consumerGroupId);
    spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());

    return spoutConfig;
}
 
Example 2
Project: Kafka-Storm-ElasticSearch   File: AuditActiveLoginsTopology.java   Source Code and License 6 votes vote down vote up
public StormTopology buildTopology(Properties properties) {
	
	// Load properties for the storm topology
	String kafkaTopic = properties.getProperty("kafka.topic");
	
	SpoutConfig kafkaConfig = new SpoutConfig(kafkaBrokerHosts, kafkaTopic, "",	"storm");
	kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
	TopologyBuilder builder = new TopologyBuilder();

	// Specific audit logs analysis bolts
	AuditLoginsCounterBolt loginCounterbolt = new AuditLoginsCounterBolt();
	AuditParserBolt auditParserBolt = new AuditParserBolt();
	
	// Elastic search bolt
	TupleMapper tupleMapper = new DefaultTupleMapper();
	ElasticSearchBolt elasticSearchBolt = new ElasticSearchBolt(tupleMapper);

	// Topology scheme: KafkaSpout -> auditParserBolt -> loginCounterBolt -> elasticSearchBolt
	builder.setSpout("KafkaSpout", new KafkaSpout(kafkaConfig), 1);
	builder.setBolt("ParseBolt", auditParserBolt, 1).shuffleGrouping("KafkaSpout");
	builder.setBolt("CountBolt", loginCounterbolt, 1).shuffleGrouping("ParseBolt");
	builder.setBolt("ElasticSearchBolt", elasticSearchBolt, 1)
	.fieldsGrouping("CountBolt", new Fields("id", "index", "type", "document"));

	return builder.createTopology();
}
 
Example 3
Project: storm-benchmark   File: TridentWordCount.java   Source Code and License 6 votes vote down vote up
@Override
  public StormTopology getTopology(Config config) {
    final int spoutNum = BenchmarkUtils.getInt(config, SPOUT_NUM, DEFAULT_SPOUT_NUM);
    final int splitNum = BenchmarkUtils.getInt(config, SPLIT_NUM, DEFAULT_SPLIT_BOLT_NUM);
    final int countNum = BenchmarkUtils.getInt(config, COUNT_NUM, DEFAULT_COUNT_BOLT_NUM);

    spout  = new TransactionalTridentKafkaSpout(
            KafkaUtils.getTridentKafkaConfig(config, new SchemeAsMultiScheme(new StringScheme())));

    TridentTopology trident = new TridentTopology();

    trident.newStream("wordcount", spout).name("sentence").parallelismHint(spoutNum).shuffle()
            .each(new Fields(StringScheme.STRING_SCHEME_KEY), new WordSplit(), new Fields("word"))
            .parallelismHint(splitNum)
            .groupBy(new Fields("word"))
            .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
            .parallelismHint(countNum);
/*    trident.newStream("wordcount", spout)
      .each(new Fields(StringScheme.STRING_SCHEME_KEY), new WordSplit(), new Fields("word"))
      .groupBy(new Fields("word"))
      .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"));*/


    return trident.build();
  }
 
Example 4
Project: storm-benchmark   File: PageViewCount.java   Source Code and License 6 votes vote down vote up
@Override
public StormTopology getTopology(Config config) {

  final int spoutNum = BenchmarkUtils.getInt(config, SPOUT_NUM, DEFAULT_SPOUT_NUM);
  final int viewBoltNum = BenchmarkUtils.getInt(config, VIEW_NUM, DEFAULT_VIEW_BOLT_NUM);
  final int cntBoltNum = BenchmarkUtils.getInt(config, COUNT_NUM, DEFAULT_COUNT_BOLT_NUM);

  spout = new KafkaSpout(KafkaUtils.getSpoutConfig(
          config, new SchemeAsMultiScheme(new StringScheme())));

  TopologyBuilder builder = new TopologyBuilder();
  builder.setSpout(SPOUT_ID, spout, spoutNum);
  builder.setBolt(VIEW_ID, new PageViewBolt(Item.URL, Item.ONE), viewBoltNum)
         .localOrShuffleGrouping(SPOUT_ID);
  builder.setBolt(COUNT_ID, new WordCount.Count(), cntBoltNum)
          .fieldsGrouping(VIEW_ID, new Fields(Item.URL.toString()));
  return builder.createTopology();
}
 
Example 5
Project: storm-benchmark   File: DataClean.java   Source Code and License 6 votes vote down vote up
@Override
public StormTopology getTopology(Config config) {
  final int spoutNum = BenchmarkUtils.getInt(config, SPOUT_NUM, DEFAULT_SPOUT_NUM);
  final int pvBoltNum = BenchmarkUtils.getInt(config, VIEW_NUM, DEFAULT_PV_BOLT_NUM);
  final int filterBoltNum = BenchmarkUtils.getInt(config, FILTER_NUM, DEFAULT_FILTER_BOLT_NUM);
  spout = new KafkaSpout(KafkaUtils.getSpoutConfig(
          config, new SchemeAsMultiScheme(new StringScheme())));

  TopologyBuilder builder = new TopologyBuilder();
  builder.setSpout(SPOUT_ID, spout, spoutNum);
  builder.setBolt(VIEW_ID, new PageViewBolt(Item.STATUS, Item.ALL), pvBoltNum)
          .localOrShuffleGrouping(SPOUT_ID);
  builder.setBolt(FILTER_ID, new FilterBolt<Integer>(404), filterBoltNum)
          .fieldsGrouping(VIEW_ID, new Fields(Item.STATUS.toString()));
  return builder.createTopology();
}
 
Example 6
Project: storm-benchmark   File: Grep.java   Source Code and License 6 votes vote down vote up
@Override
public StormTopology getTopology(Config config) {

  final int spoutNum = BenchmarkUtils.getInt(config, SPOUT_NUM, DEFAULT_SPOUT_NUM);
  final int matBoltNum = BenchmarkUtils.getInt(config, FM_NUM, DEFAULT_MAT_BOLT_NUM);
  final int cntBoltNum = BenchmarkUtils.getInt(config, CM_NUM, DEFAULT_CNT_BOLT_NUM);
  final String ptnString = (String) Utils.get(config, PATTERN_STRING, DEFAULT_PATTERN_STR);

  spout = new KafkaSpout(KafkaUtils.getSpoutConfig(config, new SchemeAsMultiScheme(new StringScheme())));

  TopologyBuilder builder = new TopologyBuilder();
  builder.setSpout(SPOUT_ID, spout, spoutNum);
  builder.setBolt(FM_ID, new FindMatchingSentence(ptnString), matBoltNum)
          .localOrShuffleGrouping(SPOUT_ID);
  builder.setBolt(CM_ID, new CountMatchingSentence(), cntBoltNum)
          .fieldsGrouping(FM_ID, new Fields(FindMatchingSentence.FIELDS));

  return builder.createTopology();
}
 
Example 7
Project: storm-benchmark   File: UniqueVisitor.java   Source Code and License 6 votes vote down vote up
@Override
public StormTopology getTopology(Config config) {

  final int spoutNum = BenchmarkUtils.getInt(config, SPOUT_NUM, DEFAULT_SPOUT_NUM);
  final int pvBoltNum = BenchmarkUtils.getInt(config, VIEW_NUM, DEFAULT_PV_BOLT_NUM);
  final int uvBoltNum = BenchmarkUtils.getInt(config, UNIQUER_NUM, DEFAULT_UV_BOLT_NUM);
  final int winLen = BenchmarkUtils.getInt(config, WINDOW_LENGTH, DEFAULT_WINDOW_LENGTH_IN_SEC);
  final int emitFreq = BenchmarkUtils.getInt(config, EMIT_FREQ, DEFAULT_EMIT_FREQ_IN_SEC);
  spout = new KafkaSpout(KafkaUtils.getSpoutConfig(
          config, new SchemeAsMultiScheme(new StringScheme())));

  TopologyBuilder builder = new TopologyBuilder();
  builder.setSpout(SPOUT_ID, spout, spoutNum);
  builder.setBolt(VIEW_ID, new PageViewBolt(Item.URL, Item.USER), pvBoltNum)
          .localOrShuffleGrouping(SPOUT_ID);
  builder.setBolt(UNIQUER_ID, new UniqueVisitorBolt(winLen, emitFreq), uvBoltNum)
          .fieldsGrouping(VIEW_ID, new Fields(Item.URL.toString()));
  return builder.createTopology();
}
 
Example 8
Project: AuditTopology-ES   File: AuditActiveLoginsTopology.java   Source Code and License 6 votes vote down vote up
public StormTopology buildTopology(Properties properties) {
	
	// Load properties for the storm topology
	String kafkaTopic = properties.getProperty("kafka.topic");
	
	SpoutConfig kafkaConfig = new SpoutConfig(kafkaBrokerHosts, kafkaTopic, "",	"storm");
	kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
	TopologyBuilder builder = new TopologyBuilder();

	// Specific audit logs analysis bolts
	AuditLoginsCounterBolt loginCounterbolt = new AuditLoginsCounterBolt();
	AuditParserBolt auditParserBolt = new AuditParserBolt();
	
	// Elastic search bolt
	TupleMapper tupleMapper = new DefaultTupleMapper();
	ElasticSearchBolt elasticSearchBolt = new ElasticSearchBolt(tupleMapper);

	// Topology scheme: KafkaSpout -> auditParserBolt -> loginCounterBolt -> elasticSearchBolt
	builder.setSpout("KafkaSpout", new KafkaSpout(kafkaConfig), 1);
	builder.setBolt("ParseBolt", auditParserBolt, 1).shuffleGrouping("KafkaSpout");
	builder.setBolt("CountBolt", loginCounterbolt, 1).shuffleGrouping("ParseBolt");
	builder.setBolt("ElasticSearchBolt", elasticSearchBolt, 1)
	.fieldsGrouping("CountBolt", new Fields("id", "index", "type", "document"));

	return builder.createTopology();
}
 
Example 9
Project: kafka-storm-hive   File: KafkaStormTopology.java   Source Code and License 6 votes vote down vote up
public static void main(String[] args) {
    TopologyBuilder builder = new TopologyBuilder();

    SpoutConfig spoutConf = new SpoutConfig(new ZkHosts("localhost:2181", "/brokers"), "test", "/kafkastorm", "KafkaSpout");
    spoutConf.scheme = new SchemeAsMultiScheme(new StringScheme());
    spoutConf.forceFromStart = true;

    builder.setSpout("KafkaSpout", new KafkaSpout(spoutConf), 3);
    builder.setBolt("KafkaBolt", new PrinterBolt(), 3).shuffleGrouping("KafkaSpout");

    Config conf = new Config();
    // conf.setDebug(true);

    LocalCluster cluster = new LocalCluster();
    cluster.submitTopology("kafka-test", conf, builder.createTopology());

    Utils.sleep(60000);
    cluster.shutdown();
}
 
Example 10
Project: storm-kafka-0.8-plus-test   File: SentenceAggregationTopology.java   Source Code and License 6 votes vote down vote up
public StormTopology buildTopology(LocalDRPC drpc) {
    TridentKafkaConfig kafkaConfig = new TridentKafkaConfig(brokerHosts, "storm-sentence", "storm");
    kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    TransactionalTridentKafkaSpout kafkaSpout = new TransactionalTridentKafkaSpout(kafkaConfig);
    TridentTopology topology = new TridentTopology();

    TridentState wordCounts = topology.newStream("kafka", kafkaSpout).shuffle().
            each(new Fields("str"), new WordSplit(), new Fields("word")).
            groupBy(new Fields("word")).
            persistentAggregate(new HazelCastStateFactory(), new Count(), new Fields("aggregates_words")).parallelismHint(2);


    topology.newDRPCStream("words", drpc)
            .each(new Fields("args"), new Split(), new Fields("word"))
            .groupBy(new Fields("word"))
            .stateQuery(wordCounts, new Fields("word"), new MapGet(), new Fields("count"))
            .each(new Fields("count"), new FilterNull())
            .aggregate(new Fields("count"), new Sum(), new Fields("sum"));

    return topology.build();
}
 
Example 11
Project: rb-bi   File: TridentKafkaSpout.java   Source Code and License 5 votes vote down vote up
/**
 * Constructor
 *
 * @param config Config file to read properties from
 * @param section Section of the kafka config file to read properties from.
 */
public TridentKafkaSpout(ConfigData config, String section) {
    _kafkaConfig = new TridentKafkaConfig(new ZkHosts(config.getZkHost()), config.getTopic(section), "stormKafka");
    _kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    _kafkaConfig.bufferSizeBytes = config.getFetchSizeKafka();
    _kafkaConfig.fetchSizeBytes = config.getFetchSizeKafka();
    _kafkaConfig.forceFromStart = false;
}
 
Example 12
Project: rb-bi   File: TridentKafkaSpoutNmsp.java   Source Code and License 5 votes vote down vote up
/**
 * Constructor
 *
 * @param config Config file to read properties from
 * @param section Section of the kafka config file to read properties from.
 */
public TridentKafkaSpoutNmsp(ConfigData config, String section) {
    _kafkaConfig = new TridentKafkaConfig(new ZkHosts(config.getZkHost()), config.getTopic(section), "stormKafka");
    _kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    _kafkaConfig.bufferSizeBytes = config.getFetchSizeKafkaNmsp();
    _kafkaConfig.fetchSizeBytes = config.getFetchSizeKafkaNmsp();
    _kafkaConfig.forceFromStart = false;
}
 
Example 13
Project: rb-bi   File: TridentKafkaSpoutLocation.java   Source Code and License 5 votes vote down vote up
/**
 * Constructor
 *
 * @param config Config file to read properties from
 * @param section Section of the kafka config file to read properties from.
 */
public TridentKafkaSpoutLocation(ConfigData config, String section) {
    _kafkaConfig = new TridentKafkaConfig(new ZkHosts(config.getZkHost()), config.getTopic(section), "stormKafka");
    _kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    _kafkaConfig.bufferSizeBytes = config.getFetchSizeKafkaLocation();
    _kafkaConfig.fetchSizeBytes = config.getFetchSizeKafkaLocation();
    _kafkaConfig.forceFromStart = false;
}
 
Example 14
Project: storm-kafka-examples   File: CounterTopology.java   Source Code and License 5 votes vote down vote up
/**
 * @param args
 * http://www.programcreek.com/java-api-examples/index.php?api=storm.kafka.KafkaSpout
 */
public static void main(String[] args) {
	try{
		//设置喷发节点并分配并发数,该并发数将会控制该对象在集群中的线程数(6个)
		String zkhost = "wxb-1:2181,wxb-2:2181,wxb-3:2181";
		String topic = "order";
		String groupId = "id";
		int spoutNum = 3;
		int boltNum = 1;
		ZkHosts zkHosts = new ZkHosts(zkhost);//kafaka所在的zookeeper
		SpoutConfig spoutConfig = new SpoutConfig(zkHosts, topic, "/order", groupId);  // create /order /id
		spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
		KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);

        TopologyBuilder builder = new TopologyBuilder();
        builder.setSpout("spout", kafkaSpout, spoutNum);
		builder.setBolt("check", new CheckOrderBolt(), boltNum).shuffleGrouping("spout");
        builder.setBolt("counter", new CounterBolt(),boltNum).shuffleGrouping("check");

        Config config = new Config();
        config.setDebug(true);
        
        if(args!=null && args.length > 0) {
            config.setNumWorkers(2);
            StormSubmitter.submitTopology(args[0], config, builder.createTopology());
        } else {        
            config.setMaxTaskParallelism(2);

            LocalCluster cluster = new LocalCluster();
            cluster.submitTopology("Wordcount-Topology", config, builder.createTopology());

            Thread.sleep(500000);

            cluster.shutdown();
        }
	}catch (Exception e) {
		e.printStackTrace();
	}
}
 
Example 15
Project: RealEstate-Streaming   File: KafkaPhoenixTopology.java   Source Code and License 5 votes vote down vote up
private SpoutConfig constructKafkaSpoutConf() {
    // BrokerHosts hosts = new ZkHosts(topologyConfig.getProperty("kafka.zookeeper.host.port"));
    BrokerHosts hosts = new ZkHosts("localhost:2181");

    String topic = "properties";
    String zkRoot = "";
    String consumerGroupId = "group1";

    SpoutConfig spoutConfig = new SpoutConfig(hosts, topic, zkRoot, consumerGroupId);
    spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());

    return spoutConfig;
}
 
Example 16
Project: Big-Data-tutorial   File: FlightTopology.java   Source Code and License 5 votes vote down vote up
public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException {

		String zkIp = "localhost";

		String nimbusHost = "sandbox.hortonworks.com";

		String zookeeperHost = zkIp +":2181";

		ZkHosts zkHosts = new ZkHosts(zookeeperHost);
		List<String> zkServers = new ArrayList<String>();
		zkServers.add(zkIp);
		SpoutConfig kafkaConfig = new SpoutConfig(zkHosts, "spertus-flight-events", "/spertus-flights-events","flight_id");
		kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
		kafkaConfig.startOffsetTime = kafka.api.OffsetRequest.EarliestTime();
		kafkaConfig.zkServers = zkServers;
		kafkaConfig.zkRoot = "/spertus-flight-events";
		kafkaConfig.zkPort = 2181;
		kafkaConfig.forceFromStart = true;
		KafkaSpout kafkaSpout = new KafkaSpout(kafkaConfig);

		TopologyBuilder builder = new TopologyBuilder();

		builder.setSpout("flight-events", kafkaSpout, 1);
		builder.setBolt("flight-stats", new GetFlightStatsBolt(), 1).shuffleGrouping("flight-events");

		Map conf = new HashMap();
		conf.put(backtype.storm.Config.TOPOLOGY_WORKERS, 4);
		conf.put(backtype.storm.Config.TOPOLOGY_DEBUG, true);
		if (args != null && args.length > 0) {
			StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
		}   else {
			LocalCluster cluster = new LocalCluster();
			cluster.submitTopology("flight-topology", conf, builder.createTopology());
		}
	}
 
Example 17
Project: LearnStorm   File: TridentKafkaWordCount.java   Source Code and License 5 votes vote down vote up
/**
 * Creates a transactional kafka spout that consumes any new data published to "test" topic.
 * <p/>
 * For more info on transactional spouts
 * see "Transactional spouts" section in
 * <a href="https://storm.apache.org/documentation/Trident-state"> Trident state</a> doc.
 *
 * @return a transactional trident kafka spout.
 */
private TransactionalTridentKafkaSpout createKafkaSpout() {
    ZkHosts hosts = new ZkHosts(zkUrl);
    TridentKafkaConfig config = new TridentKafkaConfig(hosts, KAFKA_TOPIC);
    config.scheme = new SchemeAsMultiScheme(new StringScheme());

    // Consume new data from the topic
    config.startOffsetTime = kafka.api.OffsetRequest.LatestTime();
    return new TransactionalTridentKafkaSpout(config);
}
 
Example 18
Project: realtime-event-processing   File: DocEventProcessingTopology.java   Source Code and License 5 votes vote down vote up
public static StormTopology buildTopology(Config conf, LocalDRPC drpc) {

        TridentTopology topology = new TridentTopology();

        //Kafka Spout
        BrokerHosts zk = new ZkHosts(conf.get(CrawlerConfig.KAFKA_CONSUMER_HOST_NAME) + ":" +conf.get(CrawlerConfig.KAFKA_CONSUMER_HOST_PORT));
        TridentKafkaConfig kafkaConfig = new TridentKafkaConfig(zk, (String) conf.get(CrawlerConfig.KAFKA_TOPIC_DOCUMENT_NAME));
        kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
        OpaqueTridentKafkaSpout spout = new OpaqueTridentKafkaSpout(kafkaConfig);

        //ElasticSearch Persistent State
        Settings esSettings = ImmutableSettings.settingsBuilder()
                .put("storm.elasticsearch.cluster.name", conf.get(CrawlerConfig.ELASTICSEARCH_CLUSTER_NAME))
                .put("storm.elasticsearch.hosts", conf.get(CrawlerConfig.ELASTICSEARCH_HOST_NAME) + ":" + conf.get(CrawlerConfig.ELASTICSEARCH_HOST_PORT))
                .build();
        StateFactory esStateFactory = new ESIndexState.Factory<JSONObject>(new ClientFactory.NodeClient(esSettings.getAsMap()), JSONObject.class);
        TridentState esStaticState = topology.newStaticState(esStateFactory);

        String esIndex = (String)(conf.get(CrawlerConfig.ELASTICSEARCH_INDEX_NAME));
        topology.newStream("docstream",spout)
                .each( new Fields("str"), new SplitDocStreamArgs(), new Fields("filename", "task", "user", "content"))
                .each( new Fields("filename", "task", "user"), new PrintFilter("Kafka"))
                .each( new Fields("filename","task","user","content"), new PrepareDocForElasticSearch(), new Fields("index","type","id","source") )
                .partitionPersist(esStateFactory, new Fields("index","type","id","source"), new ESIndexUpdater<String>(new ESTridentTupleMapper()), new Fields());

        return topology.build();
    }
 
Example 19
Project: StormSampleProject   File: SentimentAnalysisTopology.java   Source Code and License 5 votes vote down vote up
private static StormTopology createTopology()
{
    SpoutConfig kafkaConf = new SpoutConfig(
        new ZkHosts(Properties.getString("rts.storm.zkhosts")),
        KAFKA_TOPIC,
        "/kafka",
        "KafkaSpout");
    kafkaConf.scheme = new SchemeAsMultiScheme(new StringScheme());
    TopologyBuilder topology = new TopologyBuilder();

    topology.setSpout("kafka_spout", new KafkaSpout(kafkaConf), 4);

    topology.setBolt("twitter_filter", new TwitterFilterBolt(), 4)
            .shuffleGrouping("kafka_spout");

    topology.setBolt("text_filter", new TextFilterBolt(), 4)
            .shuffleGrouping("twitter_filter");

    topology.setBolt("stemming", new StemmingBolt(), 4)
            .shuffleGrouping("text_filter");

    topology.setBolt("positive", new PositiveSentimentBolt(), 4)
            .shuffleGrouping("stemming");
    topology.setBolt("negative", new NegativeSentimentBolt(), 4)
            .shuffleGrouping("stemming");

    topology.setBolt("join", new JoinSentimentsBolt(), 4)
            .fieldsGrouping("positive", new Fields("tweet_id"))
            .fieldsGrouping("negative", new Fields("tweet_id"));

    topology.setBolt("score", new SentimentScoringBolt(), 4)
            .shuffleGrouping("join");

    topology.setBolt("hdfs", new HDFSBolt(), 4)
            .shuffleGrouping("score");
    topology.setBolt("nodejs", new NodeNotifierBolt(), 4)
            .shuffleGrouping("score");

    return topology.createTopology();
}
 
Example 20
Project: storm-smoke-test   File: TridentConnectorUtil.java   Source Code and License 5 votes vote down vote up
public static OpaqueTridentKafkaSpout getTridentKafkaEmitter(String zkConnString, String topicName, Map topologyConfig) {
    BrokerHosts hosts = new ZkHosts(zkConnString);
    TridentKafkaConfig kafkaConfig = new TridentKafkaConfig(hosts, topicName);
    //topologyConfig.put("topology.spout.max.batch.size", 1);
    //kafkaConfig.forceFromStart = true;
    kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    return new OpaqueTridentKafkaSpout(kafkaConfig);
}
 
Example 21
Project: partial-key-grouping   File: WordCountPartialKeyGrouping.java   Source Code and License 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    String kafkaZk = "zookeeper:2181"; // change it to your zookeeper server
    BrokerHosts brokerHosts = new ZkHosts(kafkaZk);

    SpoutConfig kafkaConfig = new SpoutConfig(brokerHosts, "name_of_kafka_topic", "", "test"); // change it to the name of your kafka topic
    kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    kafkaConfig.forceFromStart = true;

    TopologyBuilder builder = new TopologyBuilder();
    builder.setSpout("stream", new KafkaSpout(kafkaConfig), 1);
    builder.setBolt("split", new SplitterBolt(), 8).shuffleGrouping("stream");
    builder.setBolt("counter", new CounterBolt(), 10).customGrouping("split", new PartialKeyGrouping());
    builder.setBolt("aggregator", new AggregatorBolt(), 1).fieldsGrouping("counter", new Fields("word"));

    Config conf = new Config();
    conf.setDebug(false);
    conf.setMaxSpoutPending(100);
    // conf.setMessageTimeoutSecs(300); // optionally increase the timeout for tuples

    if (args != null && args.length > 0) {
        conf.setNumWorkers(10);
        StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
    } else {
        LocalCluster cluster = new LocalCluster();
        cluster.submitTopology("test", conf, builder.createTopology());
        Utils.sleep(15000000);
        cluster.killTopology("test");
        cluster.shutdown();
    }
}
 
Example 22
Project: StormTopology-AuditActiveLogins   File: AuditActiveLoginsTopology.java   Source Code and License 5 votes vote down vote up
public StormTopology buildTopology(Properties properties) {
	
	// Load properties for the storm topoology
	String kafkaTopic = properties.getProperty("kafka.topic");
	String hbaseTable = properties.getProperty("hbase.table.name");
	String hbaseColumnFamily = properties.getProperty("hbase.column.family");
	
	SpoutConfig kafkaConfig = new SpoutConfig(kafkaBrokerHosts, kafkaTopic, "",
			"storm");
	kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
	TopologyBuilder builder = new TopologyBuilder();
	
	SimpleHBaseMapper hBaseMapper = new SimpleHBaseMapper()
			.withRowKeyField("host-user")
			.withCounterFields(new Fields("count"))
			.withColumnFamily(hbaseColumnFamily);

	// HbaseBolt(tableName, hbaseMapper)
	HBaseBolt hbaseBolt = new HBaseBolt(hbaseTable, hBaseMapper);
	AuditLoginsCounterBolt loginCounterbolt = new AuditLoginsCounterBolt(hbaseTable);
	AuditBolt auditParserBolt = new AuditBolt();

	builder.setSpout("KafkaSpout", new KafkaSpout(kafkaConfig), 1);
	builder.setBolt("ParseBolt", auditParserBolt, 1).shuffleGrouping("KafkaSpout");
	builder.setBolt("CountBolt", loginCounterbolt, 1).shuffleGrouping("ParseBolt");
	builder.setBolt("HBaseBolt", hbaseBolt, 1).fieldsGrouping("CountBolt",
			new Fields("host-user"));

	return builder.createTopology();
}
 
Example 23
Project: flink-perf   File: KafkaThroughput.java   Source Code and License 5 votes vote down vote up
public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException, UnknownHostException, InterruptedException {
	final ParameterTool pt = ParameterTool.fromArgs(args);

	TopologyBuilder builder = new TopologyBuilder();
	BrokerHosts hosts = new ZkHosts(pt.getRequired("zookeeper"));
	SpoutConfig spoutConfig = new SpoutConfig(hosts, pt.getRequired("topic"), "/" + pt.getRequired("topic"), UUID.randomUUID().toString());
	spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
	KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);
	builder.setSpout("source", kafkaSpout, pt.getInt("sourceParallelism"));

	builder.setBolt("sink", new Throughput.Sink(pt), pt.getInt("sinkParallelism")).noneGrouping("source");

	Config conf = new Config();
	conf.setDebug(false);

	if (!pt.has("local")) {
		conf.setNumWorkers(pt.getInt("par", 2));

		StormSubmitter.submitTopologyWithProgressBar("kafka-spout-"+pt.get("name", "no_name"), conf, builder.createTopology());
	} else {
		conf.setMaxTaskParallelism(pt.getInt("par", 2));

		LocalCluster cluster = new LocalCluster();
		cluster.submitTopology("kafka-spout", conf, builder.createTopology());

		Thread.sleep(300000);

		cluster.shutdown();
	}
}
 
Example 24
Project: storm-kafka-examples   File: HdfsTopology.java   Source Code and License 4 votes vote down vote up
public static void main(String[] args) {
    try{
        String zkhost = "wxb-1:2181,wxb-2:2181,wxb-3:2181";
        String topic = "order";
        String groupId = "id";
        int spoutNum = 3;
        int boltNum = 1;
        ZkHosts zkHosts = new ZkHosts(zkhost);//kafaka所在的zookeeper
        SpoutConfig spoutConfig = new SpoutConfig(zkHosts, topic, "/order", groupId);  // create /order /id
        spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
        KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);

        // HDFS bolt
        // use "|" instead of "," for field delimiter
        RecordFormat format = new DelimitedRecordFormat()
                .withFieldDelimiter("|");

        // sync the filesystem after every 1k tuples
        SyncPolicy syncPolicy = new CountSyncPolicy(1000);

        // rotate files when they reach 5MB
        FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(5.0f, FileSizeRotationPolicy.Units.MB);
        // FileRotationPolicy rotationPolicy = new TimedRotationPolicy(1.0f, TimedRotationPolicy.TimeUnit.MINUTES);

        FileNameFormat fileNameFormat = new DefaultFileNameFormat()
                .withPath("/tmp/").withPrefix("order_").withExtension(".log");

        HdfsBolt hdfsBolt = new HdfsBolt()
                .withFsUrl("hdfs://wxb-1:8020")
                .withFileNameFormat(fileNameFormat)
                .withRecordFormat(format)
                .withRotationPolicy(rotationPolicy)
                .withSyncPolicy(syncPolicy);

        TopologyBuilder builder = new TopologyBuilder();
        builder.setSpout("spout", kafkaSpout, spoutNum);
        builder.setBolt("check", new CheckOrderBolt(), boltNum).shuffleGrouping("spout");
        builder.setBolt("counter", new CounterBolt(),boltNum).shuffleGrouping("check");
        builder.setBolt("hdfs", hdfsBolt,boltNum).shuffleGrouping("counter");

        Config config = new Config();
        config.setDebug(true);

        if(args!=null && args.length > 0) {
            config.setNumWorkers(2);
            StormSubmitter.submitTopology(args[0], config, builder.createTopology());
        } else {
            config.setMaxTaskParallelism(2);

            LocalCluster cluster = new LocalCluster();
            cluster.submitTopology("Wordcount-Topology", config, builder.createTopology());

            Thread.sleep(500000);

            cluster.shutdown();
        }
    }catch (Exception e) {
        e.printStackTrace();
    }
}
 
Example 25
Project: web-crawler   File: WebCrawlerTopology.java   Source Code and License 4 votes vote down vote up
public static StormTopology buildTopology(Config conf, LocalDRPC localDrpc) {
    TridentTopology topology = new TridentTopology();

    //Kafka Spout
    BrokerHosts zk = new ZkHosts(conf.get(CrawlerConfig.KAFKA_CONSUMER_HOST_NAME) + ":" +conf.get(CrawlerConfig.KAFKA_CONSUMER_HOST_PORT));
    TridentKafkaConfig kafkaConfig = new TridentKafkaConfig(zk, (String) conf.get(CrawlerConfig.KAFKA_TOPIC_NAME));
    kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    OpaqueTridentKafkaSpout spout = new OpaqueTridentKafkaSpout(kafkaConfig);

    //ElasticSearch Persistent State
    Settings esSettings = ImmutableSettings.settingsBuilder()
            .put("storm.elasticsearch.cluster.name", conf.get(CrawlerConfig.ELASTICSEARCH_CLUSTER_NAME))
            .put("storm.elasticsearch.hosts", conf.get(CrawlerConfig.ELASTICSEARCH_HOST_NAME) + ":" + conf.get(CrawlerConfig.ELASTICSEARCH_HOST_PORT))
            .build();
    StateFactory esStateFactory = new ESIndexState.Factory<String>(new ClientFactory.NodeClient(esSettings.getAsMap()), String.class);
    TridentState esStaticState = topology.newStaticState(esStateFactory);

    //Topology
    topology.newStream("crawlKafkaSpout", spout).parallelismHint(5)
             //Splits url and depth information on receiving from Kafka
            .each(new Fields("str"), new SplitKafkaInput(), new Fields("url", "depth"))
            //Bloom Filter. Filters already crawled URLs
            .each(new Fields("url"), new URLFilter())
            //Download and Parse Webpage
            .each(new Fields("url"), new GetAdFreeWebPage(), new Fields("content_html", "title", "href"))//TODO Optimize
            //Add Href URls to Kafka queue
            .each(new Fields("href", "depth"), new KafkaProducerFilter())//TODO Replace with kafka persistent state.
            //Insert to Elasticsearch
            .each(new Fields("url", "content_html", "title"), new PrepareForElasticSearch(), new Fields("index", "type", "id", "source"))
            .partitionPersist(esStateFactory, new Fields("index", "type", "id", "source"), new ESIndexUpdater<String>(new ESTridentTupleMapper()))
    ;

    //DRPC
    topology.newDRPCStream("search", localDrpc)
            .each(new Fields("args"), new SplitDRPCArgs(), new Fields("query_input"))
            .each(new Fields("query_input"), new BingAutoSuggest(0), new Fields("query_preProcessed"))//TODO return List of expanded query
            .each(new Fields("query_preProcessed"), new PrepareSearchQuery(), new Fields("query", "indices", "types"))
            .groupBy(new Fields("query", "indices", "types"))
            .stateQuery(esStaticState, new Fields("query", "indices", "types"), new QuerySearchIndexQuery(), new Fields("results"))
    ;

    return topology.build();
}
 
Example 26
Project: Big-Data-tutorial   File: WeatherTopology.java   Source Code and License 4 votes vote down vote up
public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException {

		String zkIp = "localhost";

		String nimbusHost = "sandbox.hortonworks.com";

		String zookeeperHost = zkIp +":2181";

		ZkHosts zkHosts = new ZkHosts(zookeeperHost);
		List<String> zkServers = new ArrayList<String>();
		zkServers.add(zkIp);
		SpoutConfig kafkaConfig = new SpoutConfig(zkHosts, "spertus-weather-events", "/spertus-weather-events","test_id");
		kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
		kafkaConfig.startOffsetTime = kafka.api.OffsetRequest.EarliestTime();
		kafkaConfig.zkServers = zkServers;
		kafkaConfig.zkRoot = "/spertus-weather-events";
		kafkaConfig.zkPort = 2181;
		kafkaConfig.forceFromStart = true;
		KafkaSpout kafkaSpout = new KafkaSpout(kafkaConfig);

		TopologyBuilder builder = new TopologyBuilder();

		HdfsBolt hdfsBolt = new HdfsBolt().withFsUrl("hdfs://sandbox.hortonworks.com:8020")
				.withFileNameFormat(new DefaultFileNameFormat().withPath("/tmp/test"))
				.withRecordFormat(new DelimitedRecordFormat().withFieldDelimiter("|"))
				.withSyncPolicy(new CountSyncPolicy(10))
				.withRotationPolicy(new FileSizeRotationPolicy(5.0f, Units.MB));
		builder.setSpout("raw-weather-events", kafkaSpout, 1);
		builder.setBolt("filter-airports", new FilterAirportsBolt(), 1).shuffleGrouping("raw-weather-events");
		//        builder.setBolt("test-bolt", new TestBolt(), 1).shuffleGrouping("raw-weather-events");
		//        builder.setBolt("hdfs-bolt", hdfsBolt, 1).shuffleGrouping("raw-weather-events");


		Map conf = new HashMap();
		conf.put(backtype.storm.Config.TOPOLOGY_WORKERS, 4);
		conf.put(backtype.storm.Config.TOPOLOGY_DEBUG, true);
		if (args != null && args.length > 0) {
			StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
		}   else {
			LocalCluster cluster = new LocalCluster();
			cluster.submitTopology("weather-topology", conf, builder.createTopology());
		}
	}
 
Example 27
Project: streaming-benchmarks   File: AdvertisingTopology.java   Source Code and License 4 votes vote down vote up
public static void main(String[] args) throws Exception {
    TopologyBuilder builder = new TopologyBuilder();

    Options opts = new Options();
    opts.addOption("conf", true, "Path to the config file.");

    CommandLineParser parser = new DefaultParser();
    CommandLine cmd = parser.parse(opts, args);
    String configPath = cmd.getOptionValue("conf");
    Map commonConfig = Utils.findAndReadConfigFile(configPath, true);

    String zkServerHosts = joinHosts((List<String>)commonConfig.get("zookeeper.servers"),
                                     Integer.toString((Integer)commonConfig.get("zookeeper.port")));
    String redisServerHost = (String)commonConfig.get("redis.host");
    String kafkaTopic = (String)commonConfig.get("kafka.topic");
    int kafkaPartitions = ((Number)commonConfig.get("kafka.partitions")).intValue();
    int workers = ((Number)commonConfig.get("storm.workers")).intValue();
    int ackers = ((Number)commonConfig.get("storm.ackers")).intValue();
    int cores = ((Number)commonConfig.get("process.cores")).intValue();
    int parallel = Math.max(1, cores/7);

    ZkHosts hosts = new ZkHosts(zkServerHosts);



    SpoutConfig spoutConfig = new SpoutConfig(hosts, kafkaTopic, "/" + kafkaTopic, UUID.randomUUID().toString());
    spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);

    builder.setSpout("ads", kafkaSpout, kafkaPartitions);
    builder.setBolt("event_deserializer", new DeserializeBolt(), parallel).shuffleGrouping("ads");
    builder.setBolt("event_filter", new EventFilterBolt(), parallel).shuffleGrouping("event_deserializer");
    builder.setBolt("event_projection", new EventProjectionBolt(), parallel).shuffleGrouping("event_filter");
    builder.setBolt("redis_join", new RedisJoinBolt(redisServerHost), parallel).shuffleGrouping("event_projection");
    builder.setBolt("campaign_processor", new CampaignProcessor(redisServerHost), parallel*2)
        .fieldsGrouping("redis_join", new Fields("campaign_id"));

    Config conf = new Config();

    if (args != null && args.length > 0) {
        conf.setNumWorkers(workers);
        conf.setNumAckers(ackers);
        StormSubmitter.submitTopologyWithProgressBar(args[0], conf, builder.createTopology());
    }
    else {

        LocalCluster cluster = new LocalCluster();
        cluster.submitTopology("test", conf, builder.createTopology());
        backtype.storm.utils.Utils.sleep(10000);
        cluster.killTopology("test");
        cluster.shutdown();
    }
}
 
Example 28
Project: yuzhouwan   File: ZkTopology.java   Source Code and License 4 votes vote down vote up
public static void main(String[] args) {

        //这个地方其实就是kafka配置文件里边的zookeeper.connect这个参数,可以去那里拿过来
        String brokerZkStr = "10.100.90.201:2181/kafka_online_sample";
        String brokerZkPath = "/brokers";
        ZkHosts zkHosts = new ZkHosts(brokerZkStr, brokerZkPath);

        String topic = "mars-wap";
        //以下:将offset汇报到哪个zk集群,相应配置
        String offsetZkServers = "10.199.203.169";
        String offsetZkPort = "2181";
        List<String> zkServersList = new ArrayList<>();
        zkServersList.add(offsetZkServers);
        //汇报offset信息的root路径
        String offsetZkRoot = "/stormExample";
        //存储该spout id的消费offset信息,譬如以topoName来命名
        String offsetZkId = "storm-example";

        SpoutConfig kafkaConfig = new SpoutConfig(zkHosts, topic, offsetZkRoot, offsetZkId);
        kafkaConfig.zkPort = Integer.parseInt(offsetZkPort);
        kafkaConfig.zkServers = zkServersList;
        kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());

        KafkaSpout spout = new KafkaSpout(kafkaConfig);

        TopologyBuilder builder = new TopologyBuilder();
        builder.setSpout("spout", spout, 1);
        builder.setBolt("bolt", new EsBolt("storm/docs"), 1).shuffleGrouping("spout");

        Config config = new Config();
        config.put("es.index.auto.create", "true");

        if (args.length > 0) {
            try {
                StormSubmitter.submitTopology("storm-kafka-example", config, builder.createTopology());
            } catch (Exception e) {
                LOG.error("error: {}", e.getMessage());
            }
        } else {
            LocalCluster cluster = new LocalCluster();
            cluster.submitTopology("test", config, builder.createTopology());
        }
    }
 
Example 29
Project: realtime-event-processing   File: URLEventProcessingTopology.java   Source Code and License 4 votes vote down vote up
public static StormTopology buildTopology(Config conf, LocalDRPC localDrpc) {
    TridentTopology topology = new TridentTopology();

    //Kafka Spout
    BrokerHosts zk = new ZkHosts(conf.get(CrawlerConfig.KAFKA_CONSUMER_HOST_NAME) + ":" +conf.get(CrawlerConfig.KAFKA_CONSUMER_HOST_PORT));
    TridentKafkaConfig kafkaConfig = new TridentKafkaConfig(zk, (String) conf.get(CrawlerConfig.KAFKA_TOPIC_NAME));
    kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    //kafkaConfig.ignoreZkOffsets=true;
    OpaqueTridentKafkaSpout spout = new OpaqueTridentKafkaSpout(kafkaConfig);

    //ElasticSearch Persistent State
    Settings esSettings = ImmutableSettings.settingsBuilder()
            .put("storm.elasticsearch.cluster.name", conf.get(CrawlerConfig.ELASTICSEARCH_CLUSTER_NAME))
            .put("storm.elasticsearch.hosts", conf.get(CrawlerConfig.ELASTICSEARCH_HOST_NAME) + ":" + conf.get(CrawlerConfig.ELASTICSEARCH_HOST_PORT))
            .build();
    StateFactory esStateFactory = new ESIndexState.Factory<JSONObject>(new ClientFactory.NodeClient(esSettings.getAsMap()), JSONObject.class);
    TridentState esStaticState = topology.newStaticState(esStateFactory);

    //Topology
    topology.newStream("crawlKafkaSpout", spout).parallelismHint(5)
            //Splits words on receiving from Kafka
            .each(new Fields("str"), new SplitFunction(), new Fields("url", "depth", "task", "user"))
            .each(new Fields("str"), new PrintFilter("Kafka"))
            //Bloom Filter, Filters already crawled URLs
            .each(new Fields("url", "task"), new URLFilter())
            //Download and Parse Webpage
            .each(new Fields("url"), new GetAdFreeWebPage(), new Fields("content_html", "title", "href"))
            //Sending URLs present in the page into the kafka queue.
            .each(new Fields("href", "depth", "task", "user"), new KafkaProducerFilter())
            //Insert to Elasticsearch
            .each(new Fields("url", "content_html", "title", "task", "user"), new PrepareForElasticSearch(), new Fields("index", "type", "id", "source"))
            .partitionPersist(esStateFactory, new Fields("index", "type", "id", "source"), new ESIndexUpdater<String>(new ESTridentTupleMapper()), new Fields())
            ;

    //DRPC
    topology.newDRPCStream("search", localDrpc)
            .each(new Fields("args"), new SplitDRPCArgs(), new Fields("query_input", "task"))
            .each(new Fields("query_input"), new BingAutoSuggest(0), new Fields("query_preProcessed"))
            .each(new Fields("query_preProcessed", "task"), new PrepareSearchQuery(), new Fields("query", "indices", "types"))
            .groupBy(new Fields("query", "indices", "types"))
            .stateQuery(esStaticState, new Fields("query", "indices", "types"), new QuerySearchIndexQuery(), new Fields("results"))
            ;

    return topology.build();
}
 
Example 30
Project: simple-kafka-storm-java   File: OpaqueTridentKafkaSpoutBuilder.java   Source Code and License 4 votes vote down vote up
public OpaqueTridentKafkaSpout build() {
	BrokerHosts zk = new ZkHosts(zookeeper);
	TridentKafkaConfig spoutConf = new TridentKafkaConfig(zk, topic);
	spoutConf.scheme = new SchemeAsMultiScheme(new StringScheme());
	return new OpaqueTridentKafkaSpout(spoutConf);
}
 
Example 31
Project: simple-kafka-storm-java   File: KafkaSpoutBuilder.java   Source Code and License 4 votes vote down vote up
public KafkaSpout build() {
	BrokerHosts hosts = new ZkHosts(zooKeeper);
	SpoutConfig spoutConfig = new SpoutConfig(hosts, topic, "/" + topic, UUID.randomUUID().toString());
	spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
	return new KafkaSpout(spoutConfig);
}
 
Example 32
Project: storm-benchmark   File: DRPC.java   Source Code and License 4 votes vote down vote up
@Override
  public StormTopology getTopology(Config config) {

    Object sObj = config.get(SERVER);
    if (null == sObj) {
      throw new IllegalArgumentException("must set a drpc server");
    }
    server = (String) sObj;
    config.put(Config.DRPC_SERVERS, Lists.newArrayList(server));

    Object pObj = config.get(PORT);
    if (null == pObj) {
      throw new IllegalArgumentException("must set a drpc port");
    }
    port = Utils.getInt(pObj);
    config.put(Config.DRPC_PORT, port);

    LOG.info("drpc server: " + server + "; drpc port: " + port);

    final int spoutNum = BenchmarkUtils.getInt(config, SPOUT_NUM, DEFAULT_SPOUT_NUM);
    final int pageNum = BenchmarkUtils.getInt(config, PAGE_NUM, DEFAULT_PAGE_BOLT_NUM);
    final int viewNum = BenchmarkUtils.getInt(config, VIEW_NUM, DEFAULT_VIEW_BOLT_NUM);
    final int userNum = BenchmarkUtils.getInt(config, USER_NUM, DEFAULT_USER_BOLT_NUM);
    final int followerNum = BenchmarkUtils.getInt(config, FOLLOWER_NUM, DEFAULT_FOLLOWER_BOLT_NUM);

    spout = new TransactionalTridentKafkaSpout(
            KafkaUtils.getTridentKafkaConfig(config, new SchemeAsMultiScheme(new StringScheme())));

    TridentTopology trident = new TridentTopology();
    TridentState urlToUsers =
            trident.newStream("drpc", spout).parallelismHint(spoutNum).shuffle()
            .each(new Fields(StringScheme.STRING_SCHEME_KEY), new Extract(Arrays.asList(Item.URL, Item.USER)),
                    new Fields("url", "user")).parallelismHint(pageNum)
            .groupBy(new Fields("url"))
            .persistentAggregate(new MemoryMapState.Factory(), new Fields("url", "user"), new Distinct(), new Fields("user_set"))
            .parallelismHint(viewNum);
/** debug
 *  1. this proves that the aggregated result has successfully persisted
    urlToUsers.newValuesStream()
            .each(new Fields("url", "user_set"), new Print("(url, user_set)"), new Fields("url2", "user_set2"));
 */
    PageViewGenerator generator = new PageViewGenerator();
    TridentState userToFollowers = trident.newStaticState(new StaticSingleKeyMapState.Factory(generator.genFollowersDB()));
/** debug
  * 2. this proves that MemoryMapState could be read correctly
   trident.newStream("urlToUsers", new PageViewSpout(false))
            .each(new Fields("page_view"), new Extract(Arrays.asList(Item.URL)), new Fields("url"))
            .each(new Fields("url"), new Print("url"), new Fields("url2"))
            .groupBy(new Fields("url2"))
            .stateQuery(urlToUsers, new Fields("url2"),  new MapGet(), new Fields("users"))
            .each(new Fields("users"), new Print("users"), new Fields("users2"));
*/
/** debug
 *  3. this proves that StaticSingleKeyMapState could be read correctly
    trident.newStream("userToFollowers", new PageViewSpout(false))
            .each(new Fields("page_view"), new Extract(Arrays.asList(Item.USER)), new Fields("user"))
            .each(new Fields("user"), new Print("user"), new Fields("user2"))
            .stateQuery(userToFollowers, new Fields("user2"), new MapGet(), new Fields("followers"))
            .each(new Fields("followers"), new Print("followers"), new Fields("followers2"));
 */
    trident.newDRPCStream(FUNCTION, null)
            .each(new Fields("args"), new Print("args"), new Fields("url"))
            .groupBy(new Fields("url"))
            .stateQuery(urlToUsers, new Fields("url"), new MapGet(), new Fields("users"))
            .each(new Fields("users"), new Expand(), new Fields("user")).parallelismHint(userNum)
            .groupBy(new Fields("user"))
            .stateQuery(userToFollowers, new Fields("user"), new MapGet(), new Fields("followers"))
            .each(new Fields("followers"), new Expand(), new Fields("follower")).parallelismHint(followerNum)
            .groupBy(new Fields("follower"))
            .aggregate(new One(), new Fields("one"))
            .aggregate(new Fields("one"), new Sum(), new Fields("reach"));
    return trident.build();
  }
 
Example 33
Project: storm-benchmark   File: KafkaWordCount.java   Source Code and License 4 votes vote down vote up
@Override
public StormTopology getTopology(Config config) {
  spout = new KafkaSpout(KafkaUtils.getSpoutConfig(config, new SchemeAsMultiScheme(new StringScheme())));
  return super.getTopology(config);
}
 
Example 34
Project: trident-tutorial   File: TestUtils.java   Source Code and License 4 votes vote down vote up
public static TransactionalTridentKafkaSpout testTweetSpout(BrokerHosts hosts) {
    TridentKafkaConfig kafkaConfig = new TridentKafkaConfig(hosts, "test", "storm");
    kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    return new TransactionalTridentKafkaSpout(kafkaConfig);
}