Java Code Examples for backtype.storm.spout.SchemeAsMultiScheme

The following are top voted examples for showing how to use backtype.storm.spout.SchemeAsMultiScheme. These examples are extracted from open source projects. You can vote up the examples you like and your votes will be used in our system to generate more good examples.
Example 1
Project: RealEstate-Streaming   File: PhoenixTest.java   Source Code and License 6 votes vote down vote up
private SpoutConfig constructKafkaSpoutConf() {
    // BrokerHosts hosts = new ZkHosts(topologyConfig.getProperty("kafka.zookeeper.host.port"));
    BrokerHosts hosts = new ZkHosts("localhost:2181");
    /*
    String topic = topologyConfig.getProperty("kafka.topic");
    String zkRoot = topologyConfig.getProperty("kafka.zkRoot");
    String consumerGroupId = topologyConfig.getProperty("kafka.consumer.group.id");
    */
    String topic = "addresses";
    String zkRoot = "";
    String consumerGroupId = "group1";

    SpoutConfig spoutConfig = new SpoutConfig(hosts, topic, zkRoot, consumerGroupId);
    spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());

    return spoutConfig;
}
 
Example 2
Project: StreamBench   File: StormOperatorCreator.java   Source Code and License 6 votes vote down vote up
@Override
    public WorkloadOperator<WithTime<String>> stringStreamFromKafkaWithTime(String zkConStr,
                                                                            String kafkaServers,
                                                                            String group,
                                                                            String topics,
                                                                            String offset,
                                                                            String componentId,
                                                                            int parallelism) {
        conf.setNumWorkers(parallelism);
        BrokerHosts hosts = new ZkHosts(zkConStr);
        SpoutConfig spoutConfig = new SpoutConfig(hosts, topics, "/" + topics, UUID.randomUUID().toString());
        spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
        spoutConfig.startOffsetTime = kafka.api.OffsetRequest.LatestTime();
        if (offset.endsWith("smallest")) {
            spoutConfig.startOffsetTime = kafka.api.OffsetRequest.EarliestTime();
        }
        spoutConfig.fetchSizeBytes = 1024;
        spoutConfig.bufferSizeBytes = 1024;
//        spoutConfig.ignoreZkOffsets = true;

        topologyBuilder.setSpout("spout", new KafkaSpout(spoutConfig), parallelism);
        topologyBuilder.setBolt("addTime", new WithTimeBolt<String>(), parallelism).localOrShuffleGrouping("spout");
        return new StormOperator<>(topologyBuilder, "addTime", parallelism);
    }
 
Example 3
Project: StreamBench   File: StormOperatorCreator.java   Source Code and License 6 votes vote down vote up
@Override
    public WorkloadOperator<Point> pointStreamFromKafka(String zkConStr, String kafkaServers, String group, String topics, String offset, String componentId, int parallelism) {
        conf.setNumWorkers(parallelism);
        BrokerHosts hosts = new ZkHosts(zkConStr);
        SpoutConfig spoutConfig = new SpoutConfig(hosts, topics, "/" + topics, UUID.randomUUID().toString());
        spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
        spoutConfig.startOffsetTime = kafka.api.OffsetRequest.LatestTime();
        if (offset.endsWith("smallest")) {
            spoutConfig.startOffsetTime = kafka.api.OffsetRequest.EarliestTime();
        }
        spoutConfig.fetchSizeBytes = 1024;
        spoutConfig.bufferSizeBytes = 1024;
//        spoutConfig.ignoreZkOffsets = true;

        topologyBuilder.setSpout("spout", new KafkaSpout(spoutConfig), parallelism);
        topologyBuilder.setBolt("extractPoint", new ExtractPointBolt(), parallelism).localOrShuffleGrouping("spout");
        return new StormOperator<>(topologyBuilder, "extractPoint", parallelism);
    }
 
Example 4
Project: StreamBench   File: StormOperatorCreator.java   Source Code and License 6 votes vote down vote up
@Override
    public WorkloadOperator<String> stringStreamFromKafka(String zkConStr,
                                                          String kafkaServers,
                                                          String group,
                                                          String topics,
                                                          String offset,
                                                          String componentId,
                                                          int parallelism) {
        conf.setNumWorkers(parallelism);
        BrokerHosts hosts = new ZkHosts(zkConStr);
        SpoutConfig spoutConfig = new SpoutConfig(hosts, topics, "/" + topics, UUID.randomUUID().toString());
        spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
        spoutConfig.startOffsetTime = kafka.api.OffsetRequest.LatestTime();
        spoutConfig.fetchSizeBytes = 1024;
        spoutConfig.bufferSizeBytes = 1024;
//        spoutConfig.ignoreZkOffsets = true;

        topologyBuilder.setSpout(componentId, new KafkaSpout(spoutConfig), parallelism);
        return new StormOperator<>(topologyBuilder, componentId, parallelism);
    }
 
Example 5
Project: StreamBench   File: TickTest.java   Source Code and License 6 votes vote down vote up
public static void main(String[] args) throws WorkloadException {
    TopologyBuilder builder = new TopologyBuilder();
    BrokerHosts hosts = new ZkHosts("localhost:2181");
    SpoutConfig spoutConfig = new SpoutConfig(hosts, "WordCount", "/" + "WordCount", UUID.randomUUID().toString());
    spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    spoutConfig.ignoreZkOffsets = true;

    builder.setSpout("spout", new KafkaSpout(spoutConfig));
    builder.setBolt("split", new SplitSentence()).shuffleGrouping("spout");
    builder.setBolt("counter", new CounterBolt(), 3).shuffleGrouping("split");
    builder.setBolt("aggregator", new AggregatorBolt(), 1)
            .fieldsGrouping("counter", Utils.DEFAULT_STREAM_ID, new Fields("word"))
            .allGrouping("counter", "tick");

    Config conf = new Config();
    conf.setDebug(true);
    conf.setNumWorkers(3);

    LocalCluster cluster = new LocalCluster();
    cluster.submitTopology("kafka-spout", conf, builder.createTopology());
}
 
Example 6
Project: StreamBench   File: AppTest.java   Source Code and License 6 votes vote down vote up
public static void main(String[] args) throws WorkloadException {
    TopologyBuilder builder = new TopologyBuilder();
    BrokerHosts hosts = new ZkHosts("localhost:2181");
    SpoutConfig spoutConfig = new SpoutConfig(hosts, "WordCount", "/" + "WordCount", UUID.randomUUID().toString());
    spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    spoutConfig.ignoreZkOffsets = true;

    builder.setSpout("spout", new KafkaSpout(spoutConfig));
    builder.setBolt("split", new SplitSentence()).shuffleGrouping("spout");
    builder.setBolt("counter", new CounterBolt(), 3).fieldsGrouping("split", new Fields("wordCountPair"));

    Config conf = new Config();
    conf.setDebug(true);
    conf.setNumWorkers(3);

    LocalCluster cluster = new LocalCluster();
    cluster.submitTopology("kafka-spout", conf, builder.createTopology());
}
 
Example 7
Project: storm-topology-examples   File: ConfigureKafkaSpout.java   Source Code and License 6 votes vote down vote up
public static void configureKafkaSpout(TopologyBuilder builder, String zkHostString, String kafkaTopic, 
                                       String kafkaStartOffset, int parallelismHint, String spoutName,
                                       String spoutScheme) {

    LOG.info("KAFKASPOUT: Configuring the KafkaSpout");

    // Configure the KafkaSpout
    SpoutConfig spoutConfig = new SpoutConfig(new ZkHosts(zkHostString),
            kafkaTopic,      // Kafka topic to read from
            "/" + kafkaTopic, // Root path in Zookeeper for the spout to store consumer offsets
            UUID.randomUUID().toString());  // ID for storing consumer offsets in Zookeeper
    try {
        spoutConfig.scheme = new SchemeAsMultiScheme(getSchemeFromClassName(spoutScheme));
    } catch(Exception e) {
        LOG.error("ERROR: Unable to create instance of scheme: " + spoutScheme);
        e.printStackTrace();
    }
    setKafkaOffset(spoutConfig, kafkaStartOffset);
    
    KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);

    // Add the spout and bolt to the topology
    builder.setSpout(spoutName, kafkaSpout, parallelismHint);

}
 
Example 8
Project: Kafka-Storm-ElasticSearch   File: AuditActiveLoginsTopology.java   Source Code and License 6 votes vote down vote up
public StormTopology buildTopology(Properties properties) {
	
	// Load properties for the storm topology
	String kafkaTopic = properties.getProperty("kafka.topic");
	
	SpoutConfig kafkaConfig = new SpoutConfig(kafkaBrokerHosts, kafkaTopic, "",	"storm");
	kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
	TopologyBuilder builder = new TopologyBuilder();

	// Specific audit logs analysis bolts
	AuditLoginsCounterBolt loginCounterbolt = new AuditLoginsCounterBolt();
	AuditParserBolt auditParserBolt = new AuditParserBolt();
	
	// Elastic search bolt
	TupleMapper tupleMapper = new DefaultTupleMapper();
	ElasticSearchBolt elasticSearchBolt = new ElasticSearchBolt(tupleMapper);

	// Topology scheme: KafkaSpout -> auditParserBolt -> loginCounterBolt -> elasticSearchBolt
	builder.setSpout("KafkaSpout", new KafkaSpout(kafkaConfig), 1);
	builder.setBolt("ParseBolt", auditParserBolt, 1).shuffleGrouping("KafkaSpout");
	builder.setBolt("CountBolt", loginCounterbolt, 1).shuffleGrouping("ParseBolt");
	builder.setBolt("ElasticSearchBolt", elasticSearchBolt, 1)
	.fieldsGrouping("CountBolt", new Fields("id", "index", "type", "document"));

	return builder.createTopology();
}
 
Example 9
Project: storm-benchmark   File: TridentWordCount.java   Source Code and License 6 votes vote down vote up
@Override
  public StormTopology getTopology(Config config) {
    final int spoutNum = BenchmarkUtils.getInt(config, SPOUT_NUM, DEFAULT_SPOUT_NUM);
    final int splitNum = BenchmarkUtils.getInt(config, SPLIT_NUM, DEFAULT_SPLIT_BOLT_NUM);
    final int countNum = BenchmarkUtils.getInt(config, COUNT_NUM, DEFAULT_COUNT_BOLT_NUM);

    spout  = new TransactionalTridentKafkaSpout(
            KafkaUtils.getTridentKafkaConfig(config, new SchemeAsMultiScheme(new StringScheme())));

    TridentTopology trident = new TridentTopology();

    trident.newStream("wordcount", spout).name("sentence").parallelismHint(spoutNum).shuffle()
            .each(new Fields(StringScheme.STRING_SCHEME_KEY), new WordSplit(), new Fields("word"))
            .parallelismHint(splitNum)
            .groupBy(new Fields("word"))
            .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
            .parallelismHint(countNum);
/*    trident.newStream("wordcount", spout)
      .each(new Fields(StringScheme.STRING_SCHEME_KEY), new WordSplit(), new Fields("word"))
      .groupBy(new Fields("word"))
      .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"));*/


    return trident.build();
  }
 
Example 10
Project: storm-benchmark   File: PageViewCount.java   Source Code and License 6 votes vote down vote up
@Override
public StormTopology getTopology(Config config) {

  final int spoutNum = BenchmarkUtils.getInt(config, SPOUT_NUM, DEFAULT_SPOUT_NUM);
  final int viewBoltNum = BenchmarkUtils.getInt(config, VIEW_NUM, DEFAULT_VIEW_BOLT_NUM);
  final int cntBoltNum = BenchmarkUtils.getInt(config, COUNT_NUM, DEFAULT_COUNT_BOLT_NUM);

  spout = new KafkaSpout(KafkaUtils.getSpoutConfig(
          config, new SchemeAsMultiScheme(new StringScheme())));

  TopologyBuilder builder = new TopologyBuilder();
  builder.setSpout(SPOUT_ID, spout, spoutNum);
  builder.setBolt(VIEW_ID, new PageViewBolt(Item.URL, Item.ONE), viewBoltNum)
         .localOrShuffleGrouping(SPOUT_ID);
  builder.setBolt(COUNT_ID, new WordCount.Count(), cntBoltNum)
          .fieldsGrouping(VIEW_ID, new Fields(Item.URL.toString()));
  return builder.createTopology();
}
 
Example 11
Project: storm-benchmark   File: DataClean.java   Source Code and License 6 votes vote down vote up
@Override
public StormTopology getTopology(Config config) {
  final int spoutNum = BenchmarkUtils.getInt(config, SPOUT_NUM, DEFAULT_SPOUT_NUM);
  final int pvBoltNum = BenchmarkUtils.getInt(config, VIEW_NUM, DEFAULT_PV_BOLT_NUM);
  final int filterBoltNum = BenchmarkUtils.getInt(config, FILTER_NUM, DEFAULT_FILTER_BOLT_NUM);
  spout = new KafkaSpout(KafkaUtils.getSpoutConfig(
          config, new SchemeAsMultiScheme(new StringScheme())));

  TopologyBuilder builder = new TopologyBuilder();
  builder.setSpout(SPOUT_ID, spout, spoutNum);
  builder.setBolt(VIEW_ID, new PageViewBolt(Item.STATUS, Item.ALL), pvBoltNum)
          .localOrShuffleGrouping(SPOUT_ID);
  builder.setBolt(FILTER_ID, new FilterBolt<Integer>(404), filterBoltNum)
          .fieldsGrouping(VIEW_ID, new Fields(Item.STATUS.toString()));
  return builder.createTopology();
}
 
Example 12
Project: storm-benchmark   File: Grep.java   Source Code and License 6 votes vote down vote up
@Override
public StormTopology getTopology(Config config) {

  final int spoutNum = BenchmarkUtils.getInt(config, SPOUT_NUM, DEFAULT_SPOUT_NUM);
  final int matBoltNum = BenchmarkUtils.getInt(config, FM_NUM, DEFAULT_MAT_BOLT_NUM);
  final int cntBoltNum = BenchmarkUtils.getInt(config, CM_NUM, DEFAULT_CNT_BOLT_NUM);
  final String ptnString = (String) Utils.get(config, PATTERN_STRING, DEFAULT_PATTERN_STR);

  spout = new KafkaSpout(KafkaUtils.getSpoutConfig(config, new SchemeAsMultiScheme(new StringScheme())));

  TopologyBuilder builder = new TopologyBuilder();
  builder.setSpout(SPOUT_ID, spout, spoutNum);
  builder.setBolt(FM_ID, new FindMatchingSentence(ptnString), matBoltNum)
          .localOrShuffleGrouping(SPOUT_ID);
  builder.setBolt(CM_ID, new CountMatchingSentence(), cntBoltNum)
          .fieldsGrouping(FM_ID, new Fields(FindMatchingSentence.FIELDS));

  return builder.createTopology();
}
 
Example 13
Project: storm-benchmark   File: UniqueVisitor.java   Source Code and License 6 votes vote down vote up
@Override
public StormTopology getTopology(Config config) {

  final int spoutNum = BenchmarkUtils.getInt(config, SPOUT_NUM, DEFAULT_SPOUT_NUM);
  final int pvBoltNum = BenchmarkUtils.getInt(config, VIEW_NUM, DEFAULT_PV_BOLT_NUM);
  final int uvBoltNum = BenchmarkUtils.getInt(config, UNIQUER_NUM, DEFAULT_UV_BOLT_NUM);
  final int winLen = BenchmarkUtils.getInt(config, WINDOW_LENGTH, DEFAULT_WINDOW_LENGTH_IN_SEC);
  final int emitFreq = BenchmarkUtils.getInt(config, EMIT_FREQ, DEFAULT_EMIT_FREQ_IN_SEC);
  spout = new KafkaSpout(KafkaUtils.getSpoutConfig(
          config, new SchemeAsMultiScheme(new StringScheme())));

  TopologyBuilder builder = new TopologyBuilder();
  builder.setSpout(SPOUT_ID, spout, spoutNum);
  builder.setBolt(VIEW_ID, new PageViewBolt(Item.URL, Item.USER), pvBoltNum)
          .localOrShuffleGrouping(SPOUT_ID);
  builder.setBolt(UNIQUER_ID, new UniqueVisitorBolt(winLen, emitFreq), uvBoltNum)
          .fieldsGrouping(VIEW_ID, new Fields(Item.URL.toString()));
  return builder.createTopology();
}
 
Example 14
Project: AuditTopology-ES   File: AuditActiveLoginsTopology.java   Source Code and License 6 votes vote down vote up
public StormTopology buildTopology(Properties properties) {
	
	// Load properties for the storm topology
	String kafkaTopic = properties.getProperty("kafka.topic");
	
	SpoutConfig kafkaConfig = new SpoutConfig(kafkaBrokerHosts, kafkaTopic, "",	"storm");
	kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
	TopologyBuilder builder = new TopologyBuilder();

	// Specific audit logs analysis bolts
	AuditLoginsCounterBolt loginCounterbolt = new AuditLoginsCounterBolt();
	AuditParserBolt auditParserBolt = new AuditParserBolt();
	
	// Elastic search bolt
	TupleMapper tupleMapper = new DefaultTupleMapper();
	ElasticSearchBolt elasticSearchBolt = new ElasticSearchBolt(tupleMapper);

	// Topology scheme: KafkaSpout -> auditParserBolt -> loginCounterBolt -> elasticSearchBolt
	builder.setSpout("KafkaSpout", new KafkaSpout(kafkaConfig), 1);
	builder.setBolt("ParseBolt", auditParserBolt, 1).shuffleGrouping("KafkaSpout");
	builder.setBolt("CountBolt", loginCounterbolt, 1).shuffleGrouping("ParseBolt");
	builder.setBolt("ElasticSearchBolt", elasticSearchBolt, 1)
	.fieldsGrouping("CountBolt", new Fields("id", "index", "type", "document"));

	return builder.createTopology();
}
 
Example 15
Project: storm-kafka-hdfs-starter   File: ConfigureKafkaSpout.java   Source Code and License 6 votes vote down vote up
public static void configureKafkaSpout(TopologyBuilder builder, String zkHostString, String kafkaTopic, String kafkaStartOffset) {

        // Configure the KafkaSpout
        SpoutConfig spoutConfig = new SpoutConfig(new ZkHosts(zkHostString),
                kafkaTopic,      // Kafka topic to read from
                "/" + kafkaTopic, // Root path in Zookeeper for the spout to store consumer offsets
                UUID.randomUUID().toString());  // ID for storing consumer offsets in Zookeeper
        //spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
        spoutConfig.scheme = new SchemeAsMultiScheme(new JsonScheme());

        // Allow for passing in an offset time
        // startOffsetTime has a bug that ignores the special -2 value
        if(kafkaStartOffset == "-2") {
            spoutConfig.forceFromStart = true;
        } else if (kafkaStartOffset != null) {
            spoutConfig.startOffsetTime = Long.parseLong(kafkaStartOffset);
        }
        KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);

        // Add the spout and bolt to the topology
        builder.setSpout("kafkaspout", kafkaSpout, 1);

    }
 
Example 16
Project: opensoc-streaming   File: TopologyRunner.java   Source Code and License 6 votes vote down vote up
private boolean initializeKafkaSpout(String name) {
	try {

		BrokerHosts zk = new ZkHosts(config.getString("kafka.zk"));
		String input_topic = config.getString("spout.kafka.topic");
		SpoutConfig kafkaConfig = new SpoutConfig(zk, input_topic, "",
				input_topic);
		kafkaConfig.scheme = new SchemeAsMultiScheme(new RawScheme());
		kafkaConfig.forceFromStart = Boolean.valueOf("True");
		kafkaConfig.startOffsetTime = -1;

		builder.setSpout(name, new KafkaSpout(kafkaConfig),
				config.getInt("spout.kafka.parallelism.hint")).setNumTasks(
				config.getInt("spout.kafka.num.tasks"));

	} catch (Exception e) {
		e.printStackTrace();
		System.exit(0);
	}

	return true;
}
 
Example 17
Project: storm-sample   File: TruckEventProcessorKafkaTopology.java   Source Code and License 6 votes vote down vote up
/**
 * Construct 
 * @return
 */
private SpoutConfig constructKafkaSpoutConf() {
	BrokerHosts hosts = new ZkHosts(topologyConfig.getProperty("kafka.zookeeper.host.port"));
	String topic = topologyConfig.getProperty("kafka.topic");
	String zkRoot = topologyConfig.getProperty("kafka.zkRoot");
	String consumerGroupId = topologyConfig.getProperty("kafka.consumer.group.id");
	
	SpoutConfig spoutConfig = new SpoutConfig(hosts, topic, zkRoot, consumerGroupId);
	
	/* Custom TruckScheme that will take Kafka message of single truckEvent 
	 * and emit a 2-tuple consisting of truckId and truckEvent. This driverId
	 * is required to do a fieldsSorting so that all driver events are sent to the set of bolts */
	spoutConfig.scheme = new SchemeAsMultiScheme(new TruckScheme2());
	
	return spoutConfig;
}
 
Example 18
Project: kafka-storm-hive   File: KafkaStormTopology.java   Source Code and License 6 votes vote down vote up
public static void main(String[] args) {
    TopologyBuilder builder = new TopologyBuilder();

    SpoutConfig spoutConf = new SpoutConfig(new ZkHosts("localhost:2181", "/brokers"), "test", "/kafkastorm", "KafkaSpout");
    spoutConf.scheme = new SchemeAsMultiScheme(new StringScheme());
    spoutConf.forceFromStart = true;

    builder.setSpout("KafkaSpout", new KafkaSpout(spoutConf), 3);
    builder.setBolt("KafkaBolt", new PrinterBolt(), 3).shuffleGrouping("KafkaSpout");

    Config conf = new Config();
    // conf.setDebug(true);

    LocalCluster cluster = new LocalCluster();
    cluster.submitTopology("kafka-test", conf, builder.createTopology());

    Utils.sleep(60000);
    cluster.shutdown();
}
 
Example 19
Project: kafka-storm-hive   File: HDFSSequenceTopology.java   Source Code and License 6 votes vote down vote up
public static StormTopology buildTopology(String hdfsUrl) {
    TridentKafkaConfig tridentKafkaConfig = new TridentKafkaConfig(new ZkHosts(ZKHOST, "/brokers"), KAFKA_TOPIC);
    tridentKafkaConfig.scheme = new SchemeAsMultiScheme(new RawScheme());
    tridentKafkaConfig.startOffsetTime = -1; // forceStartOffsetTime(-1); //Read latest messages from Kafka

    TransactionalTridentKafkaSpout tridentKafkaSpout = new TransactionalTridentKafkaSpout(tridentKafkaConfig);

    TridentTopology topology = new TridentTopology();

    Stream stream = topology.newStream("stream", tridentKafkaSpout);

    FileNameFormat fileNameFormat = new DefaultFileNameFormat().withPath(HDFS_OUT_PATH).withPrefix("trident").withExtension(".txt");
    FileRotationPolicy rotationPolicy = new FileSizeCountRotationPolicy(5.0f, FileSizeRotationPolicy.Units.MB, 10);
    HdfsState.Options seqOpts = new HdfsState.HdfsFileOptions().withFileNameFormat(fileNameFormat)
            .withRecordFormat(new DelimitedRecordFormat().withFieldDelimiter("|").withFields(new Fields("json")))
            .withRotationPolicy(rotationPolicy).withFsUrl(hdfsUrl)
            // .addRotationAction(new MoveFileAction().toDestination(HDFS_ROTATE_PATH));
            // .addRotationAction(new AddSuffixFileAction().withSuffix("-processed"));
            .addRotationAction(new MD5FileAction());
    StateFactory factory = new HdfsStateFactory().withOptions(seqOpts);

    stream.each(new Fields("bytes"), new JacksonJsonParser(), new Fields("json")).partitionPersist(factory, new Fields("json"),
            new HdfsUpdater(), new Fields());

    return topology.build();
}
 
Example 20
Project: storm-kafka-0.8-plus-test   File: SentenceAggregationTopology.java   Source Code and License 6 votes vote down vote up
public StormTopology buildTopology(LocalDRPC drpc) {
    TridentKafkaConfig kafkaConfig = new TridentKafkaConfig(brokerHosts, "storm-sentence", "storm");
    kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    TransactionalTridentKafkaSpout kafkaSpout = new TransactionalTridentKafkaSpout(kafkaConfig);
    TridentTopology topology = new TridentTopology();

    TridentState wordCounts = topology.newStream("kafka", kafkaSpout).shuffle().
            each(new Fields("str"), new WordSplit(), new Fields("word")).
            groupBy(new Fields("word")).
            persistentAggregate(new HazelCastStateFactory(), new Count(), new Fields("aggregates_words")).parallelismHint(2);


    topology.newDRPCStream("words", drpc)
            .each(new Fields("args"), new Split(), new Fields("word"))
            .groupBy(new Fields("word"))
            .stateQuery(wordCounts, new Fields("word"), new MapGet(), new Fields("count"))
            .each(new Fields("count"), new FilterNull())
            .aggregate(new Fields("count"), new Sum(), new Fields("sum"));

    return topology.build();
}
 
Example 21
Project: storm-kafka-0.8-plus-test   File: TestTopologyStaticHosts.java   Source Code and License 6 votes vote down vote up
public static void main(String[] args) throws Exception {

        GlobalPartitionInformation hostsAndPartitions = new GlobalPartitionInformation();
        hostsAndPartitions.addPartition(0, new Broker("localhost", 9092));
        BrokerHosts brokerHosts = new StaticHosts(hostsAndPartitions);

        SpoutConfig kafkaConfig = new SpoutConfig(brokerHosts, "storm-sentence", "", "storm");
        kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());

        TopologyBuilder builder = new TopologyBuilder();
        builder.setSpout("words", new KafkaSpout(kafkaConfig), 10);
        builder.setBolt("print", new PrinterBolt()).shuffleGrouping("words");
        LocalCluster cluster = new LocalCluster();
        Config config = new Config();
        cluster.submitTopology("kafka-test", config, builder.createTopology());

        Thread.sleep(600000);

    }
 
Example 22
Project: rb-bi   File: TridentKafkaSpout.java   Source Code and License 5 votes vote down vote up
/**
 * Constructor
 *
 * @param config Config file to read properties from
 * @param section Section of the kafka config file to read properties from.
 */
public TridentKafkaSpout(ConfigData config, String section) {
    _kafkaConfig = new TridentKafkaConfig(new ZkHosts(config.getZkHost()), config.getTopic(section), "stormKafka");
    _kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    _kafkaConfig.bufferSizeBytes = config.getFetchSizeKafka();
    _kafkaConfig.fetchSizeBytes = config.getFetchSizeKafka();
    _kafkaConfig.forceFromStart = false;
}
 
Example 23
Project: rb-bi   File: TridentKafkaSpoutNmsp.java   Source Code and License 5 votes vote down vote up
/**
 * Constructor
 *
 * @param config Config file to read properties from
 * @param section Section of the kafka config file to read properties from.
 */
public TridentKafkaSpoutNmsp(ConfigData config, String section) {
    _kafkaConfig = new TridentKafkaConfig(new ZkHosts(config.getZkHost()), config.getTopic(section), "stormKafka");
    _kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    _kafkaConfig.bufferSizeBytes = config.getFetchSizeKafkaNmsp();
    _kafkaConfig.fetchSizeBytes = config.getFetchSizeKafkaNmsp();
    _kafkaConfig.forceFromStart = false;
}
 
Example 24
Project: rb-bi   File: TridentKafkaSpoutLocation.java   Source Code and License 5 votes vote down vote up
/**
 * Constructor
 *
 * @param config Config file to read properties from
 * @param section Section of the kafka config file to read properties from.
 */
public TridentKafkaSpoutLocation(ConfigData config, String section) {
    _kafkaConfig = new TridentKafkaConfig(new ZkHosts(config.getZkHost()), config.getTopic(section), "stormKafka");
    _kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    _kafkaConfig.bufferSizeBytes = config.getFetchSizeKafkaLocation();
    _kafkaConfig.fetchSizeBytes = config.getFetchSizeKafkaLocation();
    _kafkaConfig.forceFromStart = false;
}
 
Example 25
Project: storm-kafka-examples   File: CounterTopology.java   Source Code and License 5 votes vote down vote up
/**
 * @param args
 * http://www.programcreek.com/java-api-examples/index.php?api=storm.kafka.KafkaSpout
 */
public static void main(String[] args) {
	try{
		//设置喷发节点并分配并发数,该并发数将会控制该对象在集群中的线程数(6个)
		String zkhost = "wxb-1:2181,wxb-2:2181,wxb-3:2181";
		String topic = "order";
		String groupId = "id";
		int spoutNum = 3;
		int boltNum = 1;
		ZkHosts zkHosts = new ZkHosts(zkhost);//kafaka所在的zookeeper
		SpoutConfig spoutConfig = new SpoutConfig(zkHosts, topic, "/order", groupId);  // create /order /id
		spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
		KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);

        TopologyBuilder builder = new TopologyBuilder();
        builder.setSpout("spout", kafkaSpout, spoutNum);
		builder.setBolt("check", new CheckOrderBolt(), boltNum).shuffleGrouping("spout");
        builder.setBolt("counter", new CounterBolt(),boltNum).shuffleGrouping("check");

        Config config = new Config();
        config.setDebug(true);
        
        if(args!=null && args.length > 0) {
            config.setNumWorkers(2);
            StormSubmitter.submitTopology(args[0], config, builder.createTopology());
        } else {        
            config.setMaxTaskParallelism(2);

            LocalCluster cluster = new LocalCluster();
            cluster.submitTopology("Wordcount-Topology", config, builder.createTopology());

            Thread.sleep(500000);

            cluster.shutdown();
        }
	}catch (Exception e) {
		e.printStackTrace();
	}
}
 
Example 26
Project: erad2016-streamprocessing   File: SentimentAnalysisTopology.java   Source Code and License 5 votes vote down vote up
private static KafkaSpout createKafkaSpout() {
    String zkConnString = Properties.getString("sa.storm.zkhosts");
    String topicName = Properties.getString("sa.storm.kafka_topic");

    BrokerHosts hosts = new ZkHosts(zkConnString);
    SpoutConfig spoutConfig = new SpoutConfig(hosts, topicName, "/" + topicName, UUID.randomUUID().toString());
    spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    return new KafkaSpout(spoutConfig);
}
 
Example 27
Project: RealEstate-Streaming   File: KafkaPhoenixTopology.java   Source Code and License 5 votes vote down vote up
private SpoutConfig constructKafkaSpoutConf() {
    // BrokerHosts hosts = new ZkHosts(topologyConfig.getProperty("kafka.zookeeper.host.port"));
    BrokerHosts hosts = new ZkHosts("localhost:2181");

    String topic = "properties";
    String zkRoot = "";
    String consumerGroupId = "group1";

    SpoutConfig spoutConfig = new SpoutConfig(hosts, topic, zkRoot, consumerGroupId);
    spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());

    return spoutConfig;
}
 
Example 28
Project: storm-demo   File: LogStatisticsTopology.java   Source Code and License 5 votes vote down vote up
private static KafkaSpout makeKafkaSpout(String topic, String client_id) {
    BrokerHosts brokerHosts = new ZkHosts(ServerConfig.getZK());

    SpoutConfig kafkaSpoutConfig = new SpoutConfig(brokerHosts, topic, "/" + topic, client_id);
    kafkaSpoutConfig.bufferSizeBytes = 1024 * 1024 * 4;
    kafkaSpoutConfig.fetchSizeBytes = 1024 * 1024 * 4;
    kafkaSpoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());

    return new KafkaSpout(kafkaSpoutConfig);
}
 
Example 29
Project: storm-kafka-hdfs-example   File: KafkaSpoutConfigBuilder.java   Source Code and License 5 votes vote down vote up
public KafkaSpout getKafkaSpout() {

        LOG.info("KAFKASPOUT: Configuring the Kafka Spout");

        // Create the initial spoutConfig
        SpoutConfig spoutConfig = new SpoutConfig(new ZkHosts(zookeeperConnectionString),
                kafkaTopic,      // Kafka topic to read from
                "/" + kafkaTopic, // Root path in Zookeeper for the spout to store consumer offsets
                UUID.randomUUID().toString());  // ID for storing consumer offsets in Zookeeper

        // Set the scheme
        try {
            spoutConfig.scheme = new SchemeAsMultiScheme(getSchemeFromClassName(spoutSchemeClass));
        } catch(Exception e) {
            LOG.error("ERROR: Unable to create instance of scheme: " + spoutSchemeClass);
            e.printStackTrace();
        }

        // Set the offset
        setKafkaOffset(spoutConfig, kafkaStartOffset);

        // Create the kafkaSpout
        return new KafkaSpout(spoutConfig);

    }
 
Example 30
Project: iot-masterclass   File: TruckEventProcessorKafkaTopology.java   Source Code and License 5 votes vote down vote up
private SpoutConfig constructKafkaSpoutConf() {
  BrokerHosts hosts = new ZkHosts(topologyConfig.getProperty("kafka.zookeeper.host.port"));
  String topic = topologyConfig.getProperty("kafka.topic");
  String zkRoot = topologyConfig.getProperty("kafka.zkRoot");
  String consumerGroupId = topologyConfig.getProperty("kafka.consumer.group.id");

  SpoutConfig spoutConfig = new SpoutConfig(hosts, topic, zkRoot, consumerGroupId);

/* Custom TruckScheme that will take Kafka message of single truckEvent
 * and emit a 2-tuple consisting of truckId and truckEvent. This driverId
 * is required to do a fieldsSorting so that all driver events are sent to the set of bolts */
  spoutConfig.scheme = new SchemeAsMultiScheme(new TruckScheme2());

  return spoutConfig;
}
 
Example 31
Project: iot-lab   File: TruckEventProcessorKafkaTopology.java   Source Code and License 5 votes vote down vote up
private SpoutConfig constructKafkaSpoutConf() {
  BrokerHosts hosts = new ZkHosts(topologyConfig.getProperty("kafka.zookeeper.host.port"));
  String topic = topologyConfig.getProperty("kafka.topic");
  String zkRoot = topologyConfig.getProperty("kafka.zkRoot");
  String consumerGroupId = topologyConfig.getProperty("kafka.consumer.group.id");

  SpoutConfig spoutConfig = new SpoutConfig(hosts, topic, zkRoot, consumerGroupId);

/* Custom TruckScheme that will take Kafka message of single truckEvent
 * and emit a 2-tuple consisting of truckId and truckEvent. This driverId
 * is required to do a fieldsSorting so that all driver events are sent to the set of bolts */
  spoutConfig.scheme = new SchemeAsMultiScheme(new TruckScheme2());

  return spoutConfig;
}
 
Example 32
Project: Big-Data-tutorial   File: FlightTopology.java   Source Code and License 5 votes vote down vote up
public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException {

		String zkIp = "localhost";

		String nimbusHost = "sandbox.hortonworks.com";

		String zookeeperHost = zkIp +":2181";

		ZkHosts zkHosts = new ZkHosts(zookeeperHost);
		List<String> zkServers = new ArrayList<String>();
		zkServers.add(zkIp);
		SpoutConfig kafkaConfig = new SpoutConfig(zkHosts, "spertus-flight-events", "/spertus-flights-events","flight_id");
		kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
		kafkaConfig.startOffsetTime = kafka.api.OffsetRequest.EarliestTime();
		kafkaConfig.zkServers = zkServers;
		kafkaConfig.zkRoot = "/spertus-flight-events";
		kafkaConfig.zkPort = 2181;
		kafkaConfig.forceFromStart = true;
		KafkaSpout kafkaSpout = new KafkaSpout(kafkaConfig);

		TopologyBuilder builder = new TopologyBuilder();

		builder.setSpout("flight-events", kafkaSpout, 1);
		builder.setBolt("flight-stats", new GetFlightStatsBolt(), 1).shuffleGrouping("flight-events");

		Map conf = new HashMap();
		conf.put(backtype.storm.Config.TOPOLOGY_WORKERS, 4);
		conf.put(backtype.storm.Config.TOPOLOGY_DEBUG, true);
		if (args != null && args.length > 0) {
			StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
		}   else {
			LocalCluster cluster = new LocalCluster();
			cluster.submitTopology("flight-topology", conf, builder.createTopology());
		}
	}
 
Example 33
Project: LearnStorm   File: LogAnalyzer.java   Source Code and License 5 votes vote down vote up
private SpoutConfig constructKafkaSpoutConf() {
		final BrokerHosts hosts = new ZkHosts(topologyConfig.getProperty("kafka.zookeeper.host.port"));
		final String topic = topologyConfig.getProperty("kafka.topic");
		final String zkRoot = topologyConfig.getProperty("kafka.zkRoot");
//		String consumerGroupId = UUID.randomUUID().toString();
		final SpoutConfig spoutConfig = new SpoutConfig(hosts, topic, zkRoot, CONSUMER_GROUP_ID);
		spoutConfig.startOffsetTime = System.currentTimeMillis();
		spoutConfig.scheme = new SchemeAsMultiScheme(logScheme);
		spoutConfig.retryInitialDelayMs = 10000;	// 10 seconds
		spoutConfig.retryDelayMultiplier = 1.1;		// 10, 11, 12.1, 13.31, 14.641... 
		spoutConfig.retryDelayMaxMs = 590000;		// about 10 minutes
		return spoutConfig;
	}
 
Example 34
Project: LearnStorm   File: ApLogAnalyzer.java   Source Code and License 5 votes vote down vote up
private SpoutConfig constructKafkaSpoutConf() {
		final BrokerHosts hosts = new ZkHosts(topologyConfig.getProperty("kafka.zookeeper.host.port"));
		final String topic = topologyConfig.getProperty("kafka.topic");
		final String zkRoot = topologyConfig.getProperty("kafka.zkRoot");
//		String consumerGroupId = UUID.randomUUID().toString();
		final SpoutConfig spoutConfig = new SpoutConfig(hosts, topic, zkRoot, CONSUMER_GROUP_ID);
		spoutConfig.startOffsetTime = System.currentTimeMillis();
		spoutConfig.scheme = new SchemeAsMultiScheme(apLogScheme);
		spoutConfig.retryInitialDelayMs = 10000;	// 10 seconds
		spoutConfig.retryDelayMultiplier = 1.1;		// 10, 11, 12.1, 13.31, 14.641... 
		spoutConfig.retryDelayMaxMs = 590000;		// about 10 minutes
		return spoutConfig;
	}
 
Example 35
Project: LearnStorm   File: TridentKafkaWordCount.java   Source Code and License 5 votes vote down vote up
/**
 * Creates a transactional kafka spout that consumes any new data published to "test" topic.
 * <p/>
 * For more info on transactional spouts
 * see "Transactional spouts" section in
 * <a href="https://storm.apache.org/documentation/Trident-state"> Trident state</a> doc.
 *
 * @return a transactional trident kafka spout.
 */
private TransactionalTridentKafkaSpout createKafkaSpout() {
    ZkHosts hosts = new ZkHosts(zkUrl);
    TridentKafkaConfig config = new TridentKafkaConfig(hosts, KAFKA_TOPIC);
    config.scheme = new SchemeAsMultiScheme(new StringScheme());

    // Consume new data from the topic
    config.startOffsetTime = kafka.api.OffsetRequest.LatestTime();
    return new TransactionalTridentKafkaSpout(config);
}
 
Example 36
Project: docker-kafka-storm   File: WordCountTopology.java   Source Code and License 5 votes vote down vote up
/**
 * WordCountTopology with Kafka
 *
 * @return      StormTopology Object
 */
public StormTopology buildTopology(String TOPIC) {

    SpoutConfig kafkaConf = new SpoutConfig(brokerHosts, TOPIC, "", "storm");
    kafkaConf.scheme = new SchemeAsMultiScheme(new StringScheme());

    TopologyBuilder builder = new TopologyBuilder();

    builder.setSpout(KAFKA_SPOUT_ID, new KafkaSpout(kafkaConf));
    builder.setBolt(SPLITTER_BOLT_ID, new SplitterBolt(), 4).shuffleGrouping(KAFKA_SPOUT_ID);
    builder.setBolt(COUNTER_BOLT_ID, new CounterBolt(), 4).fieldsGrouping(SPLITTER_BOLT_ID, new Fields("word"));
    builder.setBolt(RANKER_BOLT_ID, new RankerBolt()).globalGrouping(COUNTER_BOLT_ID);

    return builder.createTopology();
}
 
Example 37
Project: cdc   File: KafkaEsTopology.java   Source Code and License 5 votes vote down vote up
public static void main(String[] args) throws Exception {

    TopologyBuilder builder = new TopologyBuilder();

    Map<String, String> env = System.getenv();
    for (String envName : env.keySet()) {
      LOG.info(envName + "=" + env.get(envName));
    }

    String esHost = "elasticsearch";
    LOG.info("Elasticsearch Host: " + esHost);

    Map esConf = new HashMap();
    esConf.put("es.nodes", esHost);
    esConf.put("es.storm.bolt.flush.entries.size", "100");
    esConf.put("es.batch.size.entries", "100");
    esConf.put("es.input.json", "true");

    String zkConnString = "kafka:2181";
    String topicName = "maxwell";
    BrokerHosts hosts = new ZkHosts(zkConnString);
    SpoutConfig spoutConfig = new SpoutConfig(hosts, topicName, "", "storm");
    spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);

    builder.setSpout("kafka", kafkaSpout, 1);
    builder.setBolt("binlog-expander", new BinlogPayloadExpander(), 1).shuffleGrouping("kafka");
    builder.setBolt("es-bolt", new EsBolt("maxwell/BINLOG", esConf), 1).shuffleGrouping("binlog-expander");

    Config conf = new Config();
    conf.put(Config.TOPOLOGY_DEBUG, true);
    conf.setNumWorkers(1);

    StormSubmitter.submitTopologyWithProgressBar(args[0], conf, builder.createTopology());
  }
 
Example 38
Project: realtime-event-processing   File: DocEventProcessingTopology.java   Source Code and License 5 votes vote down vote up
public static StormTopology buildTopology(Config conf, LocalDRPC drpc) {

        TridentTopology topology = new TridentTopology();

        //Kafka Spout
        BrokerHosts zk = new ZkHosts(conf.get(CrawlerConfig.KAFKA_CONSUMER_HOST_NAME) + ":" +conf.get(CrawlerConfig.KAFKA_CONSUMER_HOST_PORT));
        TridentKafkaConfig kafkaConfig = new TridentKafkaConfig(zk, (String) conf.get(CrawlerConfig.KAFKA_TOPIC_DOCUMENT_NAME));
        kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
        OpaqueTridentKafkaSpout spout = new OpaqueTridentKafkaSpout(kafkaConfig);

        //ElasticSearch Persistent State
        Settings esSettings = ImmutableSettings.settingsBuilder()
                .put("storm.elasticsearch.cluster.name", conf.get(CrawlerConfig.ELASTICSEARCH_CLUSTER_NAME))
                .put("storm.elasticsearch.hosts", conf.get(CrawlerConfig.ELASTICSEARCH_HOST_NAME) + ":" + conf.get(CrawlerConfig.ELASTICSEARCH_HOST_PORT))
                .build();
        StateFactory esStateFactory = new ESIndexState.Factory<JSONObject>(new ClientFactory.NodeClient(esSettings.getAsMap()), JSONObject.class);
        TridentState esStaticState = topology.newStaticState(esStateFactory);

        String esIndex = (String)(conf.get(CrawlerConfig.ELASTICSEARCH_INDEX_NAME));
        topology.newStream("docstream",spout)
                .each( new Fields("str"), new SplitDocStreamArgs(), new Fields("filename", "task", "user", "content"))
                .each( new Fields("filename", "task", "user"), new PrintFilter("Kafka"))
                .each( new Fields("filename","task","user","content"), new PrepareDocForElasticSearch(), new Fields("index","type","id","source") )
                .partitionPersist(esStateFactory, new Fields("index","type","id","source"), new ESIndexUpdater<String>(new ESTridentTupleMapper()), new Fields());

        return topology.build();
    }
 
Example 39
Project: StormSampleProject   File: SentimentAnalysisTopology.java   Source Code and License 5 votes vote down vote up
private static StormTopology createTopology()
{
    SpoutConfig kafkaConf = new SpoutConfig(
        new ZkHosts(Properties.getString("rts.storm.zkhosts")),
        KAFKA_TOPIC,
        "/kafka",
        "KafkaSpout");
    kafkaConf.scheme = new SchemeAsMultiScheme(new StringScheme());
    TopologyBuilder topology = new TopologyBuilder();

    topology.setSpout("kafka_spout", new KafkaSpout(kafkaConf), 4);

    topology.setBolt("twitter_filter", new TwitterFilterBolt(), 4)
            .shuffleGrouping("kafka_spout");

    topology.setBolt("text_filter", new TextFilterBolt(), 4)
            .shuffleGrouping("twitter_filter");

    topology.setBolt("stemming", new StemmingBolt(), 4)
            .shuffleGrouping("text_filter");

    topology.setBolt("positive", new PositiveSentimentBolt(), 4)
            .shuffleGrouping("stemming");
    topology.setBolt("negative", new NegativeSentimentBolt(), 4)
            .shuffleGrouping("stemming");

    topology.setBolt("join", new JoinSentimentsBolt(), 4)
            .fieldsGrouping("positive", new Fields("tweet_id"))
            .fieldsGrouping("negative", new Fields("tweet_id"));

    topology.setBolt("score", new SentimentScoringBolt(), 4)
            .shuffleGrouping("join");

    topology.setBolt("hdfs", new HDFSBolt(), 4)
            .shuffleGrouping("score");
    topology.setBolt("nodejs", new NodeNotifierBolt(), 4)
            .shuffleGrouping("score");

    return topology.createTopology();
}
 
Example 40
Project: storm-smoke-test   File: TridentConnectorUtil.java   Source Code and License 5 votes vote down vote up
public static OpaqueTridentKafkaSpout getTridentKafkaEmitter(String zkConnString, String topicName, Map topologyConfig) {
    BrokerHosts hosts = new ZkHosts(zkConnString);
    TridentKafkaConfig kafkaConfig = new TridentKafkaConfig(hosts, topicName);
    //topologyConfig.put("topology.spout.max.batch.size", 1);
    //kafkaConfig.forceFromStart = true;
    kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    return new OpaqueTridentKafkaSpout(kafkaConfig);
}
 
Example 41
Project: storm-smoke-test   File: ConnectorUtil.java   Source Code and License 5 votes vote down vote up
public static KafkaSpout getKafkaSpout(String zkConnString, String topicName) {
    BrokerHosts hosts = new ZkHosts(zkConnString);

    SpoutConfig spoutConfig = new SpoutConfig(hosts, topicName, "/" + topicName, UUID.randomUUID().toString());
    //spoutConfig.forceFromStart = true;
    spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    return new KafkaSpout(spoutConfig);
}
 
Example 42
Project: partial-key-grouping   File: WordCountPartialKeyGrouping.java   Source Code and License 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    String kafkaZk = "zookeeper:2181"; // change it to your zookeeper server
    BrokerHosts brokerHosts = new ZkHosts(kafkaZk);

    SpoutConfig kafkaConfig = new SpoutConfig(brokerHosts, "name_of_kafka_topic", "", "test"); // change it to the name of your kafka topic
    kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    kafkaConfig.forceFromStart = true;

    TopologyBuilder builder = new TopologyBuilder();
    builder.setSpout("stream", new KafkaSpout(kafkaConfig), 1);
    builder.setBolt("split", new SplitterBolt(), 8).shuffleGrouping("stream");
    builder.setBolt("counter", new CounterBolt(), 10).customGrouping("split", new PartialKeyGrouping());
    builder.setBolt("aggregator", new AggregatorBolt(), 1).fieldsGrouping("counter", new Fields("word"));

    Config conf = new Config();
    conf.setDebug(false);
    conf.setMaxSpoutPending(100);
    // conf.setMessageTimeoutSecs(300); // optionally increase the timeout for tuples

    if (args != null && args.length > 0) {
        conf.setNumWorkers(10);
        StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
    } else {
        LocalCluster cluster = new LocalCluster();
        cluster.submitTopology("test", conf, builder.createTopology());
        Utils.sleep(15000000);
        cluster.killTopology("test");
        cluster.shutdown();
    }
}
 
Example 43
Project: incubator-storm   File: KafkaUtilsTest.java   Source Code and License 5 votes vote down vote up
@Test
public void generateTuplesWithValueSchemeAndKeyValueMessage() {
    config.scheme = new SchemeAsMultiScheme(new StringScheme());
    String value = "value";
    String key = "key";
    createTopicAndSendMessage(key, value);
    ByteBufferMessageSet messageAndOffsets = getLastMessage();
    for (MessageAndOffset msg : messageAndOffsets) {
        Iterable<List<Object>> lists = KafkaUtils.generateTuples(config, msg.message());
        assertEquals(value, lists.iterator().next().get(0));
    }
}
 
Example 44
Project: recsys-online   File: Recsys.java   Source Code and License 5 votes vote down vote up
public static void main(String[] args) throws AlreadyAliveException,
			InvalidTopologyException {
		logger.info("begin to running recsys.");
		BrokerHosts brokerHosts = new ZkHosts(Constants.kafka_zk_address);
		SpoutConfig spoutConfig = new SpoutConfig(brokerHosts, Constants.kafka_topic, 	Constants.kafka_zk_root, Constants.kafka_id);

		Config conf = new Config();
		Map<String, String> map = new HashMap<String, String>();
		map.put("metadata.broker.list", Constants.kakfa_broker_list);
		map.put("serializer.class", "kafka.serializer.StringEncoder");
		conf.put("kafka.broker.properties", map);
//		conf.put("topic", "topic2");

		spoutConfig.scheme = new SchemeAsMultiScheme(new MessageScheme());
		TopologyBuilder builder = new TopologyBuilder();
		builder.setSpout("spout", new KafkaSpout(spoutConfig));
		builder.setBolt("bolt", new HBaseStoreBolt()).shuffleGrouping("spout");
//		builder.setBolt("kafkabolt", new KafkaBolt<String, Integer>()).shuffleGrouping("bolt");

		if (!islocal) {
			conf.setNumWorkers(3);
			StormSubmitter.submitTopology(Constants.storm_topology_name, conf, builder.createTopology());
		} else {
			LocalCluster cluster = new LocalCluster();
			cluster.submitTopology(Constants.storm_topology_name, conf, builder.createTopology());
			Utils.sleep(100000);
			cluster.killTopology(Constants.storm_topology_name);
			cluster.shutdown();
		}
		logger.info("run recsys finish.");
	}
 
Example 45
Project: StormTopology-AuditActiveLogins   File: AuditActiveLoginsTopology.java   Source Code and License 5 votes vote down vote up
public StormTopology buildTopology(Properties properties) {
	
	// Load properties for the storm topoology
	String kafkaTopic = properties.getProperty("kafka.topic");
	String hbaseTable = properties.getProperty("hbase.table.name");
	String hbaseColumnFamily = properties.getProperty("hbase.column.family");
	
	SpoutConfig kafkaConfig = new SpoutConfig(kafkaBrokerHosts, kafkaTopic, "",
			"storm");
	kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
	TopologyBuilder builder = new TopologyBuilder();
	
	SimpleHBaseMapper hBaseMapper = new SimpleHBaseMapper()
			.withRowKeyField("host-user")
			.withCounterFields(new Fields("count"))
			.withColumnFamily(hbaseColumnFamily);

	// HbaseBolt(tableName, hbaseMapper)
	HBaseBolt hbaseBolt = new HBaseBolt(hbaseTable, hBaseMapper);
	AuditLoginsCounterBolt loginCounterbolt = new AuditLoginsCounterBolt(hbaseTable);
	AuditBolt auditParserBolt = new AuditBolt();

	builder.setSpout("KafkaSpout", new KafkaSpout(kafkaConfig), 1);
	builder.setBolt("ParseBolt", auditParserBolt, 1).shuffleGrouping("KafkaSpout");
	builder.setBolt("CountBolt", loginCounterbolt, 1).shuffleGrouping("ParseBolt");
	builder.setBolt("HBaseBolt", hbaseBolt, 1).fieldsGrouping("CountBolt",
			new Fields("host-user"));

	return builder.createTopology();
}
 
Example 46
Project: kappaeg   File: CDRStormTopology.java   Source Code and License 5 votes vote down vote up
private KafkaSpout constructCDRKafkaSpout() {
    BrokerHosts zkhosts = new ZkHosts(globalconfigs.getProperty("cdrstorm.kafkaspout.zkhosts"));
    String topic = globalconfigs.getProperty("cdr.kafkatopic");
    String zkRoot = globalconfigs.getProperty("cdrstorm.kafkaspout.zkroot");
    String consumerGroupId = globalconfigs.getProperty("cdrstorm.kafkaspout.cdr.consumergroupid");
    SpoutConfig spoutConfig = new SpoutConfig(zkhosts, topic, zkRoot, consumerGroupId);
    spoutConfig.scheme = new SchemeAsMultiScheme(new CDRScheme());
    KafkaSpout kafkaspout = new KafkaSpout(spoutConfig);
    return kafkaspout;
}
 
Example 47
Project: kappaeg   File: CDRStormTopology.java   Source Code and License 5 votes vote down vote up
private KafkaSpout constructTwitterKafkaSpout() {
    BrokerHosts zkhosts = new ZkHosts(globalconfigs.getProperty("cdrstorm.kafkaspout.zkhosts"));
    String topic = globalconfigs.getProperty("twitter4j.kafkatopic");
    String zkRoot = globalconfigs.getProperty("cdrstorm.kafkaspout.zkroot");
    String consumerGroupId = globalconfigs.getProperty("cdrstorm.kafkaspout.cdr.consumergroupid");
    SpoutConfig spoutConfig = new SpoutConfig(zkhosts, topic, zkRoot, consumerGroupId);
    
    //Create scheme for Twitter
    spoutConfig.scheme = new SchemeAsMultiScheme(new TwitterScheme());
    
    KafkaSpout kafkaspout = new KafkaSpout(spoutConfig);
    return kafkaspout;
}
 
Example 48
Project: flink-perf   File: KafkaThroughput.java   Source Code and License 5 votes vote down vote up
public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException, UnknownHostException, InterruptedException {
	final ParameterTool pt = ParameterTool.fromArgs(args);

	TopologyBuilder builder = new TopologyBuilder();
	BrokerHosts hosts = new ZkHosts(pt.getRequired("zookeeper"));
	SpoutConfig spoutConfig = new SpoutConfig(hosts, pt.getRequired("topic"), "/" + pt.getRequired("topic"), UUID.randomUUID().toString());
	spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
	KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);
	builder.setSpout("source", kafkaSpout, pt.getInt("sourceParallelism"));

	builder.setBolt("sink", new Throughput.Sink(pt), pt.getInt("sinkParallelism")).noneGrouping("source");

	Config conf = new Config();
	conf.setDebug(false);

	if (!pt.has("local")) {
		conf.setNumWorkers(pt.getInt("par", 2));

		StormSubmitter.submitTopologyWithProgressBar("kafka-spout-"+pt.get("name", "no_name"), conf, builder.createTopology());
	} else {
		conf.setMaxTaskParallelism(pt.getInt("par", 2));

		LocalCluster cluster = new LocalCluster();
		cluster.submitTopology("kafka-spout", conf, builder.createTopology());

		Thread.sleep(300000);

		cluster.shutdown();
	}
}
 
Example 49
Project: storm-kafka-0.8-plus-test   File: KafkaSpoutTestTopology.java   Source Code and License 5 votes vote down vote up
public StormTopology buildTopology() {
    SpoutConfig kafkaConfig = new SpoutConfig(brokerHosts, "storm-sentence", "", "storm");
    kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    TopologyBuilder builder = new TopologyBuilder();
    builder.setSpout("words", new KafkaSpout(kafkaConfig), 10);
    builder.setBolt("print", new PrinterBolt()).shuffleGrouping("words");
    return builder.createTopology();
}
 
Example 50
Project: storm-kafka-0.8-plus   File: KafkaUtilsTest.java   Source Code and License 5 votes vote down vote up
@Test
public void generateTuplesWithValueSchemeAndKeyValueMessage() {
    config.scheme = new SchemeAsMultiScheme(new StringScheme());
    String value = "value";
    String key = "key";
    createTopicAndSendMessage(key, value);
    ByteBufferMessageSet messageAndOffsets = getLastMessage();
    for (MessageAndOffset msg : messageAndOffsets) {
        Iterable<List<Object>> lists = KafkaUtils.generateTuples(config, msg.message());
        assertEquals(value, lists.iterator().next().get(0));
    }
}
 
Example 51
Project: storm-kafka-examples   File: HdfsTopology.java   Source Code and License 4 votes vote down vote up
public static void main(String[] args) {
    try{
        String zkhost = "wxb-1:2181,wxb-2:2181,wxb-3:2181";
        String topic = "order";
        String groupId = "id";
        int spoutNum = 3;
        int boltNum = 1;
        ZkHosts zkHosts = new ZkHosts(zkhost);//kafaka所在的zookeeper
        SpoutConfig spoutConfig = new SpoutConfig(zkHosts, topic, "/order", groupId);  // create /order /id
        spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
        KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);

        // HDFS bolt
        // use "|" instead of "," for field delimiter
        RecordFormat format = new DelimitedRecordFormat()
                .withFieldDelimiter("|");

        // sync the filesystem after every 1k tuples
        SyncPolicy syncPolicy = new CountSyncPolicy(1000);

        // rotate files when they reach 5MB
        FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(5.0f, FileSizeRotationPolicy.Units.MB);
        // FileRotationPolicy rotationPolicy = new TimedRotationPolicy(1.0f, TimedRotationPolicy.TimeUnit.MINUTES);

        FileNameFormat fileNameFormat = new DefaultFileNameFormat()
                .withPath("/tmp/").withPrefix("order_").withExtension(".log");

        HdfsBolt hdfsBolt = new HdfsBolt()
                .withFsUrl("hdfs://wxb-1:8020")
                .withFileNameFormat(fileNameFormat)
                .withRecordFormat(format)
                .withRotationPolicy(rotationPolicy)
                .withSyncPolicy(syncPolicy);

        TopologyBuilder builder = new TopologyBuilder();
        builder.setSpout("spout", kafkaSpout, spoutNum);
        builder.setBolt("check", new CheckOrderBolt(), boltNum).shuffleGrouping("spout");
        builder.setBolt("counter", new CounterBolt(),boltNum).shuffleGrouping("check");
        builder.setBolt("hdfs", hdfsBolt,boltNum).shuffleGrouping("counter");

        Config config = new Config();
        config.setDebug(true);

        if(args!=null && args.length > 0) {
            config.setNumWorkers(2);
            StormSubmitter.submitTopology(args[0], config, builder.createTopology());
        } else {
            config.setMaxTaskParallelism(2);

            LocalCluster cluster = new LocalCluster();
            cluster.submitTopology("Wordcount-Topology", config, builder.createTopology());

            Thread.sleep(500000);

            cluster.shutdown();
        }
    }catch (Exception e) {
        e.printStackTrace();
    }
}
 
Example 52
Project: web-crawler   File: WebCrawlerTopology.java   Source Code and License 4 votes vote down vote up
public static StormTopology buildTopology(Config conf, LocalDRPC localDrpc) {
    TridentTopology topology = new TridentTopology();

    //Kafka Spout
    BrokerHosts zk = new ZkHosts(conf.get(CrawlerConfig.KAFKA_CONSUMER_HOST_NAME) + ":" +conf.get(CrawlerConfig.KAFKA_CONSUMER_HOST_PORT));
    TridentKafkaConfig kafkaConfig = new TridentKafkaConfig(zk, (String) conf.get(CrawlerConfig.KAFKA_TOPIC_NAME));
    kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    OpaqueTridentKafkaSpout spout = new OpaqueTridentKafkaSpout(kafkaConfig);

    //ElasticSearch Persistent State
    Settings esSettings = ImmutableSettings.settingsBuilder()
            .put("storm.elasticsearch.cluster.name", conf.get(CrawlerConfig.ELASTICSEARCH_CLUSTER_NAME))
            .put("storm.elasticsearch.hosts", conf.get(CrawlerConfig.ELASTICSEARCH_HOST_NAME) + ":" + conf.get(CrawlerConfig.ELASTICSEARCH_HOST_PORT))
            .build();
    StateFactory esStateFactory = new ESIndexState.Factory<String>(new ClientFactory.NodeClient(esSettings.getAsMap()), String.class);
    TridentState esStaticState = topology.newStaticState(esStateFactory);

    //Topology
    topology.newStream("crawlKafkaSpout", spout).parallelismHint(5)
             //Splits url and depth information on receiving from Kafka
            .each(new Fields("str"), new SplitKafkaInput(), new Fields("url", "depth"))
            //Bloom Filter. Filters already crawled URLs
            .each(new Fields("url"), new URLFilter())
            //Download and Parse Webpage
            .each(new Fields("url"), new GetAdFreeWebPage(), new Fields("content_html", "title", "href"))//TODO Optimize
            //Add Href URls to Kafka queue
            .each(new Fields("href", "depth"), new KafkaProducerFilter())//TODO Replace with kafka persistent state.
            //Insert to Elasticsearch
            .each(new Fields("url", "content_html", "title"), new PrepareForElasticSearch(), new Fields("index", "type", "id", "source"))
            .partitionPersist(esStateFactory, new Fields("index", "type", "id", "source"), new ESIndexUpdater<String>(new ESTridentTupleMapper()))
    ;

    //DRPC
    topology.newDRPCStream("search", localDrpc)
            .each(new Fields("args"), new SplitDRPCArgs(), new Fields("query_input"))
            .each(new Fields("query_input"), new BingAutoSuggest(0), new Fields("query_preProcessed"))//TODO return List of expanded query
            .each(new Fields("query_preProcessed"), new PrepareSearchQuery(), new Fields("query", "indices", "types"))
            .groupBy(new Fields("query", "indices", "types"))
            .stateQuery(esStaticState, new Fields("query", "indices", "types"), new QuerySearchIndexQuery(), new Fields("results"))
    ;

    return topology.build();
}
 
Example 53
Project: Big-Data-tutorial   File: WeatherTopology.java   Source Code and License 4 votes vote down vote up
public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException {

		String zkIp = "localhost";

		String nimbusHost = "sandbox.hortonworks.com";

		String zookeeperHost = zkIp +":2181";

		ZkHosts zkHosts = new ZkHosts(zookeeperHost);
		List<String> zkServers = new ArrayList<String>();
		zkServers.add(zkIp);
		SpoutConfig kafkaConfig = new SpoutConfig(zkHosts, "spertus-weather-events", "/spertus-weather-events","test_id");
		kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
		kafkaConfig.startOffsetTime = kafka.api.OffsetRequest.EarliestTime();
		kafkaConfig.zkServers = zkServers;
		kafkaConfig.zkRoot = "/spertus-weather-events";
		kafkaConfig.zkPort = 2181;
		kafkaConfig.forceFromStart = true;
		KafkaSpout kafkaSpout = new KafkaSpout(kafkaConfig);

		TopologyBuilder builder = new TopologyBuilder();

		HdfsBolt hdfsBolt = new HdfsBolt().withFsUrl("hdfs://sandbox.hortonworks.com:8020")
				.withFileNameFormat(new DefaultFileNameFormat().withPath("/tmp/test"))
				.withRecordFormat(new DelimitedRecordFormat().withFieldDelimiter("|"))
				.withSyncPolicy(new CountSyncPolicy(10))
				.withRotationPolicy(new FileSizeRotationPolicy(5.0f, Units.MB));
		builder.setSpout("raw-weather-events", kafkaSpout, 1);
		builder.setBolt("filter-airports", new FilterAirportsBolt(), 1).shuffleGrouping("raw-weather-events");
		//        builder.setBolt("test-bolt", new TestBolt(), 1).shuffleGrouping("raw-weather-events");
		//        builder.setBolt("hdfs-bolt", hdfsBolt, 1).shuffleGrouping("raw-weather-events");


		Map conf = new HashMap();
		conf.put(backtype.storm.Config.TOPOLOGY_WORKERS, 4);
		conf.put(backtype.storm.Config.TOPOLOGY_DEBUG, true);
		if (args != null && args.length > 0) {
			StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
		}   else {
			LocalCluster cluster = new LocalCluster();
			cluster.submitTopology("weather-topology", conf, builder.createTopology());
		}
	}
 
Example 54
Project: streaming-benchmarks   File: AdvertisingTopology.java   Source Code and License 4 votes vote down vote up
public static void main(String[] args) throws Exception {
    TopologyBuilder builder = new TopologyBuilder();

    Options opts = new Options();
    opts.addOption("conf", true, "Path to the config file.");

    CommandLineParser parser = new DefaultParser();
    CommandLine cmd = parser.parse(opts, args);
    String configPath = cmd.getOptionValue("conf");
    Map commonConfig = Utils.findAndReadConfigFile(configPath, true);

    String zkServerHosts = joinHosts((List<String>)commonConfig.get("zookeeper.servers"),
                                     Integer.toString((Integer)commonConfig.get("zookeeper.port")));
    String redisServerHost = (String)commonConfig.get("redis.host");
    String kafkaTopic = (String)commonConfig.get("kafka.topic");
    int kafkaPartitions = ((Number)commonConfig.get("kafka.partitions")).intValue();
    int workers = ((Number)commonConfig.get("storm.workers")).intValue();
    int ackers = ((Number)commonConfig.get("storm.ackers")).intValue();
    int cores = ((Number)commonConfig.get("process.cores")).intValue();
    int parallel = Math.max(1, cores/7);

    ZkHosts hosts = new ZkHosts(zkServerHosts);



    SpoutConfig spoutConfig = new SpoutConfig(hosts, kafkaTopic, "/" + kafkaTopic, UUID.randomUUID().toString());
    spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);

    builder.setSpout("ads", kafkaSpout, kafkaPartitions);
    builder.setBolt("event_deserializer", new DeserializeBolt(), parallel).shuffleGrouping("ads");
    builder.setBolt("event_filter", new EventFilterBolt(), parallel).shuffleGrouping("event_deserializer");
    builder.setBolt("event_projection", new EventProjectionBolt(), parallel).shuffleGrouping("event_filter");
    builder.setBolt("redis_join", new RedisJoinBolt(redisServerHost), parallel).shuffleGrouping("event_projection");
    builder.setBolt("campaign_processor", new CampaignProcessor(redisServerHost), parallel*2)
        .fieldsGrouping("redis_join", new Fields("campaign_id"));

    Config conf = new Config();

    if (args != null && args.length > 0) {
        conf.setNumWorkers(workers);
        conf.setNumAckers(ackers);
        StormSubmitter.submitTopologyWithProgressBar(args[0], conf, builder.createTopology());
    }
    else {

        LocalCluster cluster = new LocalCluster();
        cluster.submitTopology("test", conf, builder.createTopology());
        backtype.storm.utils.Utils.sleep(10000);
        cluster.killTopology("test");
        cluster.shutdown();
    }
}
 
Example 55
Project: yuzhouwan   File: ZkTopology.java   Source Code and License 4 votes vote down vote up
public static void main(String[] args) {

        //这个地方其实就是kafka配置文件里边的zookeeper.connect这个参数,可以去那里拿过来
        String brokerZkStr = "10.100.90.201:2181/kafka_online_sample";
        String brokerZkPath = "/brokers";
        ZkHosts zkHosts = new ZkHosts(brokerZkStr, brokerZkPath);

        String topic = "mars-wap";
        //以下:将offset汇报到哪个zk集群,相应配置
        String offsetZkServers = "10.199.203.169";
        String offsetZkPort = "2181";
        List<String> zkServersList = new ArrayList<>();
        zkServersList.add(offsetZkServers);
        //汇报offset信息的root路径
        String offsetZkRoot = "/stormExample";
        //存储该spout id的消费offset信息,譬如以topoName来命名
        String offsetZkId = "storm-example";

        SpoutConfig kafkaConfig = new SpoutConfig(zkHosts, topic, offsetZkRoot, offsetZkId);
        kafkaConfig.zkPort = Integer.parseInt(offsetZkPort);
        kafkaConfig.zkServers = zkServersList;
        kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());

        KafkaSpout spout = new KafkaSpout(kafkaConfig);

        TopologyBuilder builder = new TopologyBuilder();
        builder.setSpout("spout", spout, 1);
        builder.setBolt("bolt", new EsBolt("storm/docs"), 1).shuffleGrouping("spout");

        Config config = new Config();
        config.put("es.index.auto.create", "true");

        if (args.length > 0) {
            try {
                StormSubmitter.submitTopology("storm-kafka-example", config, builder.createTopology());
            } catch (Exception e) {
                LOG.error("error: {}", e.getMessage());
            }
        } else {
            LocalCluster cluster = new LocalCluster();
            cluster.submitTopology("test", config, builder.createTopology());
        }
    }
 
Example 56
Project: realtime-event-processing   File: URLEventProcessingTopology.java   Source Code and License 4 votes vote down vote up
public static StormTopology buildTopology(Config conf, LocalDRPC localDrpc) {
    TridentTopology topology = new TridentTopology();

    //Kafka Spout
    BrokerHosts zk = new ZkHosts(conf.get(CrawlerConfig.KAFKA_CONSUMER_HOST_NAME) + ":" +conf.get(CrawlerConfig.KAFKA_CONSUMER_HOST_PORT));
    TridentKafkaConfig kafkaConfig = new TridentKafkaConfig(zk, (String) conf.get(CrawlerConfig.KAFKA_TOPIC_NAME));
    kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    //kafkaConfig.ignoreZkOffsets=true;
    OpaqueTridentKafkaSpout spout = new OpaqueTridentKafkaSpout(kafkaConfig);

    //ElasticSearch Persistent State
    Settings esSettings = ImmutableSettings.settingsBuilder()
            .put("storm.elasticsearch.cluster.name", conf.get(CrawlerConfig.ELASTICSEARCH_CLUSTER_NAME))
            .put("storm.elasticsearch.hosts", conf.get(CrawlerConfig.ELASTICSEARCH_HOST_NAME) + ":" + conf.get(CrawlerConfig.ELASTICSEARCH_HOST_PORT))
            .build();
    StateFactory esStateFactory = new ESIndexState.Factory<JSONObject>(new ClientFactory.NodeClient(esSettings.getAsMap()), JSONObject.class);
    TridentState esStaticState = topology.newStaticState(esStateFactory);

    //Topology
    topology.newStream("crawlKafkaSpout", spout).parallelismHint(5)
            //Splits words on receiving from Kafka
            .each(new Fields("str"), new SplitFunction(), new Fields("url", "depth", "task", "user"))
            .each(new Fields("str"), new PrintFilter("Kafka"))
            //Bloom Filter, Filters already crawled URLs
            .each(new Fields("url", "task"), new URLFilter())
            //Download and Parse Webpage
            .each(new Fields("url"), new GetAdFreeWebPage(), new Fields("content_html", "title", "href"))
            //Sending URLs present in the page into the kafka queue.
            .each(new Fields("href", "depth", "task", "user"), new KafkaProducerFilter())
            //Insert to Elasticsearch
            .each(new Fields("url", "content_html", "title", "task", "user"), new PrepareForElasticSearch(), new Fields("index", "type", "id", "source"))
            .partitionPersist(esStateFactory, new Fields("index", "type", "id", "source"), new ESIndexUpdater<String>(new ESTridentTupleMapper()), new Fields())
            ;

    //DRPC
    topology.newDRPCStream("search", localDrpc)
            .each(new Fields("args"), new SplitDRPCArgs(), new Fields("query_input", "task"))
            .each(new Fields("query_input"), new BingAutoSuggest(0), new Fields("query_preProcessed"))
            .each(new Fields("query_preProcessed", "task"), new PrepareSearchQuery(), new Fields("query", "indices", "types"))
            .groupBy(new Fields("query", "indices", "types"))
            .stateQuery(esStaticState, new Fields("query", "indices", "types"), new QuerySearchIndexQuery(), new Fields("results"))
            ;

    return topology.build();
}
 
Example 57
Project: simple-kafka-storm-java   File: OpaqueTridentKafkaSpoutBuilder.java   Source Code and License 4 votes vote down vote up
public OpaqueTridentKafkaSpout build() {
	BrokerHosts zk = new ZkHosts(zookeeper);
	TridentKafkaConfig spoutConf = new TridentKafkaConfig(zk, topic);
	spoutConf.scheme = new SchemeAsMultiScheme(new StringScheme());
	return new OpaqueTridentKafkaSpout(spoutConf);
}
 
Example 58
Project: simple-kafka-storm-java   File: KafkaSpoutBuilder.java   Source Code and License 4 votes vote down vote up
public KafkaSpout build() {
	BrokerHosts hosts = new ZkHosts(zooKeeper);
	SpoutConfig spoutConfig = new SpoutConfig(hosts, topic, "/" + topic, UUID.randomUUID().toString());
	spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
	return new KafkaSpout(spoutConfig);
}
 
Example 59
Project: incubator-storm   File: KafkaUtilsTest.java   Source Code and License 4 votes vote down vote up
@Test
public void generateTupelsWithValueScheme() {
    config.scheme = new SchemeAsMultiScheme(new StringScheme());
    runGetValueOnlyTuplesTest();
}
 
Example 60
Project: storm-benchmark   File: DRPC.java   Source Code and License 4 votes vote down vote up
@Override
  public StormTopology getTopology(Config config) {

    Object sObj = config.get(SERVER);
    if (null == sObj) {
      throw new IllegalArgumentException("must set a drpc server");
    }
    server = (String) sObj;
    config.put(Config.DRPC_SERVERS, Lists.newArrayList(server));

    Object pObj = config.get(PORT);
    if (null == pObj) {
      throw new IllegalArgumentException("must set a drpc port");
    }
    port = Utils.getInt(pObj);
    config.put(Config.DRPC_PORT, port);

    LOG.info("drpc server: " + server + "; drpc port: " + port);

    final int spoutNum = BenchmarkUtils.getInt(config, SPOUT_NUM, DEFAULT_SPOUT_NUM);
    final int pageNum = BenchmarkUtils.getInt(config, PAGE_NUM, DEFAULT_PAGE_BOLT_NUM);
    final int viewNum = BenchmarkUtils.getInt(config, VIEW_NUM, DEFAULT_VIEW_BOLT_NUM);
    final int userNum = BenchmarkUtils.getInt(config, USER_NUM, DEFAULT_USER_BOLT_NUM);
    final int followerNum = BenchmarkUtils.getInt(config, FOLLOWER_NUM, DEFAULT_FOLLOWER_BOLT_NUM);

    spout = new TransactionalTridentKafkaSpout(
            KafkaUtils.getTridentKafkaConfig(config, new SchemeAsMultiScheme(new StringScheme())));

    TridentTopology trident = new TridentTopology();
    TridentState urlToUsers =
            trident.newStream("drpc", spout).parallelismHint(spoutNum).shuffle()
            .each(new Fields(StringScheme.STRING_SCHEME_KEY), new Extract(Arrays.asList(Item.URL, Item.USER)),
                    new Fields("url", "user")).parallelismHint(pageNum)
            .groupBy(new Fields("url"))
            .persistentAggregate(new MemoryMapState.Factory(), new Fields("url", "user"), new Distinct(), new Fields("user_set"))
            .parallelismHint(viewNum);
/** debug
 *  1. this proves that the aggregated result has successfully persisted
    urlToUsers.newValuesStream()
            .each(new Fields("url", "user_set"), new Print("(url, user_set)"), new Fields("url2", "user_set2"));
 */
    PageViewGenerator generator = new PageViewGenerator();
    TridentState userToFollowers = trident.newStaticState(new StaticSingleKeyMapState.Factory(generator.genFollowersDB()));
/** debug
  * 2. this proves that MemoryMapState could be read correctly
   trident.newStream("urlToUsers", new PageViewSpout(false))
            .each(new Fields("page_view"), new Extract(Arrays.asList(Item.URL)), new Fields("url"))
            .each(new Fields("url"), new Print("url"), new Fields("url2"))
            .groupBy(new Fields("url2"))
            .stateQuery(urlToUsers, new Fields("url2"),  new MapGet(), new Fields("users"))
            .each(new Fields("users"), new Print("users"), new Fields("users2"));
*/
/** debug
 *  3. this proves that StaticSingleKeyMapState could be read correctly
    trident.newStream("userToFollowers", new PageViewSpout(false))
            .each(new Fields("page_view"), new Extract(Arrays.asList(Item.USER)), new Fields("user"))
            .each(new Fields("user"), new Print("user"), new Fields("user2"))
            .stateQuery(userToFollowers, new Fields("user2"), new MapGet(), new Fields("followers"))
            .each(new Fields("followers"), new Print("followers"), new Fields("followers2"));
 */
    trident.newDRPCStream(FUNCTION, null)
            .each(new Fields("args"), new Print("args"), new Fields("url"))
            .groupBy(new Fields("url"))
            .stateQuery(urlToUsers, new Fields("url"), new MapGet(), new Fields("users"))
            .each(new Fields("users"), new Expand(), new Fields("user")).parallelismHint(userNum)
            .groupBy(new Fields("user"))
            .stateQuery(userToFollowers, new Fields("user"), new MapGet(), new Fields("followers"))
            .each(new Fields("followers"), new Expand(), new Fields("follower")).parallelismHint(followerNum)
            .groupBy(new Fields("follower"))
            .aggregate(new One(), new Fields("one"))
            .aggregate(new Fields("one"), new Sum(), new Fields("reach"));
    return trident.build();
  }