org.apache.storm.spout.SchemeAsMultiScheme Java Examples

The following examples show how to use org.apache.storm.spout.SchemeAsMultiScheme. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SingleTopology.java    From nightwatch with GNU Lesser General Public License v3.0 6 votes vote down vote up
private static TopologyBuilder buildTopology() throws Exception {
        TopologyBuilder builder = new TopologyBuilder();
        String topicName = Configuration.getConfig().getString("rtc.mq.spout.topic");
        String groupName = Configuration.getConfig().getString("rtc.mq.spout.group");
        BrokerHosts hosts = new ZkHosts(Configuration.getConfig().getString("rtc.zk.hosts"));
        SpoutConfig spoutConfig = new SpoutConfig(hosts, topicName, "/consumers", groupName);
        spoutConfig.startOffsetTime = kafka.api.OffsetRequest.LatestTime();
        spoutConfig.zkServers = Arrays.asList(Configuration.getConfig().getString("rtc.storm.zkServers").split(","));
        spoutConfig.zkPort = Configuration.getConfig().getInt("rtc.storm.zkPort");
        spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
        KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);
        builder.setSpout("MQSpout", kafkaSpout, Configuration.getConfig().getInt("rtc.storm.spout.parallelismHint")).setNumTasks(Configuration.getConfig().getInt("rtc.storm.spout.task"));
        builder.setBolt("ExtractBolt", new ExtractBolt(), Configuration.getConfig().getInt("rtc.storm.extract.bolt.parallelismHint")).setNumTasks(Configuration.getConfig().getInt("rtc.storm.extract.bolt.task")).shuffleGrouping("MQSpout");
        builder.setBolt("Statistic", new StatisticBolt(), Configuration.getConfig().getInt("rtc.storm.statistic.bolt.parallelismHint")).setNumTasks(Configuration.getConfig().getInt("rtc.storm.statistic.bolt.task")).fieldsGrouping("ExtractBolt", new Fields(new String[]{"hashKeys"}));
//        builder.setBolt("Alarm", new AlarmBolt(), Configuration.getConfig().getInt("rtc.storm.alarm.bolt.parallelismHint")).setNumTasks(Configuration.getConfig().getInt("rtc.storm.alarm.bolt.task")).fieldsGrouping("Statistic", new Fields(new String[]{"EventName"}));
        return builder;
    }
 
Example #2
Source File: ClusterTopology.java    From nightwatch with GNU Lesser General Public License v3.0 6 votes vote down vote up
private static TopologyBuilder buildTopology() throws Exception {
        TopologyBuilder builder = new TopologyBuilder();
        String topicName = Configuration.getConfig().getString("rtc.mq.spout.topic");
        String groupName = Configuration.getConfig().getString("rtc.mq.spout.group");
        BrokerHosts hosts = new ZkHosts(Configuration.getConfig().getString("rtc.zk.hosts"));
        SpoutConfig spoutConfig = new SpoutConfig(hosts, topicName, "/consumers", groupName);

        spoutConfig.startOffsetTime = kafka.api.OffsetRequest.LatestTime();
        spoutConfig.zkServers = Arrays.asList(Configuration.getConfig().getString("rtc.storm.zkServers").split(","));
        spoutConfig.zkPort = Configuration.getConfig().getInt("rtc.storm.zkPort");
        spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
        KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);
        builder.setSpout("MQSpout", kafkaSpout, Configuration.getConfig().getInt("rtc.storm.spout.parallelismHint")).setNumTasks(Configuration.getConfig().getInt("rtc.storm.spout.task"));
        builder.setBolt("ExtractBolt", new ExtractBolt(), Configuration.getConfig().getInt("rtc.storm.extract.bolt.parallelismHint")).setNumTasks(Configuration.getConfig().getInt("rtc.storm.extract.bolt.task")).shuffleGrouping("MQSpout");
        builder.setBolt("Statistic", new StatisticBolt(), Configuration.getConfig().getInt("rtc.storm.statistic.bolt.parallelismHint")).setNumTasks(Configuration.getConfig().getInt("rtc.storm.statistic.bolt.task")).fieldsGrouping("ExtractBolt", new Fields(new String[]{"hashKeys"}));
//        builder.setBolt("Alarm", new AlarmBolt(), Configuration.getConfig().getInt("rtc.storm.alarm.bolt.parallelismHint")).setNumTasks(Configuration.getConfig().getInt("rtc.storm.alarm.bolt.task")).fieldsGrouping("Statistic", new Fields(new String[]{"EventName"}));
        return builder;
    }
 
Example #3
Source File: KafkaSpoutBuilder.java    From storm_spring_boot_demo with MIT License 6 votes vote down vote up
@Bean("kafkaSpout")
public KafkaSpout buildSpout() {
    super.setId("kafkaSpout");
    /**
     *
     * new ZkHosts(brokerZkStr):brokerZkStr逗号分隔,kafka的zookeeper集群
     * topic:storm订阅的topic,即从哪个topic读取消息
     * spout会根据config的zkRoot和id两个参数在zookeeper上为每个kafka分区创建保存kafka偏移量的路径,如:/zkRoot/id/partitionId。
     * {@link PartitionManager#committedPath()}
     * zkRoot:偏移量保存的zk根路径
     * id:如果重新运行,希望获取同样的偏移量,则设置为固定的ID
     * PS:kafka新版本已经不将偏移量保存在zookeeper了。而且也不推荐将offset写入zk(低效)。
     */
    SpoutConfig spoutConf = new SpoutConfig(new ZkHosts(brokerZkStr), topic, zkRoot, "kafkaSpout");
    spoutConf.scheme = new SchemeAsMultiScheme(new StringScheme());
    return new KafkaSpout(spoutConf);
}
 
Example #4
Source File: KafkaStormWordCountTopology.java    From Building-Data-Streaming-Applications-with-Apache-Kafka with MIT License 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        String zkConnString = "localhost:2181";
        String topic = "words";
        BrokerHosts hosts = new ZkHosts(zkConnString);

        SpoutConfig kafkaSpoutConfig = new SpoutConfig(hosts, topic, "/" + topic,
                "wordcountID");
        kafkaSpoutConfig.startOffsetTime = kafka.api.OffsetRequest.EarliestTime();
        kafkaSpoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());

        TopologyBuilder topologyBuilder = new TopologyBuilder();
        topologyBuilder.setSpout("kafkaspout", new KafkaSpout(kafkaSpoutConfig));
        topologyBuilder.setBolt("stringsplit", new StringToWordsSpliterBolt()).shuffleGrouping("kafkaspout");
        topologyBuilder.setBolt("counter", new WordCountCalculatorBolt()).shuffleGrouping("stringsplit");

        Config config = new Config();
        config.setDebug(true);
        if (args != null && args.length > 1) {
            config.setNumWorkers(3);
            StormSubmitter.submitTopology(args[1], config, topologyBuilder.createTopology());
        } else {
            // Cap the maximum number of executors that can be spawned
            // for a component to 3
            config.setMaxTaskParallelism(3);
            // LocalCluster is used to run locally
            LocalCluster cluster = new LocalCluster();
            cluster.submitTopology("KafkaLocal", config, topologyBuilder.createTopology());
            // sleep
            try {
                Thread.sleep(10000);
            } catch (InterruptedException e) {
                // TODO Auto-generated catch block
                cluster.killTopology("KafkaToplogy");
                cluster.shutdown();
            }

            cluster.shutdown();
        }
    }
 
Example #5
Source File: PirkTopology.java    From incubator-retired-pirk with Apache License 2.0 5 votes vote down vote up
public static void runPirkTopology() throws PIRException
{
  // Set up Kafka parameters
  logger.info("Configuring Kafka.");
  String zkRoot = "/" + kafkaTopic + "_pirk_storm";
  BrokerHosts zkHosts = new ZkHosts(brokerZk);
  SpoutConfig kafkaConfig = new SpoutConfig(zkHosts, kafkaTopic, zkRoot, kafkaClientId);
  kafkaConfig.ignoreZkOffsets = forceFromStart;

  // Create conf
  logger.info("Retrieving Query and generating Storm conf.");
  Config conf = createStormConf();
  Query query = StormUtils.getQuery(useHdfs, hdfsUri, queryFile);
  conf.put(StormConstants.N_SQUARED_KEY, query.getNSquared().toString());
  conf.put(StormConstants.QUERY_INFO_KEY, query.getQueryInfo().toMap());

  // Configure this for different types of input data on Kafka.
  kafkaConfig.scheme = new SchemeAsMultiScheme(new PirkHashScheme(conf));

  // Create topology
  StormTopology topology = getPirkTopology(kafkaConfig);

  // Run topology
  logger.info("Submitting Pirk topology to Storm...");
  try
  {
    StormSubmitter.submitTopologyWithProgressBar(topologyName, conf, topology);
  } catch (AlreadyAliveException | InvalidTopologyException | AuthorizationException e)
  {
    throw new PIRException(e);
  }

}
 
Example #6
Source File: KafkaStormIntegrationTest.java    From incubator-retired-pirk with Apache License 2.0 5 votes vote down vote up
private SpoutConfig setUpTestKafkaSpout(Config conf)
{
  ZkHosts zkHost = new ZkHosts(zookeeperLocalCluster.getConnectString());

  SpoutConfig kafkaConfig = new SpoutConfig(zkHost, topic, "/pirk_test_root", "pirk_integr_test_spout");
  kafkaConfig.scheme = new SchemeAsMultiScheme(new PirkHashScheme(conf));
  logger.info("KafkaConfig initialized...");

  return kafkaConfig;
}
 
Example #7
Source File: IPFraudDetectionTopology.java    From Building-Data-Streaming-Applications-with-Apache-Kafka with MIT License 4 votes vote down vote up
public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException, AuthorizationException {
    Intialize(args[0]);
    logger.info("Successfully loaded Configuration ");


    BrokerHosts hosts = new ZkHosts(zkhost);
    SpoutConfig spoutConfig = new SpoutConfig(hosts, inputTopic, "/" + KafkaBroker, consumerGroup);
    spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    spoutConfig.startOffsetTime = kafka.api.OffsetRequest.EarliestTime();
    KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);
    String[] partNames = {"status_code"};
    String[] colNames = {"date", "request_url", "protocol_type", "status_code"};

    DelimitedRecordHiveMapper mapper = new DelimitedRecordHiveMapper().withColumnFields(new Fields(colNames))
            .withPartitionFields(new Fields(partNames));


    HiveOptions hiveOptions;
    //make sure you change batch size and all paramtere according to requirement
    hiveOptions = new HiveOptions(metaStoreURI, dbName, tblName, mapper).withTxnsPerBatch(250).withBatchSize(2)
            .withIdleTimeout(10).withCallTimeout(10000000);

    logger.info("Creating Storm Topology");
    TopologyBuilder builder = new TopologyBuilder();

    builder.setSpout("KafkaSpout", kafkaSpout, 1);

    builder.setBolt("frauddetect", new FraudDetectorBolt()).shuffleGrouping("KafkaSpout");
    builder.setBolt("KafkaOutputBolt",
            new IPFraudKafkaBolt(zkhost, "kafka.serializer.StringEncoder", KafkaBroker, outputTopic), 1)
            .shuffleGrouping("frauddetect");

    builder.setBolt("HiveOutputBolt", new IPFraudHiveBolt(), 1).shuffleGrouping("frauddetect");
    builder.setBolt("HiveBolt", new HiveBolt(hiveOptions)).shuffleGrouping("HiveOutputBolt");

    Config conf = new Config();
    if (args != null && args.length > 1) {
        conf.setNumWorkers(3);
        logger.info("Submiting  topology to storm cluster");

        StormSubmitter.submitTopology(args[1], conf, builder.createTopology());
    } else {
        // Cap the maximum number of executors that can be spawned
        // for a component to 3
        conf.setMaxTaskParallelism(3);
        // LocalCluster is used to run locally
        LocalCluster cluster = new LocalCluster();
        logger.info("Submitting  topology to local cluster");
        cluster.submitTopology("KafkaLocal", conf, builder.createTopology());
        // sleep
        try {
            Thread.sleep(10000);
        } catch (InterruptedException e) {
            // TODO Auto-generated catch block
            logger.error("Exception ocuured" + e);
            cluster.killTopology("KafkaToplogy");
            logger.info("Shutting down cluster");
            cluster.shutdown();
        }
        cluster.shutdown();

    }

}
 
Example #8
Source File: AdvertisingTopology.java    From streaming-benchmarks with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
    TopologyBuilder builder = new TopologyBuilder();

    Options opts = new Options();
    opts.addOption("conf", true, "Path to the config file.");

    CommandLineParser parser = new DefaultParser();
    CommandLine cmd = parser.parse(opts, args);
    String configPath = cmd.getOptionValue("conf");
    Map commonConfig = Utils.findAndReadConfigFile(configPath, true);

    String zkServerHosts = joinHosts((List<String>)commonConfig.get("zookeeper.servers"),
                                     Integer.toString((Integer)commonConfig.get("zookeeper.port")));
    String redisServerHost = (String)commonConfig.get("redis.host");
    String kafkaTopic = (String)commonConfig.get("kafka.topic");
    int kafkaPartitions = ((Number)commonConfig.get("kafka.partitions")).intValue();
    int workers = ((Number)commonConfig.get("storm.workers")).intValue();
    int ackers = ((Number)commonConfig.get("storm.ackers")).intValue();
    int cores = ((Number)commonConfig.get("process.cores")).intValue();
    int parallel = Math.max(1, cores/7);

    ZkHosts hosts = new ZkHosts(zkServerHosts);



    SpoutConfig spoutConfig = new SpoutConfig(hosts, kafkaTopic, "/" + kafkaTopic, UUID.randomUUID().toString());
    spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);

    builder.setSpout("ads", kafkaSpout, kafkaPartitions);
    builder.setBolt("event_deserializer", new DeserializeBolt(), parallel).shuffleGrouping("ads");
    builder.setBolt("event_filter", new EventFilterBolt(), parallel).shuffleGrouping("event_deserializer");
    builder.setBolt("event_projection", new EventProjectionBolt(), parallel).shuffleGrouping("event_filter");
    builder.setBolt("redis_join", new RedisJoinBolt(redisServerHost), parallel).shuffleGrouping("event_projection");
    builder.setBolt("campaign_processor", new CampaignProcessor(redisServerHost), parallel*2)
        .fieldsGrouping("redis_join", new Fields("campaign_id"));

    Config conf = new Config();

    if (args != null && args.length > 0) {
        conf.setNumWorkers(workers);
        conf.setNumAckers(ackers);
        StormSubmitter.submitTopologyWithProgressBar(args[0], conf, builder.createTopology());
    }
    else {

        LocalCluster cluster = new LocalCluster();
        cluster.submitTopology("test", conf, builder.createTopology());
        org.apache.storm.utils.Utils.sleep(10000);
        cluster.killTopology("test");
        cluster.shutdown();
    }
}